diff options
Diffstat (limited to 'storage')
350 files changed, 19752 insertions, 5389 deletions
diff --git a/storage/Makefile.am b/storage/Makefile.am index 8aa1e4f7dc6..e232547139d 100644 --- a/storage/Makefile.am +++ b/storage/Makefile.am @@ -21,6 +21,3 @@ AUTOMAKE_OPTIONS = foreign EXTRA_DIST = mysql_storage_engine.cmake SUBDIRS = @mysql_se_dirs@ DIST_SUBDIRS = @mysql_se_distdirs@ - -# Don't update the files from bitkeeper -%::SCCS/s.% diff --git a/storage/archive/Makefile.am b/storage/archive/Makefile.am index d092f091798..254c95bf68b 100644 --- a/storage/archive/Makefile.am +++ b/storage/archive/Makefile.am @@ -36,14 +36,14 @@ noinst_PROGRAMS = archive_test archive_reader EXTRA_LTLIBRARIES = ha_archive.la pkgplugin_LTLIBRARIES = @plugin_archive_shared_target@ ha_archive_la_LDFLAGS = -module -rpath $(pkgplugindir) -ha_archive_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -ha_archive_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_archive_la_CXXFLAGS= -shared $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_archive_la_CFLAGS = -shared $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN ha_archive_la_SOURCES = ha_archive.cc azio.c EXTRA_LIBRARIES = libarchive.a noinst_LIBRARIES = @plugin_archive_static_target@ -libarchive_a_CXXFLAGS = $(AM_CFLAGS) +libarchive_a_CXXFLAGS = $(AM_CXXFLAGS) libarchive_a_CFLAGS = $(AM_CFLAGS) libarchive_a_SOURCES = ha_archive.cc azio.c diff --git a/storage/archive/archive_reader.c b/storage/archive/archive_reader.c index 84d4e318b49..0cf795cefdf 100644 --- a/storage/archive/archive_reader.c +++ b/storage/archive/archive_reader.c @@ -355,15 +355,14 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"set-auto-increment", 'A', "Force auto_increment to start at this or higher value. If no value is given, then sets the next auto_increment value to the highest used value for the auto key + 1.", - (uchar**) &new_auto_increment, - (uchar**) &new_auto_increment, + &new_auto_increment, &new_auto_increment, 0, GET_ULL, OPT_ARG, 0, 0, 0, 0, 0, 0}, {"silent", 's', "Only print errors. One can use two -s to make archive_reader very silent.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"tmpdir", 't', "Path for temporary files.", - (uchar**) &opt_tmpdir, + &opt_tmpdir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"version", 'V', "Print version and exit.", diff --git a/storage/archive/azio.c b/storage/archive/azio.c index a24350dd454..fc54d98ab15 100644 --- a/storage/archive/azio.c +++ b/storage/archive/azio.c @@ -150,6 +150,17 @@ int az_open (azio_stream *s, const char *path, int Flags, File fd) } else { + /* Reset values in case of old version of archive file */ + s->rows= 0; + s->forced_flushes= 0; + s->shortest_row= 0; + s->longest_row= 0; + s->auto_increment= 0; + s->check_point= 0; + s->comment_start_pos= 0; + s->comment_length= 0; + s->frm_start_pos= 0; + s->frm_length= 0; check_header(s); /* skip the .az header */ } diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc index e3151d35e23..f3f5788360a 100644 --- a/storage/archive/ha_archive.cc +++ b/storage/archive/ha_archive.cc @@ -293,7 +293,7 @@ int ha_archive::read_data_header(azio_stream *file_to_read) DBUG_PRINT("ha_archive", ("Version %u", data_buffer[1])); if ((data_buffer[0] != (uchar)ARCHIVE_CHECK_HEADER) && - (data_buffer[1] != (uchar)ARCHIVE_VERSION)) + (data_buffer[1] == 1 || data_buffer[1] == 2)) DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); DBUG_RETURN(0); @@ -360,9 +360,19 @@ ARCHIVE_SHARE *ha_archive::get_share(const char *table_name, int *rc) my_free(share, MYF(0)); DBUG_RETURN(NULL); } - stats.auto_increment_value= archive_tmp.auto_increment + 1; - share->rows_recorded= (ha_rows)archive_tmp.rows; - share->crashed= archive_tmp.dirty; + share->version= archive_tmp.version; + if (archive_tmp.version == ARCHIVE_VERSION) + { + stats.auto_increment_value= archive_tmp.auto_increment + 1; + share->rows_recorded= (ha_rows)archive_tmp.rows; + share->crashed= archive_tmp.dirty; + } + else + { + /* Used by repair */ + share->rows_recorded= ~(ha_rows) 0; + stats.auto_increment_value= 0; + } /* If archive version is less than 3, It should be upgraded before use. @@ -512,10 +522,19 @@ int ha_archive::open(const char *name, int mode, uint open_options) case 0: break; case HA_ERR_CRASHED_ON_USAGE: + DBUG_PRINT("ha_archive", ("archive table was crashed")); if (open_options & HA_OPEN_FOR_REPAIR) + { + rc= 0; break; + } /* fall through */ case HA_ERR_TABLE_NEEDS_UPGRADE: + if (open_options & HA_OPEN_FOR_REPAIR) + { + rc= 0; + break; + } free_share(); /* fall through */ default: @@ -535,13 +554,6 @@ int ha_archive::open(const char *name, int mode, uint open_options) thr_lock_data_init(&share->lock, &lock, NULL); - DBUG_PRINT("ha_archive", ("archive table was crashed %s", - rc == HA_ERR_CRASHED_ON_USAGE ? "yes" : "no")); - if (rc == HA_ERR_CRASHED_ON_USAGE && open_options & HA_OPEN_FOR_REPAIR) - { - DBUG_RETURN(0); - } - DBUG_RETURN(rc); } @@ -1267,6 +1279,14 @@ int ha_archive::rnd_pos(uchar * buf, uchar *pos) DBUG_RETURN(get_row(&archive, buf)); } +int ha_archive::check_for_upgrade(HA_CHECK_OPT *check_opt) +{ + if (share->version < ARCHIVE_VERSION) + return HA_ADMIN_NEEDS_ALTER; + return 0; +} + + /* This method repairs the meta file. It does this by walking the datafile and rewriting the meta file. If EXTENDED repair is requested, we attempt to @@ -1290,10 +1310,11 @@ int ha_archive::repair(THD* thd, HA_CHECK_OPT* check_opt) */ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) { - DBUG_ENTER("ha_archive::optimize"); int rc= 0; azio_stream writer; char writer_filename[FN_REFLEN]; + char* frm_string; + DBUG_ENTER("ha_archive::optimize"); init_archive_reader(); @@ -1304,12 +1325,28 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) share->archive_write_open= FALSE; } + if (!(frm_string= (char*) malloc(archive.frm_length))) + return ENOMEM; + + azread_frm(&archive, frm_string); + /* Lets create a file to contain the new data */ fn_format(writer_filename, share->table_name, "", ARN, MY_REPLACE_EXT | MY_UNPACK_FILENAME); if (!(azopen(&writer, writer_filename, O_CREAT|O_RDWR|O_BINARY))) - DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + { + free(frm_string); + DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + } + + rc= azwrite_frm(&writer, frm_string, archive.frm_length); + free(frm_string); + if (rc) + { + rc= HA_ERR_CRASHED_ON_USAGE; + goto error; + } /* An extended rebuild is a lot more effort. We open up each row and re-record it. @@ -1387,7 +1424,6 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) // make the file we just wrote be our data file rc = my_rename(writer_filename,share->data_file_name,MYF(0)); - DBUG_RETURN(rc); error: DBUG_PRINT("ha_archive", ("Failed to recover, error was %d", rc)); @@ -1539,7 +1575,7 @@ void ha_archive::start_bulk_insert(ha_rows rows) Other side of start_bulk_insert, is end_bulk_insert. Here we turn off the bulk insert flag, and set the share dirty so that the next select will call sync for us. */ -int ha_archive::end_bulk_insert(bool table_will_be_deleted) +int ha_archive::end_bulk_insert() { DBUG_ENTER("ha_archive::end_bulk_insert"); bulk_insert= FALSE; diff --git a/storage/archive/ha_archive.h b/storage/archive/ha_archive.h index 22f8302982d..653a13b242d 100644 --- a/storage/archive/ha_archive.h +++ b/storage/archive/ha_archive.h @@ -35,7 +35,7 @@ typedef struct st_archive_record_buffer { typedef struct st_archive_share { char *table_name; char data_file_name[FN_REFLEN]; - uint table_name_length,use_count; + uint table_name_length,use_count, version; pthread_mutex_t mutex; THR_LOCK lock; azio_stream archive_write; /* Archive file we are working with */ @@ -133,8 +133,9 @@ public: int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); int optimize(THD* thd, HA_CHECK_OPT* check_opt); int repair(THD* thd, HA_CHECK_OPT* check_opt); + int check_for_upgrade(HA_CHECK_OPT *check_opt); void start_bulk_insert(ha_rows rows); - int end_bulk_insert(bool table_will_be_deleted); + int end_bulk_insert(); enum row_type get_row_type() const { return ROW_TYPE_COMPRESSED; diff --git a/storage/blackhole/Makefile.am b/storage/blackhole/Makefile.am index db4f67cf847..148746a9336 100644 --- a/storage/blackhole/Makefile.am +++ b/storage/blackhole/Makefile.am @@ -35,15 +35,13 @@ noinst_HEADERS = ha_blackhole.h EXTRA_LTLIBRARIES = ha_blackhole.la pkgplugin_LTLIBRARIES = @plugin_blackhole_shared_target@ ha_blackhole_la_LDFLAGS=-module -rpath $(pkgplugindir) -ha_blackhole_la_CXXFLAGS=$(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -ha_blackhole_la_CFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_blackhole_la_CXXFLAGS=-shared $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN ha_blackhole_la_SOURCES=ha_blackhole.cc EXTRA_LIBRARIES = libblackhole.a noinst_LIBRARIES = @plugin_blackhole_static_target@ -libblackhole_a_CXXFLAGS=$(AM_CFLAGS) -libblackhole_a_CFLAGS = $(AM_CFLAGS) +libblackhole_a_CXXFLAGS=$(AM_CXXFLAGS) libblackhole_a_SOURCES= ha_blackhole.cc diff --git a/storage/csv/Makefile.am b/storage/csv/Makefile.am index 07ffac88a96..75ad9062984 100644 --- a/storage/csv/Makefile.am +++ b/storage/csv/Makefile.am @@ -32,12 +32,12 @@ noinst_HEADERS = ha_tina.h transparent_file.h EXTRA_LTLIBRARIES = ha_csv.la pkglib_LTLIBRARIES = @plugin_csv_shared_target@ ha_csv_la_LDFLAGS = -module -rpath $(MYSQLLIBdir) -ha_csv_la_CXXFLAGS = $(AM_CFLAGS) -DMYSQL_PLUGIN +ha_csv_la_CXXFLAGS = -shared $(AM_CXXFLAGS) -DMYSQL_PLUGIN ha_csv_la_SOURCES = transparent_file.cc ha_tina.cc EXTRA_LIBRARIES = libcsv.a noinst_LIBRARIES = @plugin_csv_static_target@ -libcsv_a_CXXFLAGS = $(AM_CFLAGS) +libcsv_a_CXXFLAGS = $(AM_CXXFLAGS) libcsv_a_SOURCES = transparent_file.cc ha_tina.cc EXTRA_DIST = CMakeLists.txt plug.in diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc index ffc59140629..8ba425c86cb 100644 --- a/storage/csv/ha_tina.cc +++ b/storage/csv/ha_tina.cc @@ -468,7 +468,7 @@ int ha_tina::encode_quote(uchar *buf) const char *ptr; const char *end_ptr; const bool was_null= (*field)->is_null(); - + /* assistance for backwards compatibility in production builds. note: this will not work for ENUM columns. @@ -480,7 +480,7 @@ int ha_tina::encode_quote(uchar *buf) } (*field)->val_str(&attribute,&attribute); - + if (was_null) (*field)->set_null(); @@ -489,36 +489,39 @@ int ha_tina::encode_quote(uchar *buf) ptr= attribute.ptr(); end_ptr= attribute.length() + ptr; + /* + Ensure that buffer is big enough. This will also speed things up + as we don't have to do any new allocation in the loop below + */ + if (buffer.realloc(buffer.length() + attribute.length()*2+2)) + return 0; // Failure + buffer.append('"'); - while (ptr < end_ptr) + for (; ptr < end_ptr; ptr++) { if (*ptr == '"') { buffer.append('\\'); buffer.append('"'); - *ptr++; } else if (*ptr == '\r') { buffer.append('\\'); buffer.append('r'); - *ptr++; } else if (*ptr == '\\') { buffer.append('\\'); buffer.append('\\'); - *ptr++; } else if (*ptr == '\n') { buffer.append('\\'); buffer.append('n'); - *ptr++; } else - buffer.append(*ptr++); + buffer.append(*ptr); } buffer.append('"'); } diff --git a/storage/example/Makefile.am b/storage/example/Makefile.am index 10163e307b1..dc499e2f9dd 100644 --- a/storage/example/Makefile.am +++ b/storage/example/Makefile.am @@ -35,15 +35,12 @@ noinst_HEADERS = ha_example.h EXTRA_LTLIBRARIES = ha_example.la pkgplugin_LTLIBRARIES = @plugin_example_shared_target@ ha_example_la_LDFLAGS = -module -rpath $(pkgplugindir) -L$(top_builddir)/libservices -lmysqlservices -ha_example_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -ha_example_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_example_la_CXXFLAGS= -shared $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN ha_example_la_SOURCES = ha_example.cc - EXTRA_LIBRARIES = libexample.a noinst_LIBRARIES = @plugin_example_static_target@ -libexample_a_CXXFLAGS = $(AM_CFLAGS) -libexample_a_CFLAGS = $(AM_CFLAGS) +libexample_a_CXXFLAGS = $(AM_CXXFLAGS) libexample_a_SOURCES= ha_example.cc diff --git a/storage/example/ha_example.cc b/storage/example/ha_example.cc index 5059c729ae9..f2aee1de70c 100644 --- a/storage/example/ha_example.cc +++ b/storage/example/ha_example.cc @@ -225,7 +225,7 @@ static int example_done_func(void *p) hash_free(&example_open_tables); pthread_mutex_destroy(&example_mutex); - DBUG_RETURN(0); + DBUG_RETURN(error); } @@ -367,6 +367,7 @@ int ha_example::open(const char *name, int mode, uint test_if_locked) DBUG_RETURN(1); thr_lock_data_init(&share->lock,&lock,NULL); +#ifndef DBUG_OFF example_table_options_struct *options= (example_table_options_struct *)table->s->option_struct; @@ -375,6 +376,7 @@ int ha_example::open(const char *name, int mode, uint test_if_locked) "boolparam: %u", (options->strparam ? options->strparam : "<NULL>"), options->ullparam, options->enumparam, options->boolparam)); +#endif DBUG_RETURN(0); } @@ -588,7 +590,7 @@ int ha_example::index_last(uchar *buf) int ha_example::rnd_init(bool scan) { DBUG_ENTER("ha_example::rnd_init"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); + DBUG_RETURN(0); } int ha_example::rnd_end() @@ -896,6 +898,7 @@ ha_rows ha_example::records_in_range(uint inx, key_range *min_key, int ha_example::create(const char *name, TABLE *table_arg, HA_CREATE_INFO *create_info) { +#ifndef DBUG_OFF example_table_options_struct *options= (example_table_options_struct *)table_arg->s->option_struct; DBUG_ENTER("ha_example::create"); @@ -918,9 +921,8 @@ int ha_example::create(const char *name, TABLE *table_arg, (field_options->compex_param_to_parse_it_in_engine ? field_options->compex_param_to_parse_it_in_engine : "<NULL>"))); - } - +#endif DBUG_RETURN(0); } diff --git a/storage/federated/Makefile.am b/storage/federated/Makefile.am index 64ea0207017..7853ccedd21 100644 --- a/storage/federated/Makefile.am +++ b/storage/federated/Makefile.am @@ -26,24 +26,22 @@ INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \ -I$(srcdir) WRAPLIBS= -LDADD = - DEFS = @DEFS@ noinst_HEADERS = ha_federated.h EXTRA_LTLIBRARIES = ha_federated.la pkgplugin_LTLIBRARIES = @plugin_federated_shared_target@ -ha_federated_la_LDFLAGS = -module -rpath $(pkgplugindir) -ha_federated_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -ha_federated_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -ha_federated_la_SOURCES = ha_federated.cc +ha_federated_la_LDFLAGS = -module -rpath $(pkgplugindir) \ + -L$(top_builddir)/libservices -lmysqlservices +ha_federated_la_CXXFLAGS= -shared $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_federated_la_CFLAGS = -shared $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_federated_la_SOURCES = ha_federated.cc $(top_srcdir)/mysys/string.c EXTRA_LIBRARIES = libfederated.a noinst_LIBRARIES = @plugin_federated_static_target@ -libfederated_a_CXXFLAGS = $(AM_CFLAGS) -libfederated_a_CFLAGS = $(AM_CFLAGS) +libfederated_a_CXXFLAGS = $(AM_CXXFLAGS) libfederated_a_SOURCES= ha_federated.cc diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc index 2dbac485fbc..fcc178b09a6 100644 --- a/storage/federated/ha_federated.cc +++ b/storage/federated/ha_federated.cc @@ -561,7 +561,6 @@ static int parse_url_error(FEDERATED_SHARE *share, TABLE *table, int error_num) int get_connection(MEM_ROOT *mem_root, FEDERATED_SHARE *share) { int error_num= ER_FOREIGN_SERVER_DOESNT_EXIST; - char error_buffer[FEDERATED_QUERY_BUFFER_SIZE]; FOREIGN_SERVER *server, server_buffer; DBUG_ENTER("ha_federated::get_connection"); @@ -613,10 +612,8 @@ int get_connection(MEM_ROOT *mem_root, FEDERATED_SHARE *share) DBUG_RETURN(0); error: - my_sprintf(error_buffer, - (error_buffer, "server name: '%s' doesn't exist!", - share->connection_string)); - my_error(error_num, MYF(0), error_buffer); + my_printf_error(error_num, "server name: '%s' doesn't exist!", + MYF(0), share->connection_string); DBUG_RETURN(error_num); } @@ -1983,12 +1980,12 @@ void ha_federated::start_bulk_insert(ha_rows rows) @retval != 0 Error occured at remote server. Also sets my_errno. */ -int ha_federated::end_bulk_insert(bool abort) +int ha_federated::end_bulk_insert() { int error= 0; DBUG_ENTER("ha_federated::end_bulk_insert"); - if (!abort && bulk_insert.str && bulk_insert.length) + if (!table_will_be_deleted && bulk_insert.str && bulk_insert.length) { if (real_query(bulk_insert.str, bulk_insert.length)) error= stash_remote_error(); @@ -2405,8 +2402,8 @@ int ha_federated::index_read_idx_with_result_set(uchar *buf, uint index, if (real_query(sql_query.ptr(), sql_query.length())) { - my_sprintf(error_buffer, (error_buffer, "error: %d '%s'", - mysql_errno(mysql), mysql_error(mysql))); + sprintf(error_buffer, "error: %d '%s'", + mysql_errno(mysql), mysql_error(mysql)); retval= ER_QUERY_ON_FOREIGN_DATA_SOURCE; goto error; } @@ -2775,7 +2772,6 @@ int ha_federated::rnd_pos(uchar *buf, uchar *pos) int ha_federated::info(uint flag) { - char error_buffer[FEDERATED_QUERY_BUFFER_SIZE]; char status_buf[FEDERATED_QUERY_BUFFER_SIZE]; int error; uint error_code; @@ -2859,9 +2855,8 @@ error: mysql_free_result(result); if (mysql) { - my_sprintf(error_buffer, (error_buffer, ": %d : %s", - mysql_errno(mysql), mysql_error(mysql))); - my_error(error_code, MYF(0), error_buffer); + my_printf_error(error_code, ": %d : %s", MYF(0), + mysql_errno(mysql), mysql_error(mysql)); } else if (remote_error_number != -1 /* error already reported */) @@ -2905,6 +2900,8 @@ int ha_federated::extra(ha_extra_function operation) case HA_EXTRA_INSERT_WITH_UPDATE: insert_dup_update= TRUE; break; + case HA_EXTRA_PREPARE_FOR_DROP: + table_will_be_deleted = TRUE; default: /* do nothing */ DBUG_PRINT("info",("unhandled operation: %d", (uint) operation)); @@ -3305,6 +3302,7 @@ int ha_federated::external_lock(THD *thd, int lock_type) } } #endif /* XXX_SUPERCEDED_BY_WL2952 */ + table_will_be_deleted = FALSE; DBUG_RETURN(error); } diff --git a/storage/federated/ha_federated.h b/storage/federated/ha_federated.h index ab31e3c1680..142e0e95e2b 100644 --- a/storage/federated/ha_federated.h +++ b/storage/federated/ha_federated.h @@ -88,7 +88,7 @@ class ha_federated: public handler Array of all stored results we get during a query execution. */ DYNAMIC_ARRAY results; - bool position_called; + bool position_called, table_will_be_deleted; uint fetch_num; // stores the fetch num MYSQL_ROW_OFFSET current_position; // Current position used by ::position() int remote_error_number; @@ -210,7 +210,7 @@ public: int close(void); // required void start_bulk_insert(ha_rows rows); - int end_bulk_insert(bool abort); + int end_bulk_insert(); int write_row(uchar *buf); int update_row(const uchar *old_data, uchar *new_data); int delete_row(const uchar *buf); diff --git a/storage/federated/plug.in.disabled b/storage/federated/plug.in index 23b607d699b..714888b2ebf 100644 --- a/storage/federated/plug.in.disabled +++ b/storage/federated/plug.in @@ -1,5 +1,5 @@ MYSQL_STORAGE_ENGINE(federated,,[Federated Storage Engine], - [Connects to tables on remote MySQL servers], [max,max-no-ndb]) + [Connects to tables on remote MySQL servers], []) MYSQL_PLUGIN_STATIC(federated, [libfederated.a]) MYSQL_PLUGIN_DYNAMIC(federated, [ha_federated.la]) MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(federated, [ha_federated.cc]) diff --git a/storage/federatedx/Makefile.am b/storage/federatedx/Makefile.am index ad1328247ec..0e3249866ea 100644 --- a/storage/federatedx/Makefile.am +++ b/storage/federatedx/Makefile.am @@ -18,16 +18,16 @@ DEFS = @DEFS@ noinst_HEADERS = ha_federatedx.h federatedx_probes.h EXTRA_LTLIBRARIES = ha_federatedx.la -pkgplugin_LTLIBRARIES = @plugin_federated_shared_target@ -ha_federatedx_la_LDFLAGS = -module -rpath $(pkgplugindir) -ha_federatedx_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -ha_federatedx_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +pkgplugin_LTLIBRARIES = @plugin_federatedx_shared_target@ +ha_federatedx_la_LDFLAGS = -module -rpath $(pkgplugindir) \ + -L$(top_builddir)/libservices -lmysqlservices +ha_federatedx_la_CXXFLAGS= -shared $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_federatedx_la_CFLAGS = -shared $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN EXTRA_LIBRARIES = libfederatedx.a -noinst_LIBRARIES = @plugin_federated_static_target@ -libfederatedx_a_CXXFLAGS = $(AM_CFLAGS) -libfederatedx_a_CFLAGS = $(AM_CFLAGS) +noinst_LIBRARIES = @plugin_federatedx_static_target@ +libfederatedx_a_CXXFLAGS = $(AM_CXXFLAGS) libfederatedx_a_SOURCES= ha_federatedx.cc federatedx_txn.cc \ federatedx_io.cc federatedx_io_null.cc \ federatedx_io_mysql.cc diff --git a/storage/federatedx/federatedx_io_mysql.cc b/storage/federatedx/federatedx_io_mysql.cc index 5245395b060..d6844fab2c6 100644 --- a/storage/federatedx/federatedx_io_mysql.cc +++ b/storage/federatedx/federatedx_io_mysql.cc @@ -1,4 +1,4 @@ -/* +/* Copyright (c) 2007, Antony T Curtis All rights reserved. @@ -51,6 +51,12 @@ typedef struct federatedx_savepoint uint flags; } SAVEPT; +struct mysql_position +{ + MYSQL_RES* result; + MYSQL_ROW_OFFSET offset; +}; + class federatedx_io_mysql :public federatedx_io { @@ -76,16 +82,16 @@ public: virtual int error_code(); virtual const char *error_str(); - + void reset(); int commit(); int rollback(); - + int savepoint_set(ulong sp); ulong savepoint_release(ulong sp); ulong savepoint_rollback(ulong sp); void savepoint_restrict(ulong sp); - + ulong last_savepoint() const; ulong actual_savepoint() const; bool is_autocommit() const; @@ -94,7 +100,7 @@ public: uint table_name_length, uint flag); /* resultset operations */ - + virtual void free_result(FEDERATEDX_IO_RESULT *io_result); virtual unsigned int get_num_fields(FEDERATEDX_IO_RESULT *io_result); virtual my_ulonglong get_num_rows(FEDERATEDX_IO_RESULT *io_result); @@ -104,6 +110,12 @@ public: unsigned int column); virtual bool is_column_null(const FEDERATEDX_IO_ROW *row, unsigned int column) const; + + virtual size_t get_ref_length() const; + virtual void mark_position(FEDERATEDX_IO_RESULT *io_result, + void *ref); + virtual int seek_position(FEDERATEDX_IO_RESULT **io_result, + const void *ref); }; @@ -466,14 +478,13 @@ const char *federatedx_io_mysql::error_str() return mysql_error(&mysql); } - FEDERATEDX_IO_RESULT *federatedx_io_mysql::store_result() { FEDERATEDX_IO_RESULT *result; DBUG_ENTER("federatedx_io_mysql::store_result"); - + result= (FEDERATEDX_IO_RESULT *) mysql_store_result(&mysql); - + DBUG_RETURN(result); } @@ -590,3 +601,45 @@ error: free_result(result); return 1; } + + + +size_t federatedx_io_mysql::get_ref_length() const +{ + return sizeof(mysql_position); +} + + +void federatedx_io_mysql::mark_position(FEDERATEDX_IO_RESULT *io_result, + void *ref) +{ + MYSQL_ROWS *tmp= 0; + mysql_position& pos= *reinterpret_cast<mysql_position*>(ref); + pos.result= (MYSQL_RES *) io_result; + + if (pos.result && pos.result->data) + { + for (tmp= pos.result->data->data; + tmp && (tmp->next != pos.result->data_cursor); + tmp= tmp->next) + {} + } + + pos.offset= tmp; +} + +int federatedx_io_mysql::seek_position(FEDERATEDX_IO_RESULT **io_result, + const void *ref) +{ + const mysql_position& pos= *reinterpret_cast<const mysql_position*>(ref); + + if (!pos.result || !pos.offset) + return HA_ERR_END_OF_FILE; + + pos.result->current_row= 0; + pos.result->data_cursor= pos.offset; + *io_result= (FEDERATEDX_IO_RESULT*) pos.result; + + return 0; +} + diff --git a/storage/federatedx/federatedx_io_null.cc b/storage/federatedx/federatedx_io_null.cc index cd8fc3eaf85..49f93ab6546 100644 --- a/storage/federatedx/federatedx_io_null.cc +++ b/storage/federatedx/federatedx_io_null.cc @@ -96,6 +96,11 @@ public: unsigned int column); virtual bool is_column_null(const FEDERATEDX_IO_ROW *row, unsigned int column) const; + virtual size_t get_ref_length() const; + virtual void mark_position(FEDERATEDX_IO_RESULT *io_result, + void *ref); + virtual int seek_position(FEDERATEDX_IO_RESULT **io_result, + const void *ref); }; @@ -275,3 +280,20 @@ bool federatedx_io_null::table_metadata(ha_statistics *stats, return 0; } + +size_t federatedx_io_null::get_ref_length() const +{ + return sizeof(int); +} + + +void federatedx_io_null::mark_position(FEDERATEDX_IO_RESULT *io_result, + void *ref) +{ +} + +int federatedx_io_null::seek_position(FEDERATEDX_IO_RESULT **io_result, + const void *ref) +{ + return 0; +} diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc index 9ba1e25a367..2749034cba2 100644 --- a/storage/federatedx/ha_federatedx.cc +++ b/storage/federatedx/ha_federatedx.cc @@ -1717,14 +1717,14 @@ federatedx_txn *ha_federatedx::get_txn(THD *thd, bool no_create) return *txnp; } - + int ha_federatedx::disconnect(handlerton *hton, MYSQL_THD thd) { federatedx_txn *txn= (federatedx_txn *) thd_get_ha_data(thd, hton); delete txn; return 0; } - + /* Used for opening tables. The name will be the name of the file. @@ -1756,14 +1756,15 @@ int ha_federatedx::open(const char *name, int mode, uint test_if_locked) free_share(txn, share); DBUG_RETURN(error); } - + + ref_length= io->get_ref_length(); + txn->release(&io); - - ref_length= (table->s->primary_key != MAX_KEY ? - table->key_info[table->s->primary_key].key_length : - table->s->reclength); + DBUG_PRINT("info", ("ref_length: %u", ref_length)); + my_init_dynamic_array(&results, sizeof(FEDERATEDX_IO_RESULT*), 4, 4); + reset(); DBUG_RETURN(0); @@ -1788,8 +1789,9 @@ int ha_federatedx::close(void) DBUG_ENTER("ha_federatedx::close"); /* free the result set */ - if (stored_result) - retval= free_result(); + reset(); + + delete_dynamic(&results); /* Disconnect from mysql */ if (!thd || !(txn= get_txn(thd, true))) @@ -1799,7 +1801,7 @@ int ha_federatedx::close(void) tmp_txn.release(&io); DBUG_ASSERT(io == NULL); - + if ((error= free_share(&tmp_txn, share))) retval= error; } @@ -2143,12 +2145,12 @@ void ha_federatedx::start_bulk_insert(ha_rows rows) @retval != 0 Error occured at remote server. Also sets my_errno. */ -int ha_federatedx::end_bulk_insert(bool abort) +int ha_federatedx::end_bulk_insert() { int error= 0; DBUG_ENTER("ha_federatedx::end_bulk_insert"); - if (bulk_insert.str && bulk_insert.length && !abort) + if (bulk_insert.str && bulk_insert.length && !table_will_be_deleted) { if ((error= txn->acquire(share, FALSE, &io))) DBUG_RETURN(error); @@ -2525,7 +2527,7 @@ int ha_federatedx::index_read_idx(uchar *buf, uint index, const uchar *key, uint key_len, enum ha_rkey_function find_flag) { int retval; - FEDERATEDX_IO_RESULT *io_result; + FEDERATEDX_IO_RESULT *io_result= 0; DBUG_ENTER("ha_federatedx::index_read_idx"); if ((retval= index_read_idx_with_result_set(buf, index, key, @@ -2601,7 +2603,7 @@ int ha_federatedx::index_read_idx_with_result_set(uchar *buf, uint index, if (!(retval= read_next(buf, *result))) DBUG_RETURN(retval); - io->free_result(*result); + insert_dynamic(&results, (uchar*) result); *result= 0; table->status= STATUS_NOT_FOUND; DBUG_RETURN(retval); @@ -2669,10 +2671,7 @@ int ha_federatedx::read_range_first(const key_range *start_key, DBUG_RETURN(retval); if (stored_result) - { - io->free_result(stored_result); - stored_result= 0; - } + (void) free_result(); if (io->query(sql_query.ptr(), sql_query.length())) { @@ -2773,10 +2772,7 @@ int ha_federatedx::rnd_init(bool scan) DBUG_RETURN(error); if (stored_result) - { - io->free_result(stored_result); - stored_result= 0; - } + (void) free_result(); if (io->query(share->select_query, strlen(share->select_query))) @@ -2803,17 +2799,35 @@ int ha_federatedx::rnd_end() int ha_federatedx::free_result() { int error; - federatedx_io *tmp_io= 0, **iop; + DBUG_ENTER("ha_federatedx::free_result"); DBUG_ASSERT(stored_result); - if (!*(iop= &io) && (error= txn->acquire(share, TRUE, (iop= &tmp_io)))) + for (uint i= 0; i < results.elements; ++i) + { + FEDERATEDX_IO_RESULT *result= 0; + get_dynamic(&results, (uchar*) &result, i); + if (result == stored_result) + goto end; + } + if (position_called) { - DBUG_ASSERT(0); // Fail when testing - return error; + insert_dynamic(&results, (uchar*) &stored_result); } - (*iop)->free_result(stored_result); + else + { + federatedx_io *tmp_io= 0, **iop; + if (!*(iop= &io) && (error= txn->acquire(share, TRUE, (iop= &tmp_io)))) + { + DBUG_ASSERT(0); // Fail when testing + insert_dynamic(&results, (uchar*) &stored_result); + goto end; + } + (*iop)->free_result(stored_result); + txn->release(&tmp_io); + } +end: stored_result= 0; - txn->release(&tmp_io); - return 0; + position_called= FALSE; + DBUG_RETURN(0); } int ha_federatedx::index_end(void) @@ -2862,8 +2876,8 @@ int ha_federatedx::rnd_next(uchar *buf) SYNOPSIS field_in_record_is_null() - buf byte pointer to record - result mysql result set + buf byte pointer to record + result mysql result set DESCRIPTION This method is a wrapper method that reads one record from a result @@ -2896,24 +2910,43 @@ int ha_federatedx::read_next(uchar *buf, FEDERATEDX_IO_RESULT *result) } -/* - store reference to current row so that we can later find it for - a re-read, update or delete. - - In case of federatedx, a reference is either a primary key or - the whole record. +/** + @brief Store a reference to current row. + + @details During a query execution we may have different result sets (RS), + e.g. for different ranges. All the RS's used are stored in + memory and placed in @c results dynamic array. At the end of + execution all stored RS's are freed at once in the + @c ha_federated::reset(). + So, in case of federated, a reference to current row is a + stored result address and current data cursor position. + As we keep all RS in memory during a query execution, + we can get any record using the reference any time until + @c ha_federated::reset() is called. + TODO: we don't have to store all RS's rows but only those + we call @c ha_federated::position() for, so we can free memory + where we store other rows in the @c ha_federated::index_end(). + + @param[in] record record data (unused) - Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc. */ -void ha_federatedx::position(const uchar *record) +void ha_federatedx::position(const uchar *record __attribute__ ((unused))) { DBUG_ENTER("ha_federatedx::position"); - if (table->s->primary_key != MAX_KEY) - key_copy(ref, (uchar *)record, table->key_info + table->s->primary_key, - ref_length); - else - memcpy(ref, record, ref_length); + + bzero(ref, ref_length); + + if (!stored_result) + DBUG_VOID_RETURN; + + if (txn->acquire(share, TRUE, &io)) + DBUG_VOID_RETURN; + + io->mark_position(stored_result, ref); + + position_called= TRUE; + DBUG_VOID_RETURN; } @@ -2929,23 +2962,26 @@ void ha_federatedx::position(const uchar *record) int ha_federatedx::rnd_pos(uchar *buf, uchar *pos) { - int result; + int retval; + FEDERATEDX_IO_RESULT *result= stored_result; DBUG_ENTER("ha_federatedx::rnd_pos"); ha_statistic_increment(&SSV::ha_read_rnd_count); - if (table->s->primary_key != MAX_KEY) - { - /* We have a primary key, so use index_read_idx to find row */ - result= index_read_idx(buf, table->s->primary_key, pos, - ref_length, HA_READ_KEY_EXACT); - } - else - { - /* otherwise, get the old record ref as obtained in ::position */ - memcpy(buf, pos, ref_length); - result= 0; - } - table->status= result ? STATUS_NOT_FOUND : 0; - DBUG_RETURN(result); + + /* We have to move this to 'ref' to get things aligned */ + bmove(ref, pos, ref_length); + + if ((retval= txn->acquire(share, TRUE, &io))) + goto error; + + if ((retval= io->seek_position(&result, ref))) + goto error; + + retval= read_next(buf, result); + DBUG_RETURN(retval); + +error: + table->status= STATUS_NOT_FOUND; + DBUG_RETURN(retval); } @@ -2995,17 +3031,21 @@ int ha_federatedx::rnd_pos(uchar *buf, uchar *pos) int ha_federatedx::info(uint flag) { - char error_buffer[FEDERATEDX_QUERY_BUFFER_SIZE]; uint error_code; + THD *thd= current_thd; + federatedx_txn *tmp_txn; federatedx_io *tmp_io= 0, **iop= 0; DBUG_ENTER("ha_federatedx::info"); error_code= ER_QUERY_ON_FOREIGN_DATA_SOURCE; + // external_lock may not have been called so txn may not be set + tmp_txn= get_txn(thd); + /* we want not to show table status if not needed to do so */ if (flag & (HA_STATUS_VARIABLE | HA_STATUS_CONST | HA_STATUS_AUTO)) { - if (!*(iop= &io) && (error_code= txn->acquire(share, TRUE, (iop= &tmp_io)))) + if (!*(iop= &io) && (error_code= tmp_txn->acquire(share, TRUE, (iop= &tmp_io)))) goto fail; } @@ -3030,25 +3070,23 @@ int ha_federatedx::info(uint flag) If ::info created it's own transaction, close it. This happens in case of show table status; */ - txn->release(&tmp_io); + tmp_txn->release(&tmp_io); DBUG_RETURN(0); error: if (iop && *iop) { - my_sprintf(error_buffer, (error_buffer, ": %d : %s", - (*iop)->error_code(), (*iop)->error_str())); - my_error(error_code, MYF(0), error_buffer); + my_printf_error((*iop)->error_code(), "Received error: %d : %s", MYF(0), + (*iop)->error_code(), (*iop)->error_str()); } - else - if (remote_error_number != -1 /* error already reported */) + else if (remote_error_number != -1 /* error already reported */) { error_code= remote_error_number; my_error(error_code, MYF(0), ER(error_code)); } fail: - txn->release(&tmp_io); + tmp_txn->release(&tmp_io); DBUG_RETURN(error_code); } @@ -3085,6 +3123,9 @@ int ha_federatedx::extra(ha_extra_function operation) case HA_EXTRA_INSERT_WITH_UPDATE: insert_dup_update= TRUE; break; + case HA_EXTRA_PREPARE_FOR_DROP: + table_will_be_deleted = TRUE; + break; default: /* do nothing */ DBUG_PRINT("info",("unhandled operation: %d", (uint) operation)); @@ -3105,12 +3146,44 @@ int ha_federatedx::extra(ha_extra_function operation) int ha_federatedx::reset(void) { + int error = 0; + insert_dup_update= FALSE; ignore_duplicates= FALSE; replace_duplicates= FALSE; - return 0; -} + position_called= FALSE; + if (stored_result) + insert_dynamic(&results, (uchar*) &stored_result); + stored_result= 0; + + if (results.elements) + { + federatedx_txn *tmp_txn; + federatedx_io *tmp_io= 0, **iop; + + // external_lock may not have been called so txn may not be set + tmp_txn= get_txn(current_thd); + + if (!*(iop= &io) && (error= tmp_txn->acquire(share, TRUE, (iop= &tmp_io)))) + { + DBUG_ASSERT(0); // Fail when testing + return error; + } + + for (uint i= 0; i < results.elements; ++i) + { + FEDERATEDX_IO_RESULT *result= 0; + get_dynamic(&results, (uchar*) &result, i); + (*iop)->free_result(result); + } + tmp_txn->release(&tmp_io); + reset_dynamic(&results); + } + + return error; + +} /* Used to delete all rows in a table. Both for cases of truncate and @@ -3237,7 +3310,7 @@ static int test_connection(MYSQL_THD thd, federatedx_io *io, str.length(0); str.append(STRING_WITH_LEN("SELECT * FROM ")); - append_identifier(thd, &str, share->table_name, + append_identifier(thd, &str, share->table_name, share->table_name_length); str.append(STRING_WITH_LEN(" WHERE 1=0")); @@ -3288,14 +3361,14 @@ int ha_federatedx::create(const char *name, TABLE *table_arg, pthread_mutex_lock(&federatedx_mutex); tmp_share.s= get_server(&tmp_share, NULL); pthread_mutex_unlock(&federatedx_mutex); - + if (tmp_share.s) { tmp_txn= get_txn(thd); if (!(retval= tmp_txn->acquire(&tmp_share, TRUE, &tmp_io))) { retval= test_connection(thd, tmp_io, &tmp_share); - tmp_txn->release(&tmp_io); + tmp_txn->release(&tmp_io); } free_server(tmp_txn, tmp_share.s); } @@ -3394,6 +3467,7 @@ int ha_federatedx::external_lock(MYSQL_THD thd, int lock_type) txn->release(&io); else { + table_will_be_deleted = FALSE; txn= get_txn(thd); if (!(error= txn->acquire(share, lock_type == F_RDLCK, &io)) && (lock_type == F_WRLCK || !io->is_autocommit())) @@ -3495,7 +3569,7 @@ int ha_federatedx::rollback(handlerton *hton, MYSQL_THD thd, bool all) struct st_mysql_storage_engine federatedx_storage_engine= { MYSQL_HANDLERTON_INTERFACE_VERSION }; -mysql_declare_plugin(federated) +mysql_declare_plugin(federatedx) { MYSQL_STORAGE_ENGINE_PLUGIN, &federatedx_storage_engine, @@ -3511,7 +3585,7 @@ mysql_declare_plugin(federated) NULL /* config options */ } mysql_declare_plugin_end; -maria_declare_plugin(federated) +maria_declare_plugin(federatedx) { MYSQL_STORAGE_ENGINE_PLUGIN, &federatedx_storage_engine, diff --git a/storage/federatedx/ha_federatedx.h b/storage/federatedx/ha_federatedx.h index 0fa9df25895..2820f8a6c29 100644 --- a/storage/federatedx/ha_federatedx.h +++ b/storage/federatedx/ha_federatedx.h @@ -1,5 +1,5 @@ -/* -Copyright (c) 2008, Patrick Galbraith +/* +Copyright (c) 2008, Patrick Galbraith All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,7 +43,7 @@ class federatedx_io; typedef struct st_fedrated_server { MEM_ROOT mem_root; uint use_count, io_count; - + uchar *key; uint key_length; @@ -74,10 +74,10 @@ typedef struct st_fedrated_server { #include <mysql.h> -/* +/* handler::print_error has a case statement for error numbers. - This value is (10000) is far out of range and will envoke the - default: case. + This value is (10000) is far out of range and will envoke the + default: case. (Current error range is 120-159 from include/my_base.h) */ #define HA_FEDERATEDX_ERROR_WITH_REMOTE_SYSTEM 10000 @@ -158,7 +158,7 @@ public: const char * get_database() const { return server->database; } ushort get_port() const { return server->port; } const char * get_socket() const { return server->socket; } - + static bool handles_scheme(const char *scheme); static federatedx_io *construct(MEM_ROOT *server_root, FEDERATEDX_SERVER *server); @@ -167,7 +167,7 @@ public: { return alloc_root(mem_root, size); } static void operator delete(void *ptr, size_t size) { TRASH(ptr, size); } - + virtual int query(const char *buffer, uint length)=0; virtual FEDERATEDX_IO_RESULT *store_result()=0; @@ -178,25 +178,25 @@ public: virtual int error_code()=0; virtual const char *error_str()=0; - + virtual void reset()=0; virtual int commit()=0; virtual int rollback()=0; - + virtual int savepoint_set(ulong sp)=0; virtual ulong savepoint_release(ulong sp)=0; virtual ulong savepoint_rollback(ulong sp)=0; virtual void savepoint_restrict(ulong sp)=0; - + virtual ulong last_savepoint() const=0; virtual ulong actual_savepoint() const=0; virtual bool is_autocommit() const=0; virtual bool table_metadata(ha_statistics *stats, const char *table_name, uint table_name_length, uint flag) = 0; - + /* resultset operations */ - + virtual void free_result(FEDERATEDX_IO_RESULT *io_result)=0; virtual unsigned int get_num_fields(FEDERATEDX_IO_RESULT *io_result)=0; virtual my_ulonglong get_num_rows(FEDERATEDX_IO_RESULT *io_result)=0; @@ -206,6 +206,13 @@ public: unsigned int column)=0; virtual bool is_column_null(const FEDERATEDX_IO_ROW *row, unsigned int column) const=0; + + virtual size_t get_ref_length() const=0; + virtual void mark_position(FEDERATEDX_IO_RESULT *io_result, + void *ref)=0; + virtual int seek_position(FEDERATEDX_IO_RESULT **io_result, + const void *ref)=0; + }; @@ -215,12 +222,12 @@ class federatedx_txn ulong savepoint_level; ulong savepoint_stmt; ulong savepoint_next; - + void release_scan(); public: federatedx_txn(); ~federatedx_txn(); - + bool has_connections() const { return txn_list != NULL; } bool in_transaction() const { return savepoint_next != 0; } int acquire(FEDERATEDX_SHARE *share, bool readonly, federatedx_io **io); @@ -254,12 +261,16 @@ class ha_federatedx: public handler federatedx_txn *txn; federatedx_io *io; FEDERATEDX_IO_RESULT *stored_result; + /** + Array of all stored results we get during a query execution. + */ + DYNAMIC_ARRAY results; + bool position_called; uint fetch_num; // stores the fetch num - FEDERATEDX_IO_OFFSET current_position; // Current position used by ::position() int remote_error_number; char remote_error_buf[FEDERATEDX_QUERY_BUFFER_SIZE]; bool ignore_duplicates, replace_duplicates; - bool insert_dup_update; + bool insert_dup_update, table_will_be_deleted; DYNAMIC_STRING bulk_insert; private: @@ -269,7 +280,7 @@ private: */ uint convert_row_to_internal_format(uchar *buf, FEDERATEDX_IO_ROW *row, FEDERATEDX_IO_RESULT *result); - bool create_where_from_key(String *to, KEY *key_info, + bool create_where_from_key(String *to, KEY *key_info, const key_range *start_key, const key_range *end_key, bool records_in_range, bool eq_range); @@ -348,18 +359,18 @@ public: Talk to Kostja about this - how to get the number of rows * ... disk scan time on other side (block size, size of the row) + network time ... - The reason for "records * 1000" is that such a large number forces + The reason for "records * 1000" is that such a large number forces this to use indexes " */ double scan_time() { DBUG_PRINT("info", ("records %lu", (ulong) stats.records)); - return (double)(stats.records*1000); + return (double)(stats.records*1000); } /* The next method will never be called if you do not implement indexes. */ - double read_time(uint index, uint ranges, ha_rows rows) + double read_time(uint index, uint ranges, ha_rows rows) { /* Per Brian, this number is bugus, but this method must be implemented, @@ -379,7 +390,7 @@ public: int close(void); // required void start_bulk_insert(ha_rows rows); - int end_bulk_insert(bool abort); + int end_bulk_insert(); int write_row(uchar *buf); int update_row(const uchar *old_data, uchar *new_data); int delete_row(const uchar *buf); diff --git a/storage/federatedx/plug.in b/storage/federatedx/plug.in index ee2e6af0e94..95afe270f4c 100644 --- a/storage/federatedx/plug.in +++ b/storage/federatedx/plug.in @@ -1,5 +1,5 @@ -MYSQL_STORAGE_ENGINE(federated,,[FederatedX Storage Engine], +MYSQL_STORAGE_ENGINE(federatedx,,[FederatedX Storage Engine], [FederatedX Storage Engine], [max,max-no-ndb]) -MYSQL_PLUGIN_DYNAMIC(federated, [ha_federatedx.la]) -MYSQL_PLUGIN_STATIC(federated, [libfederatedx.a]) -MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(federated, [ha_federatedx.cc]) +MYSQL_PLUGIN_DYNAMIC(federatedx, [ha_federatedx.la]) +MYSQL_PLUGIN_STATIC(federatedx, [libfederatedx.a]) +MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(federatedx, [ha_federatedx.cc]) diff --git a/storage/heap/hp_create.c b/storage/heap/hp_create.c index b6814fc1614..bc8183bf777 100644 --- a/storage/heap/hp_create.c +++ b/storage/heap/hp_create.c @@ -85,8 +85,6 @@ int heap_create(const char *name, uint keys, HP_KEYDEF *keydef, keyinfo->seg[j].type= HA_KEYTYPE_VARTEXT1; /* fall_through */ case HA_KEYTYPE_VARTEXT1: - if (!my_binary_compare(keyinfo->seg[j].charset)) - keyinfo->flag|= HA_END_SPACE_KEY; keyinfo->flag|= HA_VAR_LENGTH_KEY; length+= 2; /* Save number of bytes used to store length */ @@ -96,8 +94,6 @@ int heap_create(const char *name, uint keys, HP_KEYDEF *keydef, /* Case-insensitiveness is handled in coll->hash_sort */ /* fall_through */ case HA_KEYTYPE_VARTEXT2: - if (!my_binary_compare(keyinfo->seg[j].charset)) - keyinfo->flag|= HA_END_SPACE_KEY; keyinfo->flag|= HA_VAR_LENGTH_KEY; length+= 2; /* Save number of bytes used to store length */ @@ -111,8 +107,6 @@ int heap_create(const char *name, uint keys, HP_KEYDEF *keydef, default: break; } - if (keyinfo->seg[j].flag & HA_END_SPACE_ARE_EQUAL) - keyinfo->flag|= HA_END_SPACE_KEY; } keyinfo->length= length; length+= keyinfo->rb_tree.size_of_element + diff --git a/storage/heap/hp_rkey.c b/storage/heap/hp_rkey.c index 6eeac6acd7b..27d1114770e 100644 --- a/storage/heap/hp_rkey.c +++ b/storage/heap/hp_rkey.c @@ -63,7 +63,7 @@ int heap_rkey(HP_INFO *info, uchar *record, int inx, const uchar *key, info->update= 0; DBUG_RETURN(my_errno); } - if (!(keyinfo->flag & HA_NOSAME) || (keyinfo->flag & HA_END_SPACE_KEY)) + if (!(keyinfo->flag & HA_NOSAME)) memcpy(info->lastkey, key, (size_t) keyinfo->length); } memcpy(record, pos, (size_t) share->reclength); diff --git a/storage/heap/hp_test2.c b/storage/heap/hp_test2.c index 5c548b6be74..bf06cf03035 100644 --- a/storage/heap/hp_test2.c +++ b/storage/heap/hp_test2.c @@ -406,7 +406,7 @@ int main(int argc, char *argv[]) bmove(record2,record,reclength); if (heap_rsame(file,record,-1) || heap_rsame(file,record2,2)) goto err; - if (bcmp(record2,record,reclength)) + if (memcmp(record2,record,reclength)) { puts("heap_rsame didn't find right record"); goto end; @@ -415,7 +415,7 @@ int main(int argc, char *argv[]) puts("- Test of read through position"); if (heap_rrnd(file,record,position)) goto err; - if (bcmp(record3,record,reclength)) + if (memcmp(record3,record,reclength)) { puts("heap_frnd didn't find right record"); goto end; diff --git a/storage/ibmdb2i/Makefile.am b/storage/ibmdb2i/Makefile.am index 768ca15f4cf..b9602e392e0 100644 --- a/storage/ibmdb2i/Makefile.am +++ b/storage/ibmdb2i/Makefile.am @@ -34,7 +34,7 @@ EXTRA_LTLIBRARIES = ha_ibmdb2i.la pkgplugin_LTLIBRARIES = @plugin_ibmdb2i_shared_target@ ha_ibmdb2i_la_LIBADD = -liconv ha_ibmdb2i_la_LDFLAGS = -module -rpath $(MYSQLLIBdir) -ha_ibmdb2i_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_ibmdb2i_la_CXXFLAGS= $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN ha_ibmdb2i_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN ha_ibmdb2i_la_SOURCES = ha_ibmdb2i.cc db2i_ileBridge.cc db2i_conversion.cc \ db2i_blobCollection.cc db2i_file.cc db2i_charsetSupport.cc \ @@ -44,7 +44,7 @@ ha_ibmdb2i_la_SOURCES = ha_ibmdb2i.cc db2i_ileBridge.cc db2i_conversion.cc \ EXTRA_LIBRARIES = libibmdb2i.a noinst_LIBRARIES = @plugin_ibmdb2i_static_target@ -libibmdb2i_a_CXXFLAGS = $(AM_CFLAGS) +libibmdb2i_a_CXXFLAGS = $(AM_CXXFLAGS) libibmdb2i_a_CFLAGS = $(AM_CFLAGS) libibmdb2i_a_SOURCES= $(ha_ibmdb2i_la_SOURCES) diff --git a/storage/ibmdb2i/db2i_file.h b/storage/ibmdb2i/db2i_file.h index ff35a473b05..7b63b18c315 100644 --- a/storage/ibmdb2i/db2i_file.h +++ b/storage/ibmdb2i/db2i_file.h @@ -40,7 +40,6 @@ OF SUCH DAMAGE. #include "db2i_global.h" #include "db2i_ileBridge.h" #include "db2i_validatedPointer.h" -#include "my_atomic.h" #include "db2i_iconv.h" #include "db2i_charsetSupport.h" diff --git a/storage/ibmdb2i/ha_ibmdb2i.cc b/storage/ibmdb2i/ha_ibmdb2i.cc index c007cbaf3e3..f77927421d2 100644 --- a/storage/ibmdb2i/ha_ibmdb2i.cc +++ b/storage/ibmdb2i/ha_ibmdb2i.cc @@ -1158,9 +1158,7 @@ int ha_ibmdb2i::rnd_init(bool scan) rrnAssocHandle= 0; - DBUG_RETURN(0); // MySQL sometimes does not check the return code, causing - // an assert in ha_rnd_end later on if we return a non-zero - // value here. + DBUG_RETURN(0); } int ha_ibmdb2i::rnd_end() diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt.disabled index 5918db7ab11..5918db7ab11 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt.disabled diff --git a/storage/innobase/Makefile.am b/storage/innobase/Makefile.am index a597e3c24e4..29ba83c0668 100644 --- a/storage/innobase/Makefile.am +++ b/storage/innobase/Makefile.am @@ -156,15 +156,15 @@ libinnobase_a_SOURCES= btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c \ ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c \ handler/ha_innodb.cc -libinnobase_a_CXXFLAGS= $(AM_CFLAGS) +libinnobase_a_CXXFLAGS= $(AM_CXXFLAGS) libinnobase_a_CFLAGS= $(AM_CFLAGS) EXTRA_LTLIBRARIES= ha_innodb.la pkgplugin_LTLIBRARIES= @plugin_innobase_shared_target@ ha_innodb_la_LDFLAGS= -module -rpath $(pkgplugindir) -ha_innodb_la_CXXFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) -ha_innodb_la_CFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) +ha_innodb_la_CXXFLAGS= -shared $(AM_CXXFLAGS) $(INNODB_DYNAMIC_CFLAGS) +ha_innodb_la_CFLAGS= -shared $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) ha_innodb_la_SOURCES= $(libinnobase_a_SOURCES) EXTRA_DIST= CMakeLists.txt plug.in \ diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c index 423c08c0569..7533205d695 100644 --- a/storage/innobase/buf/buf0flu.c +++ b/storage/innobase/buf/buf0flu.c @@ -55,6 +55,7 @@ buf_flush_insert_into_flush_list( || (ut_dulint_cmp((UT_LIST_GET_FIRST(buf_pool->flush_list)) ->oldest_modification, block->oldest_modification) <= 0)); + UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block); @@ -75,6 +76,7 @@ buf_flush_insert_sorted_into_flush_list( buf_block_t* b; ut_ad(mutex_own(&(buf_pool->mutex))); + UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); prev_b = NULL; b = UT_LIST_GET_FIRST(buf_pool->flush_list); @@ -423,6 +425,7 @@ try_again: goto try_again; } + UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); ut_memcpy(trx_doublewrite->write_buf + UNIV_PAGE_SIZE * trx_doublewrite->first_free, block->frame, UNIV_PAGE_SIZE); diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c index b8251a99105..d2b59469cdc 100644 --- a/storage/innobase/dict/dict0dict.c +++ b/storage/innobase/dict/dict0dict.c @@ -618,13 +618,11 @@ dict_table_get_on_id( if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0 || trx->dict_operation_lock_mode == RW_X_LATCH) { - /* It is a system table which will always exist in the table - cache: we avoid acquiring the dictionary mutex, because - if we are doing a rollback to handle an error in TABLE - CREATE, for example, we already have the mutex! */ - ut_ad(mutex_own(&(dict_sys->mutex)) - || trx->dict_operation_lock_mode == RW_X_LATCH); + /* Note: An X latch implies that the transaction + already owns the dictionary mutex. */ + + ut_ad(mutex_own(&dict_sys->mutex)); return(dict_table_get_on_id_low(table_id)); } @@ -2586,25 +2584,28 @@ dict_strip_comments( /* out, own: SQL string stripped from comments; the caller must free this with mem_free()! */ - const char* sql_string) /* in: SQL string */ + const char* sql_string, /* in: SQL string */ + size_t sql_length) /* in: length of sql_string */ { char* str; const char* sptr; + const char* eptr = sql_string + sql_length; char* ptr; /* unclosed quote character (0 if none) */ char quote = 0; - str = mem_alloc(strlen(sql_string) + 1); + str = mem_alloc(sql_length + 1); sptr = sql_string; ptr = str; for (;;) { scan_more: - if (*sptr == '\0') { + if (sptr >= eptr || *sptr == '\0') { +end_of_string: *ptr = '\0'; - ut_a(ptr <= str + strlen(sql_string)); + ut_a(ptr <= str + sql_length); return(str); } @@ -2623,30 +2624,35 @@ scan_more: || (sptr[0] == '-' && sptr[1] == '-' && sptr[2] == ' ')) { for (;;) { + if (++sptr >= eptr) { + goto end_of_string; + } + /* In Unix a newline is 0x0A while in Windows it is 0x0D followed by 0x0A */ - if (*sptr == (char)0x0A - || *sptr == (char)0x0D - || *sptr == '\0') { - + switch (*sptr) { + case (char) 0X0A: + case (char) 0x0D: + case '\0': goto scan_more; } - - sptr++; } } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') { + sptr += 2; for (;;) { - if (*sptr == '*' && *(sptr + 1) == '/') { - - sptr += 2; - - goto scan_more; + if (sptr >= eptr) { + goto end_of_string; } - if (*sptr == '\0') { - + switch (*sptr) { + case '\0': goto scan_more; + case '*': + if (sptr[1] == '/') { + sptr += 2; + goto scan_more; + } } sptr++; @@ -3348,6 +3354,7 @@ dict_create_foreign_constraints( name before it: test.table2; the default database id the database of parameter name */ + size_t sql_length, /* in: length of sql_string */ const char* name, /* in: table full name in the normalized form database_name/table_name */ @@ -3362,7 +3369,7 @@ dict_create_foreign_constraints( ut_a(trx); ut_a(trx->mysql_thd); - str = dict_strip_comments(sql_string); + str = dict_strip_comments(sql_string, sql_length); heap = mem_heap_create(10000); err = dict_create_foreign_constraints_low( @@ -3411,7 +3418,8 @@ dict_foreign_parse_drop_constraints( *constraints_to_drop = mem_heap_alloc(heap, 1000 * sizeof(char*)); - str = dict_strip_comments(*(trx->mysql_query_str)); + str = dict_strip_comments(*trx->mysql_query_str, + *trx->mysql_query_len); ptr = str; ut_ad(mutex_own(&(dict_sys->mutex))); diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c index 65f1c9536bd..d5e7600f4d0 100644 --- a/storage/innobase/dict/dict0load.c +++ b/storage/innobase/dict/dict0load.c @@ -927,6 +927,8 @@ dict_load_table_on_id( ut_ad(mutex_own(&(dict_sys->mutex))); + table = NULL; + /* NOTE that the operation of this function is protected by the dictionary mutex, and therefore no deadlocks can occur with other dictionary operations. */ @@ -953,15 +955,17 @@ dict_load_table_on_id( BTR_SEARCH_LEAF, &pcur, &mtr); rec = btr_pcur_get_rec(&pcur); - if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec, 0)) { + if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { /* Not found */ + goto func_exit; + } - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); + /* Find the first record that is not delete marked */ + while (rec_get_deleted_flag(rec, 0)) { + if (!btr_pcur_move_to_next_user_rec(&pcur, &mtr)) { + goto func_exit; + } + rec = btr_pcur_get_rec(&pcur); } /*---------------------------------------------------*/ @@ -974,19 +978,14 @@ dict_load_table_on_id( /* Check if the table id in record is the one searched for */ if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) { - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); + goto func_exit; } /* Now we get the table name from the record */ field = rec_get_nth_field_old(rec, 1, &len); /* Load the table definition to memory */ table = dict_load_table(mem_heap_strdupl(heap, (char*) field, len)); - +func_exit: btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap); diff --git a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c index e1074933fe8..1ec1c262a52 100644 --- a/storage/innobase/fsp/fsp0fsp.c +++ b/storage/innobase/fsp/fsp0fsp.c @@ -802,12 +802,7 @@ fsp_init_file_page_low( buf_block_align(page)->check_index_page_at_flush = FALSE; -#ifdef UNIV_BASIC_LOG_DEBUG - memset(page, 0xff, UNIV_PAGE_SIZE); -#endif - mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, - ut_dulint_zero); - mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero); + memset(page, 0, UNIV_PAGE_SIZE); } /*************************************************************** diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index ebf01fbc296..d10fcb8d31e 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -583,13 +583,13 @@ thd_is_select( /************************************************************************ Obtain the InnoDB transaction of a MySQL thread. */ inline -trx_t*& +trx_t* thd_to_trx( /*=======*/ /* out: reference to transaction pointer */ THD* thd) /* in: MySQL thread */ { - return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr)); + return((trx_t*) thd_get_ha_data(thd, innodb_hton_ptr)); } /************************************************************************ @@ -759,6 +759,10 @@ convert_error_code_to_mysql( } else if (error == DB_UNSUPPORTED) { return(HA_ERR_UNSUPPORTED); + } else if (error == DB_INTERRUPTED) { + + my_error(ER_QUERY_INTERRUPTED, MYF(0)); + return(-1); } else { return(-1); // Unknown error } @@ -1140,6 +1144,15 @@ innobase_next_autoinc( return(next_value); } +/** Copy the current SQL statement. +* @param[in] thd MySQL client connection +* @param[in/out] trx InnoDB transaction */ +#define INNOBASE_COPY_STMT(thd, trx) do { \ + LEX_STRING* stmt = thd_query_string(thd); \ + (trx)->mysql_query_str = &stmt->str; \ + (trx)->mysql_query_len = &stmt->length; \ +} while (0) + /************************************************************************* Gets the InnoDB transaction handle for a MySQL handler object, creates an InnoDB transaction struct if the corresponding MySQL thread struct still @@ -1151,7 +1164,7 @@ check_trx_exists( /* out: InnoDB transaction handle */ THD* thd) /* in: user thread handle */ { - trx_t*& trx = thd_to_trx(thd); + trx_t* trx = thd_to_trx(thd); ut_ad(thd == current_thd); @@ -1160,11 +1173,14 @@ check_trx_exists( trx = trx_allocate_for_mysql(); trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); + INNOBASE_COPY_STMT(thd, trx); /* Update the info whether we should skip XA steps that eat CPU time */ trx->support_xa = THDVAR(thd, support_xa); + + /* We have a new trx, register with the thread handle */ + thd_set_ha_data(thd, innodb_hton_ptr, trx); } else { if (trx->magic_n != TRX_MAGIC_N) { mem_analyze_corruption(trx); @@ -2469,6 +2485,9 @@ innobase_close_connection( innobase_rollback_trx(trx); + /* Release the lock in thread handler */ + thd_set_ha_data(thd, hton, NULL); + thr_local_free(trx->mysql_thread_id); trx_free_for_mysql(trx); @@ -3223,6 +3242,11 @@ get_innobase_type_from_mysql_type( case MYSQL_TYPE_BLOB: case MYSQL_TYPE_LONG_BLOB: return(DATA_BLOB); + case MYSQL_TYPE_NULL: + /* MySQL currently accepts "NULL" datatype, but will + reject such datatype in the next release. We will cope + with it and not trigger assertion failure in 5.1 */ + break; default: assert(0); } @@ -5244,7 +5268,22 @@ create_table_def( field = form->field[i]; col_type = get_innobase_type_from_mysql_type(&unsigned_type, - field); + field); + + if (!col_type) { + push_warning_printf( + (THD*) trx->mysql_thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_CANT_CREATE_TABLE, + "Error creating table '%s' with " + "column '%s'. Please check its " + "column type and try to re-create " + "the table with an appropriate " + "column type.", + table->name, (char*) field->field_name); + goto err_col; + } + if (field->null_ptr) { nulls_allowed = 0; } else { @@ -5301,7 +5340,7 @@ create_table_def( "different column name.", table->name, (char*) field->field_name, (char*) field->field_name); - +err_col: dict_mem_table_free(table); trx_commit_for_mysql(trx); @@ -5578,7 +5617,7 @@ ha_innobase::create( trx = trx_allocate_for_mysql(); trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); + INNOBASE_COPY_STMT(thd, trx); if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) { trx->check_foreigns = FALSE; @@ -5674,8 +5713,10 @@ ha_innobase::create( } if (*trx->mysql_query_str) { - error = row_table_add_foreign_constraints(trx, - *trx->mysql_query_str, norm_name, + error = row_table_add_foreign_constraints( + trx, + *trx->mysql_query_str, *trx->mysql_query_len, + norm_name, create_info->options & HA_LEX_CREATE_TMP_TABLE); error = convert_error_code_to_mysql(error, NULL); @@ -5866,7 +5907,7 @@ ha_innobase::delete_table( trx = trx_allocate_for_mysql(); trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); + INNOBASE_COPY_STMT(thd, trx); if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) { trx->check_foreigns = FALSE; @@ -5955,7 +5996,7 @@ innobase_drop_database( #endif trx = trx_allocate_for_mysql(); trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); + INNOBASE_COPY_STMT(thd, trx); if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) { trx->check_foreigns = FALSE; @@ -6025,7 +6066,7 @@ ha_innobase::rename_table( trx = trx_allocate_for_mysql(); trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); + INNOBASE_COPY_STMT(thd, trx); if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) { trx->check_foreigns = FALSE; diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index 5b3df16875a..eb9199b8955 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -210,7 +210,7 @@ the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */ extern "C" { struct charset_info_st *thd_charset(MYSQL_THD thd); -char **thd_query(MYSQL_THD thd); +LEX_STRING *thd_query_string(MYSQL_THD thd); /** Get the file name of the MySQL binlog. * @return the name of the binlog file diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h index ed7ce151718..2be6005622d 100644 --- a/storage/innobase/include/db0err.h +++ b/storage/innobase/include/db0err.h @@ -10,6 +10,8 @@ Created 5/24/1996 Heikki Tuuri #define db0err_h +#define DB_SUCCESS_LOCKED_REC 9 /* like DB_SUCCESS, but a new + explicit record lock was created */ #define DB_SUCCESS 10 /* The following are error codes */ @@ -69,6 +71,9 @@ Created 5/24/1996 Heikki Tuuri a feature that it can't recoginize or work with e.g., FT indexes created by a later version of the engine. */ +#define DB_INTERRUPTED 49 /* the query has been interrupted with + "KILL QUERY N;" */ + /* The following are partial failure codes */ #define DB_FAIL 1000 #define DB_OVERFLOW 1001 diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 7d5ff09c7a6..e76f23d0767 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -309,6 +309,7 @@ dict_create_foreign_constraints( name before it: test.table2; the default database id the database of parameter name */ + size_t sql_length, /* in: length of sql_string */ const char* name, /* in: table full name in the normalized form database_name/table_name */ diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index beaf17eda01..70b141eafeb 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -292,14 +292,15 @@ lock_sec_rec_modify_check_and_lock( dict_index_t* index, /* in: secondary index */ que_thr_t* thr); /* in: query thread */ /************************************************************************* -Like the counterpart for a clustered index below, but now we read a +Like lock_clust_rec_read_check_and_lock(), but reads a secondary index record. */ ulint lock_sec_rec_read_check_and_lock( /*=============================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC, + DB_LOCK_WAIT, DB_DEADLOCK, + or DB_QUE_THR_SUSPENDED */ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, does nothing */ rec_t* rec, /* in: user record or page supremum record @@ -324,8 +325,9 @@ lock on the record. */ ulint lock_clust_rec_read_check_and_lock( /*===============================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC, + DB_LOCK_WAIT, DB_DEADLOCK, + or DB_QUE_THR_SUSPENDED */ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, does nothing */ rec_t* rec, /* in: user record or page supremum record diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic index dc7918c287b..fceb8017121 100644 --- a/storage/innobase/include/mach0data.ic +++ b/storage/innobase/include/mach0data.ic @@ -19,7 +19,7 @@ mach_write_to_1( ulint n) /* in: ulint integer to be stored, >= 0, < 256 */ { ut_ad(b); - ut_ad(n <= 0xFFUL); + ut_ad((n | 0xFFUL) <= 0xFFUL); b[0] = (byte)n; } @@ -48,7 +48,7 @@ mach_write_to_2( ulint n) /* in: ulint integer to be stored */ { ut_ad(b); - ut_ad(n <= 0xFFFFUL); + ut_ad((n | 0xFFFFUL) <= 0xFFFFUL); b[0] = (byte)(n >> 8); b[1] = (byte)(n); @@ -64,10 +64,7 @@ mach_read_from_2( /* out: ulint integer */ byte* b) /* in: pointer to 2 bytes */ { - ut_ad(b); - return( ((ulint)(b[0]) << 8) - + (ulint)(b[1]) - ); + return(((ulint)(b[0]) << 8) | (ulint)(b[1])); } /************************************************************ @@ -112,7 +109,7 @@ mach_write_to_3( ulint n) /* in: ulint integer to be stored */ { ut_ad(b); - ut_ad(n <= 0xFFFFFFUL); + ut_ad((n | 0xFFFFFFUL) <= 0xFFFFFFUL); b[0] = (byte)(n >> 16); b[1] = (byte)(n >> 8); @@ -131,8 +128,8 @@ mach_read_from_3( { ut_ad(b); return( ((ulint)(b[0]) << 16) - + ((ulint)(b[1]) << 8) - + (ulint)(b[2]) + | ((ulint)(b[1]) << 8) + | (ulint)(b[2]) ); } @@ -166,9 +163,9 @@ mach_read_from_4( { ut_ad(b); return( ((ulint)(b[0]) << 24) - + ((ulint)(b[1]) << 16) - + ((ulint)(b[2]) << 8) - + (ulint)(b[3]) + | ((ulint)(b[1]) << 16) + | ((ulint)(b[2]) << 8) + | (ulint)(b[3]) ); } @@ -670,7 +667,7 @@ mach_read_from_2_little_endian( /* out: unsigned long int */ byte* buf) /* in: from where to read */ { - return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256); + return((ulint)(buf[0]) | ((ulint)(buf[1]) << 8)); } /************************************************************* diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h index 5430190fa51..488177791a4 100644 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -246,22 +246,20 @@ row_update_for_mysql( row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL handle */ /************************************************************************* -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE or this +session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +Before calling this function row_search_for_mysql() must have +initialized prebuilt->new_rec_locks to store the information which new +record locks really were set. This function removes a newly set +clustered index record lock under prebuilt->pcur or +prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that +releases the latest clustered index record lock we set. */ int row_unlock_for_mysql( /*=================*/ /* out: error code or DB_SUCCESS */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /* in/out: prebuilt struct in MySQL handle */ ibool has_latches_on_recs);/* TRUE if called so that we have the latches on the records under pcur @@ -366,6 +364,7 @@ row_table_add_foreign_constraints( FOREIGN KEY (a, b) REFERENCES table2(c, d), table2 can be written also with the database name before it: test.table2 */ + size_t sql_length, /* in: length of sql_string */ const char* name, /* in: table full name in the normalized form database_name/table_name */ @@ -659,18 +658,17 @@ struct row_prebuilt_struct { ulint new_rec_locks; /* normally 0; if srv_locks_unsafe_for_binlog is TRUE or session is using READ - COMMITTED isolation level, in a - cursor search, if we set a new - record lock on an index, this is - incremented; this is used in - releasing the locks under the - cursors if we are performing an - UPDATE and we determine after - retrieving the row that it does - not need to be locked; thus, - these can be used to implement a - 'mini-rollback' that releases - the latest record locks */ + COMMITTED or READ UNCOMMITTED + isolation level, set in + row_search_for_mysql() if we set a new + record lock on the secondary + or clustered index; this is + used in row_unlock_for_mysql() + when releasing the lock under + the cursor if we determine + after retrieving the row that + it does not need to be locked + ('mini-rollback') */ ulint mysql_prefix_len;/* byte offset of the end of the last requested column */ ulint mysql_row_len; /* length in bytes of a row in the diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index 6a61330f97e..9430d4cb723 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -401,7 +401,7 @@ or row lock! */ locked; see e.g. ibuf_bitmap_get_map_page(). */ #define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve - this in X-mode, implicit or backround + this in X-mode; implicit or backround operations purge, rollback, foreign key checks reserve this in S-mode */ #define SYNC_DICT 1000 diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index cdbf1970715..97a47d9f46e 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -444,6 +444,8 @@ struct trx_struct{ char** mysql_query_str;/* pointer to the field in mysqld_thd which contains the pointer to the current SQL query string */ + size_t* mysql_query_len;/* pointer to the length of the + current SQL query string */ const char* mysql_log_file_name; /* if MySQL binlog is used, this field contains a pointer to the latest file diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index ee3a0b27b20..97d022d284e 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -82,9 +82,9 @@ memory is read outside the allocated blocks. */ /* Make a non-inline debug version */ -#ifdef HAVE_purify +#if defined HAVE_VALGRIND # define UNIV_DEBUG_VALGRIND -#endif /* HAVE_purify */ +#endif /* HAVE_VALGRIND */ #if 0 #define UNIV_DEBUG_VALGRIND /* Enable extra Valgrind instrumentation */ @@ -126,11 +126,6 @@ by one. */ /* the above option prevents forcing of log to disk at a buffer page write: it should be tested with this option off; also some ibuf tests are suppressed */ -/* -#define UNIV_BASIC_LOG_DEBUG -*/ - /* the above option enables basic recovery debugging: - new allocated file pages are reset */ #if (!defined(UNIV_DEBUG) && !defined(INSIDE_HA_INNOBASE_CC) && !defined(UNIV_MUST_NOT_INLINE)) /* Definition for inline version */ diff --git a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c index 7df8ea50887..04240960b3a 100644 --- a/storage/innobase/lock/lock0lock.c +++ b/storage/innobase/lock/lock0lock.c @@ -1739,11 +1739,12 @@ ulint lock_rec_enqueue_waiting( /*=====================*/ /* out: DB_LOCK_WAIT, DB_DEADLOCK, or - DB_QUE_THR_SUSPENDED, or DB_SUCCESS; - DB_SUCCESS means that there was a deadlock, - but another transaction was chosen as a - victim, and we got the lock immediately: - no need to wait then */ + DB_QUE_THR_SUSPENDED, or DB_SUCCESS_LOCKED_REC; + DB_SUCCESS_LOCKED_REC means that there + was a deadlock, but another + transaction was chosen as a victim, + and we got the lock immediately: no + need to wait then */ ulint type_mode,/* in: lock mode this transaction is requesting: LOCK_S or LOCK_X, possibly ORed with LOCK_GAP or LOCK_REC_NOT_GAP, ORed @@ -1804,7 +1805,7 @@ lock_rec_enqueue_waiting( if (trx->wait_lock == NULL) { - return(DB_SUCCESS); + return(DB_SUCCESS_LOCKED_REC); } trx->que_state = TRX_QUE_LOCK_WAIT; @@ -1903,6 +1904,16 @@ lock_rec_add_to_queue( return(lock_rec_create(type_mode, rec, index, trx)); } +/** Record locking request status */ +enum lock_rec_req_status { + /** Failed to acquire a lock */ + LOCK_REC_FAIL, + /** Succeeded in acquiring a lock (implicit or already acquired) */ + LOCK_REC_SUCCESS, + /** Explicitly created a new lock */ + LOCK_REC_SUCCESS_CREATED +}; + /************************************************************************* This is a fast routine for locking a record in the most common cases: there are no explicit locks on the page, or there is just one lock, owned @@ -1911,10 +1922,10 @@ which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. */ UNIV_INLINE -ibool +enum lock_rec_req_status lock_rec_lock_fast( /*===============*/ - /* out: TRUE if locking succeeded */ + /* out: whether the locking succeeded */ ibool impl, /* in: if TRUE, no lock is set if no wait is necessary: we assume that the caller will set an implicit lock */ @@ -1950,19 +1961,19 @@ lock_rec_lock_fast( lock_rec_create(mode, rec, index, trx); } - return(TRUE); + return(LOCK_REC_SUCCESS_CREATED); } if (lock_rec_get_next_on_page(lock)) { - return(FALSE); + return(LOCK_REC_FAIL); } if (lock->trx != trx || lock->type_mode != (mode | LOCK_REC) || lock_rec_get_n_bits(lock) <= heap_no) { - return(FALSE); + return(LOCK_REC_FAIL); } if (!impl) { @@ -1971,10 +1982,11 @@ lock_rec_lock_fast( if (!lock_rec_get_nth_bit(lock, heap_no)) { lock_rec_set_nth_bit(lock, heap_no); + return(LOCK_REC_SUCCESS_CREATED); } } - return(TRUE); + return(LOCK_REC_SUCCESS); } /************************************************************************* @@ -1986,8 +1998,9 @@ static ulint lock_rec_lock_slow( /*===============*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, or error - code */ + /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC, + DB_LOCK_WAIT, DB_DEADLOCK, + or DB_QUE_THR_SUSPENDED */ ibool impl, /* in: if TRUE, no lock is set if no wait is necessary: we assume that the caller will set an implicit lock */ @@ -1998,7 +2011,6 @@ lock_rec_lock_slow( que_thr_t* thr) /* in: query thread */ { trx_t* trx; - ulint err; ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S @@ -2017,26 +2029,21 @@ lock_rec_lock_slow( /* The trx already has a strong enough lock on rec: do nothing */ - err = DB_SUCCESS; } else if (lock_rec_other_has_conflicting(mode, rec, trx)) { /* If another transaction has a non-gap conflicting request in the queue, as this transaction does not have a lock strong enough already granted on the record, we have to wait. */ - err = lock_rec_enqueue_waiting(mode, rec, index, thr); - } else { - if (!impl) { - /* Set the requested lock on the record */ + return(lock_rec_enqueue_waiting(mode, rec, index, thr)); + } else if (!impl) { + /* Set the requested lock on the record */ - lock_rec_add_to_queue(LOCK_REC | mode, rec, index, - trx); - } - - err = DB_SUCCESS; + lock_rec_add_to_queue(LOCK_REC | mode, rec, index, trx); + return(DB_SUCCESS_LOCKED_REC); } - return(err); + return(DB_SUCCESS); } /************************************************************************* @@ -2049,8 +2056,9 @@ static ulint lock_rec_lock( /*==========*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, or error - code */ + /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC, + DB_LOCK_WAIT, DB_DEADLOCK, or + DB_QUE_THR_SUSPENDED */ ibool impl, /* in: if TRUE, no lock is set if no wait is necessary: we assume that the caller will set an implicit lock */ @@ -2060,8 +2068,6 @@ lock_rec_lock( dict_index_t* index, /* in: index of record */ que_thr_t* thr) /* in: query thread */ { - ulint err; - ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); @@ -2073,17 +2079,19 @@ lock_rec_lock( || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP || mode - (LOCK_MODE_MASK & mode) == 0); - if (lock_rec_lock_fast(impl, mode, rec, index, thr)) { - - /* We try a simplified and faster subroutine for the most - common cases */ - - err = DB_SUCCESS; - } else { - err = lock_rec_lock_slow(impl, mode, rec, index, thr); + /* We try a simplified and faster subroutine for the most + common cases */ + switch (lock_rec_lock_fast(impl, mode, rec, index, thr)) { + case LOCK_REC_SUCCESS: + return(DB_SUCCESS); + case LOCK_REC_SUCCESS_CREATED: + return(DB_SUCCESS_LOCKED_REC); + case LOCK_REC_FAIL: + return(lock_rec_lock_slow(impl, mode, rec, index, thr)); } - return(err); + ut_error; + return(DB_ERROR); } /************************************************************************* @@ -4832,7 +4840,7 @@ lock_rec_insert_check_and_lock( lock = lock_rec_get_first(next_rec); - if (lock == NULL) { + if (UNIV_LIKELY(lock == NULL)) { /* We optimize CPU time usage in the simplest case */ lock_mutex_exit_kernel(); @@ -4840,8 +4848,7 @@ lock_rec_insert_check_and_lock( if (!(index->type & DICT_CLUSTERED)) { /* Update the page max trx id field */ - page_update_max_trx_id(buf_frame_align(rec), - thr_get_trx(thr)->id); + page_update_max_trx_id(buf_frame_align(rec), trx->id); } return(DB_SUCCESS); @@ -4873,11 +4880,16 @@ lock_rec_insert_check_and_lock( lock_mutex_exit_kernel(); - if (!(index->type & DICT_CLUSTERED) && (err == DB_SUCCESS)) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + /* fall through */ + case DB_SUCCESS: + if (index->type & DICT_CLUSTERED) { + break; + } /* Update the page max trx id field */ - page_update_max_trx_id(buf_frame_align(rec), - thr_get_trx(thr)->id); + page_update_max_trx_id(buf_frame_align(rec), trx->id); } #ifdef UNIV_DEBUG @@ -4984,6 +4996,10 @@ lock_clust_rec_modify_check_and_lock( ut_ad(lock_rec_queue_validate(rec, index, offsets)); + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + return(err); } @@ -5043,25 +5059,29 @@ lock_sec_rec_modify_check_and_lock( } #endif /* UNIV_DEBUG */ - if (err == DB_SUCCESS) { + if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) { /* Update the page max trx id field */ - + /* It might not be necessary to do this if + err == DB_SUCCESS (no new lock created), + but it should not cost too much performance. */ page_update_max_trx_id(buf_frame_align(rec), thr_get_trx(thr)->id); + err = DB_SUCCESS; } return(err); } /************************************************************************* -Like the counterpart for a clustered index below, but now we read a +Like lock_clust_rec_read_check_and_lock(), but reads a secondary index record. */ ulint lock_sec_rec_read_check_and_lock( /*=============================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC, + DB_LOCK_WAIT, DB_DEADLOCK, + or DB_QUE_THR_SUSPENDED */ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, does nothing */ rec_t* rec, /* in: user record or page supremum record @@ -5126,8 +5146,9 @@ lock on the record. */ ulint lock_clust_rec_read_check_and_lock( /*===============================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC, + DB_LOCK_WAIT, DB_DEADLOCK, + or DB_QUE_THR_SUSPENDED */ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, does nothing */ rec_t* rec, /* in: user record or page supremum record @@ -5206,16 +5227,21 @@ lock_clust_rec_read_check_and_lock_alt( mem_heap_t* tmp_heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; - ulint ret; + ulint err; *offsets_ = (sizeof offsets_) / sizeof *offsets_; offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &tmp_heap); - ret = lock_clust_rec_read_check_and_lock(flags, rec, index, + err = lock_clust_rec_read_check_and_lock(flags, rec, index, offsets, mode, gap_mode, thr); if (tmp_heap) { mem_heap_free(tmp_heap); } - return(ret); + + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + + return(err); } diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c index 7373a97cfb0..3396d1adf2f 100644 --- a/storage/innobase/os/os0file.c +++ b/storage/innobase/os/os0file.c @@ -3974,6 +3974,9 @@ os_aio_simulated_handle( ulint n; ulint i; + /* Fix compiler warning */ + *consecutive_ios = NULL; + segment = os_aio_get_array_and_local_segment(&array, global_segment); restart: diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c index 51c295b5098..9786f90fd39 100644 --- a/storage/innobase/row/row0ins.c +++ b/storage/innobase/row/row0ins.c @@ -1114,7 +1114,8 @@ static ulint row_ins_set_shared_rec_lock( /*========================*/ - /* out: DB_SUCCESS or error code */ + /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC, + or error code */ ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or LOCK_REC_NOT_GAP type lock */ rec_t* rec, /* in: record */ @@ -1145,7 +1146,8 @@ static ulint row_ins_set_exclusive_rec_lock( /*===========================*/ - /* out: DB_SUCCESS or error code */ + /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC, + or error code */ ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or LOCK_REC_NOT_GAP type lock */ rec_t* rec, /* in: record */ @@ -1195,9 +1197,7 @@ row_ins_check_foreign_constraint( dict_table_t* check_table; dict_index_t* check_index; ulint n_fields_cmp; - rec_t* rec; btr_pcur_t pcur; - ibool moved; int cmp; ulint err; ulint i; @@ -1328,12 +1328,12 @@ run_again: /* Scan index records and check if there is a matching record */ - for (;;) { - rec = btr_pcur_get_rec(&pcur); + do { + rec_t* rec = btr_pcur_get_rec(&pcur); if (page_rec_is_infimum(rec)) { - goto next_rec; + continue; } offsets = rec_get_offsets(rec, check_index, @@ -1343,12 +1343,13 @@ run_again: err = row_ins_set_shared_rec_lock( LOCK_ORDINARY, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - - break; + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + continue; + default: + goto end_scan; } - - goto next_rec; } cmp = cmp_dtuple_rec(entry, rec, offsets); @@ -1359,9 +1360,12 @@ run_again: err = row_ins_set_shared_rec_lock( LOCK_ORDINARY, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: break; + default: + goto end_scan; } } else { /* Found a matching record. Lock only @@ -1372,15 +1376,18 @@ run_again: LOCK_REC_NOT_GAP, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: break; + default: + goto end_scan; } if (check_ref) { err = DB_SUCCESS; - break; + goto end_scan; } else if (foreign->type != 0) { /* There is an ON UPDATE or ON DELETE condition: check them in a separate @@ -1406,7 +1413,7 @@ run_again: err = DB_FOREIGN_DUPLICATE_KEY; } - break; + goto end_scan; } } else { row_ins_foreign_report_err( @@ -1414,48 +1421,39 @@ run_again: thr, foreign, rec, entry); err = DB_ROW_IS_REFERENCED; - break; + goto end_scan; } } - } + } else { + ut_a(cmp < 0); - if (cmp < 0) { err = row_ins_set_shared_rec_lock( LOCK_GAP, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - - break; - } - - if (check_ref) { - err = DB_NO_REFERENCED_ROW; - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - } else { - err = DB_SUCCESS; + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + if (check_ref) { + err = DB_NO_REFERENCED_ROW; + row_ins_foreign_report_add_err( + trx, foreign, rec, entry); + } else { + err = DB_SUCCESS; + } } - break; + goto end_scan; } + } while (btr_pcur_move_to_next(&pcur, &mtr)); - ut_a(cmp == 0); -next_rec: - moved = btr_pcur_move_to_next(&pcur, &mtr); - - if (!moved) { - if (check_ref) { - rec = btr_pcur_get_rec(&pcur); - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - err = DB_NO_REFERENCED_ROW; - } else { - err = DB_SUCCESS; - } - - break; - } + if (check_ref) { + row_ins_foreign_report_add_err( + trx, foreign, btr_pcur_get_rec(&pcur), entry); + err = DB_NO_REFERENCED_ROW; + } else { + err = DB_SUCCESS; } +end_scan: btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -1641,10 +1639,8 @@ row_ins_scan_sec_index_for_duplicate( ulint i; int cmp; ulint n_fields_cmp; - rec_t* rec; btr_pcur_t pcur; ulint err = DB_SUCCESS; - ibool moved; unsigned allow_duplicates; mtr_t mtr; mem_heap_t* heap = NULL; @@ -1680,12 +1676,12 @@ row_ins_scan_sec_index_for_duplicate( /* Scan index records and check if there is a duplicate */ - for (;;) { - rec = btr_pcur_get_rec(&pcur); + do { + rec_t* rec = btr_pcur_get_rec(&pcur); if (page_rec_is_infimum(rec)) { - goto next_rec; + continue; } offsets = rec_get_offsets(rec, index, offsets, @@ -1706,14 +1702,18 @@ row_ins_scan_sec_index_for_duplicate( LOCK_ORDINARY, rec, index, offsets, thr); } - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: break; + default: + goto end_scan; } if (page_rec_is_supremum(rec)) { - goto next_rec; + continue; } cmp = cmp_dtuple_rec(entry, rec, offsets); @@ -1725,23 +1725,15 @@ row_ins_scan_sec_index_for_duplicate( thr_get_trx(thr)->error_info = index; - break; + goto end_scan; } + } else { + ut_a(cmp < 0); + goto end_scan; } + } while (btr_pcur_move_to_next(&pcur, &mtr)); - if (cmp < 0) { - break; - } - - ut_a(cmp == 0); -next_rec: - moved = btr_pcur_move_to_next(&pcur, &mtr); - - if (!moved) { - break; - } - } - +end_scan: if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } @@ -1837,7 +1829,11 @@ row_ins_duplicate_error_in_clust( cursor->index, offsets, thr); } - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto func_exit; } @@ -1875,7 +1871,11 @@ row_ins_duplicate_error_in_clust( cursor->index, offsets, thr); } - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto func_exit; } diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c index a0e0ee99775..4a834c4efc2 100644 --- a/storage/innobase/row/row0mysql.c +++ b/storage/innobase/row/row0mysql.c @@ -483,6 +483,7 @@ handle_new_error: } else if (err == DB_ROW_IS_REFERENCED || err == DB_NO_REFERENCED_ROW || err == DB_CANNOT_ADD_CONSTRAINT + || err == DB_INTERRUPTED || err == DB_TOO_MANY_CONCURRENT_TRXS) { if (savept) { /* Roll back the latest, possibly incomplete @@ -1454,22 +1455,20 @@ run_again: } /************************************************************************* -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -this session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE or this +session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +Before calling this function row_search_for_mysql() must have +initialized prebuilt->new_rec_locks to store the information which new +record locks really were set. This function removes a newly set +clustered index record lock under prebuilt->pcur or +prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that +releases the latest clustered index record lock we set. */ int row_unlock_for_mysql( /*=================*/ /* out: error code or DB_SUCCESS */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /* in/out: prebuilt struct in MySQL handle */ ibool has_latches_on_recs)/* TRUE if called so that we have the latches on the records under pcur @@ -2103,6 +2102,7 @@ row_table_add_foreign_constraints( FOREIGN KEY (a, b) REFERENCES table2(c, d), table2 can be written also with the database name before it: test.table2 */ + size_t sql_length, /* in: length of sql_string */ const char* name, /* in: table full name in the normalized form database_name/table_name */ @@ -2124,8 +2124,8 @@ row_table_add_foreign_constraints( trx->dict_operation = TRUE; - err = dict_create_foreign_constraints(trx, sql_string, name, - reject_fks); + err = dict_create_foreign_constraints(trx, sql_string, sql_length, + name, reject_fks); if (err == DB_SUCCESS) { /* Check that also referencing constraints are ok */ diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c index 1d30249c53e..06a19ba7979 100644 --- a/storage/innobase/row/row0sel.c +++ b/storage/innobase/row/row0sel.c @@ -754,8 +754,14 @@ row_sel_get_clust_rec( 0, clust_rec, index, offsets, node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + /* Declare the variable uninitialized in Valgrind. + It should be set to DB_SUCCESS at func_exit. */ + UNIV_MEM_INVALID(&err, sizeof err); + break; + default: goto err_exit; } } else { @@ -826,7 +832,8 @@ UNIV_INLINE ulint sel_set_rec_lock( /*=============*/ - /* out: DB_SUCCESS or error code */ + /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC, + or error code */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: index */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */ @@ -1374,11 +1381,15 @@ rec_loop: node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: /* Note that in this case we will store in pcur the PREDECESSOR of the record we are waiting the lock for */ - goto lock_wait_or_error; } } @@ -1429,8 +1440,12 @@ skip_lock: err = sel_set_rec_lock(rec, index, offsets, node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -2452,6 +2467,7 @@ row_sel_field_store_in_mysql_format( byte* pad_ptr; ut_ad(len != UNIV_SQL_NULL); + UNIV_MEM_ASSERT_RW(data, len); if (templ->type == DATA_INT) { /* Convert integer data from Innobase to a little-endian @@ -2605,6 +2621,12 @@ row_sel_store_mysql_rec( prebuilt->blob_heap = NULL; } + /* init null bytes with default values as they might be + left uninitialized in some cases and this uninited bytes + might be copied into mysql record buffer that leads to + valgrind warnings */ + memcpy(mysql_rec, prebuilt->default_rec, prebuilt->null_bitmap_len); + for (i = 0; i < prebuilt->n_template; i++) { templ = prebuilt->mysql_template + i; @@ -2687,6 +2709,9 @@ row_sel_store_mysql_rec( /* MySQL assumes that the field for an SQL NULL value is set to the default value. */ + UNIV_MEM_ASSERT_RW(prebuilt->default_rec + + templ->mysql_col_offset, + templ->mysql_col_len); mysql_rec[templ->mysql_null_byte_offset] |= (byte) templ->mysql_null_bit_mask; memcpy(mysql_rec + templ->mysql_col_offset, @@ -2741,7 +2766,8 @@ static ulint row_sel_get_clust_rec_for_mysql( /*============================*/ - /* out: DB_SUCCESS or error code */ + /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC, + or error code */ row_prebuilt_t* prebuilt,/* in: prebuilt struct in the handle */ dict_index_t* sec_index,/* in: secondary index where rec resides */ rec_t* rec, /* in: record in a non-clustered index; if @@ -2822,6 +2848,7 @@ row_sel_get_clust_rec_for_mysql( clust_rec = NULL; + err = DB_SUCCESS; goto func_exit; } @@ -2836,8 +2863,11 @@ row_sel_get_clust_rec_for_mysql( err = lock_clust_rec_read_check_and_lock( 0, clust_rec, clust_index, *offsets, prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + break; + default: goto err_exit; } } else { @@ -2896,6 +2926,8 @@ row_sel_get_clust_rec_for_mysql( rec, sec_index, clust_rec, clust_index)); #endif } + + err = DB_SUCCESS; } func_exit: @@ -2908,7 +2940,6 @@ func_exit: btr_pcur_store_position(prebuilt->clust_pcur, mtr); } - err = DB_SUCCESS; err_exit: return(err); } @@ -3007,6 +3038,11 @@ row_sel_pop_cached_row_for_mysql( for (i = 0; i < prebuilt->n_template; i++) { templ = prebuilt->mysql_template + i; +#if 0 /* Some of the cached_rec may legitimately be uninitialized. */ + UNIV_MEM_ASSERT_RW(cached_rec + + templ->mysql_col_offset, + templ->mysql_col_len); +#endif ut_memcpy(buf + templ->mysql_col_offset, cached_rec + templ->mysql_col_offset, templ->mysql_col_len); @@ -3021,6 +3057,11 @@ row_sel_pop_cached_row_for_mysql( } } else { +#if 0 /* Some of the cached_rec may legitimately be uninitialized. */ + UNIV_MEM_ASSERT_RW(prebuilt->fetch_cache + [prebuilt->fetch_cache_first], + prebuilt->mysql_prefix_len); +#endif ut_memcpy(buf, prebuilt->fetch_cache[prebuilt->fetch_cache_first], prebuilt->mysql_prefix_len); @@ -3070,6 +3111,8 @@ row_sel_push_cache_row_for_mysql( } ut_ad(prebuilt->fetch_cache_first == 0); + UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached], + prebuilt->mysql_row_len); if (UNIV_UNLIKELY(!row_sel_store_mysql_rec( prebuilt->fetch_cache[ @@ -3610,8 +3653,12 @@ shortcut_fails_too_big_rec: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3708,8 +3755,12 @@ rec_loop: prebuilt->select_lock_type, LOCK_ORDINARY, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3840,8 +3891,11 @@ wrong_offs: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3875,8 +3929,11 @@ wrong_offs: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3945,15 +4002,21 @@ no_gap_lock: switch (err) { rec_t* old_vers; - case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: if (srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) { + || trx->isolation_level + <= TRX_ISO_READ_COMMITTED) { /* Note that a record of prebuilt->index was locked. */ prebuilt->new_rec_locks = 1; } + err = DB_SUCCESS; + case DB_SUCCESS: break; case DB_LOCK_WAIT: + /* Never unlock rows that were part of a conflict. */ + prebuilt->new_rec_locks = 0; + if (UNIV_LIKELY(prebuilt->row_read_type != ROW_READ_TRY_SEMI_CONSISTENT) || unique_search @@ -3983,7 +4046,6 @@ no_gap_lock: if (UNIV_LIKELY(trx->wait_lock != NULL)) { lock_cancel_waiting_and_release( trx->wait_lock); - prebuilt->new_rec_locks = 0; } else { mutex_exit(&kernel_mutex); @@ -3995,9 +4057,6 @@ no_gap_lock: ULINT_UNDEFINED, &heap); err = DB_SUCCESS; - /* Note that a record of - prebuilt->index was locked. */ - prebuilt->new_rec_locks = 1; break; } mutex_exit(&kernel_mutex); @@ -4135,27 +4194,30 @@ requires_clust_rec: err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec, thr, &clust_rec, &offsets, &heap, &mtr); - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS: + if (clust_rec == NULL) { + /* The record did not exist in the read view */ + ut_ad(prebuilt->select_lock_type == LOCK_NONE); + goto next_rec; + } + break; + case DB_SUCCESS_LOCKED_REC: + ut_a(clust_rec != NULL); + if (srv_locks_unsafe_for_binlog + || trx->isolation_level + <= TRX_ISO_READ_COMMITTED) { + /* Note that the clustered index record + was locked. */ + prebuilt->new_rec_locks = 2; + } + err = DB_SUCCESS; + break; + default: goto lock_wait_or_error; } - if (clust_rec == NULL) { - /* The record did not exist in the read view */ - ut_ad(prebuilt->select_lock_type == LOCK_NONE); - - goto next_rec; - } - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - /* Note that both the secondary index record - and the clustered index record were locked. */ - ut_ad(prebuilt->new_rec_locks == 1); - prebuilt->new_rec_locks = 2; - } - if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) { /* The record is delete marked: we can skip it */ diff --git a/storage/innobase/row/row0undo.c b/storage/innobase/row/row0undo.c index f03f84ed1b0..7f31fd0060c 100644 --- a/storage/innobase/row/row0undo.c +++ b/storage/innobase/row/row0undo.c @@ -272,7 +272,7 @@ row_undo( if (locked_data_dict) { - row_mysql_lock_data_dictionary(trx); + row_mysql_freeze_data_dictionary(trx); } if (node->state == UNDO_NODE_INSERT) { @@ -287,7 +287,7 @@ row_undo( if (locked_data_dict) { - row_mysql_unlock_data_dictionary(trx); + row_mysql_unfreeze_data_dictionary(trx); } /* Do some cleanup */ diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index a2eed3f171c..5b1184fb416 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -1554,12 +1554,16 @@ srv_suspend_mysql_thread( mutex_exit(&kernel_mutex); - if (trx_is_interrupted(trx) - || (srv_lock_wait_timeout < 100000000 - && wait_time > (double)srv_lock_wait_timeout)) { + if (srv_lock_wait_timeout < 100000000 + && wait_time > (double)srv_lock_wait_timeout) { trx->error_state = DB_LOCK_WAIT_TIMEOUT; } + + if (trx_is_interrupted(trx)) { + + trx->error_state = DB_INTERRUPTED; + } #else /* UNIV_HOTBACKUP */ /* This function depends on MySQL code that is not included in InnoDB Hot Backup builds. Besides, this function should never diff --git a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c index a7950473a17..9d057110d11 100644 --- a/storage/innobase/srv/srv0start.c +++ b/storage/innobase/srv/srv0start.c @@ -102,20 +102,6 @@ static char* srv_monitor_file_name; #define SRV_MAX_N_PENDING_SYNC_IOS 100 -/* Avoid warnings when using purify */ - -#ifdef HAVE_purify -static int inno_bcmp(register const char *s1, register const char *s2, - register uint len) -{ - while ((len-- != 0) && (*s1++ == *s2++)) - ; - - return(len + 1); -} -#define memcmp(A,B,C) inno_bcmp((A),(B),(C)) -#endif - static char* srv_parse_megabytes( diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c index fae479feddc..545226a5994 100644 --- a/storage/innobase/trx/trx0trx.c +++ b/storage/innobase/trx/trx0trx.c @@ -131,6 +131,8 @@ trx_create( trx->mysql_thd = NULL; trx->mysql_query_str = NULL; + trx->mysql_query_len = NULL; + trx->active_trans = 0; trx->duplicates = 0; @@ -936,6 +938,7 @@ trx_commit_off_kernel( trx->undo_no = ut_dulint_zero; trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; trx->mysql_query_str = NULL; + trx->mysql_query_len = NULL; ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index bc69aaca96a..3e802360d23 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,152 @@ +2010-07-27 The InnoDB Team + + * include/mem0pool.h, mem/mem0mem.c, mem/mem0pool.c, srv/srv0start.c: + Fix Bug#55581 shutdown with innodb-use-sys-malloc=0: assert + mutex->magic_n == MUTEX_MAGIC_N. + +2010-06-30 The InnoDB Team + + * btr/btr0sea.c, ha/ha0ha.c, handler/ha_innodb.cc, include/btr0sea.h: + Fix Bug#54311 Crash on CHECK PARTITION after concurrent LOAD DATA + and adaptive_hash_index=OFF + +2010-06-29 The InnoDB Team + * row/row0row.c, row/row0undo.c, row/row0upd.c: + Fix Bug#54408 txn rollback after recovery: row0umod.c:673 + dict_table_get_format(index->table) + +2010-06-29 The InnoDB Team + + * btr/btr0cur.c, include/btr0cur.h, + include/row0mysql.h, row/row0merge.c, row/row0sel.c: + Fix Bug#54358 READ UNCOMMITTED access failure of off-page DYNAMIC + or COMPRESSED columns + +2010-06-24 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#54679 alter table causes compressed row_format to revert + to compact + +2010-06-22 The InnoDB Team + + * dict/dict0dict.c, dict/dict0mem.c, include/dict0mem.h, + include/univ.i, page/page0zip.c, row/row0merge.c: + Fix Bug#47991 InnoDB Dictionary Cache memory usage increases + indefinitely when renaming tables + +2010-06-22 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#54686: "field->col->mtype == type" assertion error at + row/row0sel.c + +2010-06-22 The InnoDB Team + + * handler/ha_innodb.cc, innodb_bug54044.result, innodb_bug54044.test: + Fix Bug#54044 Create temporary tables and using innodb crashes. + +2010-06-21 The InnoDB Team + + * dict/dict0load.c, fil/fil0fil.c: + Fix Bug#54658: InnoDB: Warning: allocated tablespace %lu, + old maximum was 0 (introduced in Bug #53578 fix) + +2010-06-16 The InnoDB Team + + * row/row0merge.c: + Fix Bug#54330 Broken fast index creation + +2010-06-10 The InnoDB Team + + * include/log0log.ic, row/row0ins.c, row/row0purge.c, + row/row0uins.c, row/row0umod.c, row/row0upd.c: + Fix Bug#39168 ERROR: the age of the last checkpoint ... exceeds + the log group capacity + +2010-06-08 The InnoDB Team + + * dict/dict0load.c: + Fix Bug#54009 Server crashes when data is selected from non backed + up table for InnoDB plugin + +2010-06-02 The InnoDB Team + + * include/db0err.h, include/lock0lock.h, include/row0mysql.h, + lock/lock0lock.c, row/row0ins.c, row/row0mysql.c, row/row0sel.c: + Fix Bug#53674 InnoDB: Error: unlock row could not find a + 4 mode lock on the record + +2010-06-01 The InnoDB Team + + * include/sync0rw.h, sync/sync0rw.c: + Fix Bug#48197 Concurrent rw_lock_free may cause assertion failure + +2010-06-01 The InnoDB Team + + * row/row0umod.c: + Fix Bug#53812 assert row/row0umod.c line 660 in txn rollback + after crash recovery + +2010-05-25 The InnoDB Team + + * handler/ha_innodb.cc, include/row0mysql.h, row/row0mysql.c: + Fix Bug#53592: crash replacing duplicates into table after fast + alter table added unique key + +2010-05-24 The InnoDB Team + + * dict/dict0boot.c, dict/dict0crea.c, fil/fil0fil.c, + include/dict0boot.h, include/fil0fil.h, row/row0mysql.c: + Fix Bug#53578: assert on invalid page access, in fil_io() + +2010-05-14 The InnoDB Team + * mysql-test/innodb_bug48024.test, mysql-test/innodb_bug48024.result, + dict/dict0dict.c, handler/ha_innodb.cc, handler/ha_innodb.h, + include/dict0dict.h, include/ha_prototypes.h, include/row0mysql.h, + include/trx0trx.h, row/row0mysql.c, trx/trx0i_s.c, trx/trx0trx.c: + Fix Bug#48024 Innodb doesn't work with multi-statements + Fix Bug#53644 InnoDB thinks that /*/ starts and ends a comment + +2010-05-12 The InnoDB Team + + * handler/handler0alter.cc: + Fix Bug#53591 crash with fast alter table and text/blob prefix + primary key + +2010-05-12 The InnoDB Team + + * row/row0merge.c: + Fix Bug#53471 row_merge_drop_temp_indexes() refers freed memory, SEGVs + +2010-05-11 The InnoDB Team + + * mysql-test/innodb_bug53290.test, mysql-test/innodb_bug53290.result, + include/rem0cmp.h, rem/rem0cmp.c, row/row0merge.c: + Fix Bug#53290 wrong duplicate key error when adding a unique index + via fast alter table + +2010-05-11 The InnoDB Team + * buf/buf0lru.c, include/buf0buf.ic: + Fix Bug#53307 valgrind: warnings in main.partition_innodb_plugin + +2010-05-05 The InnoDB Team + + * row/row0merge.c: + Fix Bug#53256 in a stress test, assert dict/dict0dict.c:815 + table2 == NULL + +2010-05-05 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#53165 Setting innodb_change_buffering=DEFAULT produces + incorrect result + +2010-05-04 The InnoDB Team + + * fsp/fsp0fsp.c: + Fix Bug#53306 valgrind: warnings in innodb.innodb + 2010-05-03 The InnoDB Team * buf0buf.c: @@ -48,12 +197,6 @@ Only check the record size at index creation time when innodb_strict_mode is set or when ROW_FORMAT is DYNAMIC or COMPRESSED. -2010-04-20 The InnoDB Team - - * btr/btr0btr.c, include/univ.i: - Implement UNIV_BTR_AVOID_COPY, for avoiding writes when a B-tree - node is split at the first or last record. - 2010-04-15 The InnoDB Team * trx/trx0rec.c: @@ -72,6 +215,10 @@ * mysql-test/innodb_bug38231.test: Remove non-determinism in the test case. +2010-03-29 The InnoDB Team + + InnoDB Plugin 1.0.7 released + 2010-03-18 The InnoDB Team * CMakeLists.txt: @@ -194,6 +341,14 @@ Fix Bug#49497 Error 1467 (ER_AUTOINC_READ_FAILED) on inserting a negative value +2010-01-28 The InnoDB Team + * handler/ha_innodb.h, handler/ha_innodb.cc, + handler/handler0alter.cc, + mysql-test/innodb_bug47622.test, + mysql-test/innodb_bug47622.result: + Fix Bug#47622 the new index is added before the existing ones + in MySQL, but after one in SE + 2010-01-27 The InnoDB Team * include/row0mysql.h, log/log0recv.c, row/row0mysql.c: diff --git a/storage/innodb_plugin/Makefile.am b/storage/innodb_plugin/Makefile.am index 1d0dd936895..1bed796f0d4 100644 --- a/storage/innodb_plugin/Makefile.am +++ b/storage/innodb_plugin/Makefile.am @@ -325,15 +325,15 @@ libinnobase_a_SOURCES= \ ut/ut0vec.c \ ut/ut0wqueue.c -libinnobase_a_CXXFLAGS= $(AM_CFLAGS) +libinnobase_a_CXXFLAGS= $(AM_CXXFLAGS) libinnobase_a_CFLAGS= $(AM_CFLAGS) EXTRA_LTLIBRARIES= ha_innodb_plugin.la pkgplugin_LTLIBRARIES= @plugin_innodb_plugin_shared_target@ -ha_innodb_plugin_la_LDFLAGS= -module -rpath $(pkgplugindir) -ha_innodb_plugin_la_CXXFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) -ha_innodb_plugin_la_CFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) +ha_innodb_plugin_la_LDFLAGS= -module -rpath $(pkgplugindir) -L$(top_builddir)/libservices -lmysqlservices +ha_innodb_plugin_la_CXXFLAGS= -shared $(AM_CXXFLAGS) $(INNODB_DYNAMIC_CFLAGS) +ha_innodb_plugin_la_CFLAGS= -shared $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) ha_innodb_plugin_la_SOURCES= $(libinnobase_a_SOURCES) EXTRA_DIST= CMakeLists.txt plug.in \ diff --git a/storage/innodb_plugin/btr/btr0btr.c b/storage/innodb_plugin/btr/btr0btr.c index 96fcc2ed821..02677e0a71c 100644 --- a/storage/innodb_plugin/btr/btr0btr.c +++ b/storage/innodb_plugin/btr/btr0btr.c @@ -2000,6 +2000,7 @@ func_start: goto insert_empty; } } else if (UNIV_UNLIKELY(insert_left)) { + ut_a(n_iterations > 0); first_rec = page_rec_get_next(page_get_infimum_rec(page)); move_limit = page_rec_get_next(btr_cur_get_rec(cursor)); } else { @@ -2046,17 +2047,7 @@ insert_empty: } /* 5. Move then the records to the new page */ - if (direction == FSP_DOWN -#ifdef UNIV_BTR_AVOID_COPY - && page_rec_is_supremum(move_limit)) { - /* Instead of moving all records, make the new page - the empty page. */ - - left_block = block; - right_block = new_block; - } else if (direction == FSP_DOWN -#endif /* UNIV_BTR_AVOID_COPY */ - ) { + if (direction == FSP_DOWN) { /* fputs("Split left\n", stderr); */ if (0 @@ -2099,14 +2090,6 @@ insert_empty: right_block = block; lock_update_split_left(right_block, left_block); -#ifdef UNIV_BTR_AVOID_COPY - } else if (!split_rec) { - /* Instead of moving all records, make the new page - the empty page. */ - - left_block = new_block; - right_block = block; -#endif /* UNIV_BTR_AVOID_COPY */ } else { /* fputs("Split right\n", stderr); */ diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 0e603fdca8f..7fa7d42320a 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -1959,9 +1959,8 @@ any_extern: err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, thr, mtr, &roll_ptr); if (err != DB_SUCCESS) { -err_exit: - mem_heap_free(heap); - return(err); + + goto err_exit; } /* Ok, we may do the replacement. Store on the page infimum the @@ -2007,9 +2006,10 @@ err_exit: page_cur_move_to_next(page_cursor); + err = DB_SUCCESS; +err_exit: mem_heap_free(heap); - - return(DB_SUCCESS); + return(err); } /*************************************************************//** @@ -3871,6 +3871,8 @@ btr_store_big_rec_extern_fields( field_ref += local_len; } extern_len = big_rec_vec->fields[i].len; + UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data, + extern_len); ut_a(extern_len > 0); @@ -4507,6 +4509,7 @@ btr_copy_blob_prefix( mtr_commit(&mtr); if (page_no == FIL_NULL || copy_len != part_len) { + UNIV_MEM_ASSERT_RW(buf, copied_len); return(copied_len); } @@ -4690,6 +4693,7 @@ btr_copy_externally_stored_field_prefix_low( space_id, page_no, offset); inflateEnd(&d_stream); mem_heap_free(heap); + UNIV_MEM_ASSERT_RW(buf, d_stream.total_out); return(d_stream.total_out); } else { return(btr_copy_blob_prefix(buf, len, space_id, @@ -4810,7 +4814,7 @@ btr_copy_externally_stored_field( /*******************************************************************//** Copies an externally stored field of a record to mem heap. -@return the field copied to heap */ +@return the field copied to heap, or NULL if the field is incomplete */ UNIV_INTERN byte* btr_rec_copy_externally_stored_field( @@ -4840,6 +4844,18 @@ btr_rec_copy_externally_stored_field( data = rec_get_nth_field(rec, offsets, no, &local_len); + ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); + + if (UNIV_UNLIKELY + (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE, + field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) { + /* The externally stored field was not written yet. + This record should only be seen by + recv_recovery_rollback_active() or any + TRX_ISO_READ_UNCOMMITTED transactions. */ + return(NULL); + } + return(btr_copy_externally_stored_field(len, data, zip_size, local_len, heap)); } diff --git a/storage/innodb_plugin/btr/btr0sea.c b/storage/innodb_plugin/btr/btr0sea.c index ef7afeb1039..ac7248fef20 100644 --- a/storage/innodb_plugin/btr/btr0sea.c +++ b/storage/innodb_plugin/btr/btr0sea.c @@ -46,6 +46,7 @@ Created 2/17/1996 Heikki Tuuri /** Flag: has the search system been enabled? Protected by btr_search_latch and btr_search_enabled_mutex. */ UNIV_INTERN char btr_search_enabled = TRUE; +UNIV_INTERN ibool btr_search_fully_disabled = FALSE; /** Mutex protecting btr_search_enabled */ static mutex_t btr_search_enabled_mutex; @@ -182,6 +183,7 @@ void btr_search_sys_free(void) /*=====================*/ { + rw_lock_free(&btr_search_latch); mem_free(btr_search_latch_temp); btr_search_latch_temp = NULL; mem_heap_free(btr_search_sys->hash_index->heap); @@ -200,12 +202,19 @@ btr_search_disable(void) mutex_enter(&btr_search_enabled_mutex); rw_lock_x_lock(&btr_search_latch); + /* Disable access to hash index, also tell ha_insert_for_fold() + stop adding new nodes to hash index, but still allow updating + existing nodes */ btr_search_enabled = FALSE; /* Clear all block->is_hashed flags and remove all entries from btr_search_sys->hash_index. */ buf_pool_drop_hash_index(); + /* hash index has been cleaned up, disallow any operation to + the hash index */ + btr_search_fully_disabled = TRUE; + /* btr_search_enabled_mutex should guarantee this. */ ut_ad(!btr_search_enabled); @@ -224,6 +233,7 @@ btr_search_enable(void) rw_lock_x_lock(&btr_search_latch); btr_search_enabled = TRUE; + btr_search_fully_disabled = FALSE; rw_lock_x_unlock(&btr_search_latch); mutex_exit(&btr_search_enabled_mutex); @@ -1362,7 +1372,7 @@ btr_search_build_page_hash_index( rw_lock_x_lock(&btr_search_latch); - if (UNIV_UNLIKELY(!btr_search_enabled)) { + if (UNIV_UNLIKELY(btr_search_fully_disabled)) { goto exit_func; } diff --git a/storage/innodb_plugin/buf/buf0buddy.c b/storage/innodb_plugin/buf/buf0buddy.c index 66d802f8a36..ee5a569c3ff 100644 --- a/storage/innodb_plugin/buf/buf0buddy.c +++ b/storage/innodb_plugin/buf/buf0buddy.c @@ -442,11 +442,15 @@ buf_buddy_relocate( pool), so there is nothing wrong about this. The mach_read_from_4() calls here will only trigger bogus Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */ - bpage = buf_page_hash_get( - mach_read_from_4((const byte*) src - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID), - mach_read_from_4((const byte*) src - + FIL_PAGE_OFFSET)); + ulint space = mach_read_from_4( + (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + ulint page_no = mach_read_from_4( + (const byte*) src + FIL_PAGE_OFFSET); + /* Suppress Valgrind warnings about conditional jump + on uninitialized value. */ + UNIV_MEM_VALID(&space, sizeof space); + UNIV_MEM_VALID(&page_no, sizeof page_no); + bpage = buf_page_hash_get(space, page_no); if (!bpage || bpage->zip.data != src) { /* The block has probably been freshly @@ -495,7 +499,12 @@ success: mutex_exit(mutex); } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) { /* This must be a buf_page_t object. */ +#if UNIV_WORD_SIZE == 4 + /* On 32-bit systems, there is no padding in + buf_page_t. On other systems, Valgrind could complain + about uninitialized pad bytes. */ UNIV_MEM_ASSERT_RW(src, size); +#endif if (buf_buddy_relocate_block(src, dst)) { goto success; diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c index f299c2df969..660686bac1e 100644 --- a/storage/innodb_plugin/buf/buf0buf.c +++ b/storage/innodb_plugin/buf/buf0buf.c @@ -2236,7 +2236,7 @@ wait_until_unfixed: block->page.buf_fix_count = 1; buf_block_set_io_fix(block, BUF_IO_READ); - rw_lock_x_lock(&block->lock); + rw_lock_x_lock_func(&block->lock, 0, file, line); UNIV_MEM_INVALID(bpage, sizeof *bpage); @@ -2280,7 +2280,12 @@ wait_until_unfixed: ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); mutex_enter(&block->mutex); +#if UNIV_WORD_SIZE == 4 + /* On 32-bit systems, there is no padding in buf_page_t. On + other systems, Valgrind could complain about uninitialized pad + bytes. */ UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page); +#endif buf_block_buf_fix_inc(block, file, line); diff --git a/storage/innodb_plugin/buf/buf0flu.c b/storage/innodb_plugin/buf/buf0flu.c index f2b07492470..d8c0497fa1e 100644 --- a/storage/innodb_plugin/buf/buf0flu.c +++ b/storage/innodb_plugin/buf/buf0flu.c @@ -249,6 +249,17 @@ buf_flush_insert_into_flush_list( ut_d(block->page.in_flush_list = TRUE); UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page); +#ifdef UNIV_DEBUG_VALGRIND + { + ulint zip_size = buf_block_get_zip_size(block); + + if (UNIV_UNLIKELY(zip_size)) { + UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size); + } else { + UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); + } + } +#endif /* UNIV_DEBUG_VALGRIND */ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(buf_flush_validate_low()); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ @@ -276,6 +287,18 @@ buf_flush_insert_sorted_into_flush_list( ut_ad(!block->page.in_flush_list); ut_d(block->page.in_flush_list = TRUE); +#ifdef UNIV_DEBUG_VALGRIND + { + ulint zip_size = buf_block_get_zip_size(block); + + if (UNIV_UNLIKELY(zip_size)) { + UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size); + } else { + UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); + } + } +#endif /* UNIV_DEBUG_VALGRIND */ + prev_b = NULL; /* For the most part when this function is called the flush_rbt @@ -809,6 +832,7 @@ try_again: zip_size = buf_page_get_zip_size(bpage); if (UNIV_UNLIKELY(zip_size)) { + UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size); /* Copy the compressed page and clear the rest. */ memcpy(trx_doublewrite->write_buf + UNIV_PAGE_SIZE * trx_doublewrite->first_free, @@ -818,6 +842,8 @@ try_again: + zip_size, 0, UNIV_PAGE_SIZE - zip_size); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); + UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame, + UNIV_PAGE_SIZE); memcpy(trx_doublewrite->write_buf + UNIV_PAGE_SIZE * trx_doublewrite->first_free, diff --git a/storage/innodb_plugin/buf/buf0lru.c b/storage/innodb_plugin/buf/buf0lru.c index 9cfa02ba3ac..78d8d348e2a 100644 --- a/storage/innodb_plugin/buf/buf0lru.c +++ b/storage/innodb_plugin/buf/buf0lru.c @@ -1364,7 +1364,7 @@ buf_LRU_make_block_old( Try to free a block. If bpage is a descriptor of a compressed-only page, the descriptor object will be freed as well. -NOTE: If this function returns BUF_LRU_FREED, it will not temporarily +NOTE: If this function returns BUF_LRU_FREED, it will temporarily release buf_pool_mutex. Furthermore, the page frame will no longer be accessible via bpage. @@ -1393,7 +1393,12 @@ buf_LRU_free_block( ut_ad(buf_page_in_file(bpage)); ut_ad(bpage->in_LRU_list); ut_ad(!bpage->in_flush_list == !bpage->oldest_modification); +#if UNIV_WORD_SIZE == 4 + /* On 32-bit systems, there is no padding in buf_page_t. On + other systems, Valgrind could complain about uninitialized pad + bytes. */ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage); +#endif if (!buf_page_can_relocate(bpage)) { @@ -1489,8 +1494,13 @@ alloc: ut_ad(prev_b->in_LRU_list); ut_ad(buf_page_in_file(prev_b)); +#if UNIV_WORD_SIZE == 4 + /* On 32-bit systems, there is no + padding in buf_page_t. On other + systems, Valgrind could complain about + uninitialized pad bytes. */ UNIV_MEM_ASSERT_RW(prev_b, sizeof *prev_b); - +#endif UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, prev_b, b); @@ -1688,7 +1698,12 @@ buf_LRU_block_remove_hashed_page( ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE); ut_a(bpage->buf_fix_count == 0); +#if UNIV_WORD_SIZE == 4 + /* On 32-bit systems, there is no padding in + buf_page_t. On other systems, Valgrind could complain + about uninitialized pad bytes. */ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage); +#endif buf_LRU_remove_block(bpage); diff --git a/storage/innodb_plugin/dict/dict0boot.c b/storage/innodb_plugin/dict/dict0boot.c index 45d57b8c619..e63c1dc94b9 100644 --- a/storage/innodb_plugin/dict/dict0boot.c +++ b/storage/innodb_plugin/dict/dict0boot.c @@ -62,32 +62,47 @@ dict_hdr_get( } /**********************************************************************//** -Returns a new table, index, or tree id. -@return the new id */ +Returns a new table, index, or space id. */ UNIV_INTERN -dulint +void dict_hdr_get_new_id( /*================*/ - ulint type) /*!< in: DICT_HDR_ROW_ID, ... */ + dulint* table_id, /*!< out: table id (not assigned if NULL) */ + dulint* index_id, /*!< out: index id (not assigned if NULL) */ + ulint* space_id) /*!< out: space id (not assigned if NULL) */ { dict_hdr_t* dict_hdr; dulint id; mtr_t mtr; - ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID)); - mtr_start(&mtr); dict_hdr = dict_hdr_get(&mtr); - id = mtr_read_dulint(dict_hdr + type, &mtr); - id = ut_dulint_add(id, 1); + if (table_id) { + id = mtr_read_dulint(dict_hdr + DICT_HDR_TABLE_ID, &mtr); + id = ut_dulint_add(id, 1); + mlog_write_dulint(dict_hdr + DICT_HDR_TABLE_ID, id, &mtr); + *table_id = id; + } - mlog_write_dulint(dict_hdr + type, id, &mtr); + if (index_id) { + id = mtr_read_dulint(dict_hdr + DICT_HDR_INDEX_ID, &mtr); + id = ut_dulint_add(id, 1); + mlog_write_dulint(dict_hdr + DICT_HDR_INDEX_ID, id, &mtr); + *index_id = id; + } - mtr_commit(&mtr); + if (space_id) { + *space_id = mtr_read_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID, + MLOG_4BYTES, &mtr); + if (fil_assign_new_space_id(space_id)) { + mlog_write_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID, + *space_id, MLOG_4BYTES, &mtr); + } + } - return(id); + mtr_commit(&mtr); } /**********************************************************************//** @@ -151,9 +166,12 @@ dict_hdr_create( mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID, ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr); - /* Obsolete, but we must initialize it to 0 anyway. */ - mlog_write_dulint(dict_header + DICT_HDR_MIX_ID, - ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr); + mlog_write_ulint(dict_header + DICT_HDR_MAX_SPACE_ID, + 0, MLOG_4BYTES, mtr); + + /* Obsolete, but we must initialize it anyway. */ + mlog_write_ulint(dict_header + DICT_HDR_MIX_ID_LOW, + DICT_HDR_FIRST_ID, MLOG_4BYTES, mtr); /* Create the B-tree roots for the clustered indexes of the basic system tables */ diff --git a/storage/innodb_plugin/dict/dict0crea.c b/storage/innodb_plugin/dict/dict0crea.c index 653bff4bef6..09353c45c8c 100644 --- a/storage/innodb_plugin/dict/dict0crea.c +++ b/storage/innodb_plugin/dict/dict0crea.c @@ -239,16 +239,34 @@ dict_build_table_def_step( const char* path_or_name; ibool is_path; mtr_t mtr; + ulint space = 0; + ibool file_per_table; ut_ad(mutex_own(&(dict_sys->mutex))); table = node->table; - table->id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + /* Cache the global variable "srv_file_per_table" to + a local variable before using it. Please note + "srv_file_per_table" is not under dict_sys mutex + protection, and could be changed while executing + this function. So better to cache the current value + to a local variable, and all future reference to + "srv_file_per_table" should use this local variable. */ + file_per_table = srv_file_per_table; + + dict_hdr_get_new_id(&table->id, NULL, NULL); thr_get_trx(thr)->table_id = table->id; - if (srv_file_per_table) { + if (file_per_table) { + /* Get a new space id if srv_file_per_table is set */ + dict_hdr_get_new_id(NULL, NULL, &space); + + if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) { + return(DB_ERROR); + } + /* We create a new single-table tablespace for the table. We initially let it be 4 pages: - page 0 is the fsp header and an extent descriptor page, @@ -257,8 +275,6 @@ dict_build_table_def_step( - page 3 will contain the root of the clustered index of the table we create here. */ - ulint space = 0; /* reset to zero for the call below */ - if (table->dir_path_of_temp_table) { /* We place tables created with CREATE TEMPORARY TABLE in the tmp dir of mysqld server */ @@ -276,7 +292,7 @@ dict_build_table_def_step( flags = table->flags & ~(~0 << DICT_TF_BITS); error = fil_create_new_single_table_tablespace( - &space, path_or_name, is_path, + space, path_or_name, is_path, flags == DICT_TF_COMPACT ? 0 : flags, FIL_IBD_FILE_INITIAL_SIZE); table->space = (unsigned int) space; @@ -561,7 +577,7 @@ dict_build_index_def_step( ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) || dict_index_is_clust(index)); - index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID); + dict_hdr_get_new_id(NULL, &index->id, NULL); /* Inherit the space id from the table; we store all indexes of a table in the same tablespace */ diff --git a/storage/innodb_plugin/dict/dict0dict.c b/storage/innodb_plugin/dict/dict0dict.c index 83438231689..fe4e058e122 100644 --- a/storage/innodb_plugin/dict/dict0dict.c +++ b/storage/innodb_plugin/dict/dict0dict.c @@ -82,7 +82,7 @@ static char dict_ibfk[] = "_ibfk_"; /** array of mutexes protecting dict_index_t::stat_n_diff_key_vals[] */ #define DICT_INDEX_STAT_MUTEX_SIZE 32 -mutex_t dict_index_stat_mutex[DICT_INDEX_STAT_MUTEX_SIZE]; +static mutex_t dict_index_stat_mutex[DICT_INDEX_STAT_MUTEX_SIZE]; /*******************************************************************//** Tries to find column names for the index and sets the col field of the @@ -570,13 +570,11 @@ dict_table_get_on_id( if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0 || trx->dict_operation_lock_mode == RW_X_LATCH) { - /* It is a system table which will always exist in the table - cache: we avoid acquiring the dictionary mutex, because - if we are doing a rollback to handle an error in TABLE - CREATE, for example, we already have the mutex! */ - ut_ad(mutex_own(&(dict_sys->mutex)) - || trx->dict_operation_lock_mode == RW_X_LATCH); + /* Note: An X latch implies that the transaction + already owns the dictionary mutex. */ + + ut_ad(mutex_own(&dict_sys->mutex)); return(dict_table_get_on_id_low(table_id)); } @@ -850,7 +848,8 @@ dict_table_add_to_cache( /* Add table to LRU list of tables */ UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); - dict_sys->size += mem_heap_get_size(table->heap); + dict_sys->size += mem_heap_get_size(table->heap) + + strlen(table->name) + 1; } /**********************************************************************//** @@ -904,14 +903,21 @@ dict_table_rename_in_cache( dict_foreign_t* foreign; dict_index_t* index; ulint fold; - ulint old_size; - const char* old_name; + char old_name[MAX_TABLE_NAME_LEN + 1]; ut_ad(table); ut_ad(mutex_own(&(dict_sys->mutex))); - old_size = mem_heap_get_size(table->heap); - old_name = table->name; + /* store the old/current name to an automatic variable */ + if (strlen(table->name) + 1 <= sizeof(old_name)) { + memcpy(old_name, table->name, strlen(table->name) + 1); + } else { + ut_print_timestamp(stderr); + fprintf(stderr, "InnoDB: too long table name: '%s', " + "max length is %d\n", table->name, + MAX_TABLE_NAME_LEN); + ut_error; + } fold = ut_fold_string(new_name); @@ -957,12 +963,22 @@ dict_table_rename_in_cache( /* Remove table from the hash tables of tables */ HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, ut_fold_string(old_name), table); - table->name = mem_heap_strdup(table->heap, new_name); + + if (strlen(new_name) > strlen(table->name)) { + /* We allocate MAX_TABLE_NAME_LEN+1 bytes here to avoid + memory fragmentation, we assume a repeated calls of + ut_realloc() with the same size do not cause fragmentation */ + ut_a(strlen(new_name) <= MAX_TABLE_NAME_LEN); + table->name = ut_realloc(table->name, MAX_TABLE_NAME_LEN + 1); + } + memcpy(table->name, new_name, strlen(new_name) + 1); /* Add table to hash table of tables */ HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table); - dict_sys->size += (mem_heap_get_size(table->heap) - old_size); + + dict_sys->size += strlen(new_name) - strlen(old_name); + ut_a(dict_sys->size > 0); /* Update the table_name field in indexes */ index = dict_table_get_first_index(table); @@ -1187,7 +1203,7 @@ dict_table_remove_from_cache( /* Remove table from LRU list of tables */ UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); - size = mem_heap_get_size(table->heap); + size = mem_heap_get_size(table->heap) + strlen(table->name) + 1; ut_ad(dict_sys->size >= size); @@ -3008,25 +3024,28 @@ static char* dict_strip_comments( /*================*/ - const char* sql_string) /*!< in: SQL string */ + const char* sql_string, /*!< in: SQL string */ + size_t sql_length) /*!< in: length of sql_string */ { char* str; const char* sptr; + const char* eptr = sql_string + sql_length; char* ptr; /* unclosed quote character (0 if none) */ char quote = 0; - str = mem_alloc(strlen(sql_string) + 1); + str = mem_alloc(sql_length + 1); sptr = sql_string; ptr = str; for (;;) { scan_more: - if (*sptr == '\0') { + if (sptr >= eptr || *sptr == '\0') { +end_of_string: *ptr = '\0'; - ut_a(ptr <= str + strlen(sql_string)); + ut_a(ptr <= str + sql_length); return(str); } @@ -3045,30 +3064,35 @@ scan_more: || (sptr[0] == '-' && sptr[1] == '-' && sptr[2] == ' ')) { for (;;) { + if (++sptr >= eptr) { + goto end_of_string; + } + /* In Unix a newline is 0x0A while in Windows it is 0x0D followed by 0x0A */ - if (*sptr == (char)0x0A - || *sptr == (char)0x0D - || *sptr == '\0') { - + switch (*sptr) { + case (char) 0X0A: + case (char) 0x0D: + case '\0': goto scan_more; } - - sptr++; } } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') { + sptr += 2; for (;;) { - if (*sptr == '*' && *(sptr + 1) == '/') { - - sptr += 2; - - goto scan_more; + if (sptr >= eptr) { + goto end_of_string; } - if (*sptr == '\0') { - + switch (*sptr) { + case '\0': goto scan_more; + case '*': + if (sptr[1] == '/') { + sptr += 2; + goto scan_more; + } } sptr++; @@ -3749,6 +3773,7 @@ dict_create_foreign_constraints( name before it: test.table2; the default database id the database of parameter name */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -3763,7 +3788,7 @@ dict_create_foreign_constraints( ut_a(trx); ut_a(trx->mysql_thd); - str = dict_strip_comments(sql_string); + str = dict_strip_comments(sql_string, sql_length); heap = mem_heap_create(10000); err = dict_create_foreign_constraints_low( @@ -3796,6 +3821,7 @@ dict_foreign_parse_drop_constraints( dict_foreign_t* foreign; ibool success; char* str; + size_t len; const char* ptr; const char* id; FILE* ef = dict_foreign_err_file; @@ -3810,7 +3836,10 @@ dict_foreign_parse_drop_constraints( *constraints_to_drop = mem_heap_alloc(heap, 1000 * sizeof(char*)); - str = dict_strip_comments(*(trx->mysql_query_str)); + ptr = innobase_get_stmt(trx->mysql_thd, &len); + + str = dict_strip_comments(ptr, len); + ptr = str; ut_ad(mutex_own(&(dict_sys->mutex))); diff --git a/storage/innodb_plugin/dict/dict0load.c b/storage/innodb_plugin/dict/dict0load.c index 377818308c5..3c495d21786 100644 --- a/storage/innodb_plugin/dict/dict0load.c +++ b/storage/innodb_plugin/dict/dict0load.c @@ -316,7 +316,7 @@ dict_check_tablespaces_and_store_max_id( dict_index_t* sys_index; btr_pcur_t pcur; const rec_t* rec; - ulint max_space_id = 0; + ulint max_space_id; mtr_t mtr; mutex_enter(&(dict_sys->mutex)); @@ -327,6 +327,11 @@ dict_check_tablespaces_and_store_max_id( sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); ut_a(!dict_table_is_comp(sys_tables)); + max_space_id = mtr_read_ulint(dict_hdr_get(&mtr) + + DICT_HDR_MAX_SPACE_ID, + MLOG_4BYTES, &mtr); + fil_set_max_space_id_if_bigger(max_space_id); + btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr); loop: @@ -973,6 +978,7 @@ err_exit: /* Try to open the tablespace */ if (!fil_open_single_table_tablespace( TRUE, space, + flags == DICT_TF_COMPACT ? 0 : flags & ~(~0 << DICT_TF_BITS), name)) { /* We failed to find a sensible tablespace file */ diff --git a/storage/innodb_plugin/dict/dict0mem.c b/storage/innodb_plugin/dict/dict0mem.c index 66b4b43f296..3287247029f 100644 --- a/storage/innodb_plugin/dict/dict0mem.c +++ b/storage/innodb_plugin/dict/dict0mem.c @@ -68,7 +68,8 @@ dict_mem_table_create( table->heap = heap; table->flags = (unsigned int) flags; - table->name = mem_heap_strdup(heap, name); + table->name = ut_malloc(strlen(name) + 1); + memcpy(table->name, name, strlen(name) + 1); table->space = (unsigned int) space; table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS); @@ -106,6 +107,7 @@ dict_mem_table_free( #ifndef UNIV_HOTBACKUP mutex_free(&(table->autoinc_mutex)); #endif /* UNIV_HOTBACKUP */ + ut_free(table->name); mem_heap_free(table->heap); } diff --git a/storage/innodb_plugin/fil/fil0fil.c b/storage/innodb_plugin/fil/fil0fil.c index 963e306c00c..796fe921a7e 100644 --- a/storage/innodb_plugin/fil/fil0fil.c +++ b/storage/innodb_plugin/fil/fil0fil.c @@ -279,6 +279,10 @@ struct fil_system_struct { request */ UT_LIST_BASE_NODE_T(fil_space_t) space_list; /*!< list of all file spaces */ + ibool space_id_reuse_warned; + /* !< TRUE if fil_space_create() + has issued a warning about + potential space_id reuse */ }; /** The tablespace memory cache. This variable is NULL before the module is @@ -1193,7 +1197,19 @@ try_again: space->tablespace_version = fil_system->tablespace_version; space->mark = FALSE; - if (purpose == FIL_TABLESPACE && id > fil_system->max_assigned_id) { + if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on) + && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) { + if (!fil_system->space_id_reuse_warned) { + fil_system->space_id_reuse_warned = TRUE; + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: allocated tablespace %lu," + " old maximum was %lu\n", + (ulong) id, + (ulong) fil_system->max_assigned_id); + } + fil_system->max_assigned_id = id; } @@ -1231,19 +1247,25 @@ try_again: Assigns a new space id for a new single-table tablespace. This works simply by incrementing the global counter. If 4 billion id's is not enough, we may need to recycle id's. -@return new tablespace id; ULINT_UNDEFINED if could not assign an id */ -static -ulint -fil_assign_new_space_id(void) -/*=========================*/ +@return TRUE if assigned, FALSE if not */ +UNIV_INTERN +ibool +fil_assign_new_space_id( +/*====================*/ + ulint* space_id) /*!< in/out: space id */ { - ulint id; + ulint id; + ibool success; mutex_enter(&fil_system->mutex); - fil_system->max_assigned_id++; + id = *space_id; + + if (id < fil_system->max_assigned_id) { + id = fil_system->max_assigned_id; + } - id = fil_system->max_assigned_id; + id++; if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) { ut_print_timestamp(stderr); @@ -1259,7 +1281,11 @@ fil_assign_new_space_id(void) (ulong) SRV_LOG_SPACE_FIRST_ID); } - if (id >= SRV_LOG_SPACE_FIRST_ID) { + success = (id < SRV_LOG_SPACE_FIRST_ID); + + if (success) { + *space_id = fil_system->max_assigned_id = id; + } else { ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: You have run out of single-table" @@ -1269,14 +1295,12 @@ fil_assign_new_space_id(void) " have to dump all your tables and\n" "InnoDB: recreate the whole InnoDB installation.\n", (ulong) id); - fil_system->max_assigned_id--; - - id = ULINT_UNDEFINED; + *space_id = ULINT_UNDEFINED; } mutex_exit(&fil_system->mutex); - return(id); + return(success); } /*******************************************************************//** @@ -1512,7 +1536,7 @@ fil_init( ut_a(hash_size > 0); ut_a(max_n_open > 0); - fil_system = mem_alloc(sizeof(fil_system_t)); + fil_system = mem_zalloc(sizeof(fil_system_t)); mutex_create(&fil_system->mutex, SYNC_ANY_LATCH); @@ -1521,16 +1545,7 @@ fil_init( UT_LIST_INIT(fil_system->LRU); - fil_system->n_open = 0; fil_system->max_n_open = max_n_open; - - fil_system->modification_counter = 0; - fil_system->max_assigned_id = 0; - - fil_system->tablespace_version = 0; - - UT_LIST_INIT(fil_system->unflushed_spaces); - UT_LIST_INIT(fil_system->space_list); } /*******************************************************************//** @@ -2115,7 +2130,7 @@ fil_op_log_parse_or_replay( fil_create_directory_for_tablename(name); if (fil_create_new_single_table_tablespace( - &space_id, name, FALSE, flags, + space_id, name, FALSE, flags, FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) { ut_error; } @@ -2562,9 +2577,7 @@ UNIV_INTERN ulint fil_create_new_single_table_tablespace( /*===================================*/ - ulint* space_id, /*!< in/out: space id; if this is != 0, - then this is an input parameter, - otherwise output */ + ulint space_id, /*!< in: space id */ const char* tablename, /*!< in: the table name in the usual databasename/tablename format of InnoDB, or a dir path to a temp @@ -2584,6 +2597,8 @@ fil_create_new_single_table_tablespace( ibool success; char* path; + ut_a(space_id > 0); + ut_a(space_id < SRV_LOG_SPACE_FIRST_ID); ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE); /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for ROW_FORMAT=COMPACT @@ -2640,38 +2655,21 @@ fil_create_new_single_table_tablespace( return(DB_ERROR); } - buf2 = ut_malloc(3 * UNIV_PAGE_SIZE); - /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); - ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0); if (!ret) { - ut_free(buf2); - os_file_close(file); - os_file_delete(path); - - mem_free(path); - return(DB_OUT_OF_FILE_SPACE); - } - - if (*space_id == 0) { - *space_id = fil_assign_new_space_id(); - } - - /* printf("Creating tablespace %s id %lu\n", path, *space_id); */ - - if (*space_id == ULINT_UNDEFINED) { - ut_free(buf2); + err = DB_OUT_OF_FILE_SPACE; error_exit: os_file_close(file); error_exit2: os_file_delete(path); mem_free(path); - return(DB_ERROR); + return(err); } + /* printf("Creating tablespace %s id %lu\n", path, space_id); */ + /* We have to write the space id to the file immediately and flush the file to disk. This is because in crash recovery we must be aware what tablespaces exist and what are their space id's, so that we can apply @@ -2681,10 +2679,14 @@ error_exit2: with zeros from the call of os_file_set_size(), until a buffer pool flush would write to it. */ + buf2 = ut_malloc(3 * UNIV_PAGE_SIZE); + /* Align the memory for file i/o if we might have O_DIRECT set */ + page = ut_align(buf2, UNIV_PAGE_SIZE); + memset(page, '\0', UNIV_PAGE_SIZE); - fsp_header_init_fields(page, *space_id, flags); - mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, *space_id); + fsp_header_init_fields(page, space_id, flags); + mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); if (!(flags & DICT_TF_ZSSIZE_MASK)) { buf_flush_init_for_writing(page, NULL, 0); @@ -2715,6 +2717,7 @@ error_exit2: " to tablespace ", stderr); ut_print_filename(stderr, path); putc('\n', stderr); + err = DB_ERROR; goto error_exit; } @@ -2724,22 +2727,20 @@ error_exit2: fputs("InnoDB: Error: file flush of tablespace ", stderr); ut_print_filename(stderr, path); fputs(" failed\n", stderr); + err = DB_ERROR; goto error_exit; } os_file_close(file); - if (*space_id == ULINT_UNDEFINED) { - goto error_exit2; - } - - success = fil_space_create(path, *space_id, flags, FIL_TABLESPACE); + success = fil_space_create(path, space_id, flags, FIL_TABLESPACE); if (!success) { + err = DB_ERROR; goto error_exit2; } - fil_node_create(path, size, *space_id, FALSE); + fil_node_create(path, size, space_id, FALSE); #ifndef UNIV_HOTBACKUP { @@ -2750,7 +2751,7 @@ error_exit2: fil_op_write_log(flags ? MLOG_FILE_CREATE2 : MLOG_FILE_CREATE, - *space_id, + space_id, is_temp ? MLOG_FILE_FLAG_TEMP : 0, flags, tablename, NULL, &mtr); @@ -3541,39 +3542,6 @@ next_datadir_item: return(err); } -/********************************************************************//** -If we need crash recovery, and we have called -fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), -we can call this function to print an error message of orphaned .ibd files -for which there is not a data dictionary entry with a matching table name -and space id. */ -UNIV_INTERN -void -fil_print_orphaned_tablespaces(void) -/*================================*/ -{ - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = UT_LIST_GET_FIRST(fil_system->space_list); - - while (space) { - if (space->purpose == FIL_TABLESPACE && space->id != 0 - && !space->mark) { - fputs("InnoDB: Warning: tablespace ", stderr); - ut_print_filename(stderr, space->name); - fprintf(stderr, " of id %lu has no matching table in\n" - "InnoDB: the InnoDB data dictionary.\n", - (ulong) space->id); - } - - space = UT_LIST_GET_NEXT(space_list, space); - } - - mutex_exit(&fil_system->mutex); -} - /*******************************************************************//** Returns TRUE if a single-table tablespace does not exist in the memory cache, or is being deleted there. diff --git a/storage/innodb_plugin/fsp/fsp0fsp.c b/storage/innodb_plugin/fsp/fsp0fsp.c index c7f1a299d8a..2bae8481d20 100644 --- a/storage/innodb_plugin/fsp/fsp0fsp.c +++ b/storage/innodb_plugin/fsp/fsp0fsp.c @@ -869,12 +869,10 @@ fsp_init_file_page_low( return; } - UNIV_MEM_INVALID(page, UNIV_PAGE_SIZE); + memset(page, 0, UNIV_PAGE_SIZE); mach_write_to_4(page + FIL_PAGE_OFFSET, buf_block_get_page_no(block)); - memset(page + FIL_PAGE_LSN, 0, 8); mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, buf_block_get_space(block)); - memset(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, 0, 8); } #ifndef UNIV_HOTBACKUP diff --git a/storage/innodb_plugin/ha/ha0ha.c b/storage/innodb_plugin/ha/ha0ha.c index 9d9d341ad39..f9e798012f8 100644 --- a/storage/innodb_plugin/ha/ha0ha.c +++ b/storage/innodb_plugin/ha/ha0ha.c @@ -31,9 +31,7 @@ Created 8/22/1994 Heikki Tuuri #ifdef UNIV_DEBUG # include "buf0buf.h" #endif /* UNIV_DEBUG */ -#ifdef UNIV_SYNC_DEBUG -# include "btr0sea.h" -#endif /* UNIV_SYNC_DEBUG */ +#include "btr0sea.h" #include "page0page.h" /*************************************************************//** @@ -127,7 +125,8 @@ ha_clear( /*************************************************************//** Inserts an entry into a hash table. If an entry with the same fold number is found, its node is updated to point to the new data, and no new node -is inserted. +is inserted. If btr_search_enabled is set to FALSE, we will only allow +updating existing nodes, but no new node is allowed to be added. @return TRUE if succeed, FALSE if no more memory could be allocated */ UNIV_INTERN ibool @@ -174,6 +173,7 @@ ha_insert_for_fold_func( prev_block->n_pointers--; block->n_pointers++; } + ut_ad(!btr_search_fully_disabled); # endif /* !UNIV_HOTBACKUP */ prev_node->block = block; @@ -186,6 +186,13 @@ ha_insert_for_fold_func( prev_node = prev_node->next; } + /* We are in the process of disabling hash index, do not add + new chain node */ + if (!btr_search_enabled) { + ut_ad(!btr_search_fully_disabled); + return(TRUE); + } + /* We have to allocate a new chain node */ node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t)); diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index 0fc6e786f4c..37ce69b9933 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -1004,6 +1004,29 @@ innobase_get_charset( return(thd_charset((THD*) mysql_thd)); } +/**********************************************************************//** +Determines the current SQL statement. +@return SQL statement string */ +extern "C" UNIV_INTERN +const char* +innobase_get_stmt( +/*==============*/ + void* mysql_thd, /*!< in: MySQL thread handle */ + size_t* length) /*!< out: length of the SQL statement */ +{ +#if MYSQL_VERSION_ID >= 50142 + LEX_STRING* stmt; + + stmt = thd_query_string((THD*) mysql_thd); + *length = stmt->length; + return(stmt->str); +#else + const char* stmt_str = thd_query((THD*) mysql_thd); + *length = strlen(stmt_str); + return(stmt_str); +#endif +} + #if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list; /*******************************************************************//** @@ -1314,7 +1337,6 @@ innobase_trx_allocate( trx = trx_allocate_for_mysql(); trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); innobase_trx_init(thd, trx); @@ -2248,6 +2270,7 @@ innobase_change_buffering_inited_ok: /* Get the current high water mark format. */ innobase_file_format_check = (char*) trx_sys_file_format_max_get(); + btr_search_fully_disabled = (!btr_search_enabled); DBUG_RETURN(FALSE); error: DBUG_RETURN(TRUE); @@ -2650,7 +2673,7 @@ innobase_rollback_to_savepoint( /* TODO: use provided savepoint data area to store savepoint data */ - longlong2str((ulint)savepoint, name, 36); + longlong2str((ulint)savepoint, name, 36, 1); error = (int) trx_rollback_to_savepoint_for_mysql(trx, name, &mysql_binlog_cache_pos); @@ -2681,7 +2704,7 @@ innobase_release_savepoint( /* TODO: use provided savepoint data area to store savepoint data */ - longlong2str((ulint)savepoint, name, 36); + longlong2str((ulint)savepoint, name, 36, 1); error = (int) trx_release_savepoint_for_mysql(trx, name); @@ -2728,7 +2751,7 @@ innobase_savepoint( /* TODO: use provided savepoint data area to store savepoint data */ char name[64]; - longlong2str((ulint)savepoint,name,36); + longlong2str((ulint)savepoint,name,36,1); error = (int) trx_savepoint_for_mysql(trx, name, (ib_int64_t)0); @@ -3928,6 +3951,11 @@ get_innobase_type_from_mysql_type( case MYSQL_TYPE_BLOB: case MYSQL_TYPE_LONG_BLOB: return(DATA_BLOB); + case MYSQL_TYPE_NULL: + /* MySQL currently accepts "NULL" datatype, but will + reject such datatype in the next release. We will cope + with it and not trigger assertion failure in 5.1 */ + break; default: ut_error; } @@ -5357,6 +5385,9 @@ ha_innobase::index_read( prebuilt->index_usable = FALSE; DBUG_RETURN(HA_ERR_CRASHED); } + if (UNIV_UNLIKELY(!prebuilt->index_usable)) { + DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED); + } /* Note that if the index for which the search template is built is not necessarily prebuilt->index, but can also be the clustered index */ @@ -5547,7 +5578,8 @@ ha_innobase::change_active_index( keynr); /* The caller seems to ignore this. Thus, we must check this again in row_search_for_mysql(). */ - DBUG_RETURN(2); + DBUG_RETURN(convert_error_code_to_mysql(DB_MISSING_HISTORY, + 0, NULL)); } ut_a(prebuilt->search_tuple != 0); @@ -5975,7 +6007,22 @@ create_table_def( field = form->field[i]; col_type = get_innobase_type_from_mysql_type(&unsigned_type, - field); + field); + + if (!col_type) { + push_warning_printf( + (THD*) trx->mysql_thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_CANT_CREATE_TABLE, + "Error creating table '%s' with " + "column '%s'. Please check its " + "column type and try to re-create " + "the table with an appropriate " + "column type.", + table->name, (char*) field->field_name); + goto err_col; + } + if (field->null_ptr) { nulls_allowed = 0; } else { @@ -6033,7 +6080,7 @@ create_table_def( if (dict_col_name_is_reserved(field->field_name)){ my_error(ER_WRONG_COLUMN_NAME, MYF(0), field->field_name); - +err_col: dict_mem_table_free(table); trx_commit_for_mysql(trx); @@ -6433,6 +6480,9 @@ ha_innobase::create( /* Cache the value of innodb_file_format, in case it is modified by another thread while the table is being created. */ const ulint file_format = srv_file_format; + const char* stmt; + size_t stmt_len; + enum row_type row_type; DBUG_ENTER("ha_innobase::create"); @@ -6553,94 +6603,94 @@ ha_innobase::create( } } - if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) { - if (flags) { - /* KEY_BLOCK_SIZE was specified. */ - if (form->s->row_type != ROW_TYPE_COMPRESSED) { - /* ROW_FORMAT other than COMPRESSED - ignores KEY_BLOCK_SIZE. It does not - make sense to reject conflicting - KEY_BLOCK_SIZE and ROW_FORMAT, because - such combinations can be obtained - with ALTER TABLE anyway. */ - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" - " unless ROW_FORMAT=COMPRESSED.", - create_info->key_block_size); - flags = 0; - } - } else { - /* No KEY_BLOCK_SIZE */ - if (form->s->row_type == ROW_TYPE_COMPRESSED) { - /* ROW_FORMAT=COMPRESSED without - KEY_BLOCK_SIZE implies half the - maximum KEY_BLOCK_SIZE. */ - flags = (DICT_TF_ZSSIZE_MAX - 1) - << DICT_TF_ZSSIZE_SHIFT - | DICT_TF_COMPACT - | DICT_TF_FORMAT_ZIP - << DICT_TF_FORMAT_SHIFT; + row_type = form->s->row_type; + + if (flags) { + /* KEY_BLOCK_SIZE was specified. */ + if (!(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) { + /* ROW_FORMAT was not specified; + default to ROW_FORMAT=COMPRESSED */ + row_type = ROW_TYPE_COMPRESSED; + } else if (row_type != ROW_TYPE_COMPRESSED) { + /* ROW_FORMAT other than COMPRESSED + ignores KEY_BLOCK_SIZE. It does not + make sense to reject conflicting + KEY_BLOCK_SIZE and ROW_FORMAT, because + such combinations can be obtained + with ALTER TABLE anyway. */ + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" + " unless ROW_FORMAT=COMPRESSED.", + create_info->key_block_size); + flags = 0; + } + } else { + /* No KEY_BLOCK_SIZE */ + if (row_type == ROW_TYPE_COMPRESSED) { + /* ROW_FORMAT=COMPRESSED without + KEY_BLOCK_SIZE implies half the + maximum KEY_BLOCK_SIZE. */ + flags = (DICT_TF_ZSSIZE_MAX - 1) + << DICT_TF_ZSSIZE_SHIFT + | DICT_TF_COMPACT + | DICT_TF_FORMAT_ZIP + << DICT_TF_FORMAT_SHIFT; #if DICT_TF_ZSSIZE_MAX < 1 # error "DICT_TF_ZSSIZE_MAX < 1" #endif - } } + } - switch (form->s->row_type) { - const char* row_format_name; - case ROW_TYPE_REDUNDANT: - break; - case ROW_TYPE_COMPRESSED: - case ROW_TYPE_DYNAMIC: - row_format_name - = form->s->row_type == ROW_TYPE_COMPRESSED - ? "COMPRESSED" - : "DYNAMIC"; - - if (!srv_file_per_table) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_per_table.", - row_format_name); - } else if (file_format < DICT_TF_FORMAT_ZIP) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_format >" - " Antelope.", - row_format_name); - } else { - flags |= DICT_TF_COMPACT - | (DICT_TF_FORMAT_ZIP - << DICT_TF_FORMAT_SHIFT); - break; - } + switch (row_type) { + const char* row_format_name; + case ROW_TYPE_REDUNDANT: + break; + case ROW_TYPE_COMPRESSED: + case ROW_TYPE_DYNAMIC: + row_format_name + = row_type == ROW_TYPE_COMPRESSED + ? "COMPRESSED" + : "DYNAMIC"; - /* fall through */ - case ROW_TYPE_NOT_USED: - case ROW_TYPE_FIXED: - default: - push_warning(thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: assuming ROW_FORMAT=COMPACT."); - case ROW_TYPE_DEFAULT: - case ROW_TYPE_COMPACT: - flags = DICT_TF_COMPACT; + if (!srv_file_per_table) { + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ROW_FORMAT=%s" + " requires innodb_file_per_table.", + row_format_name); + } else if (file_format < DICT_TF_FORMAT_ZIP) { + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ROW_FORMAT=%s" + " requires innodb_file_format >" + " Antelope.", + row_format_name); + } else { + flags |= DICT_TF_COMPACT + | (DICT_TF_FORMAT_ZIP + << DICT_TF_FORMAT_SHIFT); break; } - } else if (!flags) { - /* No KEY_BLOCK_SIZE or ROW_FORMAT specified: - use ROW_FORMAT=COMPACT by default. */ + + /* fall through */ + case ROW_TYPE_NOT_USED: + case ROW_TYPE_FIXED: + default: + push_warning(thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: assuming ROW_FORMAT=COMPACT."); + case ROW_TYPE_DEFAULT: + case ROW_TYPE_COMPACT: flags = DICT_TF_COMPACT; + break; } /* Look for a primary key */ @@ -6649,7 +6699,7 @@ ha_innobase::create( (int) form->s->primary_key : -1); - /* Our function row_get_mysql_key_number_for_index assumes + /* Our function innobase_get_mysql_key_number_for_index assumes the primary key is always number 0, if it exists */ ut_a(primary_key_no == -1 || primary_key_no == 0); @@ -6709,9 +6759,11 @@ ha_innobase::create( } } - if (*trx->mysql_query_str) { - error = row_table_add_foreign_constraints(trx, - *trx->mysql_query_str, norm_name, + stmt = innobase_get_stmt(thd, &stmt_len); + + if (stmt) { + error = row_table_add_foreign_constraints( + trx, stmt, stmt_len, norm_name, create_info->options & HA_LEX_CREATE_TMP_TABLE); error = convert_error_code_to_mysql(error, flags, NULL); @@ -6996,7 +7048,6 @@ innobase_drop_database( /* In the Windows plugin, thd = current_thd is always NULL */ trx = trx_allocate_for_mysql(); trx->mysql_thd = NULL; - trx->mysql_query_str = NULL; #else trx = innobase_trx_allocate(thd); #endif @@ -7196,6 +7247,10 @@ ha_innobase::records_in_range( n_rows = HA_POS_ERROR; goto func_exit; } + if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) { + n_rows = HA_ERR_TABLE_DEF_CHANGED; + goto func_exit; + } heap = mem_heap_create(2 * (key->key_parts * sizeof(dfield_t) + sizeof(dtuple_t))); @@ -7365,6 +7420,86 @@ ha_innobase::read_time( } /*********************************************************************//** +Calculates the key number used inside MySQL for an Innobase index. We will +first check the "index translation table" for a match of the index to get +the index number. If there does not exist an "index translation table", +or not able to find the index in the translation table, then we will fall back +to the traditional way of looping through dict_index_t list to find a +match. In this case, we have to take into account if we generated a +default clustered index for the table +@return the key number used inside MySQL */ +static +unsigned int +innobase_get_mysql_key_number_for_index( +/*====================================*/ + INNOBASE_SHARE* share, /*!< in: share structure for index + translation table. */ + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + dict_table_t* ib_table,/*!< in: table in Innodb data + dictionary */ + const dict_index_t* index) /*!< in: index */ +{ + const dict_index_t* ind; + unsigned int i; + + ut_ad(index); + ut_ad(ib_table); + ut_ad(table); + ut_ad(share); + + /* If index does not belong to the table of share structure. Search + index->table instead */ + if (index->table != ib_table) { + i = 0; + ind = dict_table_get_first_index(index->table); + + while (index != ind) { + ind = dict_table_get_next_index(ind); + i++; + } + + if (row_table_got_default_clust_index(index->table)) { + ut_a(i > 0); + i--; + } + + return(i); + } + + /* If index translation table exists, we will first check + the index through index translation table for a match. */ + if (share->idx_trans_tbl.index_mapping) { + for (i = 0; i < share->idx_trans_tbl.index_count; i++) { + if (share->idx_trans_tbl.index_mapping[i] == index) { + return(i); + } + } + + /* Print an error message if we cannot find the index + ** in the "index translation table". */ + sql_print_error("Cannot find index %s in InnoDB index " + "translation table.", index->name); + } + + /* If we do not have an "index translation table", or not able + to find the index in the translation table, we'll directly find + matching index in the dict_index_t list */ + for (i = 0; i < table->s->keys; i++) { + ind = dict_table_get_index_on_name( + ib_table, table->key_info[i].name); + + if (index == ind) { + return(i); + } + } + + sql_print_error("Cannot find matching index number for index %s " + "in InnoDB index list.", index->name); + + return(0); +} +/*********************************************************************//** Returns statistics information of the table to the MySQL interpreter, in various fields of the handle object. */ UNIV_INTERN @@ -7633,8 +7768,8 @@ ha_innobase::info( err_index = trx_get_error_info(prebuilt->trx); if (err_index) { - errkey = (unsigned int) - row_get_mysql_key_number_for_index(err_index); + errkey = innobase_get_mysql_key_number_for_index( + share, table, ib_table, err_index); } else { errkey = (unsigned int) prebuilt->trx->error_key_num; } @@ -10346,7 +10481,35 @@ innodb_old_blocks_pct_update( } /*************************************************************//** -Check if it is a valid value of innodb_change_buffering. This function is +Find the corresponding ibuf_use_t value that indexes into +innobase_change_buffering_values[] array for the input +change buffering option name. +@return corresponding IBUF_USE_* value for the input variable +name, or IBUF_USE_COUNT if not able to find a match */ +static +ibuf_use_t +innodb_find_change_buffering_value( +/*===============================*/ + const char* input_name) /*!< in: input change buffering + option name */ +{ + ulint use; + + for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values); + use++) { + /* found a match */ + if (!innobase_strcasecmp( + input_name, innobase_change_buffering_values[use])) { + return((ibuf_use_t)use); + } + } + + /* Did not find any match */ + return(IBUF_USE_COUNT); +} + +/*************************************************************//** +Check if it is a valid value of innodb_change_buffering. This function is registered as a callback with MySQL. @return 0 for valid innodb_change_buffering */ static @@ -10370,19 +10533,22 @@ innodb_change_buffering_validate( change_buffering_input = value->val_str(value, buff, &len); if (change_buffering_input != NULL) { - ulint use; + ibuf_use_t use; - for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values); - use++) { - if (!innobase_strcasecmp( - change_buffering_input, - innobase_change_buffering_values[use])) { - *(ibuf_use_t*) save = (ibuf_use_t) use; - return(0); - } + use = innodb_find_change_buffering_value( + change_buffering_input); + + if (use != IBUF_USE_COUNT) { + /* Find a matching change_buffering option value. */ + *static_cast<const char**>(save) = + innobase_change_buffering_values[use]; + + return(0); } } + /* No corresponding change buffering option for user supplied + "change_buffering_input" */ return(1); } @@ -10393,21 +10559,27 @@ static void innodb_change_buffering_update( /*===========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr, /*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to + system variable */ + void* var_ptr,/*!< out: where the + formal string goes */ + const void* save) /*!< in: immediate result + from check function */ { + ibuf_use_t use; + ut_a(var_ptr != NULL); ut_a(save != NULL); - ut_a((*(ibuf_use_t*) save) < IBUF_USE_COUNT); - ibuf_use = *(const ibuf_use_t*) save; + use = innodb_find_change_buffering_value( + *static_cast<const char*const*>(save)); + + ut_a(use < IBUF_USE_COUNT); - *(const char**) var_ptr = innobase_change_buffering_values[ibuf_use]; + ibuf_use = use; + *static_cast<const char**>(var_ptr) = + *static_cast<const char*const*>(save); } static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff) @@ -10735,7 +10907,7 @@ static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering, "Buffer changes to reduce random access: " "OFF, ON, none, inserts.", innodb_change_buffering_validate, - innodb_change_buffering_update, NULL); + innodb_change_buffering_update, "inserts"); static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold, PLUGIN_VAR_RQCMDARG, diff --git a/storage/innodb_plugin/handler/ha_innodb.h b/storage/innodb_plugin/handler/ha_innodb.h index 8a3e1ccff82..9789e4ba639 100644 --- a/storage/innodb_plugin/handler/ha_innodb.h +++ b/storage/innodb_plugin/handler/ha_innodb.h @@ -231,7 +231,11 @@ the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */ extern "C" { struct charset_info_st *thd_charset(MYSQL_THD thd); +#if MYSQL_VERSION_ID >= 50142 +LEX_STRING *thd_query_string(MYSQL_THD thd); +#else char **thd_query(MYSQL_THD thd); +#endif /** Get the file name of the MySQL binlog. * @return the name of the binlog file diff --git a/storage/innodb_plugin/handler/handler0alter.cc b/storage/innodb_plugin/handler/handler0alter.cc index e474c318c58..e936bfafa0e 100644 --- a/storage/innodb_plugin/handler/handler0alter.cc +++ b/storage/innodb_plugin/handler/handler0alter.cc @@ -894,6 +894,8 @@ error: prebuilt->trx->error_info = NULL; /* fall through */ default: + trx->error_state = DB_SUCCESS; + if (new_primary) { if (indexed_table != innodb_table) { row_merge_drop_table(trx, indexed_table); diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h index 716f15c4267..7dc2eb63cf5 100644 --- a/storage/innodb_plugin/include/btr0cur.h +++ b/storage/innodb_plugin/include/btr0cur.h @@ -570,7 +570,7 @@ btr_copy_externally_stored_field_prefix( ulint local_len);/*!< in: length of data, in bytes */ /*******************************************************************//** Copies an externally stored field of a record to mem heap. -@return the field copied to heap */ +@return the field copied to heap, or NULL if the field is incomplete */ UNIV_INTERN byte* btr_rec_copy_externally_stored_field( diff --git a/storage/innodb_plugin/include/btr0sea.h b/storage/innodb_plugin/include/btr0sea.h index f98ba386f9c..20a2be7f877 100644 --- a/storage/innodb_plugin/include/btr0sea.h +++ b/storage/innodb_plugin/include/btr0sea.h @@ -190,7 +190,13 @@ btr_search_validate(void); /** Flag: has the search system been enabled? Protected by btr_search_latch and btr_search_enabled_mutex. */ -extern char btr_search_enabled; +extern char btr_search_enabled; + +/** Flag: whether the search system has completed its disabling process, +It is set to TRUE right after buf_pool_drop_hash_index() in +btr_search_disable(), indicating hash index entries are cleaned up. +Protected by btr_search_latch and btr_search_enabled_mutex. */ +extern ibool btr_search_fully_disabled; /** The search info struct in an index */ struct btr_search_struct{ diff --git a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic index 378c3590181..23db684806c 100644 --- a/storage/innodb_plugin/include/buf0buf.ic +++ b/storage/innodb_plugin/include/buf0buf.ic @@ -931,7 +931,12 @@ buf_page_hash_get( ut_a(buf_page_in_file(bpage)); ut_ad(bpage->in_page_hash); ut_ad(!bpage->in_zip_hash); +#if UNIV_WORD_SIZE == 4 + /* On 32-bit systems, there is no padding in + buf_page_t. On other systems, Valgrind could complain + about uninitialized pad bytes. */ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage); +#endif } return(bpage); diff --git a/storage/innodb_plugin/include/buf0lru.h b/storage/innodb_plugin/include/buf0lru.h index 009430af35b..5a9cfd059f3 100644 --- a/storage/innodb_plugin/include/buf0lru.h +++ b/storage/innodb_plugin/include/buf0lru.h @@ -96,7 +96,7 @@ buf_LRU_insert_zip_clean( Try to free a block. If bpage is a descriptor of a compressed-only page, the descriptor object will be freed as well. -NOTE: If this function returns BUF_LRU_FREED, it will not temporarily +NOTE: If this function returns BUF_LRU_FREED, it will temporarily release buf_pool_mutex. Furthermore, the page frame will no longer be accessible via bpage. diff --git a/storage/innodb_plugin/include/db0err.h b/storage/innodb_plugin/include/db0err.h index 747e9b5364e..c841c2b4afe 100644 --- a/storage/innodb_plugin/include/db0err.h +++ b/storage/innodb_plugin/include/db0err.h @@ -28,6 +28,8 @@ Created 5/24/1996 Heikki Tuuri enum db_err { + DB_SUCCESS_LOCKED_REC = 9, /*!< like DB_SUCCESS, but a new + explicit record lock was created */ DB_SUCCESS = 10, /* The following are error codes */ diff --git a/storage/innodb_plugin/include/dict0boot.h b/storage/innodb_plugin/include/dict0boot.h index 1a13bd1503a..148b5cbe250 100644 --- a/storage/innodb_plugin/include/dict0boot.h +++ b/storage/innodb_plugin/include/dict0boot.h @@ -46,13 +46,14 @@ dict_hdr_get( /*=========*/ mtr_t* mtr); /*!< in: mtr */ /**********************************************************************//** -Returns a new row, table, index, or tree id. -@return the new id */ +Returns a new table, index, or space id. */ UNIV_INTERN -dulint +void dict_hdr_get_new_id( /*================*/ - ulint type); /*!< in: DICT_HDR_ROW_ID, ... */ + dulint* table_id, /*!< out: table id (not assigned if NULL) */ + dulint* index_id, /*!< out: index id (not assigned if NULL) */ + ulint* space_id); /*!< out: space id (not assigned if NULL) */ /**********************************************************************//** Returns a new row id. @return the new id */ @@ -119,7 +120,8 @@ dict_create(void); #define DICT_HDR_ROW_ID 0 /* The latest assigned row id */ #define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */ #define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */ -#define DICT_HDR_MIX_ID 24 /* Obsolete, always 0. */ +#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id, or 0*/ +#define DICT_HDR_MIX_ID_LOW 28 /* Obsolete,always DICT_HDR_FIRST_ID */ #define DICT_HDR_TABLES 32 /* Root of the table index tree */ #define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */ #define DICT_HDR_COLUMNS 40 /* Root of the column index tree */ diff --git a/storage/innodb_plugin/include/dict0dict.h b/storage/innodb_plugin/include/dict0dict.h index 79dcbb30de2..3a1bee4cd89 100644 --- a/storage/innodb_plugin/include/dict0dict.h +++ b/storage/innodb_plugin/include/dict0dict.h @@ -352,6 +352,7 @@ dict_create_foreign_constraints( name before it: test.table2; the default database id the database of parameter name */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ diff --git a/storage/innodb_plugin/include/dict0mem.h b/storage/innodb_plugin/include/dict0mem.h index 9996fb59a75..2fce1e00927 100644 --- a/storage/innodb_plugin/include/dict0mem.h +++ b/storage/innodb_plugin/include/dict0mem.h @@ -382,7 +382,7 @@ initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_struct{ dulint id; /*!< id of the table */ mem_heap_t* heap; /*!< memory heap */ - const char* name; /*!< table name */ + char* name; /*!< table name */ const char* dir_path_of_temp_table;/*!< NULL or the directory path where a TEMPORARY table that was explicitly created by a user should be placed if diff --git a/storage/innodb_plugin/include/fil0fil.h b/storage/innodb_plugin/include/fil0fil.h index de8ef9e9687..c894875b352 100644 --- a/storage/innodb_plugin/include/fil0fil.h +++ b/storage/innodb_plugin/include/fil0fil.h @@ -225,6 +225,16 @@ fil_space_create( 0 for uncompressed tablespaces */ ulint purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ /*******************************************************************//** +Assigns a new space id for a new single-table tablespace. This works simply by +incrementing the global counter. If 4 billion id's is not enough, we may need +to recycle id's. +@return TRUE if assigned, FALSE if not */ +UNIV_INTERN +ibool +fil_assign_new_space_id( +/*====================*/ + ulint* space_id); /*!< in/out: space id */ +/*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. @return space size, 0 if space not found */ @@ -427,9 +437,7 @@ UNIV_INTERN ulint fil_create_new_single_table_tablespace( /*===================================*/ - ulint* space_id, /*!< in/out: space id; if this is != 0, - then this is an input parameter, - otherwise output */ + ulint space_id, /*!< in: space id */ const char* tablename, /*!< in: the table name in the usual databasename/tablename format of InnoDB, or a dir path to a temp @@ -498,16 +506,6 @@ UNIV_INTERN ulint fil_load_single_table_tablespaces(void); /*===================================*/ -/********************************************************************//** -If we need crash recovery, and we have called -fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), -we can call this function to print an error message of orphaned .ibd files -for which there is not a data dictionary entry with a matching table name -and space id. */ -UNIV_INTERN -void -fil_print_orphaned_tablespaces(void); -/*================================*/ /*******************************************************************//** Returns TRUE if a single-table tablespace does not exist in the memory cache, or is being deleted there. diff --git a/storage/innodb_plugin/include/ha_prototypes.h b/storage/innodb_plugin/include/ha_prototypes.h index b737a00b3dc..e897a233a6a 100644 --- a/storage/innodb_plugin/include/ha_prototypes.h +++ b/storage/innodb_plugin/include/ha_prototypes.h @@ -215,11 +215,21 @@ innobase_casedn_str( /**********************************************************************//** Determines the connection character set. @return connection character set */ +UNIV_INTERN struct charset_info_st* innobase_get_charset( /*=================*/ void* mysql_thd); /*!< in: MySQL thread handle */ - +/**********************************************************************//** +Determines the current SQL statement. +@return SQL statement string */ +UNIV_INTERN +const char* +innobase_get_stmt( +/*==============*/ + void* mysql_thd, /*!< in: MySQL thread handle */ + size_t* length) /*!< out: length of the SQL statement */ + __attribute__((nonnull)); /******************************************************************//** This function is used to find the storage length in bytes of the first n characters for prefix indexes using a multibyte character set. The function diff --git a/storage/innodb_plugin/include/lock0lock.h b/storage/innodb_plugin/include/lock0lock.h index 7d76cbe3c75..b3e1e5c4537 100644 --- a/storage/innodb_plugin/include/lock0lock.h +++ b/storage/innodb_plugin/include/lock0lock.h @@ -340,11 +340,12 @@ lock_sec_rec_modify_check_and_lock( que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr); /*!< in/out: mini-transaction */ /*********************************************************************//** -Like the counterpart for a clustered index below, but now we read a +Like lock_clust_rec_read_check_and_lock(), but reads a secondary index record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_sec_rec_read_check_and_lock( /*=============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -371,9 +372,10 @@ if the query thread should anyway be suspended for some reason; if not, then puts the transaction and the query thread to the lock wait state and inserts a waiting request for a record lock to the lock queue. Sets the requested mode lock on the record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_clust_rec_read_check_and_lock( /*===============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG diff --git a/storage/innodb_plugin/include/log0log.ic b/storage/innodb_plugin/include/log0log.ic index 139f4041a36..1ce00fd7313 100644 --- a/storage/innodb_plugin/include/log0log.ic +++ b/storage/innodb_plugin/include/log0log.ic @@ -433,7 +433,10 @@ void log_free_check(void) /*================*/ { - /* ut_ad(sync_thread_levels_empty()); */ + +#ifdef UNIV_SYNC_DEBUG + ut_ad(sync_thread_levels_empty_gen(TRUE)); +#endif /* UNIV_SYNC_DEBUG */ if (log_sys->check_flush_or_checkpoint) { diff --git a/storage/innodb_plugin/include/mach0data.ic b/storage/innodb_plugin/include/mach0data.ic index ef20356bd31..96d2417ac81 100644 --- a/storage/innodb_plugin/include/mach0data.ic +++ b/storage/innodb_plugin/include/mach0data.ic @@ -36,7 +36,7 @@ mach_write_to_1( ulint n) /*!< in: ulint integer to be stored, >= 0, < 256 */ { ut_ad(b); - ut_ad(n <= 0xFFUL); + ut_ad((n | 0xFFUL) <= 0xFFUL); b[0] = (byte)n; } @@ -65,7 +65,7 @@ mach_write_to_2( ulint n) /*!< in: ulint integer to be stored */ { ut_ad(b); - ut_ad(n <= 0xFFFFUL); + ut_ad((n | 0xFFFFUL) <= 0xFFFFUL); b[0] = (byte)(n >> 8); b[1] = (byte)(n); @@ -81,10 +81,7 @@ mach_read_from_2( /*=============*/ const byte* b) /*!< in: pointer to 2 bytes */ { - ut_ad(b); - return( ((ulint)(b[0]) << 8) - + (ulint)(b[1]) - ); + return(((ulint)(b[0]) << 8) | (ulint)(b[1])); } /********************************************************//** @@ -129,7 +126,7 @@ mach_write_to_3( ulint n) /*!< in: ulint integer to be stored */ { ut_ad(b); - ut_ad(n <= 0xFFFFFFUL); + ut_ad((n | 0xFFFFFFUL) <= 0xFFFFFFUL); b[0] = (byte)(n >> 16); b[1] = (byte)(n >> 8); @@ -148,8 +145,8 @@ mach_read_from_3( { ut_ad(b); return( ((ulint)(b[0]) << 16) - + ((ulint)(b[1]) << 8) - + (ulint)(b[2]) + | ((ulint)(b[1]) << 8) + | (ulint)(b[2]) ); } @@ -183,9 +180,9 @@ mach_read_from_4( { ut_ad(b); return( ((ulint)(b[0]) << 24) - + ((ulint)(b[1]) << 16) - + ((ulint)(b[2]) << 8) - + (ulint)(b[3]) + | ((ulint)(b[1]) << 16) + | ((ulint)(b[2]) << 8) + | (ulint)(b[3]) ); } @@ -721,7 +718,7 @@ mach_read_from_2_little_endian( /*===========================*/ const byte* buf) /*!< in: from where to read */ { - return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256); + return((ulint)(buf[0]) | ((ulint)(buf[1]) << 8)); } /*********************************************************//** diff --git a/storage/innodb_plugin/include/mem0pool.h b/storage/innodb_plugin/include/mem0pool.h index 5e93bf88a47..fa8be296ec9 100644 --- a/storage/innodb_plugin/include/mem0pool.h +++ b/storage/innodb_plugin/include/mem0pool.h @@ -100,18 +100,6 @@ mem_pool_get_reserved( /*==================*/ mem_pool_t* pool); /*!< in: memory pool */ /********************************************************************//** -Reserves the mem pool mutex. */ -UNIV_INTERN -void -mem_pool_mutex_enter(void); -/*======================*/ -/********************************************************************//** -Releases the mem pool mutex. */ -UNIV_INTERN -void -mem_pool_mutex_exit(void); -/*=====================*/ -/********************************************************************//** Validates a memory pool. @return TRUE if ok */ UNIV_INTERN diff --git a/storage/innodb_plugin/include/rem0cmp.h b/storage/innodb_plugin/include/rem0cmp.h index 072f74267ea..2f751a38864 100644 --- a/storage/innodb_plugin/include/rem0cmp.h +++ b/storage/innodb_plugin/include/rem0cmp.h @@ -148,7 +148,9 @@ cmp_rec_rec_simple( const rec_t* rec2, /*!< in: physical record */ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */ - const dict_index_t* index); /*!< in: data dictionary index */ + const dict_index_t* index, /*!< in: data dictionary index */ + ibool* null_eq);/*!< out: set to TRUE if + found matching null values */ /*************************************************************//** This function is used to compare two physical records. Only the common first fields are compared, and if an externally stored field is diff --git a/storage/innodb_plugin/include/row0mysql.h b/storage/innodb_plugin/include/row0mysql.h index d2a8734c61f..b69e657361b 100644 --- a/storage/innodb_plugin/include/row0mysql.h +++ b/storage/innodb_plugin/include/row0mysql.h @@ -253,15 +253,6 @@ row_table_got_default_clust_index( /*==============================*/ const dict_table_t* table); /*!< in: table */ /*********************************************************************//** -Calculates the key number used inside MySQL for an Innobase index. We have -to take into account if we generated a default clustered index for the table -@return the key number used inside MySQL */ -UNIV_INTERN -ulint -row_get_mysql_key_number_for_index( -/*===============================*/ - const dict_index_t* index); /*!< in: index */ -/*********************************************************************//** Does an update or delete of a row for MySQL. @return error code or DB_SUCCESS */ UNIV_INTERN @@ -273,27 +264,26 @@ row_update_for_mysql( row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL handle */ /*********************************************************************//** -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. -@return error code or DB_SUCCESS */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE or this +session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +Before calling this function row_search_for_mysql() must have +initialized prebuilt->new_rec_locks to store the information which new +record locks really were set. This function removes a newly set +clustered index record lock under prebuilt->pcur or +prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that +releases the latest clustered index record lock we set. +@return error code or DB_SUCCESS */ UNIV_INTERN int row_unlock_for_mysql( /*=================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL handle */ - ibool has_latches_on_recs);/*!< TRUE if called so that we have - the latches on the records under pcur - and clust_pcur, and we do not need to - reposition the cursors. */ + ibool has_latches_on_recs);/*!< in: TRUE if called + so that we have the latches on + the records under pcur and + clust_pcur, and we do not need + to reposition the cursors. */ /*********************************************************************//** Creates an query graph node of 'update' type to be used in the MySQL interface. @@ -403,6 +393,7 @@ row_table_add_foreign_constraints( FOREIGN KEY (a, b) REFERENCES table2(c, d), table2 can be written also with the database name before it: test.table2 */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -631,7 +622,11 @@ struct row_prebuilt_struct { the secondary index, then this is set to TRUE */ unsigned templ_contains_blob:1;/*!< TRUE if the template contains - BLOB column(s) */ + a column with DATA_BLOB == + get_innobase_type_from_mysql_type(); + not to be confused with InnoDB + externally stored columns + (VARCHAR can be off-page too) */ mysql_row_templ_t* mysql_template;/*!< template used to transform rows fast between MySQL and Innobase formats; memory for this template @@ -710,18 +705,17 @@ struct row_prebuilt_struct { ulint new_rec_locks; /*!< normally 0; if srv_locks_unsafe_for_binlog is TRUE or session is using READ - COMMITTED isolation level, in a - cursor search, if we set a new - record lock on an index, this is - incremented; this is used in - releasing the locks under the - cursors if we are performing an - UPDATE and we determine after - retrieving the row that it does - not need to be locked; thus, - these can be used to implement a - 'mini-rollback' that releases - the latest record locks */ + COMMITTED or READ UNCOMMITTED + isolation level, set in + row_search_for_mysql() if we set a new + record lock on the secondary + or clustered index; this is + used in row_unlock_for_mysql() + when releasing the lock under + the cursor if we determine + after retrieving the row that + it does not need to be locked + ('mini-rollback') */ ulint mysql_prefix_len;/*!< byte offset of the end of the last requested column */ ulint mysql_row_len; /*!< length in bytes of a row in the diff --git a/storage/innodb_plugin/include/sync0rw.h b/storage/innodb_plugin/include/sync0rw.h index 6f7e13220c1..175f3deb77c 100644 --- a/storage/innodb_plugin/include/sync0rw.h +++ b/storage/innodb_plugin/include/sync0rw.h @@ -555,11 +555,12 @@ struct rw_lock_struct { unsigned cline:14; /*!< Line where created */ unsigned last_s_line:14; /*!< Line number where last time s-locked */ unsigned last_x_line:14; /*!< Line number where last time x-locked */ +#ifdef UNIV_DEBUG ulint magic_n; /*!< RW_LOCK_MAGIC_N */ -}; - /** Value of rw_lock_struct::magic_n */ #define RW_LOCK_MAGIC_N 22643 +#endif /* UNIV_DEBUG */ +}; #ifdef UNIV_SYNC_DEBUG /** The structure for storing debug info of an rw-lock */ diff --git a/storage/innodb_plugin/include/sync0sync.h b/storage/innodb_plugin/include/sync0sync.h index d470b823fc3..71c9920a10b 100644 --- a/storage/innodb_plugin/include/sync0sync.h +++ b/storage/innodb_plugin/include/sync0sync.h @@ -438,7 +438,7 @@ or row lock! */ #define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the file format tag */ #define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve - this in X-mode, implicit or backround + this in X-mode; implicit or backround operations purge, rollback, foreign key checks reserve this in S-mode */ #define SYNC_DICT 1000 diff --git a/storage/innodb_plugin/include/trx0trx.h b/storage/innodb_plugin/include/trx0trx.h index 6872fb463c0..abd175d365b 100644 --- a/storage/innodb_plugin/include/trx0trx.h +++ b/storage/innodb_plugin/include/trx0trx.h @@ -560,9 +560,6 @@ struct trx_struct{ /*------------------------------*/ void* mysql_thd; /*!< MySQL thread handle corresponding to this trx, or NULL */ - char** mysql_query_str;/* pointer to the field in mysqld_thd - which contains the pointer to the - current SQL query string */ const char* mysql_log_file_name; /* if MySQL binlog is used, this field contains a pointer to the latest file diff --git a/storage/innodb_plugin/include/univ.i b/storage/innodb_plugin/include/univ.i index 49717760456..991625d6a8a 100644 --- a/storage/innodb_plugin/include/univ.i +++ b/storage/innodb_plugin/include/univ.i @@ -46,7 +46,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 8 +#define INNODB_VERSION_BUGFIX 11 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -165,9 +165,9 @@ command. Not tested on Windows. */ #define UNIV_COMPILE_TEST_FUNCS */ -#ifdef HAVE_purify +#if defined(HAVE_valgrind)&& defined(HAVE_VALGRIND_MEMCHECK_H) # define UNIV_DEBUG_VALGRIND -#endif /* HAVE_purify */ +#endif /* HAVE_VALGRIND */ #if 0 #define UNIV_DEBUG_VALGRIND /* Enable extra Valgrind instrumentation */ @@ -205,10 +205,6 @@ operations (very slow); also UNIV_DEBUG must be defined */ adaptive hash index */ #define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output in sync0sync.c */ -#define UNIV_BTR_AVOID_COPY /* when splitting B-tree nodes, - do not move any records when - all the records would - be moved */ #define UNIV_BTR_PRINT /* enable functions for printing B-trees */ #define UNIV_ZIP_DEBUG /* extensive consistency checks @@ -294,6 +290,12 @@ management to ensure correct alignment for doubles etc. */ /* Maximum number of parallel threads in a parallelized operation */ #define UNIV_MAX_PARALLELISM 32 +/* The maximum length of a table name. This is the MySQL limit and is +defined in mysql_com.h like NAME_CHAR_LEN*SYSTEM_CHARSET_MBMAXLEN, the +number does not include a terminating '\0'. InnoDB probably can handle +longer names internally */ +#define MAX_TABLE_NAME_LEN 192 + /* UNIVERSAL TYPE DEFINITIONS ========================== diff --git a/storage/innodb_plugin/lock/lock0lock.c b/storage/innodb_plugin/lock/lock0lock.c index 04e5fe1a65a..77d69d11a2d 100644 --- a/storage/innodb_plugin/lock/lock0lock.c +++ b/storage/innodb_plugin/lock/lock0lock.c @@ -1733,11 +1733,11 @@ lock_rec_create( Enqueues a waiting request for a lock which cannot be granted immediately. Checks for deadlocks. @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or -DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another -transaction was chosen as a victim, and we got the lock immediately: -no need to wait then */ +DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that +there was a deadlock, but another transaction was chosen as a victim, +and we got the lock immediately: no need to wait then */ static -ulint +enum db_err lock_rec_enqueue_waiting( /*=====================*/ ulint type_mode,/*!< in: lock mode this @@ -1809,7 +1809,7 @@ lock_rec_enqueue_waiting( if (trx->wait_lock == NULL) { - return(DB_SUCCESS); + return(DB_SUCCESS_LOCKED_REC); } trx->que_state = TRX_QUE_LOCK_WAIT; @@ -1925,6 +1925,16 @@ somebody_waits: return(lock_rec_create(type_mode, block, heap_no, index, trx)); } +/** Record locking request status */ +enum lock_rec_req_status { + /** Failed to acquire a lock */ + LOCK_REC_FAIL, + /** Succeeded in acquiring a lock (implicit or already acquired) */ + LOCK_REC_SUCCESS, + /** Explicitly created a new lock */ + LOCK_REC_SUCCESS_CREATED +}; + /*********************************************************************//** This is a fast routine for locking a record in the most common cases: there are no explicit locks on the page, or there is just one lock, owned @@ -1932,9 +1942,9 @@ by this transaction, and of the right type_mode. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return TRUE if locking succeeded */ +@return whether the locking succeeded */ UNIV_INLINE -ibool +enum lock_rec_req_status lock_rec_lock_fast( /*===============*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -1973,19 +1983,19 @@ lock_rec_lock_fast( lock_rec_create(mode, block, heap_no, index, trx); } - return(TRUE); + return(LOCK_REC_SUCCESS_CREATED); } if (lock_rec_get_next_on_page(lock)) { - return(FALSE); + return(LOCK_REC_FAIL); } if (lock->trx != trx || lock->type_mode != (mode | LOCK_REC) || lock_rec_get_n_bits(lock) <= heap_no) { - return(FALSE); + return(LOCK_REC_FAIL); } if (!impl) { @@ -1994,10 +2004,11 @@ lock_rec_lock_fast( if (!lock_rec_get_nth_bit(lock, heap_no)) { lock_rec_set_nth_bit(lock, heap_no); + return(LOCK_REC_SUCCESS_CREATED); } } - return(TRUE); + return(LOCK_REC_SUCCESS); } /*********************************************************************//** @@ -2005,9 +2016,10 @@ This is the general, and slower, routine for locking a record. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return DB_SUCCESS, DB_LOCK_WAIT, or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ static -ulint +enum db_err lock_rec_lock_slow( /*===============*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -2024,7 +2036,6 @@ lock_rec_lock_slow( que_thr_t* thr) /*!< in: query thread */ { trx_t* trx; - ulint err; ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S @@ -2043,27 +2054,23 @@ lock_rec_lock_slow( /* The trx already has a strong enough lock on rec: do nothing */ - err = DB_SUCCESS; } else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) { /* If another transaction has a non-gap conflicting request in the queue, as this transaction does not have a lock strong enough already granted on the record, we have to wait. */ - err = lock_rec_enqueue_waiting(mode, block, heap_no, - index, thr); - } else { - if (!impl) { - /* Set the requested lock on the record */ - - lock_rec_add_to_queue(LOCK_REC | mode, block, - heap_no, index, trx); - } + return(lock_rec_enqueue_waiting(mode, block, heap_no, + index, thr)); + } else if (!impl) { + /* Set the requested lock on the record */ - err = DB_SUCCESS; + lock_rec_add_to_queue(LOCK_REC | mode, block, + heap_no, index, trx); + return(DB_SUCCESS_LOCKED_REC); } - return(err); + return(DB_SUCCESS); } /*********************************************************************//** @@ -2072,9 +2079,10 @@ possible, enqueues a waiting lock request. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return DB_SUCCESS, DB_LOCK_WAIT, or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ static -ulint +enum db_err lock_rec_lock( /*==========*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -2090,8 +2098,6 @@ lock_rec_lock( dict_index_t* index, /*!< in: index of record */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); @@ -2103,18 +2109,20 @@ lock_rec_lock( || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP || mode - (LOCK_MODE_MASK & mode) == 0); - if (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { - - /* We try a simplified and faster subroutine for the most - common cases */ - - err = DB_SUCCESS; - } else { - err = lock_rec_lock_slow(impl, mode, block, - heap_no, index, thr); + /* We try a simplified and faster subroutine for the most + common cases */ + switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { + case LOCK_REC_SUCCESS: + return(DB_SUCCESS); + case LOCK_REC_SUCCESS_CREATED: + return(DB_SUCCESS_LOCKED_REC); + case LOCK_REC_FAIL: + return(lock_rec_lock_slow(impl, mode, block, + heap_no, index, thr)); } - return(err); + ut_error; + return(DB_ERROR); } /*********************************************************************//** @@ -3935,8 +3943,8 @@ lock_rec_unlock( const rec_t* rec, /*!< in: record */ enum lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */ { + lock_t* first_lock; lock_t* lock; - lock_t* release_lock = NULL; ulint heap_no; ut_ad(trx && rec); @@ -3946,48 +3954,40 @@ lock_rec_unlock( mutex_enter(&kernel_mutex); - lock = lock_rec_get_first(block, heap_no); + first_lock = lock_rec_get_first(block, heap_no); /* Find the last lock with the same lock_mode and transaction from the record. */ - while (lock != NULL) { + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { if (lock->trx == trx && lock_get_mode(lock) == lock_mode) { - release_lock = lock; ut_a(!lock_get_wait(lock)); + lock_rec_reset_nth_bit(lock, heap_no); + goto released; } - - lock = lock_rec_get_next(heap_no, lock); } - /* If a record lock is found, release the record lock */ - - if (UNIV_LIKELY(release_lock != NULL)) { - lock_rec_reset_nth_bit(release_lock, heap_no); - } else { - mutex_exit(&kernel_mutex); - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: unlock row could not" - " find a %lu mode lock on the record\n", - (ulong) lock_mode); + mutex_exit(&kernel_mutex); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: unlock row could not" + " find a %lu mode lock on the record\n", + (ulong) lock_mode); - return; - } + return; +released: /* Check if we can now grant waiting lock requests */ - lock = lock_rec_get_first(block, heap_no); - - while (lock != NULL) { + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { if (lock_get_wait(lock) && !lock_rec_has_to_wait_in_queue(lock)) { /* Grant the lock */ lock_grant(lock); } - - lock = lock_rec_get_next(heap_no, lock); } mutex_exit(&kernel_mutex); @@ -5080,7 +5080,14 @@ lock_rec_insert_check_and_lock( lock_mutex_exit_kernel(); - if ((err == DB_SUCCESS) && !dict_index_is_clust(index)) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + /* fall through */ + case DB_SUCCESS: + if (dict_index_is_clust(index)) { + break; + } /* Update the page max trx id field */ page_update_max_trx_id(block, buf_block_get_page_zip(block), @@ -5203,6 +5210,10 @@ lock_clust_rec_modify_check_and_lock( ut_ad(lock_rec_queue_validate(block, rec, index, offsets)); + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + return(err); } @@ -5269,22 +5280,27 @@ lock_sec_rec_modify_check_and_lock( } #endif /* UNIV_DEBUG */ - if (err == DB_SUCCESS) { + if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) { /* Update the page max trx id field */ + /* It might not be necessary to do this if + err == DB_SUCCESS (no new lock created), + but it should not cost too much performance. */ page_update_max_trx_id(block, buf_block_get_page_zip(block), thr_get_trx(thr)->id, mtr); + err = DB_SUCCESS; } return(err); } /*********************************************************************//** -Like the counterpart for a clustered index below, but now we read a +Like lock_clust_rec_read_check_and_lock(), but reads a secondary index record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_sec_rec_read_check_and_lock( /*=============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -5305,8 +5321,8 @@ lock_sec_rec_read_check_and_lock( LOCK_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ulint heap_no; + enum db_err err; + ulint heap_no; ut_ad(!dict_index_is_clust(index)); ut_ad(block->frame == page_align(rec)); @@ -5357,9 +5373,10 @@ if the query thread should anyway be suspended for some reason; if not, then puts the transaction and the query thread to the lock wait state and inserts a waiting request for a record lock to the lock queue. Sets the requested mode lock on the record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_clust_rec_read_check_and_lock( /*===============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -5380,8 +5397,8 @@ lock_clust_rec_read_check_and_lock( LOCK_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ulint heap_no; + enum db_err err; + ulint heap_no; ut_ad(dict_index_is_clust(index)); ut_ad(block->frame == page_align(rec)); @@ -5452,17 +5469,22 @@ lock_clust_rec_read_check_and_lock_alt( mem_heap_t* tmp_heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; - ulint ret; + ulint err; rec_offs_init(offsets_); offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &tmp_heap); - ret = lock_clust_rec_read_check_and_lock(flags, block, rec, index, + err = lock_clust_rec_read_check_and_lock(flags, block, rec, index, offsets, mode, gap_mode, thr); if (tmp_heap) { mem_heap_free(tmp_heap); } - return(ret); + + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + + return(err); } /*******************************************************************//** diff --git a/storage/innodb_plugin/mem/mem0mem.c b/storage/innodb_plugin/mem/mem0mem.c index c0ce8a3e1ac..1dd4db30841 100644 --- a/storage/innodb_plugin/mem/mem0mem.c +++ b/storage/innodb_plugin/mem/mem0mem.c @@ -367,7 +367,7 @@ mem_heap_create_block( block->line = line; #ifdef MEM_PERIODIC_CHECK - mem_pool_mutex_enter(); + mutex_enter(&(mem_comm_pool->mutex)); if (!mem_block_list_inited) { mem_block_list_inited = TRUE; @@ -376,7 +376,7 @@ mem_heap_create_block( UT_LIST_ADD_LAST(mem_block_list, mem_block_list, block); - mem_pool_mutex_exit(); + mutex_exit(&(mem_comm_pool->mutex)); #endif mem_block_set_len(block, len); mem_block_set_type(block, type); @@ -479,11 +479,11 @@ mem_heap_block_free( UT_LIST_REMOVE(list, heap->base, block); #ifdef MEM_PERIODIC_CHECK - mem_pool_mutex_enter(); + mutex_enter(&(mem_comm_pool->mutex)); UT_LIST_REMOVE(mem_block_list, mem_block_list, block); - mem_pool_mutex_exit(); + mutex_exit(&(mem_comm_pool->mutex)); #endif ut_ad(heap->total_size >= block->len); @@ -556,7 +556,7 @@ mem_validate_all_blocks(void) { mem_block_t* block; - mem_pool_mutex_enter(); + mutex_enter(&(mem_comm_pool->mutex)); block = UT_LIST_GET_FIRST(mem_block_list); @@ -568,6 +568,6 @@ mem_validate_all_blocks(void) block = UT_LIST_GET_NEXT(mem_block_list, block); } - mem_pool_mutex_exit(); + mutex_exit(&(mem_comm_pool->mutex)); } #endif diff --git a/storage/innodb_plugin/mem/mem0pool.c b/storage/innodb_plugin/mem/mem0pool.c index c4f8af607e0..3291453eeb5 100644 --- a/storage/innodb_plugin/mem/mem0pool.c +++ b/storage/innodb_plugin/mem/mem0pool.c @@ -34,6 +34,7 @@ Created 5/12/1997 Heikki Tuuri #include "ut0lst.h" #include "ut0byte.h" #include "mem0mem.h" +#include "srv0start.h" /* We would like to use also the buffer frames to allocate memory. This would be desirable, because then the memory consumption of the database @@ -121,23 +122,33 @@ mysql@lists.mysql.com */ UNIV_INTERN ulint mem_n_threads_inside = 0; /********************************************************************//** -Reserves the mem pool mutex. */ -UNIV_INTERN +Reserves the mem pool mutex if we are not in server shutdown. Use +this function only in memory free functions, since only memory +free functions are used during server shutdown. */ +UNIV_INLINE void -mem_pool_mutex_enter(void) -/*======================*/ +mem_pool_mutex_enter( +/*=================*/ + mem_pool_t* pool) /*!< in: memory pool */ { - mutex_enter(&(mem_comm_pool->mutex)); + if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) { + mutex_enter(&(pool->mutex)); + } } /********************************************************************//** -Releases the mem pool mutex. */ -UNIV_INTERN +Releases the mem pool mutex if we are not in server shutdown. As +its corresponding mem_pool_mutex_enter() function, use it only +in memory free functions */ +UNIV_INLINE void -mem_pool_mutex_exit(void) -/*=====================*/ +mem_pool_mutex_exit( +/*================*/ + mem_pool_t* pool) /*!< in: memory pool */ { - mutex_exit(&(mem_comm_pool->mutex)); + if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) { + mutex_exit(&(pool->mutex)); + } } /********************************************************************//** @@ -567,7 +578,7 @@ mem_area_free( n = ut_2_log(size); - mutex_enter(&(pool->mutex)); + mem_pool_mutex_enter(pool); mem_n_threads_inside++; ut_a(mem_n_threads_inside == 1); @@ -595,7 +606,7 @@ mem_area_free( pool->reserved += ut_2_exp(n); mem_n_threads_inside--; - mutex_exit(&(pool->mutex)); + mem_pool_mutex_exit(pool); mem_area_free(new_ptr, pool); @@ -611,7 +622,7 @@ mem_area_free( } mem_n_threads_inside--; - mutex_exit(&(pool->mutex)); + mem_pool_mutex_exit(pool); ut_ad(mem_pool_validate(pool)); } @@ -630,7 +641,7 @@ mem_pool_validate( ulint free; ulint i; - mutex_enter(&(pool->mutex)); + mem_pool_mutex_enter(pool); free = 0; @@ -658,7 +669,7 @@ mem_pool_validate( ut_a(free + pool->reserved == pool->size); - mutex_exit(&(pool->mutex)); + mem_pool_mutex_exit(pool); return(TRUE); } diff --git a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0file.c index b244e3974b3..9f937b9def2 100644 --- a/storage/innodb_plugin/os/os0file.c +++ b/storage/innodb_plugin/os/os0file.c @@ -1339,7 +1339,11 @@ try_again: /* When srv_file_per_table is on, file creation failure may not be critical to the whole instance. Do not crash the server in - case of unknown errors. */ + case of unknown errors. + Please note "srv_file_per_table" is a global variable with + no explicit synchronization protection. It could be + changed during this execution path. It might not have the + same value as the one when building the table definition */ if (srv_file_per_table) { retry = os_file_handle_error_no_exit(name, create_mode == OS_FILE_CREATE ? @@ -1426,7 +1430,11 @@ try_again: /* When srv_file_per_table is on, file creation failure may not be critical to the whole instance. Do not crash the server in - case of unknown errors. */ + case of unknown errors. + Please note "srv_file_per_table" is a global variable with + no explicit synchronization protection. It could be + changed during this execution path. It might not have the + same value as the one when building the table definition */ if (srv_file_per_table) { retry = os_file_handle_error_no_exit(name, create_mode == OS_FILE_CREATE ? diff --git a/storage/innodb_plugin/page/page0zip.c b/storage/innodb_plugin/page/page0zip.c index aa5e39ff04a..d3b1edefc6b 100644 --- a/storage/innodb_plugin/page/page0zip.c +++ b/storage/innodb_plugin/page/page0zip.c @@ -571,7 +571,7 @@ page_zip_dir_encode( /* Traverse the list of stored records in the collation order, starting from the first user record. */ - rec = page + PAGE_NEW_INFIMUM, TRUE; + rec = page + PAGE_NEW_INFIMUM; i = 0; @@ -1464,6 +1464,7 @@ page_zip_fields_free( dict_table_t* table = index->table; mem_heap_free(index->heap); mutex_free(&(table->autoinc_mutex)); + ut_free(table->name); mem_heap_free(table->heap); } } @@ -3117,8 +3118,13 @@ page_zip_validate_low( temp_page_zip in a debugger when running valgrind --db-attach. */ VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE); UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE); +# if UNIV_WORD_SIZE == 4 VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip); + /* On 32-bit systems, there is no padding in page_zip_des_t. + On other systems, Valgrind could complain about uninitialized + pad bytes. */ UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip); +# endif VALGRIND_GET_VBITS(page_zip->data, temp_page, page_zip_get_size(page_zip)); UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); diff --git a/storage/innodb_plugin/plug.in.disabled b/storage/innodb_plugin/plug.in index e638332d74a..38e14d9d2fd 100644 --- a/storage/innodb_plugin/plug.in.disabled +++ b/storage/innodb_plugin/plug.in @@ -15,7 +15,7 @@ # MYSQL_STORAGE_ENGINE(innodb_plugin,, [InnoDB Storage Engine], - [Transactional Tables using InnoDB], [max,max-no-ndb]) + [Transactional Tables using InnoDB], []) MYSQL_PLUGIN_DIRECTORY(innodb_plugin, [storage/innodb_plugin]) MYSQL_PLUGIN_DYNAMIC(innodb_plugin, [ha_innodb_plugin.la]) MYSQL_PLUGIN_ACTIONS(innodb_plugin, [ diff --git a/storage/innodb_plugin/rem/rem0cmp.c b/storage/innodb_plugin/rem/rem0cmp.c index e6dab0bc66b..35b67992558 100644 --- a/storage/innodb_plugin/rem/rem0cmp.c +++ b/storage/innodb_plugin/rem/rem0cmp.c @@ -706,7 +706,9 @@ cmp_rec_rec_simple( const rec_t* rec2, /*!< in: physical record */ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */ - const dict_index_t* index) /*!< in: data dictionary index */ + const dict_index_t* index, /*!< in: data dictionary index */ + ibool* null_eq)/*!< out: set to TRUE if + found matching null values */ { ulint rec1_f_len; /*!< length of current field in rec1 */ const byte* rec1_b_ptr; /*!< pointer to the current byte @@ -753,6 +755,9 @@ cmp_rec_rec_simple( || rec2_f_len == UNIV_SQL_NULL) { if (rec1_f_len == rec2_f_len) { + if (null_eq) { + *null_eq = TRUE; + } goto next_field; diff --git a/storage/innodb_plugin/row/row0ins.c b/storage/innodb_plugin/row/row0ins.c index 230dc45dadc..a193bf21f7c 100644 --- a/storage/innodb_plugin/row/row0ins.c +++ b/storage/innodb_plugin/row/row0ins.c @@ -51,6 +51,15 @@ Created 4/20/1996 Heikki Tuuri #define ROW_INS_PREV 1 #define ROW_INS_NEXT 2 +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ /*********************************************************************//** Creates an insert node struct. @@ -1121,9 +1130,9 @@ nonstandard_exit_func: /*********************************************************************//** Sets a shared lock on a record. Used in locking possible duplicate key records and also in checking foreign key constraints. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_ins_set_shared_rec_lock( /*========================*/ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or @@ -1134,7 +1143,7 @@ row_ins_set_shared_rec_lock( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + enum db_err err; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -1152,9 +1161,9 @@ row_ins_set_shared_rec_lock( /*********************************************************************//** Sets a exclusive lock on a record. Used in locking possible duplicate key records -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_ins_set_exclusive_rec_lock( /*===========================*/ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or @@ -1165,7 +1174,7 @@ row_ins_set_exclusive_rec_lock( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + enum db_err err; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -1205,7 +1214,6 @@ row_ins_check_foreign_constraint( dict_index_t* check_index; ulint n_fields_cmp; btr_pcur_t pcur; - ibool moved; int cmp; ulint err; ulint i; @@ -1336,13 +1344,13 @@ run_again: /* Scan index records and check if there is a matching record */ - for (;;) { + do { const rec_t* rec = btr_pcur_get_rec(&pcur); const buf_block_t* block = btr_pcur_get_block(&pcur); if (page_rec_is_infimum(rec)) { - goto next_rec; + continue; } offsets = rec_get_offsets(rec, check_index, @@ -1353,12 +1361,13 @@ run_again: err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - - break; + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + continue; + default: + goto end_scan; } - - goto next_rec; } cmp = cmp_dtuple_rec(entry, rec, offsets); @@ -1369,9 +1378,12 @@ run_again: err = row_ins_set_shared_rec_lock( LOCK_ORDINARY, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: break; + default: + goto end_scan; } } else { /* Found a matching record. Lock only @@ -1382,15 +1394,18 @@ run_again: LOCK_REC_NOT_GAP, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: break; + default: + goto end_scan; } if (check_ref) { err = DB_SUCCESS; - break; + goto end_scan; } else if (foreign->type != 0) { /* There is an ON UPDATE or ON DELETE condition: check them in a separate @@ -1416,7 +1431,7 @@ run_again: err = DB_FOREIGN_DUPLICATE_KEY; } - break; + goto end_scan; } /* row_ins_foreign_check_on_constraint @@ -1429,49 +1444,41 @@ run_again: thr, foreign, rec, entry); err = DB_ROW_IS_REFERENCED; - break; + goto end_scan; } } - } + } else { + ut_a(cmp < 0); - if (cmp < 0) { err = row_ins_set_shared_rec_lock( LOCK_GAP, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - break; - } - - if (check_ref) { - err = DB_NO_REFERENCED_ROW; - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - } else { - err = DB_SUCCESS; + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + if (check_ref) { + err = DB_NO_REFERENCED_ROW; + row_ins_foreign_report_add_err( + trx, foreign, rec, entry); + } else { + err = DB_SUCCESS; + } } - break; + goto end_scan; } + } while (btr_pcur_move_to_next(&pcur, &mtr)); - ut_a(cmp == 0); -next_rec: - moved = btr_pcur_move_to_next(&pcur, &mtr); - - if (!moved) { - if (check_ref) { - rec = btr_pcur_get_rec(&pcur); - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - err = DB_NO_REFERENCED_ROW; - } else { - err = DB_SUCCESS; - } - - break; - } + if (check_ref) { + row_ins_foreign_report_add_err( + trx, foreign, btr_pcur_get_rec(&pcur), entry); + err = DB_NO_REFERENCED_ROW; + } else { + err = DB_SUCCESS; } +end_scan: btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -1719,9 +1726,13 @@ row_ins_scan_sec_index_for_duplicate( rec, index, offsets, thr); } - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: break; + default: + goto end_scan; } if (page_rec_is_supremum(rec)) { @@ -1738,17 +1749,15 @@ row_ins_scan_sec_index_for_duplicate( thr_get_trx(thr)->error_info = index; - break; + goto end_scan; } + } else { + ut_a(cmp < 0); + goto end_scan; } - - if (cmp < 0) { - break; - } - - ut_a(cmp == 0); } while (btr_pcur_move_to_next(&pcur, &mtr)); +end_scan: if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } @@ -1837,7 +1846,11 @@ row_ins_duplicate_error_in_clust( cursor->index, offsets, thr); } - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto func_exit; } @@ -1877,7 +1890,11 @@ row_ins_duplicate_error_in_clust( rec, cursor->index, offsets, thr); } - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto func_exit; } diff --git a/storage/innodb_plugin/row/row0merge.c b/storage/innodb_plugin/row/row0merge.c index d61d626f92e..56a68b58225 100644 --- a/storage/innodb_plugin/row/row0merge.c +++ b/storage/innodb_plugin/row/row0merge.c @@ -717,14 +717,16 @@ row_merge_read( } /********************************************************************//** -Read a merge block from the file system. +Write a merge block to the file system. @return TRUE if request was successful, FALSE if fail */ static ibool row_merge_write( /*============*/ int fd, /*!< in: file descriptor */ - ulint offset, /*!< in: offset where to write */ + ulint offset, /*!< in: offset where to read + in number of row_merge_block_t + elements */ const void* buf) /*!< in: data */ { ib_uint64_t ofs = ((ib_uint64_t) offset) @@ -1075,11 +1077,14 @@ row_merge_cmp( record to be compared */ const ulint* offsets1, /*!< in: first record offsets */ const ulint* offsets2, /*!< in: second record offsets */ - const dict_index_t* index) /*!< in: index */ + const dict_index_t* index, /*!< in: index */ + ibool* null_eq) /*!< out: set to TRUE if + found matching null values */ { int cmp; - cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index); + cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index, + null_eq); #ifdef UNIV_DEBUG if (row_merge_print_cmp) { @@ -1445,11 +1450,13 @@ corrupt: } while (mrec0 && mrec1) { + ibool null_eq = FALSE; switch (row_merge_cmp(mrec0, mrec1, - offsets0, offsets1, index)) { + offsets0, offsets1, index, + &null_eq)) { case 0: if (UNIV_UNLIKELY - (dict_index_is_unique(index))) { + (dict_index_is_unique(index) && !null_eq)) { innobase_rec_to_mysql(table, mrec0, index, offsets0); mem_heap_free(heap); @@ -1571,22 +1578,28 @@ row_merge( const dict_index_t* index, /*!< in: index being created */ merge_file_t* file, /*!< in/out: file containing index entries */ - ulint* half, /*!< in/out: half the file */ row_merge_block_t* block, /*!< in/out: 3 buffers */ int* tmpfd, /*!< in/out: temporary file handle */ - TABLE* table) /*!< in/out: MySQL table, for + TABLE* table, /*!< in/out: MySQL table, for reporting erroneous key value if applicable */ + ulint* num_run,/*!< in/out: Number of runs remain + to be merged */ + ulint* run_offset) /*!< in/out: Array contains the + first offset number for each merge + run */ { ulint foffs0; /*!< first input offset */ ulint foffs1; /*!< second input offset */ ulint error; /*!< error code */ merge_file_t of; /*!< output file */ - const ulint ihalf = *half; + const ulint ihalf = run_offset[*num_run / 2]; /*!< half the input file */ - ulint ohalf; /*!< half the output file */ + ulint n_run = 0; + /*!< num of runs generated from this merge */ UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]); + ut_ad(ihalf < file->offset); of.fd = *tmpfd; @@ -1594,17 +1607,20 @@ row_merge( of.n_rec = 0; /* Merge blocks to the output file. */ - ohalf = 0; foffs0 = 0; foffs1 = ihalf; + UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset); + for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) { - ulint ahalf; /*!< arithmetic half the input file */ if (UNIV_UNLIKELY(trx_is_interrupted(trx))) { return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + error = row_merge_blocks(index, file, block, &foffs0, &foffs1, &of, table); @@ -1612,21 +1628,6 @@ row_merge( return(error); } - /* Record the offset of the output file when - approximately half the output has been generated. In - this way, the next invocation of row_merge() will - spend most of the time in this loop. The initial - estimate is ohalf==0. */ - ahalf = file->offset / 2; - ut_ad(ohalf <= of.offset); - - /* Improve the estimate until reaching half the input - file size, or we can not get any closer to it. All - comparands should be non-negative when !(ohalf < ahalf) - because ohalf <= of.offset. */ - if (ohalf < ahalf || of.offset - ahalf < ohalf - ahalf) { - ohalf = of.offset; - } } /* Copy the last blocks, if there are any. */ @@ -1636,6 +1637,9 @@ row_merge( return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) { return(DB_CORRUPTION); } @@ -1648,6 +1652,9 @@ row_merge( return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) { return(DB_CORRUPTION); } @@ -1659,10 +1666,23 @@ row_merge( return(DB_CORRUPTION); } + ut_ad(n_run <= *num_run); + + *num_run = n_run; + + /* Each run can contain one or more offsets. As merge goes on, + the number of runs (to merge) will reduce until we have one + single run. So the number of runs will always be smaller than + the number of offsets in file */ + ut_ad((*num_run) <= file->offset); + + /* The number of offsets in output file is always equal or + smaller than input file */ + ut_ad(of.offset <= file->offset); + /* Swap file descriptors for the next pass. */ *tmpfd = file->fd; *file = of; - *half = ohalf; UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]); @@ -1687,27 +1707,44 @@ row_merge_sort( if applicable */ { ulint half = file->offset / 2; + ulint num_runs; + ulint* run_offset; + ulint error = DB_SUCCESS; + + /* Record the number of merge runs we need to perform */ + num_runs = file->offset; + + /* If num_runs are less than 1, nothing to merge */ + if (num_runs <= 1) { + return(error); + } + + /* "run_offset" records each run's first offset number */ + run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint)); + + /* This tells row_merge() where to start for the first round + of merge. */ + run_offset[half] = half; /* The file should always contain at least one byte (the end of file marker). Thus, it must be at least one block. */ ut_ad(file->offset > 0); + /* Merge the runs until we have one big run */ do { - ulint error; + error = row_merge(trx, index, file, block, tmpfd, + table, &num_runs, run_offset); - error = row_merge(trx, index, file, &half, - block, tmpfd, table); + UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset); if (error != DB_SUCCESS) { - return(error); + break; } + } while (num_runs > 1); - /* half > 0 should hold except when the file consists - of one block. No need to merge further then. */ - ut_ad(half > 0 || file->offset == 1); - } while (half < file->offset && half > 0); + mem_free(run_offset); - return(DB_SUCCESS); + return(error); } /*************************************************************//** @@ -1743,6 +1780,11 @@ row_merge_copy_blobs( (below). */ data = btr_rec_copy_externally_stored_field( mrec, offsets, zip_size, i, &len, heap); + /* Because we have locked the table, any records + written by incomplete transactions must have been + rolled back already. There must not be any incomplete + BLOB columns. */ + ut_a(data); dfield_set_data(field, data, len); } @@ -2087,13 +2129,16 @@ row_merge_drop_temp_indexes(void) btr_pcur_store_position(&pcur, &mtr); btr_pcur_commit_specify_mtr(&pcur, &mtr); - table = dict_load_table_on_id(table_id); + table = dict_table_get_on_id_low(table_id); if (table) { dict_index_t* index; + dict_index_t* next_index; for (index = dict_table_get_first_index(table); - index; index = dict_table_get_next_index(index)) { + index; index = next_index) { + + next_index = dict_table_get_next_index(index); if (*index->name == TEMP_INDEX_PREFIX) { row_merge_drop_index(index, table, trx); @@ -2296,7 +2341,7 @@ row_merge_rename_tables( { ulint err = DB_ERROR; pars_info_t* info; - const char* old_name= old_table->name; + char old_name[MAX_TABLE_NAME_LEN + 1]; ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_ad(old_table != new_table); @@ -2304,6 +2349,17 @@ row_merge_rename_tables( ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); + /* store the old/current name to an automatic variable */ + if (strlen(old_table->name) + 1 <= sizeof(old_name)) { + memcpy(old_name, old_table->name, strlen(old_table->name) + 1); + } else { + ut_print_timestamp(stderr); + fprintf(stderr, "InnoDB: too long table name: '%s', " + "max length is %d\n", old_table->name, + MAX_TABLE_NAME_LEN); + ut_error; + } + trx->op_info = "renaming tables"; /* We use the private SQL parser of Innobase to generate the query diff --git a/storage/innodb_plugin/row/row0mysql.c b/storage/innodb_plugin/row/row0mysql.c index 24abf8067f2..feeb7fc80b7 100644 --- a/storage/innodb_plugin/row/row0mysql.c +++ b/storage/innodb_plugin/row/row0mysql.c @@ -522,6 +522,7 @@ handle_new_error: case DB_CANNOT_ADD_CONSTRAINT: case DB_TOO_MANY_CONCURRENT_TRXS: case DB_OUT_OF_FILE_SPACE: + case DB_INTERRUPTED: if (savept) { /* Roll back the latest, possibly incomplete insertion or update */ @@ -624,6 +625,8 @@ row_create_prebuilt( prebuilt->select_lock_type = LOCK_NONE; prebuilt->stored_select_lock_type = 99999999; + UNIV_MEM_INVALID(&prebuilt->stored_select_lock_type, + sizeof prebuilt->stored_select_lock_type); prebuilt->search_tuple = dtuple_create( heap, 2 * dict_table_get_n_cols(table)); @@ -1427,27 +1430,26 @@ run_again: } /*********************************************************************//** -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -this session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. -@return error code or DB_SUCCESS */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE or this +session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +Before calling this function row_search_for_mysql() must have +initialized prebuilt->new_rec_locks to store the information which new +record locks really were set. This function removes a newly set +clustered index record lock under prebuilt->pcur or +prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that +releases the latest clustered index record lock we set. +@return error code or DB_SUCCESS */ UNIV_INTERN int row_unlock_for_mysql( /*=================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL handle */ - ibool has_latches_on_recs)/*!< TRUE if called so that we have - the latches on the records under pcur - and clust_pcur, and we do not need to - reposition the cursors. */ + ibool has_latches_on_recs)/*!< in: TRUE if called so + that we have the latches on + the records under pcur and + clust_pcur, and we do not need + to reposition the cursors. */ { btr_pcur_t* pcur = prebuilt->pcur; btr_pcur_t* clust_pcur = prebuilt->clust_pcur; @@ -1645,37 +1647,6 @@ row_table_got_default_clust_index( } /*********************************************************************//** -Calculates the key number used inside MySQL for an Innobase index. We have -to take into account if we generated a default clustered index for the table -@return the key number used inside MySQL */ -UNIV_INTERN -ulint -row_get_mysql_key_number_for_index( -/*===============================*/ - const dict_index_t* index) /*!< in: index */ -{ - const dict_index_t* ind; - ulint i; - - ut_a(index); - - i = 0; - ind = dict_table_get_first_index(index->table); - - while (index != ind) { - ind = dict_table_get_next_index(ind); - i++; - } - - if (row_table_got_default_clust_index(index->table)) { - ut_a(i > 0); - i--; - } - - return(i); -} - -/*********************************************************************//** Locks the data dictionary in shared mode from modifications, for performing foreign key check, rollback, or other operation invisible to MySQL. */ UNIV_INTERN @@ -2059,6 +2030,7 @@ row_table_add_foreign_constraints( FOREIGN KEY (a, b) REFERENCES table2(c, d), table2 can be written also with the database name before it: test.table2 */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -2080,8 +2052,8 @@ row_table_add_foreign_constraints( trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - err = dict_create_foreign_constraints(trx, sql_string, name, - reject_fks); + err = dict_create_foreign_constraints(trx, sql_string, sql_length, + name, reject_fks); if (err == DB_SUCCESS) { /* Check that also referencing constraints are ok */ err = dict_load_foreigns(name, TRUE); @@ -2425,7 +2397,7 @@ row_discard_tablespace_for_mysql( goto funct_exit; } - new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + dict_hdr_get_new_id(&new_id, NULL, NULL); /* Remove all locks except the table-level S and X locks. */ lock_remove_all_on_table(table, FALSE); @@ -2787,10 +2759,11 @@ row_truncate_table_for_mysql( dict_index_t* index; - space = 0; + dict_hdr_get_new_id(NULL, NULL, &space); - if (fil_create_new_single_table_tablespace( - &space, table->name, FALSE, flags, + if (space == ULINT_UNDEFINED + || fil_create_new_single_table_tablespace( + space, table->name, FALSE, flags, FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) { ut_print_timestamp(stderr); fprintf(stderr, @@ -2895,7 +2868,7 @@ next_rec: mem_heap_free(heap); - new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + dict_hdr_get_new_id(&new_id, NULL, NULL); info = pars_info_create(); diff --git a/storage/innodb_plugin/row/row0purge.c b/storage/innodb_plugin/row/row0purge.c index 500ebe571ab..835af990672 100644 --- a/storage/innodb_plugin/row/row0purge.c +++ b/storage/innodb_plugin/row/row0purge.c @@ -44,6 +44,16 @@ Created 3/14/1997 Heikki Tuuri #include "row0mysql.h" #include "log0log.h" +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /********************************************************************//** Creates a purge node to a query graph. @return own: purge node */ @@ -126,6 +136,7 @@ row_purge_remove_clust_if_poss_low( pcur = &(node->pcur); btr_cur = btr_pcur_get_btr_cur(pcur); + log_free_check(); mtr_start(&mtr); success = row_purge_reposition_pcur(mode, node, &mtr); diff --git a/storage/innodb_plugin/row/row0row.c b/storage/innodb_plugin/row/row0row.c index cb7dfa2b7c9..8e806a14a98 100644 --- a/storage/innodb_plugin/row/row0row.c +++ b/storage/innodb_plugin/row/row0row.c @@ -294,7 +294,13 @@ row_build( ut_ad(dtuple_check_typed(row)); - if (j) { + if (!ext) { + /* REDUNDANT and COMPACT formats store a local + 768-byte prefix of each externally stored + column. No cache is needed. */ + ut_ad(dict_table_get_format(index->table) + < DICT_TF_FORMAT_ZIP); + } else if (j) { *ext = row_ext_create(j, ext_cols, row, dict_table_zip_size(index->table), heap); diff --git a/storage/innodb_plugin/row/row0sel.c b/storage/innodb_plugin/row/row0sel.c index d0702a0cd2f..76c144e5a8c 100644 --- a/storage/innodb_plugin/row/row0sel.c +++ b/storage/innodb_plugin/row/row0sel.c @@ -416,7 +416,7 @@ row_sel_fetch_columns( field_no))) { /* Copy an externally stored field to the - temporary heap */ + temporary heap, if possible. */ heap = mem_heap_create(1); @@ -425,6 +425,17 @@ row_sel_fetch_columns( dict_table_zip_size(index->table), field_no, &len, heap); + /* data == NULL means that the + externally stored field was not + written yet. This record + should only be seen by + recv_recovery_rollback_active() or any + TRX_ISO_READ_UNCOMMITTED + transactions. The InnoDB SQL parser + (the sole caller of this function) + does not implement READ UNCOMMITTED, + and it is not involved during rollback. */ + ut_a(data); ut_a(len != UNIV_SQL_NULL); needs_copy = TRUE; @@ -863,8 +874,14 @@ row_sel_get_clust_rec( clust_rec, index, offsets, node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + /* Declare the variable uninitialized in Valgrind. + It should be set to DB_SUCCESS at func_exit. */ + UNIV_MEM_INVALID(&err, sizeof err); + break; + default: goto err_exit; } } else { @@ -920,6 +937,7 @@ row_sel_get_clust_rec( when plan->clust_pcur was positioned. The latch will not be released until mtr_commit(mtr). */ + ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets))); row_sel_fetch_columns(index, clust_rec, offsets, UT_LIST_GET_FIRST(plan->columns)); *out_rec = clust_rec; @@ -934,9 +952,9 @@ err_exit: /*********************************************************************//** Sets a lock on a record. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ UNIV_INLINE -ulint +enum db_err sel_set_rec_lock( /*=============*/ const buf_block_t* block, /*!< in: buffer block of rec */ @@ -948,8 +966,8 @@ sel_set_rec_lock( LOC_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - trx_t* trx; - ulint err; + trx_t* trx; + enum db_err err; trx = thr_get_trx(thr); @@ -1482,11 +1500,15 @@ rec_loop: node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: /* Note that in this case we will store in pcur the PREDECESSOR of the record we are waiting the lock for */ - goto lock_wait_or_error; } } @@ -1538,8 +1560,12 @@ skip_lock: rec, index, offsets, node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -1614,6 +1640,13 @@ skip_lock: } if (old_vers == NULL) { + /* The record does not exist + in our read view. Skip it, but + first attempt to determine + whether the index segment we + are searching through has been + exhausted. */ + offsets = rec_get_offsets( rec, index, offsets, ULINT_UNDEFINED, &heap); @@ -2498,6 +2531,7 @@ row_sel_field_store_in_mysql_format( byte* pad_ptr; ut_ad(len != UNIV_SQL_NULL); + UNIV_MEM_ASSERT_RW(data, len); switch (templ->type) { case DATA_INT: @@ -2632,9 +2666,8 @@ Convert a row in the Innobase format to a row in the MySQL format. Note that the template in prebuilt may advise us to copy only a few columns to mysql_rec, other columns are left blank. All columns may not be needed in the query. -@return TRUE if success, FALSE if could not allocate memory for a BLOB -(though we may also assert in that case) */ -static +@return TRUE on success, FALSE if not all columns could be retrieved */ +static __attribute__((warn_unused_result)) ibool row_sel_store_mysql_rec( /*====================*/ @@ -2657,12 +2690,19 @@ row_sel_store_mysql_rec( ut_ad(prebuilt->mysql_template); ut_ad(prebuilt->default_rec); ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { mem_heap_free(prebuilt->blob_heap); prebuilt->blob_heap = NULL; } + /* init null bytes with default values as they might be + left uninitialized in some cases and these uninited bytes + might be copied into mysql record buffer that leads to + valgrind warnings */ + memcpy(mysql_rec, prebuilt->default_rec, prebuilt->null_bitmap_len); + for (i = 0; i < prebuilt->n_template; i++) { templ = prebuilt->mysql_template + i; @@ -2698,6 +2738,21 @@ row_sel_store_mysql_rec( dict_table_zip_size(prebuilt->table), templ->rec_field_no, &len, heap); + if (UNIV_UNLIKELY(!data)) { + /* The externally stored field + was not written yet. This + record should only be seen by + recv_recovery_rollback_active() + or any TRX_ISO_READ_UNCOMMITTED + transactions. */ + + if (extern_field_heap) { + mem_heap_free(extern_field_heap); + } + + return(FALSE); + } + ut_a(len != UNIV_SQL_NULL); } else { /* Field is stored in the row. */ @@ -2746,6 +2801,9 @@ row_sel_store_mysql_rec( /* MySQL assumes that the field for an SQL NULL value is set to the default value. */ + UNIV_MEM_ASSERT_RW(prebuilt->default_rec + + templ->mysql_col_offset, + templ->mysql_col_len); mysql_rec[templ->mysql_null_byte_offset] |= (byte) templ->mysql_null_bit_mask; memcpy(mysql_rec + templ->mysql_col_offset, @@ -2797,9 +2855,9 @@ row_sel_build_prev_vers_for_mysql( Retrieves the clustered index record corresponding to a record in a non-clustered index. Does the necessary locking. Used in the MySQL interface. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_sel_get_clust_rec_for_mysql( /*============================*/ row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */ @@ -2826,7 +2884,7 @@ row_sel_get_clust_rec_for_mysql( dict_index_t* clust_index; const rec_t* clust_rec; rec_t* old_vers; - ulint err; + enum db_err err; trx_t* trx; *out_rec = NULL; @@ -2885,6 +2943,7 @@ row_sel_get_clust_rec_for_mysql( clust_rec = NULL; + err = DB_SUCCESS; goto func_exit; } @@ -2900,8 +2959,11 @@ row_sel_get_clust_rec_for_mysql( 0, btr_pcur_get_block(prebuilt->clust_pcur), clust_rec, clust_index, *offsets, prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + break; + default: goto err_exit; } } else { @@ -2961,6 +3023,8 @@ row_sel_get_clust_rec_for_mysql( rec, sec_index, clust_rec, clust_index)); #endif } + + err = DB_SUCCESS; } func_exit: @@ -2973,7 +3037,6 @@ func_exit: btr_pcur_store_position(prebuilt->clust_pcur, mtr); } - err = DB_SUCCESS; err_exit: return(err); } @@ -3070,6 +3133,11 @@ row_sel_pop_cached_row_for_mysql( for (i = 0; i < prebuilt->n_template; i++) { templ = prebuilt->mysql_template + i; +#if 0 /* Some of the cached_rec may legitimately be uninitialized. */ + UNIV_MEM_ASSERT_RW(cached_rec + + templ->mysql_col_offset, + templ->mysql_col_len); +#endif ut_memcpy(buf + templ->mysql_col_offset, cached_rec + templ->mysql_col_offset, templ->mysql_col_len); @@ -3084,6 +3152,11 @@ row_sel_pop_cached_row_for_mysql( } } else { +#if 0 /* Some of the cached_rec may legitimately be uninitialized. */ + UNIV_MEM_ASSERT_RW(prebuilt->fetch_cache + [prebuilt->fetch_cache_first], + prebuilt->mysql_prefix_len); +#endif ut_memcpy(buf, prebuilt->fetch_cache[prebuilt->fetch_cache_first], prebuilt->mysql_prefix_len); @@ -3097,9 +3170,10 @@ row_sel_pop_cached_row_for_mysql( } /********************************************************************//** -Pushes a row for MySQL to the fetch cache. */ -UNIV_INLINE -void +Pushes a row for MySQL to the fetch cache. +@return TRUE on success, FALSE if the record contains incomplete BLOBs */ +UNIV_INLINE __attribute__((warn_unused_result)) +ibool row_sel_push_cache_row_for_mysql( /*=============================*/ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ @@ -3134,15 +3208,18 @@ row_sel_push_cache_row_for_mysql( } ut_ad(prebuilt->fetch_cache_first == 0); + UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached], + prebuilt->mysql_row_len); if (UNIV_UNLIKELY(!row_sel_store_mysql_rec( prebuilt->fetch_cache[ prebuilt->n_fetch_cached], prebuilt, rec, offsets))) { - ut_error; + return(FALSE); } prebuilt->n_fetch_cached++; + return(TRUE); } /*********************************************************************//** @@ -3537,11 +3614,21 @@ row_search_for_mysql( if (!row_sel_store_mysql_rec(buf, prebuilt, rec, offsets)) { - err = DB_TOO_BIG_RECORD; - - /* We let the main loop to do the - error handling */ - goto shortcut_fails_too_big_rec; + /* Only fresh inserts may contain + incomplete externally stored + columns. Pretend that such + records do not exist. Such + records may only be accessed + at the READ UNCOMMITTED + isolation level or when + rolling back a recovered + transaction. Rollback happens + at a lower level, not here. */ + ut_a(trx->isolation_level + == TRX_ISO_READ_UNCOMMITTED); + + /* Proceed as in case SEL_RETRY. */ + break; } mtr_commit(&mtr); @@ -3581,7 +3668,7 @@ release_search_latch_if_needed: default: ut_ad(0); } -shortcut_fails_too_big_rec: + mtr_commit(&mtr); mtr_start(&mtr); } @@ -3595,6 +3682,13 @@ shortcut_fails_too_big_rec: trx->has_search_latch = FALSE; } + ut_ad(prebuilt->sql_stat_start || trx->conc_state == TRX_ACTIVE); + ut_ad(trx->conc_state == TRX_NOT_STARTED + || trx->conc_state == TRX_ACTIVE); + ut_ad(prebuilt->sql_stat_start + || prebuilt->select_lock_type != LOCK_NONE + || trx->read_view); + trx_start_if_not_started(trx); if (trx->isolation_level <= TRX_ISO_READ_COMMITTED @@ -3679,8 +3773,12 @@ shortcut_fails_too_big_rec: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3778,8 +3876,12 @@ rec_loop: prebuilt->select_lock_type, LOCK_ORDINARY, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3909,8 +4011,11 @@ wrong_offs: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3945,8 +4050,11 @@ wrong_offs: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -4016,15 +4124,21 @@ no_gap_lock: switch (err) { const rec_t* old_vers; - case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: if (srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) { + || trx->isolation_level + <= TRX_ISO_READ_COMMITTED) { /* Note that a record of prebuilt->index was locked. */ prebuilt->new_rec_locks = 1; } + err = DB_SUCCESS; + case DB_SUCCESS: break; case DB_LOCK_WAIT: + /* Never unlock rows that were part of a conflict. */ + prebuilt->new_rec_locks = 0; + if (UNIV_LIKELY(prebuilt->row_read_type != ROW_READ_TRY_SEMI_CONSISTENT) || unique_search @@ -4054,7 +4168,6 @@ no_gap_lock: if (UNIV_LIKELY(trx->wait_lock != NULL)) { lock_cancel_waiting_and_release( trx->wait_lock); - prebuilt->new_rec_locks = 0; } else { mutex_exit(&kernel_mutex); @@ -4066,9 +4179,6 @@ no_gap_lock: ULINT_UNDEFINED, &heap); err = DB_SUCCESS; - /* Note that a record of - prebuilt->index was locked. */ - prebuilt->new_rec_locks = 1; break; } mutex_exit(&kernel_mutex); @@ -4205,27 +4315,30 @@ requires_clust_rec: err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec, thr, &clust_rec, &offsets, &heap, &mtr); - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS: + if (clust_rec == NULL) { + /* The record did not exist in the read view */ + ut_ad(prebuilt->select_lock_type == LOCK_NONE); + goto next_rec; + } + break; + case DB_SUCCESS_LOCKED_REC: + ut_a(clust_rec != NULL); + if (srv_locks_unsafe_for_binlog + || trx->isolation_level + <= TRX_ISO_READ_COMMITTED) { + /* Note that the clustered index record + was locked. */ + prebuilt->new_rec_locks = 2; + } + err = DB_SUCCESS; + break; + default: goto lock_wait_or_error; } - if (clust_rec == NULL) { - /* The record did not exist in the read view */ - ut_ad(prebuilt->select_lock_type == LOCK_NONE); - - goto next_rec; - } - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - /* Note that both the secondary index record - and the clustered index record were locked. */ - ut_ad(prebuilt->new_rec_locks == 1); - prebuilt->new_rec_locks = 2; - } - if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) { /* The record is delete marked: we can skip it */ @@ -4290,9 +4403,18 @@ requires_clust_rec: not cache rows because there the cursor is a scrollable cursor. */ - row_sel_push_cache_row_for_mysql(prebuilt, result_rec, - offsets); - if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) { + if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec, + offsets)) { + /* Only fresh inserts may contain incomplete + externally stored columns. Pretend that such + records do not exist. Such records may only be + accessed at the READ UNCOMMITTED isolation + level or when rolling back a recovered + transaction. Rollback happens at a lower + level, not here. */ + ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED); + } else if (prebuilt->n_fetch_cached + == MYSQL_FETCH_CACHE_SIZE) { goto got_row; } @@ -4308,9 +4430,17 @@ requires_clust_rec: } else { if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec, offsets)) { - err = DB_TOO_BIG_RECORD; - - goto lock_wait_or_error; + /* Only fresh inserts may contain + incomplete externally stored + columns. Pretend that such records do + not exist. Such records may only be + accessed at the READ UNCOMMITTED + isolation level or when rolling back a + recovered transaction. Rollback + happens at a lower level, not here. */ + ut_a(trx->isolation_level + == TRX_ISO_READ_UNCOMMITTED); + goto next_rec; } } diff --git a/storage/innodb_plugin/row/row0uins.c b/storage/innodb_plugin/row/row0uins.c index 9f9c814f1a5..930a5cf13b6 100644 --- a/storage/innodb_plugin/row/row0uins.c +++ b/storage/innodb_plugin/row/row0uins.c @@ -46,6 +46,16 @@ Created 2/25/1997 Heikki Tuuri #include "ibuf0ibuf.h" #include "log0log.h" +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***************************************************************//** Removes a clustered index record. The pcur in node was positioned on the record, now it is detached. @@ -152,7 +162,6 @@ row_undo_ins_remove_sec_low( ulint err; mtr_t mtr; - log_free_check(); mtr_start(&mtr); found = row_search_index_entry(index, entry, mode, &pcur, &mtr); @@ -335,6 +344,7 @@ row_undo_ins( transactions. */ ut_a(trx_is_recv(node->trx)); } else { + log_free_check(); err = row_undo_ins_remove_sec(node->index, entry); if (err != DB_SUCCESS) { @@ -346,5 +356,6 @@ row_undo_ins( node->index = dict_table_get_next_index(node->index); } + log_free_check(); return(row_undo_ins_remove_clust_rec(node)); } diff --git a/storage/innodb_plugin/row/row0umod.c b/storage/innodb_plugin/row/row0umod.c index e7245dbee41..8464b0f95cc 100644 --- a/storage/innodb_plugin/row/row0umod.c +++ b/storage/innodb_plugin/row/row0umod.c @@ -58,12 +58,22 @@ delete marked clustered index record was delete unmarked and possibly also some of its fields were changed. Now, it is possible that the delete marked version has become obsolete at the time the undo is started. */ +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***********************************************************//** Checks if also the previous version of the clustered index record was modified or inserted by the same transaction, and its undo number is such that it should be undone in the same rollback. @return TRUE if also previous modify or insert of this row should be undone */ -UNIV_INLINE +static ibool row_undo_mod_undo_also_prev_vers( /*=============================*/ @@ -231,6 +241,8 @@ row_undo_mod_clust( ut_ad(node && thr); + log_free_check(); + /* Check if also the previous version of the clustered index record should be undone in this same rollback operation */ @@ -657,24 +669,55 @@ row_undo_mod_upd_exist_sec( /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, index, heap); - ut_a(entry); - /* NOTE that if we updated the fields of a - delete-marked secondary index record so that - alphabetically they stayed the same, e.g., - 'abc' -> 'aBc', we cannot return to the original - values because we do not know them. But this should - not cause problems because in row0sel.c, in queries - we always retrieve the clustered index record or an - earlier version of it, if the secondary index record - through which we do the search is delete-marked. */ - - err = row_undo_mod_del_mark_or_remove_sec(node, thr, - index, - entry); - if (err != DB_SUCCESS) { - mem_heap_free(heap); - - return(err); + if (UNIV_UNLIKELY(!entry)) { + /* The server must have crashed in + row_upd_clust_rec_by_insert(), in + row_ins_index_entry_low() before + btr_store_big_rec_extern_fields() + has written the externally stored columns + (BLOBs) of the new clustered index entry. */ + + /* The table must be in DYNAMIC or COMPRESSED + format. REDUNDANT and COMPACT formats + store a local 768-byte prefix of each + externally stored column. */ + ut_a(dict_table_get_format(index->table) + >= DICT_TF_FORMAT_ZIP); + + /* This is only legitimate when + rolling back an incomplete transaction + after crash recovery. */ + ut_a(thr_get_trx(thr)->is_recovered); + + /* The server must have crashed before + completing the insert of the new + clustered index entry and before + inserting to the secondary indexes. + Because node->row was not yet written + to this index, we can ignore it. But + we must restore node->undo_row. */ + } else { + /* NOTE that if we updated the fields of a + delete-marked secondary index record so that + alphabetically they stayed the same, e.g., + 'abc' -> 'aBc', we cannot return to the + original values because we do not know them. + But this should not cause problems because + in row0sel.c, in queries we always retrieve + the clustered index record or an earlier + version of it, if the secondary index record + through which we do the search is + delete-marked. */ + + err = row_undo_mod_del_mark_or_remove_sec( + node, thr, index, entry); + if (err != DB_SUCCESS) { + mem_heap_free(heap); + + return(err); + } + + mem_heap_empty(heap); } /* We may have to update the delete mark in the @@ -683,7 +726,6 @@ row_undo_mod_upd_exist_sec( the secondary index record if we updated its fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */ - mem_heap_empty(heap); entry = row_build_index_entry(node->undo_row, node->undo_ext, index, heap); diff --git a/storage/innodb_plugin/row/row0undo.c b/storage/innodb_plugin/row/row0undo.c index 3d739c9689a..fd28a4f6520 100644 --- a/storage/innodb_plugin/row/row0undo.c +++ b/storage/innodb_plugin/row/row0undo.c @@ -199,8 +199,24 @@ row_undo_search_clust_to_pcur( ret = FALSE; } else { + row_ext_t** ext; + + if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) { + /* In DYNAMIC or COMPRESSED format, there is + no prefix of externally stored columns in the + clustered index record. Build a cache of + column prefixes. */ + ext = &node->ext; + } else { + /* REDUNDANT and COMPACT formats store a local + 768-byte prefix of each externally stored + column. No cache is needed. */ + ext = NULL; + node->ext = NULL; + } + node->row = row_build(ROW_COPY_DATA, clust_index, rec, - offsets, NULL, &node->ext, node->heap); + offsets, NULL, ext, node->heap); if (node->update) { node->undo_row = dtuple_copy(node->row, node->heap); row_upd_replace(node->undo_row, &node->undo_ext, @@ -297,7 +313,7 @@ row_undo( if (locked_data_dict) { - row_mysql_lock_data_dictionary(trx); + row_mysql_freeze_data_dictionary(trx); } if (node->state == UNDO_NODE_INSERT) { @@ -312,7 +328,7 @@ row_undo( if (locked_data_dict) { - row_mysql_unlock_data_dictionary(trx); + row_mysql_unfreeze_data_dictionary(trx); } /* Do some cleanup */ diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c index 95d1d00aeef..397b117c067 100644 --- a/storage/innodb_plugin/row/row0upd.c +++ b/storage/innodb_plugin/row/row0upd.c @@ -92,6 +92,16 @@ the x-latch freed? The most efficient way for performing a searched delete is obviously to keep the x-latch for several steps of query graph execution. */ +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***********************************************************//** Checks if an update vector changes some of the first ordering fields of an index record. This is only used in foreign key checks and we can assume @@ -1388,6 +1398,7 @@ row_upd_store_row( dict_index_t* clust_index; rec_t* rec; mem_heap_t* heap = NULL; + row_ext_t** ext; ulint offsets_[REC_OFFS_NORMAL_SIZE]; const ulint* offsets; rec_offs_init(offsets_); @@ -1404,8 +1415,22 @@ row_upd_store_row( offsets = rec_get_offsets(rec, clust_index, offsets_, ULINT_UNDEFINED, &heap); + + if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) { + /* In DYNAMIC or COMPRESSED format, there is no prefix + of externally stored columns in the clustered index + record. Build a cache of column prefixes. */ + ext = &node->ext; + } else { + /* REDUNDANT and COMPACT formats store a local + 768-byte prefix of each externally stored column. + No cache is needed. */ + ext = NULL; + node->ext = NULL; + } + node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets, - NULL, &node->ext, node->heap); + NULL, ext, node->heap); if (node->is_delete) { node->upd_row = NULL; node->upd_ext = NULL; @@ -1453,7 +1478,6 @@ row_upd_sec_index_entry( entry = row_build_index_entry(node->row, node->ext, index, heap); ut_a(entry); - log_free_check(); mtr_start(&mtr); found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, @@ -1529,7 +1553,7 @@ Updates the secondary index record if it is changed in the row update or deletes it if this is a delete. @return DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ -UNIV_INLINE +static ulint row_upd_sec_step( /*=============*/ @@ -2015,6 +2039,7 @@ row_upd( if (node->state == UPD_NODE_UPDATE_CLUSTERED || node->state == UPD_NODE_INSERT_CLUSTERED) { + log_free_check(); err = row_upd_clust_step(node, thr); if (err != DB_SUCCESS) { @@ -2029,6 +2054,8 @@ row_upd( } while (node->index != NULL) { + + log_free_check(); err = row_upd_sec_step(node, thr); if (err != DB_SUCCESS) { diff --git a/storage/innodb_plugin/setup.sh b/storage/innodb_plugin/setup.sh index 23fe729a406..b5d8299d411 100755 --- a/storage/innodb_plugin/setup.sh +++ b/storage/innodb_plugin/setup.sh @@ -21,7 +21,7 @@ set -eu -TARGETDIR=../storage/innobase +TARGETDIR=../storage/innodb_plugin # link the build scripts BUILDSCRIPTS="compile-innodb compile-innodb-debug" diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c index 63c355cea32..f7e7e351bdc 100644 --- a/storage/innodb_plugin/srv/srv0srv.c +++ b/storage/innodb_plugin/srv/srv0srv.c @@ -1609,12 +1609,16 @@ srv_suspend_mysql_thread( innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */ lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd); - if (trx_is_interrupted(trx) - || (lock_wait_timeout < 100000000 - && wait_time > (double) lock_wait_timeout)) { + if (lock_wait_timeout < 100000000 + && wait_time > (double) lock_wait_timeout) { trx->error_state = DB_LOCK_WAIT_TIMEOUT; } + + if (trx_is_interrupted(trx)) { + + trx->error_state = DB_INTERRUPTED; + } } /********************************************************************//** diff --git a/storage/innodb_plugin/srv/srv0start.c b/storage/innodb_plugin/srv/srv0start.c index e517b9a86b0..ba9fc831b39 100644 --- a/storage/innodb_plugin/srv/srv0start.c +++ b/storage/innodb_plugin/srv/srv0start.c @@ -2018,9 +2018,13 @@ innobase_shutdown_for_mysql(void) pars_lexer_close(); log_mem_free(); buf_pool_free(); - ut_free_all_mem(); mem_close(); + /* ut_free_all_mem() frees all allocated memory not freed yet + in shutdown, and it will also free the ut_list_mutex, so it + should be the last one for all operation */ + ut_free_all_mem(); + if (os_thread_count != 0 || os_event_count != 0 || os_mutex_count != 0 diff --git a/storage/innodb_plugin/sync/sync0arr.c b/storage/innodb_plugin/sync/sync0arr.c index ed9e25bf2f2..3c825e2202b 100644 --- a/storage/innodb_plugin/sync/sync0arr.c +++ b/storage/innodb_plugin/sync/sync0arr.c @@ -498,7 +498,9 @@ sync_array_cell_print( || type == RW_LOCK_WAIT_EX || type == RW_LOCK_SHARED) { - fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file); + fputs(type == RW_LOCK_EX ? "X-lock on" + : type == RW_LOCK_WAIT_EX ? "X-lock (wait_ex) on" + : "S-lock on", file); rwlock = cell->old_wait_rw_lock; diff --git a/storage/innodb_plugin/sync/sync0rw.c b/storage/innodb_plugin/sync/sync0rw.c index d231b6acdf7..52eaa5d0f43 100644 --- a/storage/innodb_plugin/sync/sync0rw.c +++ b/storage/innodb_plugin/sync/sync0rw.c @@ -267,7 +267,7 @@ rw_lock_create_func( lock->level = level; #endif /* UNIV_SYNC_DEBUG */ - lock->magic_n = RW_LOCK_MAGIC_N; + ut_d(lock->magic_n = RW_LOCK_MAGIC_N); lock->cfile_name = cfile_name; lock->cline = (unsigned int) cline; @@ -282,10 +282,8 @@ rw_lock_create_func( mutex_enter(&rw_lock_list_mutex); - if (UT_LIST_GET_LEN(rw_lock_list) > 0) { - ut_a(UT_LIST_GET_FIRST(rw_lock_list)->magic_n - == RW_LOCK_MAGIC_N); - } + ut_ad(UT_LIST_GET_FIRST(rw_lock_list) == NULL + || UT_LIST_GET_FIRST(rw_lock_list)->magic_n == RW_LOCK_MAGIC_N); UT_LIST_ADD_FIRST(list, rw_lock_list, lock); @@ -305,8 +303,6 @@ rw_lock_free( ut_ad(rw_lock_validate(lock)); ut_a(lock->lock_word == X_LOCK_DECR); - lock->magic_n = 0; - #ifndef INNODB_RW_LOCKS_USE_ATOMICS mutex_free(rw_lock_get_mutex(lock)); #endif /* INNODB_RW_LOCKS_USE_ATOMICS */ @@ -316,16 +312,16 @@ rw_lock_free( os_event_free(lock->wait_ex_event); - if (UT_LIST_GET_PREV(list, lock)) { - ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); - } - if (UT_LIST_GET_NEXT(list, lock)) { - ut_a(UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); - } + ut_ad(UT_LIST_GET_PREV(list, lock) == NULL + || UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); + ut_ad(UT_LIST_GET_NEXT(list, lock) == NULL + || UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); UT_LIST_REMOVE(list, rw_lock_list, lock); mutex_exit(&rw_lock_list_mutex); + + ut_d(lock->magic_n = 0); } #ifdef UNIV_DEBUG @@ -344,7 +340,7 @@ rw_lock_validate( ulint waiters = rw_lock_get_waiters(lock); lint lock_word = lock->lock_word; - ut_a(lock->magic_n == RW_LOCK_MAGIC_N); + ut_ad(lock->magic_n == RW_LOCK_MAGIC_N); ut_a(waiters == 0 || waiters == 1); ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0); diff --git a/storage/innodb_plugin/trx/trx0i_s.c b/storage/innodb_plugin/trx/trx0i_s.c index c160eb2942a..5bc8302d0c0 100644 --- a/storage/innodb_plugin/trx/trx0i_s.c +++ b/storage/innodb_plugin/trx/trx0i_s.c @@ -429,6 +429,9 @@ fill_trx_row( which to copy volatile strings */ { + const char* stmt; + size_t stmt_len; + row->trx_id = trx_get_id(trx); row->trx_started = (ib_time_t) trx->start_time; row->trx_state = trx_get_que_state_str(trx); @@ -449,37 +452,32 @@ fill_trx_row( row->trx_weight = (ullint) ut_conv_dulint_to_longlong(TRX_WEIGHT(trx)); - if (trx->mysql_thd != NULL) { - row->trx_mysql_thread_id - = thd_get_thread_id(trx->mysql_thd); - } else { + if (trx->mysql_thd == NULL) { /* For internal transactions e.g., purge and transactions being recovered at startup there is no associated MySQL thread data structure. */ row->trx_mysql_thread_id = 0; + row->trx_query = NULL; + return(TRUE); } - if (trx->mysql_query_str != NULL && *trx->mysql_query_str != NULL) { + row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd); + stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len); - if (strlen(*trx->mysql_query_str) - > TRX_I_S_TRX_QUERY_MAX_LEN) { + if (stmt != NULL) { - char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; + char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; - memcpy(query, *trx->mysql_query_str, - TRX_I_S_TRX_QUERY_MAX_LEN); - query[TRX_I_S_TRX_QUERY_MAX_LEN] = '\0'; + if (stmt_len > TRX_I_S_TRX_QUERY_MAX_LEN) { + stmt_len = TRX_I_S_TRX_QUERY_MAX_LEN; + } - row->trx_query = ha_storage_put_memlim( - cache->storage, query, - TRX_I_S_TRX_QUERY_MAX_LEN + 1, - MAX_ALLOWED_FOR_STORAGE(cache)); - } else { + memcpy(query, stmt, stmt_len); + query[stmt_len] = '\0'; - row->trx_query = ha_storage_put_str_memlim( - cache->storage, *trx->mysql_query_str, - MAX_ALLOWED_FOR_STORAGE(cache)); - } + row->trx_query = ha_storage_put_memlim( + cache->storage, stmt, stmt_len + 1, + MAX_ALLOWED_FOR_STORAGE(cache)); if (row->trx_query == NULL) { diff --git a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0trx.c index 6ef7e62e6ae..9722bb59a5e 100644 --- a/storage/innodb_plugin/trx/trx0trx.c +++ b/storage/innodb_plugin/trx/trx0trx.c @@ -119,7 +119,6 @@ trx_create( trx->table_id = ut_dulint_zero; trx->mysql_thd = NULL; - trx->mysql_query_str = NULL; trx->active_trans = 0; trx->duplicates = 0; @@ -940,7 +939,6 @@ trx_commit_off_kernel( trx->rseg = NULL; trx->undo_no = ut_dulint_zero; trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; - trx->mysql_query_str = NULL; ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); diff --git a/storage/maria/CMakeLists.txt b/storage/maria/CMakeLists.txt index 9e8424ba97f..850c2dd40cd 100644 --- a/storage/maria/CMakeLists.txt +++ b/storage/maria/CMakeLists.txt @@ -23,7 +23,7 @@ INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib ${CMAKE_SOURCE_DIR}/sql ${CMAKE_SOURCE_DIR}/regex ${CMAKE_SOURCE_DIR}/extra/yassl/include) -SET(MARIA_SOURCES ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c +SET(ARIA_SOURCES ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c ma_rnext.c ma_rnext_same.c ma_search.c ma_page.c ma_key_recover.c ma_key.c ma_locking.c ma_state.c @@ -43,52 +43,55 @@ SET(MARIA_SOURCES ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c ha_maria.cc trnman.c lockman.c tablockman.c ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c ma_sp_key.c ma_control_file.c ma_loghandler.c - ma_pagecache.c ma_pagecaches.c + ma_pagecache.c ma_pagecaches.c compat_aliases.cc compat_aliases.h ma_checkpoint.c ma_recovery.c ma_commit.c ma_pagecrc.c ha_maria.h maria_def.h ma_recovery_util.c ma_servicethread.c ) -MYSQL_STORAGE_ENGINE(MARIA) +MYSQL_STORAGE_ENGINE(ARIA) IF(NOT SOURCE_SUBLIBS) - ADD_DEPENDENCIES(maria GenError) + ADD_DEPENDENCIES(aria GenError) -ADD_EXECUTABLE(maria_ftdump maria_ftdump.c) -TARGET_LINK_LIBRARIES(maria_ftdump maria myisam mysys dbug strings zlib wsock32) +ADD_EXECUTABLE(aria_ftdump maria_ftdump.c) +TARGET_LINK_LIBRARIES(aria_ftdump aria myisam mysys dbug strings zlib wsock32) -ADD_EXECUTABLE(maria_chk maria_chk.c) -TARGET_LINK_LIBRARIES(maria_chk maria myisam mysys dbug strings zlib wsock32) +ADD_EXECUTABLE(aria_chk maria_chk.c) +TARGET_LINK_LIBRARIES(aria_chk aria myisam mysys dbug strings zlib wsock32) -ADD_EXECUTABLE(maria_read_log maria_read_log.c) -TARGET_LINK_LIBRARIES(maria_read_log maria myisam mysys dbug strings zlib wsock32) +ADD_EXECUTABLE(aria_read_log maria_read_log.c) +TARGET_LINK_LIBRARIES(aria_read_log aria myisam mysys dbug strings zlib wsock32) -ADD_EXECUTABLE(maria_pack maria_pack.c) -TARGET_LINK_LIBRARIES(maria_pack maria myisam mysys dbug strings zlib wsock32) +ADD_EXECUTABLE(aria_pack maria_pack.c) +TARGET_LINK_LIBRARIES(aria_pack aria myisam mysys dbug strings zlib wsock32) -ADD_EXECUTABLE(maria_dump_log ma_loghandler.c unittest/ma_loghandler_examples.c) -TARGET_LINK_LIBRARIES(maria_dump_log maria myisam mysys dbug strings zlib wsock32) -SET_TARGET_PROPERTIES(maria_dump_log PROPERTIES COMPILE_FLAGS "-DMARIA_DUMP_LOG") +ADD_EXECUTABLE(aria_dump_log ma_loghandler.c unittest/ma_loghandler_examples.c) +TARGET_LINK_LIBRARIES(aria_dump_log aria myisam mysys dbug strings zlib wsock32) +SET_TARGET_PROPERTIES(aria_dump_log PROPERTIES COMPILE_FLAGS "-DMARIA_DUMP_LOG") ADD_EXECUTABLE(ma_test1 ma_test1.c) -TARGET_LINK_LIBRARIES(ma_test1 maria myisam mysys dbug strings zlib wsock32) +TARGET_LINK_LIBRARIES(ma_test1 aria myisam mysys dbug strings zlib wsock32) ADD_EXECUTABLE(ma_test2 ma_test2.c) -TARGET_LINK_LIBRARIES(ma_test2 maria myisam mysys dbug strings zlib wsock32) +TARGET_LINK_LIBRARIES(ma_test2 aria myisam mysys dbug strings zlib wsock32) ADD_EXECUTABLE(ma_test3 ma_test3.c) -TARGET_LINK_LIBRARIES(ma_test3 maria myisam mysys dbug strings zlib wsock32) +TARGET_LINK_LIBRARIES(ma_test3 aria myisam mysys dbug strings zlib wsock32) ADD_EXECUTABLE(ma_rt_test ma_rt_test.c) -TARGET_LINK_LIBRARIES(ma_rt_test maria myisam mysys dbug strings zlib wsock32) +TARGET_LINK_LIBRARIES(ma_rt_test aria myisam mysys dbug strings zlib wsock32) ADD_EXECUTABLE(ma_sp_test ma_sp_test.c) -TARGET_LINK_LIBRARIES(ma_sp_test maria myisam mysys dbug strings zlib wsock32) +TARGET_LINK_LIBRARIES(ma_sp_test aria myisam mysys dbug strings zlib wsock32) IF(EMBED_MANIFESTS) - MYSQL_EMBED_MANIFEST("maria_ftdump" "asInvoker") - MYSQL_EMBED_MANIFEST("maria_chk" "asInvoker") - MYSQL_EMBED_MANIFEST("maria_read_log" "asInvoker") - MYSQL_EMBED_MANIFEST("maria_pack" "asInvoker") + MYSQL_EMBED_MANIFEST("aria_ftdump" "asInvoker") + MYSQL_EMBED_MANIFEST("aria_chk" "asInvoker") + MYSQL_EMBED_MANIFEST("aria_read_log" "asInvoker") + MYSQL_EMBED_MANIFEST("aria_pack" "asInvoker") ENDIF(EMBED_MANIFESTS) +INSTALL(TARGETS aria_ftdump aria_chk aria_read_log aria_pack aria_dump_log + DESTINATION bin COMPONENT runtime) + ENDIF(NOT SOURCE_SUBLIBS) diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index fe674fb2837..a83063a0226 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -27,44 +27,47 @@ LDADD = DEFS = @DEFS@ -# "." is needed first because tests in unittest need libmaria +# "." is needed first because tests in unittest need libaria SUBDIRS = . unittest EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_test_big.sh \ ma_ft_stem.c CMakeLists.txt plug.in ma_test_recovery pkgdata_DATA = -pkglib_LIBRARIES = libmaria.a -bin_PROGRAMS = maria_chk maria_pack maria_ftdump maria_read_log \ - maria_dump_log -maria_chk_DEPENDENCIES= $(LIBRARIES) +pkglib_LIBRARIES = libaria.a +bin_PROGRAMS = aria_chk aria_pack aria_ftdump aria_read_log \ + aria_dump_log +aria_chk_DEPENDENCIES= $(LIBRARIES) # Only reason to link with libmyisam.a here is that it's where some fulltext -# pieces are (but soon we'll remove fulltext dependencies from Maria). +# pieces are (but soon we'll remove fulltext dependencies from Aria). # For now, it imposes that storage/myisam be built before storage/maria. -maria_chk_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ +aria_chk_SOURCES= maria_chk.c +aria_chk_LDADD= @CLIENT_EXTRA_LDFLAGS@ libaria.a \ $(top_builddir)/storage/myisam/libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ -maria_pack_DEPENDENCIES=$(LIBRARIES) -maria_pack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ +aria_pack_SOURCES= maria_pack.c +aria_pack_DEPENDENCIES=$(LIBRARIES) +aria_pack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libaria.a \ $(top_builddir)/storage/myisam/libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ -maria_read_log_DEPENDENCIES=$(LIBRARIES) -maria_read_log_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ +aria_read_log_SOURCES= maria_read_log.c +aria_read_log_DEPENDENCIES=$(LIBRARIES) +aria_read_log_LDADD= @CLIENT_EXTRA_LDFLAGS@ libaria.a \ $(top_builddir)/storage/myisam/libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ -maria_dump_log_DEPENDENCIES=$(LIBRARIES) ma_loghandler.c -maria_dump_log_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ +aria_dump_log_DEPENDENCIES=$(LIBRARIES) ma_loghandler.c +aria_dump_log_LDADD= @CLIENT_EXTRA_LDFLAGS@ libaria.a \ $(top_builddir)/storage/myisam/libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ -maria_dump_log_SOURCES= ma_loghandler.c unittest/ma_loghandler_examples.c -maria_dump_log_CPPFLAGS= -DMARIA_DUMP_LOG +aria_dump_log_SOURCES= ma_loghandler.c unittest/ma_loghandler_examples.c +aria_dump_log_CPPFLAGS= -DMARIA_DUMP_LOG noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h \ @@ -74,46 +77,47 @@ noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ ma_checkpoint.h ma_recovery.h ma_commit.h ma_state.h \ trnman_public.h ma_check_standalone.h \ ma_key_recover.h ma_recovery_util.h \ - ma_servicethread.h + ma_servicethread.h compat_aliases.h ma_test1_DEPENDENCIES= $(LIBRARIES) -ma_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ +ma_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libaria.a \ $(top_builddir)/storage/myisam/libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ ma_test2_DEPENDENCIES= $(LIBRARIES) -ma_test2_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ +ma_test2_LDADD= @CLIENT_EXTRA_LDFLAGS@ libaria.a \ $(top_builddir)/storage/myisam/libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ ma_test3_DEPENDENCIES= $(LIBRARIES) -ma_test3_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ +ma_test3_LDADD= @CLIENT_EXTRA_LDFLAGS@ libaria.a \ $(top_builddir)/storage/myisam/libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ #ma_ft_test1_DEPENDENCIES= $(LIBRARIES) #ma_ft_eval_DEPENDENCIES= $(LIBRARIES) -maria_ftdump_DEPENDENCIES= $(LIBRARIES) -maria_ftdump_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ +aria_ftdump_SOURCES= maria_ftdump.c +aria_ftdump_DEPENDENCIES= $(LIBRARIES) +aria_ftdump_LDADD= @CLIENT_EXTRA_LDFLAGS@ libaria.a \ $(top_builddir)/storage/myisam/libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ ma_rt_test_DEPENDENCIES= $(LIBRARIES) -ma_rt_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ +ma_rt_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libaria.a \ $(top_builddir)/storage/myisam/libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ ma_sp_test_DEPENDENCIES= $(LIBRARIES) -ma_sp_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ +ma_sp_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libaria.a \ $(top_builddir)/storage/myisam/libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ -libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ +libaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_rnext.c ma_rnext_same.c \ ma_search.c ma_page.c ma_key_recover.c ma_key.c \ ma_locking.c ma_state.c \ @@ -136,8 +140,8 @@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_pagecache.c ma_pagecaches.c \ ma_checkpoint.c ma_recovery.c ma_commit.c \ ma_pagecrc.c ma_recovery_util.c \ - ha_maria.cc ma_servicethread.c -CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? maria_log_control maria_log.0000* + ha_maria.cc compat_aliases.cc ma_servicethread.c +CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? aria_log_control aria_log.0000* SUFFIXES = .sh diff --git a/storage/maria/compat_aliases.cc b/storage/maria/compat_aliases.cc new file mode 100644 index 00000000000..2d3c67d69a7 --- /dev/null +++ b/storage/maria/compat_aliases.cc @@ -0,0 +1,245 @@ +/* Copyright (C) 2010 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + compatibility aliases for system and static variables +*/ +#include <my_global.h> +#include <maria.h> +#include <mysql/plugin.h> +#include "ma_loghandler.h" +#include "compat_aliases.h" + +ulong block_size_alias; +static MYSQL_SYSVAR_ULONG(block_size, block_size_alias, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Deprecated, use --aria-block-size instead", 0, 0, + MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH, + MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH); + +ulong checkpoint_interval_alias; +static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval_alias, + PLUGIN_VAR_RQCMDARG, + "Deprecated, use --aria-checkpoint-interval instead", + NULL, NULL, 30, 0, UINT_MAX, 1); + +ulong force_start_after_recovery_failures_alias; +static MYSQL_SYSVAR_ULONG(force_start_after_recovery_failures, force_start_after_recovery_failures_alias, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Deprecated, use --aria-force-start-after-recovery-failures instead", + NULL, NULL, 0, 0, UINT_MAX8, 1); + +my_bool page_checksum_alias; +static MYSQL_SYSVAR_BOOL(page_checksum, page_checksum_alias, 0, + "Deprecated, use --aria-page-checksum instead", 0, 0, 1); + +char *log_dir_path_alias; +static MYSQL_SYSVAR_STR(log_dir_path, log_dir_path_alias, + PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Deprecated, use --aria-log-dir-path instead", + NULL, NULL, mysql_real_data_home); + +ulong log_file_size_alias; +static MYSQL_SYSVAR_ULONG(log_file_size, log_file_size_alias, + PLUGIN_VAR_RQCMDARG, + "Deprecated, use --aria-log-file-size instead", + NULL, NULL, TRANSLOG_FILE_SIZE, + TRANSLOG_MIN_FILE_SIZE, 0xffffffffL, TRANSLOG_PAGE_SIZE); + +ulong group_commit_alias; +static MYSQL_SYSVAR_ENUM(group_commit, group_commit_alias, + PLUGIN_VAR_RQCMDARG, + "Deprecated, use --aria-group-commit instead", + NULL, NULL, + TRANSLOG_GCOMMIT_NONE, &maria_group_commit_typelib); + +ulong group_commit_interval_alias; +static MYSQL_SYSVAR_ULONG(group_commit_interval, group_commit_interval_alias, + PLUGIN_VAR_RQCMDARG, + "Deprecated, use --aria-group-commit-interval instead", + NULL, NULL, 0, 0, UINT_MAX, 1); + +ulong log_purge_type_alias; +static MYSQL_SYSVAR_ENUM(log_purge_type, log_purge_type_alias, + PLUGIN_VAR_RQCMDARG, + "Deprecated, use --aria-log-purge-type instead", + NULL, NULL, TRANSLOG_PURGE_IMMIDIATE, + &maria_translog_purge_type_typelib); + +ulonglong max_sort_file_size_alias; +static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size, max_sort_file_size_alias, + PLUGIN_VAR_RQCMDARG, + "Deprecated, use --aria-max-temp-length instead", + 0, 0, MAX_FILE_SIZE, 0, MAX_FILE_SIZE, 1024*1024); + +ulong pagecache_age_threshold_alias; +static MYSQL_SYSVAR_ULONG(pagecache_age_threshold, pagecache_age_threshold_alias, + PLUGIN_VAR_RQCMDARG, + "Deprecated, use --aria-pagecache-age-threshold instead", + 0, 0, 300, 100, ~0L, 100); + +ulonglong pagecache_buffer_size_alias; +static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size_alias, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Deprecated, use --aria-pagecache-buffer-size instead", + 0, 0, KEY_CACHE_SIZE, MALLOC_OVERHEAD, ~0UL, IO_SIZE); + +ulong pagecache_division_limit_alias; +static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit_alias, + PLUGIN_VAR_RQCMDARG, + "Deprecated, use --aria-pagecache-division-limit instead", + 0, 0, 100, 1, 100, 1); + +ulong recover_alias; +static MYSQL_SYSVAR_ENUM(recover, recover_alias, PLUGIN_VAR_OPCMDARG, + "Deprecated, use --aria-recover instead", + NULL, NULL, HA_RECOVER_DEFAULT, &maria_recover_typelib); + +ulong repair_threads_alias; +static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG, + "Deprecated, use --aria-repair-threads instead", + 0, 0, 1, 1, ~0L, 1); + +ulong sort_buffer_size_alias; +static MYSQL_THDVAR_ULONG(sort_buffer_size, PLUGIN_VAR_RQCMDARG, + "Deprecated, use --aria-sort-buffer-size instead", + 0, 0, 128L*1024L*1024L, 4, ~0L, 1); + +ulong stats_method_alias; +static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG, + "Deprecated, use --aria-stats-method instead", + 0, 0, 0, &maria_stats_method_typelib); + +ulong sync_log_dir_alias; +static MYSQL_SYSVAR_ENUM(sync_log_dir, sync_log_dir_alias, + PLUGIN_VAR_RQCMDARG, + "Deprecated, use --aria-sync-log-dir instead", + NULL, NULL, TRANSLOG_SYNC_DIR_NEWFILE, + &maria_sync_log_dir_typelib); + +my_bool used_for_temp_tables_alias= 1; +static MYSQL_SYSVAR_BOOL(used_for_temp_tables, + used_for_temp_tables_alias, PLUGIN_VAR_READONLY | PLUGIN_VAR_NOCMDOPT, + NULL, 0, 0, 1); + +static struct st_mysql_show_var status_variables_aliases[]= { + {"Maria", (char*) &status_variables, SHOW_ARRAY}, + {NullS, NullS, SHOW_LONG} +}; + +/* + There is one problem with aliases for command-line options. + Plugin initialization works like this + + for all plugins: + prepare command-line options + initialize command-line option variables to the default values + parse command line, assign values as necessary + + for all plugins: + call the plugin initialization function + + it means, we cannot have maria* and aria* command-line options to use + the same underlying variables - because after assigning maria* values, + MySQL will put there default values again preparing for parsing aria* + values. So, maria* values will be lost. + + So, we create separate set of variables for maria* options, + and take both values into account in ha_maria_init(). + + When the command line was parsed, we patch maria* options + to use the same variables as aria* options so that + set @@maria_some_var would have the same value as @@aria_some_var + without forcing us to copy the values around all the time. +*/ + +static struct st_mysql_sys_var* system_variables_aliases[]= { + MYSQL_SYSVAR(block_size), + MYSQL_SYSVAR(checkpoint_interval), + MYSQL_SYSVAR(force_start_after_recovery_failures), + MYSQL_SYSVAR(group_commit), + MYSQL_SYSVAR(group_commit_interval), + MYSQL_SYSVAR(log_dir_path), + MYSQL_SYSVAR(log_file_size), + MYSQL_SYSVAR(log_purge_type), + MYSQL_SYSVAR(max_sort_file_size), + MYSQL_SYSVAR(page_checksum), + MYSQL_SYSVAR(pagecache_age_threshold), + MYSQL_SYSVAR(pagecache_buffer_size), + MYSQL_SYSVAR(pagecache_division_limit), + MYSQL_SYSVAR(recover), + MYSQL_SYSVAR(repair_threads), + MYSQL_SYSVAR(sort_buffer_size), + MYSQL_SYSVAR(stats_method), + MYSQL_SYSVAR(sync_log_dir), + MYSQL_SYSVAR(used_for_temp_tables), + NULL +}; + +#define COPY_SYSVAR(name) \ + memcpy(&MYSQL_SYSVAR_NAME(name), system_variables[i++], \ + sizeof(MYSQL_SYSVAR_NAME(name))); \ + if (name ## _alias != MYSQL_SYSVAR_NAME(name).def_val && \ + *MYSQL_SYSVAR_NAME(name).value == MYSQL_SYSVAR_NAME(name).def_val) \ + *MYSQL_SYSVAR_NAME(name).value= name ## _alias; + +#define COPY_THDVAR(name) \ + name ## _alias= THDVAR(0, name); \ + memcpy(&MYSQL_SYSVAR_NAME(name), system_variables[i++], \ + sizeof(MYSQL_SYSVAR_NAME(name))); \ + if (name ## _alias != MYSQL_SYSVAR_NAME(name).def_val && \ + THDVAR(0, name) == MYSQL_SYSVAR_NAME(name).def_val) \ + THDVAR(0, name)= name ## _alias; + +void copy_variable_aliases() +{ + int i= 0; + COPY_SYSVAR(block_size); + COPY_SYSVAR(checkpoint_interval); + COPY_SYSVAR(force_start_after_recovery_failures); + COPY_SYSVAR(group_commit); + COPY_SYSVAR(group_commit_interval); + COPY_SYSVAR(log_dir_path); + COPY_SYSVAR(log_file_size); + COPY_SYSVAR(log_purge_type); + COPY_SYSVAR(max_sort_file_size); + COPY_SYSVAR(page_checksum); + COPY_SYSVAR(pagecache_age_threshold); + COPY_SYSVAR(pagecache_buffer_size); + COPY_SYSVAR(pagecache_division_limit); + COPY_SYSVAR(recover); + COPY_THDVAR(repair_threads); + COPY_THDVAR(sort_buffer_size); + COPY_THDVAR(stats_method); + COPY_SYSVAR(sync_log_dir); + COPY_SYSVAR(used_for_temp_tables); +} + +struct st_maria_plugin compat_aliases= { + MYSQL_DAEMON_PLUGIN, + &maria_storage_engine, + "Maria", + "Monty Program Ab", + "Compatibility aliases for the Aria engine", + PLUGIN_LICENSE_GPL, + NULL, + NULL, + 0x0105, + status_variables_aliases, + system_variables_aliases, + "1.5", + MariaDB_PLUGIN_MATURITY_GAMMA +}; + diff --git a/storage/maria/compat_aliases.h b/storage/maria/compat_aliases.h new file mode 100644 index 00000000000..46a4da74eec --- /dev/null +++ b/storage/maria/compat_aliases.h @@ -0,0 +1,27 @@ +/* Copyright (C) 2010 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +extern struct st_maria_plugin compat_aliases; +extern char mysql_real_data_home[FN_REFLEN]; +extern TYPELIB maria_recover_typelib; +extern TYPELIB maria_stats_method_typelib; +extern TYPELIB maria_translog_purge_type_typelib; +extern TYPELIB maria_sync_log_dir_typelib; +extern TYPELIB maria_group_commit_typelib; +extern struct st_mysql_storage_engine maria_storage_engine; +extern my_bool use_maria_for_temp_tables; +extern struct st_mysql_sys_var* system_variables[]; +extern st_mysql_show_var status_variables[]; +void copy_variable_aliases(); diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 6375f01237e..f31c77834ff 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -29,6 +29,7 @@ #include "ha_maria.h" #include "trnman_public.h" #include "trnman.h" +#include "compat_aliases.h" C_MODE_START #include "maria_def.h" @@ -144,7 +145,7 @@ static void update_log_file_size(MYSQL_THD thd, static MYSQL_SYSVAR_ULONG(block_size, maria_block_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Block size to be used for MARIA index pages.", 0, 0, + "Block size to be used for Aria index pages.", 0, 0, MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH, MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH); @@ -184,7 +185,7 @@ static MYSQL_SYSVAR_ULONG(log_file_size, log_file_size, static MYSQL_SYSVAR_ENUM(group_commit, maria_group_commit, PLUGIN_VAR_RQCMDARG, - "Specifies maria group commit mode. " + "Specifies Aria group commit mode. " "Possible values are \"none\" (no group commit), " "\"hard\" (with waiting to actual commit), " "\"soft\" (no wait for commit (DANGEROUS!!!))", @@ -197,12 +198,12 @@ static MYSQL_SYSVAR_ULONG(group_commit_interval, maria_group_commit_interval, " 0 stands for no waiting" " for other threads to come and do a commit in \"hard\" mode and no" " sync()/commit at all in \"soft\" mode. Option has only an effect" - " if maria_group_commit is used", + " if aria_group_commit is used", NULL, update_maria_group_commit_interval, 0, 0, UINT_MAX, 1); static MYSQL_SYSVAR_ENUM(log_purge_type, log_purge_type, PLUGIN_VAR_RQCMDARG, - "Specifies how maria transactional log will be purged. " + "Specifies how Aria transactional log will be purged. " "Possible values of name are \"immediate\", \"external\" " "and \"at_flush\"", NULL, NULL, TRANSLOG_PURGE_IMMIDIATE, @@ -212,7 +213,7 @@ static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size, maria_max_temp_length, PLUGIN_VAR_RQCMDARG, "Don't use the fast sort index method to created index if the " "temporary file would get bigger than this.", - 0, 0, MAX_FILE_SIZE, 0, MAX_FILE_SIZE, 1024*1024); + 0, 0, MAX_FILE_SIZE & ~(1*MB-1), 0, MAX_FILE_SIZE, 1*MB); static MYSQL_SYSVAR_ULONG(pagecache_age_threshold, pagecache_age_threshold, PLUGIN_VAR_RQCMDARG, @@ -224,10 +225,10 @@ static MYSQL_SYSVAR_ULONG(pagecache_age_threshold, static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "The size of the buffer used for index blocks for Maria tables. " + "The size of the buffer used for index blocks for Aria tables. " "Increase this to get better index handling (for all reads and " "multiple writes) to as much as you can afford.", 0, 0, - KEY_CACHE_SIZE, MALLOC_OVERHEAD, ~(ulong) 0, IO_SIZE); + KEY_CACHE_SIZE, 0, ~(ulong) 0, 1); static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit, PLUGIN_VAR_RQCMDARG, @@ -238,20 +239,20 @@ static MYSQL_SYSVAR_ENUM(recover, maria_recover_options, PLUGIN_VAR_OPCMDARG, "Specifies how corrupted tables should be automatically repaired." " Possible values are \"NORMAL\" (the default), \"BACKUP\", \"FORCE\"," " \"QUICK\", or \"OFF\" which is like not using the option.", - NULL, NULL, HA_RECOVER_NONE, &maria_recover_typelib); + NULL, NULL, HA_RECOVER_DEFAULT, &maria_recover_typelib); static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG, - "Number of threads to use when repairing maria tables. The value of 1 " + "Number of threads to use when repairing Aria tables. The value of 1 " "disables parallel repair.", 0, 0, 1, 1, ~0L, 1); static MYSQL_THDVAR_ULONG(sort_buffer_size, PLUGIN_VAR_RQCMDARG, "The buffer that is allocated when sorting the index when doing a " "REPAIR or when creating indexes with CREATE INDEX or ALTER TABLE.", - 0, 0, 8192*1024, 4, ~0L, 1); + 0, 0, 128L*1024L*1024L, 4, ~0L, 1); static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG, - "Specifies how maria index statistics collection code should treat " + "Specifies how Aria index statistics collection code should treat " "NULLs. Possible values are \"nulls_unequal\", \"nulls_equal\", " "and \"nulls_ignored\".", 0, 0, 0, &maria_stats_method_typelib); @@ -262,14 +263,15 @@ static MYSQL_SYSVAR_ENUM(sync_log_dir, sync_log_dir, PLUGIN_VAR_RQCMDARG, &maria_sync_log_dir_typelib); #ifdef USE_MARIA_FOR_TMP_TABLES -static my_bool use_maria_for_temp_tables= 1; +#define USE_MARIA_FOR_TMP_TABLES_VAL 1 #else -static my_bool use_maria_for_temp_tables= 0; +#define USE_MARIA_FOR_TMP_TABLES_VAL 0 #endif +my_bool use_maria_for_temp_tables= USE_MARIA_FOR_TMP_TABLES_VAL; static MYSQL_SYSVAR_BOOL(used_for_temp_tables, use_maria_for_temp_tables, PLUGIN_VAR_READONLY | PLUGIN_VAR_NOCMDOPT, - "Whether temporary tables should be MyISAM or Maria", 0, 0, + "Whether temporary tables should be MyISAM or Aria", 0, 0, 1); /***************************************************************************** @@ -468,7 +470,7 @@ static int table2maria(TABLE *table_arg, data_file_type row_type, recinfo_pos= recinfo; create_info->null_bytes= table_arg->s->null_bytes; - while (recpos < (uint) share->reclength) + while (recpos < (uint) share->stored_rec_length) { Field **field, *found= 0; minpos= share->reclength; @@ -743,8 +745,60 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...) DBUG_VOID_RETURN; } +/* + Create a transaction object + + SYNOPSIS + info Maria handler + + RETURN + 0 ok + # Error number (HA_ERR_OUT_OF_MEM) +*/ + +static int maria_create_trn_for_mysql(MARIA_HA *info) +{ + THD *thd= (THD*) info->external_ptr; + TRN *trn= THD_TRN; + DBUG_ENTER("maria_create_trn_for_mysql"); + + if (!trn) /* no transaction yet - open it now */ + { + trn= trnman_new_trn(& thd->transaction.wt); + if (unlikely(!trn)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + THD_TRN= trn; + if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) + trans_register_ha(thd, TRUE, maria_hton); + } + _ma_set_trn_for_table(info, trn); + if (!trnman_increment_locked_tables(trn)) + { + trans_register_ha(thd, FALSE, maria_hton); + trnman_new_statement(trn); + } +#ifdef EXTRA_DEBUG + if (info->lock_type == F_WRLCK && + ! (trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED)) + { + trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED | + TRN_STATE_TABLES_CAN_CHANGE); + (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY, + (uchar*) thd->query(), + thd->query_length()); + } + else + { + DBUG_PRINT("info", ("lock_type: %d trnman_flags: %u", + info->lock_type, trnman_get_flags(trn))); + } + +#endif + DBUG_RETURN(0); } +} /* extern "C" */ + /** Transactional table doing bulk insert with one single UNDO (UNDO_BULK_INSERT) and with repair. @@ -777,7 +831,11 @@ handler *ha_maria::clone(MEM_ROOT *mem_root) { ha_maria *new_handler= static_cast <ha_maria *>(handler::clone(mem_root)); if (new_handler) + { new_handler->file->state= file->state; + /* maria_create_trn_for_mysql() is never called for clone() tables */ + new_handler->file->trn= file->trn; + } return new_handler; } @@ -1031,6 +1089,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) HA_CHECK param; MARIA_SHARE *share= file->s; const char *old_proc_info= thd_proc_info(thd, "Checking table"); + TRN *old_trn= file->trn; maria_chk_init(¶m); param.thd= thd; @@ -1047,7 +1106,8 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) if (!maria_is_crashed(file) && (((param.testflag & T_CHECK_ONLY_CHANGED) && !(share->state.changed & (STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR)) && + STATE_CRASHED_ON_REPAIR | + STATE_IN_REPAIR)) && share->state.open_count == 0) || ((param.testflag & T_FAST) && (share->state.open_count == (uint) (share->global_changed ? 1 : @@ -1084,14 +1144,15 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) if (!error) { if ((share->state.changed & (STATE_CHANGED | - STATE_CRASHED_ON_REPAIR | + STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR | STATE_CRASHED | STATE_NOT_ANALYZED)) || (param.testflag & T_STATISTICS) || maria_is_crashed(file)) { file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; pthread_mutex_lock(&share->intern_lock); - share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR); + DBUG_PRINT("info", ("Reseting crashed state")); + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR); if (!(table->db_stat & HA_READ_ONLY)) error= maria_update_state_info(¶m, file, UPDATE_TIME | UPDATE_OPEN_COUNT | @@ -1107,6 +1168,8 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; } + /* Reset trn, that may have been set by repair */ + _ma_set_trn_for_table(file, old_trn); thd_proc_info(thd, old_proc_info); return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK; } @@ -1330,11 +1393,13 @@ int ha_maria::zerofill(THD * thd, HA_CHECK_OPT *check_opt) { int error; HA_CHECK param; + TRN *old_trn; MARIA_SHARE *share= file->s; if (!file) return HA_ADMIN_INTERNAL_ERROR; + old_trn= file->trn; maria_chk_init(¶m); param.thd= thd; param.op_name= "zerofill"; @@ -1342,6 +1407,9 @@ int ha_maria::zerofill(THD * thd, HA_CHECK_OPT *check_opt) param.sort_buffer_length= THDVAR(thd, sort_buffer_size); error=maria_zerofill(¶m, file, share->open_file_name.str); + /* Reset trn, that may have been set by repair */ + _ma_set_trn_for_table(file, old_trn); + if (!error) { pthread_mutex_lock(&share->intern_lock); @@ -1355,6 +1423,7 @@ int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt) { int error; HA_CHECK param; + if (!file) return HA_ADMIN_INTERNAL_ERROR; @@ -1371,6 +1440,7 @@ int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt) param.testflag &= ~T_REP_BY_SORT; error= repair(thd, ¶m, 1); } + return error; } @@ -1384,6 +1454,7 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize) char fixed_name[FN_REFLEN]; MARIA_SHARE *share= file->s; ha_rows rows= file->state->records; + TRN *old_trn= file->trn; DBUG_ENTER("ha_maria::repair"); /* @@ -1397,7 +1468,7 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize) if (file->dfile.file == -1) { sql_print_information("Retrying repair of: '%s' failed. " - "Please try REPAIR EXTENDED or maria_chk", + "Please try REPAIR EXTENDED or aria_chk", table->s->path.str); DBUG_RETURN(HA_ADMIN_FAILED); } @@ -1505,8 +1576,9 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize) { if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file)) { - share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR); + DBUG_PRINT("info", ("Reseting crashed state")); + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR); file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; } /* @@ -1544,6 +1616,9 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize) thd_proc_info(thd, old_proc_info); if (!thd->locked_tables) maria_lock_database(file, F_UNLCK); + + /* Reset trn, that may have been set by repair */ + _ma_set_trn_for_table(file, old_trn); error= error ? HA_ADMIN_FAILED : (optimize_done ? (write_log_record_for_repair(param, file) ? HA_ADMIN_FAILED : @@ -1782,7 +1857,7 @@ int ha_maria::enable_indexes(uint mode) "retrying", my_errno, param.db_name, param.table_name); /* This should never fail normally */ - DBUG_ASSERT(0); + DBUG_ASSERT(thd->killed != 0); /* Repairing by sort failed. Now try standard repair method. */ param.testflag &= ~T_REP_BY_SORT; error= (repair(thd, ¶m, 0) != HA_ADMIN_OK); @@ -1936,14 +2011,14 @@ void ha_maria::start_bulk_insert(ha_rows rows) != 0 Error */ -int ha_maria::end_bulk_insert(bool table_will_be_deleted) +int ha_maria::end_bulk_insert() { int err; DBUG_ENTER("ha_maria::end_bulk_insert"); - maria_end_bulk_insert(file, table_will_be_deleted); + maria_end_bulk_insert(file); if ((err= maria_extra(file, HA_EXTRA_NO_CACHE, 0))) goto end; - if (can_enable_indexes && !table_will_be_deleted) + if (can_enable_indexes && !file->s->deleting) err= enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE); end: if (bulk_insert_single_undo != BULK_INSERT_NONE) @@ -1971,14 +2046,16 @@ bool ha_maria::check_and_repair(THD *thd) check_opt.init(); - if (file->s->state.changed & STATE_MOVED) + error= 1; + if ((file->s->state.changed & + (STATE_CRASHED | STATE_CRASHED_ON_REPAIR | STATE_MOVED)) == + STATE_MOVED) { - sql_print_information("Zerofilling table: '%s'", table->s->path.str); + sql_print_information("Zerofilling moved table: '%s'", + table->s->path.str); if (!(error= zerofill(thd, &check_opt))) DBUG_RETURN(0); } - else - error= 1; /* if we got this far - the table is crashed. @@ -2315,6 +2392,8 @@ int ha_maria::info(uint flag, my_bool lock_table_share) int ha_maria::extra(enum ha_extra_function operation) { + int tmp; + TRN *old_trn= file->trn; if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD) return 0; #ifdef NOT_USED @@ -2340,7 +2419,9 @@ int ha_maria::extra(enum ha_extra_function operation) TRN *trn= THD_TRN; _ma_set_trn_for_table(file, trn); } - return maria_extra(file, operation, 0); + tmp= maria_extra(file, operation, 0); + file->trn= old_trn; // Reset trn if was used + return tmp; } int ha_maria::reset(void) @@ -2349,6 +2430,12 @@ int ha_maria::reset(void) pushed_idx_cond_keyno= MAX_KEY; ma_set_index_cond_func(file, NULL, 0); ds_mrr.dsmrr_close(); + if (file->trn) + { + /* Next statement is a new statement. Ensure it's logged */ + trnman_set_flags(file->trn, + trnman_get_flags(file->trn) & ~TRN_STATE_INFO_LOGGED); + } return maria_reset(file); } @@ -2389,9 +2476,18 @@ int ha_maria::delete_table(const char *name) return maria_delete_table(name); } + +/* This is mainly for temporary tables, so no logging necessary */ + +void ha_maria::drop_table(const char *name) +{ + (void) close(); + (void) maria_delete_table(name); +} + + int ha_maria::external_lock(THD *thd, int lock_type) { - TRN *trn= THD_TRN; DBUG_ENTER("ha_maria::external_lock"); /* We don't test now_transactional because it may vary between lock/unlock @@ -2411,22 +2507,7 @@ int ha_maria::external_lock(THD *thd, int lock_type) /* Transactional table */ if (lock_type != F_UNLCK) { - /* Start of new statement */ - if (!trn) /* no transaction yet - open it now */ - { - trn= trnman_new_trn(& thd->transaction.wt); - if (unlikely(!trn)) - DBUG_RETURN(HA_ERR_OUT_OF_MEM); - THD_TRN= trn; - if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) - trans_register_ha(thd, TRUE, maria_hton); - } - _ma_set_trn_for_table(file, trn); - if (!trnman_increment_locked_tables(trn)) - { - trans_register_ha(thd, FALSE, maria_hton); - trnman_new_statement(trn); - } + file->external_ptr= thd; // For maria_register_trn() if (!file->s->lock_key_trees) // If we don't use versioning { @@ -2445,6 +2526,13 @@ int ha_maria::external_lock(THD *thd, int lock_type) *file->state= file->s->state.state; } + if (file->trn) + { + /* This can only happen with tables created with clone() */ + DBUG_ASSERT(cloned); + trnman_increment_locked_tables(file->trn); + } + if (!thd->transaction.on) { /* @@ -2459,20 +2547,10 @@ int ha_maria::external_lock(THD *thd, int lock_type) DBUG_PRINT("info", ("Disabling logging for table")); _ma_tmp_disable_logging_for_table(file, TRUE); } -#ifdef EXTRA_DEBUG - if (lock_type == F_WRLCK && - ! (trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED)) - { - trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED | - TRN_STATE_TABLES_CAN_CHANGE); - (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY, - (uchar*) thd->query(), - thd->query_length()); - } -#endif } else { + TRN *trn= THD_TRN; /* End of transaction */ /* @@ -2497,6 +2575,8 @@ int ha_maria::external_lock(THD *thd, int lock_type) file->state= &file->s->state.state; if (trn) { + DBUG_PRINT("info", + ("locked_tables: %u", trnman_has_locked_tables(trn))); if (trnman_has_locked_tables(trn) && !trnman_decrement_locked_tables(trn)) { @@ -3163,7 +3243,7 @@ static my_bool translog_callback_delete_all(const char *directory, /** - Helper function for option maria-force-start-after-recovery-failures. + Helper function for option aria-force-start-after-recovery-failures. Deletes logs if too many failures. Otherwise, increments the counter of failures in the control file. Notice how this has to be called _before_ translog_init() (if log is @@ -3179,9 +3259,9 @@ static int mark_recovery_start(const char* log_dir) DBUG_ENTER("mark_recovery_start"); if (unlikely(maria_recover_options == HA_RECOVER_NONE)) ma_message_no_user(ME_JUST_WARNING, "Please consider using option" - " --maria-recover[=...] to automatically check and" + " --aria-recover[=...] to automatically check and" " repair tables when logs are removed by option" - " --maria-force-start-after-recovery-failures=#"); + " --aria-force-start-after-recovery-failures=#"); if (recovery_failures >= force_start_after_recovery_failures) { /* @@ -3207,9 +3287,9 @@ static int mark_recovery_start(const char* log_dir) /** - Helper function for option maria-force-start-after-recovery-failures. + Helper function for option aria-force-start-after-recovery-failures. Records in the control file that recovery was a success, so that it's not - counted for maria-force-start-after-recovery-failures. + counted for aria-force-start-after-recovery-failures. */ static int mark_recovery_success(void) @@ -3236,6 +3316,7 @@ bool ha_maria::is_changed() const static int ha_maria_init(void *p) { int res; + copy_variable_aliases(); const char *log_dir= maria_data_root; maria_hton= (handlerton *)p; maria_hton->state= SHOW_OPTION_YES; @@ -3250,7 +3331,7 @@ static int ha_maria_init(void *p) maria_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES; bzero(maria_log_pagecache, sizeof(*maria_log_pagecache)); maria_tmpdir= &mysql_tmpdir_list; /* For REDO */ - res= maria_init() || ma_control_file_open(TRUE, TRUE) || + res= maria_upgrade() || maria_init() || ma_control_file_open(TRUE, TRUE) || ((force_start_after_recovery_failures != 0) && mark_recovery_start(log_dir)) || !init_pagecache(maria_pagecache, @@ -3263,9 +3344,11 @@ static int ha_maria_init(void *p) MYSQL_VERSION_ID, server_id, maria_log_pagecache, TRANSLOG_DEFAULT_FLAGS, 0) || maria_recovery_from_log() || - ((force_start_after_recovery_failures != 0) && mark_recovery_success()) || + ((force_start_after_recovery_failures != 0 || + maria_recovery_changed_data) && mark_recovery_success()) || ma_checkpoint_init(checkpoint_interval); maria_multi_threaded= maria_in_ha_maria= TRUE; + maria_create_trn_hook= maria_create_trn_for_mysql; #if defined(HAVE_REALPATH) && !defined(HAVE_valgrind) && !defined(HAVE_BROKEN_REALPATH) /* We can only test for sub paths if my_symlink.c is using realpath */ @@ -3350,7 +3433,7 @@ my_bool ha_maria::register_query_cache_table(THD *thd, char *table_name, } #endif -static struct st_mysql_sys_var* system_variables[]= { +struct st_mysql_sys_var* system_variables[]= { MYSQL_SYSVAR(block_size), MYSQL_SYSVAR(checkpoint_interval), MYSQL_SYSVAR(force_start_after_recovery_failures), @@ -3486,15 +3569,20 @@ static void update_log_file_size(MYSQL_THD thd, } -static SHOW_VAR status_variables[]= { - {"Maria_pagecache_blocks_not_flushed", (char*) &maria_pagecache_var.global_blocks_changed, SHOW_LONG_NOFLUSH}, - {"Maria_pagecache_blocks_unused", (char*) &maria_pagecache_var.blocks_unused, SHOW_LONG_NOFLUSH}, - {"Maria_pagecache_blocks_used", (char*) &maria_pagecache_var.blocks_used, SHOW_LONG_NOFLUSH}, - {"Maria_pagecache_read_requests", (char*) &maria_pagecache_var.global_cache_r_requests, SHOW_LONGLONG}, - {"Maria_pagecache_reads", (char*) &maria_pagecache_var.global_cache_read, SHOW_LONGLONG}, - {"Maria_pagecache_write_requests", (char*) &maria_pagecache_var.global_cache_w_requests, SHOW_LONGLONG}, - {"Maria_pagecache_writes", (char*) &maria_pagecache_var.global_cache_write, SHOW_LONGLONG}, - {"Maria_transaction_log_syncs", (char*) &translog_syncs, SHOW_LONGLONG}, +SHOW_VAR status_variables[]= { + {"pagecache_blocks_not_flushed", (char*) &maria_pagecache_var.global_blocks_changed, SHOW_LONG_NOFLUSH}, + {"pagecache_blocks_unused", (char*) &maria_pagecache_var.blocks_unused, SHOW_LONG_NOFLUSH}, + {"pagecache_blocks_used", (char*) &maria_pagecache_var.blocks_used, SHOW_LONG_NOFLUSH}, + {"pagecache_read_requests", (char*) &maria_pagecache_var.global_cache_r_requests, SHOW_LONGLONG}, + {"pagecache_reads", (char*) &maria_pagecache_var.global_cache_read, SHOW_LONGLONG}, + {"pagecache_write_requests", (char*) &maria_pagecache_var.global_cache_w_requests, SHOW_LONGLONG}, + {"pagecache_writes", (char*) &maria_pagecache_var.global_cache_write, SHOW_LONGLONG}, + {"transaction_log_syncs", (char*) &translog_syncs, SHOW_LONGLONG}, + {NullS, NullS, SHOW_LONG} +}; + +static struct st_mysql_show_var aria_status_variables[]= { + {"Aria", (char*) &status_variables, SHOW_ARRAY}, {NullS, NullS, SHOW_LONG} }; @@ -3559,36 +3647,21 @@ Item *ha_maria::idx_cond_push(uint keyno_arg, Item* idx_cond_arg) struct st_mysql_storage_engine maria_storage_engine= { MYSQL_HANDLERTON_INTERFACE_VERSION }; -mysql_declare_plugin(maria) +maria_declare_plugin(aria) +compat_aliases, { MYSQL_STORAGE_ENGINE_PLUGIN, &maria_storage_engine, - "MARIA", + "Aria", "Monty Program Ab", "Crash-safe tables with MyISAM heritage", PLUGIN_LICENSE_GPL, - ha_maria_init, /* Plugin Init */ - NULL, /* Plugin Deinit */ - 0x0105, /* 1.5 */ - status_variables, /* status variables */ - system_variables, /* system variables */ - NULL -} -mysql_declare_plugin_end; -maria_declare_plugin(maria) -{ - MYSQL_STORAGE_ENGINE_PLUGIN, - &maria_storage_engine, - "MARIA", - "MySQL AB", - "Crash-safe tables with MyISAM heritage", - PLUGIN_LICENSE_GPL, - ha_maria_init, /* Plugin Init */ - NULL, /* Plugin Deinit */ - 0x0105, /* 1.5 */ - status_variables, /* status variables */ - system_variables, /* system variables */ - "1.5", /* string version */ - MariaDB_PLUGIN_MATURITY_GAMMA /* maturity */ + ha_maria_init, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0105, /* 1.5 */ + aria_status_variables, /* status variables */ + system_variables, /* system variables */ + "1.5", /* string version */ + MariaDB_PLUGIN_MATURITY_GAMMA /* maturity */ } maria_declare_plugin_end; diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index 6b9ea150ef1..605ad1d3a20 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -59,7 +59,7 @@ public: ~ha_maria() {} handler *clone(MEM_ROOT *mem_root); const char *table_type() const - { return "MARIA"; } + { return "Aria"; } const char *index_type(uint key_number); const char **bas_ext() const; ulonglong table_flags() const @@ -131,7 +131,7 @@ public: int enable_indexes(uint mode); int indexes_are_disabled(void); void start_bulk_insert(ha_rows rows); - int end_bulk_insert(bool abort); + int end_bulk_insert(); ha_rows records_in_range(uint inx, key_range * min_key, key_range * max_key); void update_create_info(HA_CREATE_INFO * create_info); int create(const char *name, TABLE * form, HA_CREATE_INFO * create_info); @@ -143,19 +143,21 @@ public: ulonglong *nb_reserved_values); int rename_table(const char *from, const char *to); int delete_table(const char *name); + void drop_table(const char *name); int check(THD * thd, HA_CHECK_OPT * check_opt); int analyze(THD * thd, HA_CHECK_OPT * check_opt); int repair(THD * thd, HA_CHECK_OPT * check_opt); bool check_and_repair(THD * thd); bool is_crashed() const; bool is_changed() const; - bool auto_repair() const { return 1; } + bool auto_repair() const { return maria_recover_options != HA_RECOVER_NONE; } int optimize(THD * thd, HA_CHECK_OPT * check_opt); int restore(THD * thd, HA_CHECK_OPT * check_opt); int backup(THD * thd, HA_CHECK_OPT * check_opt); int assign_to_keycache(THD * thd, HA_CHECK_OPT * check_opt); int preload_keys(THD * thd, HA_CHECK_OPT * check_opt); bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes); + bool check_if_supported_virtual_columns(void) { return TRUE;} #ifdef HAVE_REPLICATION int dump(THD * thd, int fd); int net_read_dump(NET * net); diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index 882c6e4cd06..4f37d7b9a1f 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -147,6 +147,12 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share, DBUG_ASSERT(bitmap->file.write_callback != 0); DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable)); + /* + Mark that a bitmap page has been written to page cache and we have + to flush it during checkpoint. + */ + bitmap->changed_not_flushed= 1; + if ((bitmap->non_flushable == 0) #ifdef WRONG_BITMAP_FLUSH || 1 @@ -347,7 +353,7 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) MARIA_FILE_BITMAP *bitmap= &share->bitmap; DBUG_ENTER("_ma_bitmap_flush_all"); pthread_mutex_lock(&bitmap->bitmap_lock); - if (bitmap->changed) + if (bitmap->changed || bitmap->changed_not_flushed) { bitmap->flush_all_requested= TRUE; #ifndef WRONG_BITMAP_FLUSH @@ -365,8 +371,8 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) */ if (bitmap->changed) { - res= write_changed_bitmap(share, bitmap); bitmap->changed= FALSE; + res= write_changed_bitmap(share, bitmap); } /* We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap @@ -384,6 +390,7 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) &bitmap->pages_covered) & PCFLUSH_PINNED_AND_ERROR) res= TRUE; + bitmap->changed_not_flushed= FALSE; bitmap->flush_all_requested= FALSE; /* Some well-behaved threads may be waiting for flush_all_requested to @@ -1875,6 +1882,7 @@ static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, uint offset_page, offset, tmp, org_tmp; uchar *data; DBUG_ENTER("set_page_bits"); + DBUG_ASSERT(fill_pattern <= 7); bitmap_page= page - page % bitmap->pages_covered; if (bitmap_page != bitmap->page && @@ -2063,6 +2071,13 @@ my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info, safe_mutex_assert_owner(&info->s->bitmap.bitmap_lock); bitmap_page= page - page % bitmap->pages_covered; + if (page == bitmap_page || + page + page_count >= bitmap_page + bitmap->pages_covered) + { + DBUG_ASSERT(0); /* Wrong in data */ + DBUG_RETURN(1); + } + if (bitmap_page != bitmap->page && _ma_change_bitmap_page(info, bitmap, bitmap_page)) DBUG_RETURN(1); @@ -2142,12 +2157,12 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc) DBUG_VOID_RETURN; bitmap= &share->bitmap; + pthread_mutex_lock(&bitmap->bitmap_lock); + if (non_flushable_inc == -1) { - pthread_mutex_lock(&bitmap->bitmap_lock); DBUG_ASSERT((int) bitmap->non_flushable > 0); DBUG_ASSERT(info->non_flushable_state == 1); - info->non_flushable_state= 0; if (--bitmap->non_flushable == 0) { /* @@ -2164,11 +2179,11 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc) } DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable)); pthread_mutex_unlock(&bitmap->bitmap_lock); + info->non_flushable_state= 0; DBUG_VOID_RETURN; } DBUG_ASSERT(non_flushable_inc == 1); DBUG_ASSERT(info->non_flushable_state == 0); - pthread_mutex_lock(&bitmap->bitmap_lock); while (unlikely(bitmap->flush_all_requested)) { /* @@ -2186,9 +2201,9 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc) pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); } bitmap->non_flushable++; - info->non_flushable_state= 1; DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable)); pthread_mutex_unlock(&bitmap->bitmap_lock); + info->non_flushable_state= 1; DBUG_VOID_RETURN; } @@ -2217,6 +2232,8 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc) Note that we may have 'filler blocks' that are used to split a block in half; These can be recognized by that they have page_count == 0. + This code also reverse the effect of ma_bitmap_flushable(.., 1); + RETURN 0 ok 1 error (Couldn't write or read bitmap page) @@ -2287,9 +2304,16 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks) The page has all bits set; The following test is an optimization to not set the bits to the same value as before. */ - if (bits != current_bitmap_value && - set_page_bits(info, bitmap, block->page, bits)) - goto err; + if (bits != current_bitmap_value) + { + if (set_page_bits(info, bitmap, block->page, bits)) + goto err; + } + else + { + DBUG_ASSERT(current_bitmap_value == + _ma_bitmap_get_page_bits(info, bitmap, block->page)); + } } else if (!(block->used & BLOCKUSED_USED) && _ma_bitmap_reset_full_page_bits(info, bitmap, @@ -2393,6 +2417,8 @@ my_bool _ma_bitmap_set(MARIA_HA *info, pgcache_page_no_t page, my_bool head, uint bits; my_bool res; DBUG_ENTER("_ma_bitmap_set"); + DBUG_PRINT("enter", ("page: %lu head: %d empty_space: %u", + (ulong) page, head, empty_space)); pthread_mutex_lock(&info->s->bitmap.bitmap_lock); bits= (head ? diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 89701913c9a..92ec916a9d1 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -334,12 +334,13 @@ typedef struct st_maria_extent_cursor } \ } while (0) + static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails); static my_bool delete_head_or_tail(MARIA_HA *info, pgcache_page_no_t page, uint record_number, my_bool head, my_bool from_update); #ifndef DBUG_OFF -static void _ma_print_directory(uchar *buff, uint block_size); +static void _ma_print_directory(FILE *file, uchar *buff, uint block_size); #endif static uchar *store_page_range(uchar *to, MARIA_BITMAP_BLOCK *block, uint block_size, ulong length, @@ -615,7 +616,7 @@ static inline uint end_of_previous_entry(uchar *dir, uchar *end) #ifndef DBUG_OFF -static void _ma_print_directory(uchar *buff, uint block_size) +static void _ma_print_directory(FILE *file, uchar *buff, uint block_size) { uint max_entry= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET], row= 0; uint end_of_prev_row= PAGE_HEADER_SIZE; @@ -624,40 +625,46 @@ static void _ma_print_directory(uchar *buff, uint block_size) dir= dir_entry_pos(buff, block_size, max_entry-1); end= dir_entry_pos(buff, block_size, 0); - DBUG_LOCK_FILE; - fprintf(DBUG_FILE,"Directory dump (pos:length):\n"); + DBUG_LOCK_FILE; /* If using DBUG_FILE */ + fprintf(file,"Directory dump (pos:length):\n"); for (row= 1; dir <= end ; end-= DIR_ENTRY_SIZE, row++) { uint offset= uint2korr(end); uint length= uint2korr(end+2); - fprintf(DBUG_FILE, " %4u:%4u", offset, offset ? length : 0); + fprintf(file, " %4u:%4u", offset, offset ? length : 0); if (!(row % (80/12))) - fputc('\n', DBUG_FILE); + fputc('\n', file); if (offset) { DBUG_ASSERT(offset >= end_of_prev_row); end_of_prev_row= offset + length; } } - fputc('\n', DBUG_FILE); - fflush(DBUG_FILE); + fputc('\n', file); + fflush(file); DBUG_UNLOCK_FILE; } -static void check_directory(uchar *buff, uint block_size, uint min_row_length) +static void check_directory(uchar *buff, uint block_size, uint min_row_length, + uint real_empty_size) { uchar *dir, *end; uint max_entry= (uint) buff[DIR_COUNT_OFFSET]; uint start_of_dir, deleted; - uchar free_entry, prev_free_entry; uint end_of_prev_row= PAGE_HEADER_SIZE; + uint empty_size_on_page; + uint empty_size; + uchar free_entry, prev_free_entry; dir= dir_entry_pos(buff, block_size, max_entry-1); start_of_dir= (uint) (dir - buff); end= dir_entry_pos(buff, block_size, 0); - deleted= 0; + deleted= empty_size= 0; + + empty_size_on_page= (real_empty_size != (uint) -1 ? real_empty_size : + uint2korr(buff + EMPTY_SPACE_OFFSET)); /* Ensure that all rows are in increasing order and no overlaps */ for (; dir <= end ; end-= DIR_ENTRY_SIZE) @@ -668,12 +675,15 @@ static void check_directory(uchar *buff, uint block_size, uint min_row_length) { DBUG_ASSERT(offset >= end_of_prev_row); DBUG_ASSERT(!length || length >= min_row_length); + empty_size+= offset - end_of_prev_row; end_of_prev_row= offset + length; } else deleted++; } + empty_size+= start_of_dir - end_of_prev_row; DBUG_ASSERT(end_of_prev_row <= start_of_dir); + DBUG_ASSERT(empty_size == empty_size_on_page); /* check free links */ free_entry= buff[DIR_FREE_OFFSET]; @@ -690,7 +700,7 @@ static void check_directory(uchar *buff, uint block_size, uint min_row_length) DBUG_ASSERT(deleted == 0); } #else -#define check_directory(A,B,C) +#define check_directory(A,B,C,D) #endif /* DBUG_OFF */ @@ -698,7 +708,8 @@ static void check_directory(uchar *buff, uint block_size, uint min_row_length) @brief Calculate if there is enough entries on the page */ -my_bool enough_free_entries(uchar *buff, uint block_size, uint wanted_entries) +static my_bool enough_free_entries(uchar *buff, uint block_size, + uint wanted_entries) { uint entries= (uint) buff[DIR_COUNT_OFFSET]; uint needed_free_entries, free_entry; @@ -723,6 +734,33 @@ my_bool enough_free_entries(uchar *buff, uint block_size, uint wanted_entries) /** + @brief Check if there is room for more rows on page + + @fn enough_free_entries_on_page + + @return 0 Directory is full + @return 1 There is room for more entries on the page +*/ + +my_bool enough_free_entries_on_page(MARIA_SHARE *share, + uchar *page_buff) +{ + enum en_page_type page_type; + page_type= (enum en_page_type) (page_buff[PAGE_TYPE_OFFSET] & + ~(uchar) PAGE_CAN_BE_COMPACTED); + + if (page_type == HEAD_PAGE) + { + uint row_count= (uint) page_buff[DIR_COUNT_OFFSET]; + return !(row_count == MAX_ROWS_PER_PAGE && + page_buff[DIR_FREE_OFFSET] == END_OF_DIR_FREE_LIST); + } + return enough_free_entries(page_buff, share->block_size, + 1 + share->base.blobs); +} + + +/** @brief Extend a record area to fit a given size block @fn extend_area_on_page() @@ -764,20 +802,27 @@ static my_bool extend_area_on_page(MARIA_HA *info, uint *empty_space, uint *ret_offset, uint *ret_length) { - uint rec_offset, length; + uint rec_offset, length, org_rec_length; uint max_entry= (uint) buff[DIR_COUNT_OFFSET]; DBUG_ENTER("extend_area_on_page"); + /* + We can't check for min length here as we may have called + extend_directory() to create a new (empty) entry just before + */ + check_directory(buff, block_size, 0, *empty_space); + rec_offset= uint2korr(dir); if (rec_offset) { /* Extending old row; Mark current space as 'free' */ - length= uint2korr(dir + 2); + length= org_rec_length= uint2korr(dir + 2); DBUG_PRINT("info", ("rec_offset: %u length: %u request_length: %u " "empty_space: %u", - rec_offset, length, request_length, *empty_space)); + rec_offset, org_rec_length, request_length, + *empty_space)); - *empty_space+= length; + *empty_space+= org_rec_length; } else { @@ -847,6 +892,7 @@ static my_bool extend_area_on_page(MARIA_HA *info, "length: %u request_length: %u", length, request_length)); my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_ASSERT(0); /* For debugging */ DBUG_RETURN(1); /* Error in block */ } *empty_space= length; /* All space is here */ @@ -857,7 +903,9 @@ static my_bool extend_area_on_page(MARIA_HA *info, int2store(dir + 2, length); *ret_offset= rec_offset; *ret_length= length; - check_directory(buff, block_size, info ? info->s->base.min_block_length : 0); + + check_directory(buff, block_size, info ? info->s->base.min_block_length : 0, + *empty_space - length); DBUG_RETURN(0); } @@ -1066,7 +1114,7 @@ static uchar *find_free_position(MARIA_HA *info, *res_length= length; check_directory(buff, block_size, - info ? info->s->base.min_block_length : 0); + info ? info->s->base.min_block_length : 0, (uint) -1); DBUG_RETURN(dir); } /* No free places in dir; create a new one */ @@ -1087,7 +1135,8 @@ static uchar *find_free_position(MARIA_HA *info, *res_rownr= max_entry; *res_length= length; - check_directory(buff, block_size, info ? info->s->base.min_block_length : 0); + check_directory(buff, block_size, info ? info->s->base.min_block_length : 0, + *empty_space); DBUG_RETURN(dir); } @@ -1167,7 +1216,8 @@ static my_bool extend_directory(MARIA_HA *info, uchar *buff, uint block_size, } check_directory(buff, block_size, - info ? min(info->s->base.min_block_length, length) : 0); + info ? min(info->s->base.min_block_length, length) : 0, + *empty_space); DBUG_RETURN(0); } @@ -1377,7 +1427,8 @@ void _ma_compact_block_page(uchar *buff, uint block_size, uint rownr, uint freed_size= 0; uchar *dir, *end; DBUG_ENTER("_ma_compact_block_page"); - DBUG_PRINT("enter", ("rownr: %u", rownr)); + DBUG_PRINT("enter", ("rownr: %u min_read_from: %lu", rownr, + (ulong) min_read_from)); DBUG_ASSERT(max_entry > 0 && max_entry < (block_size - PAGE_HEADER_SIZE - PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE); @@ -1467,6 +1518,8 @@ void _ma_compact_block_page(uchar *buff, uint block_size, uint rownr, { /* Move all entries after rownr to end of page */ uint rownr_length; + + DBUG_ASSERT(extend_block); /* Should always be true */ next_free_pos= end_of_found_block= page_pos= block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE; diff= 0; @@ -1538,13 +1591,13 @@ void _ma_compact_block_page(uchar *buff, uint block_size, uint rownr, int2store(dir, offset + diff); /* correct current pos */ next_free_pos= offset; } - if (page_pos != end_of_found_block) { uint length= (end_of_found_block - next_free_pos); memmove(buff + page_pos - length, buff + next_free_pos, length); next_free_pos= page_pos- length; } + /* Extend rownr block to cover hole */ rownr_length= next_free_pos - start_of_found_block; int2store(dir+2, rownr_length); @@ -1567,8 +1620,9 @@ void _ma_compact_block_page(uchar *buff, uint block_size, uint rownr, } buff[PAGE_TYPE_OFFSET]&= ~(uchar) PAGE_CAN_BE_COMPACTED; } - check_directory(buff, block_size, min_row_length); - DBUG_EXECUTE("directory", _ma_print_directory(buff, block_size);); + check_directory(buff, block_size, min_row_length, + extend_block ? 0 : (uint) -1); + DBUG_EXECUTE("directory", _ma_print_directory(DBUG_FILE, buff, block_size);); DBUG_VOID_RETURN; } @@ -1662,7 +1716,7 @@ static my_bool get_head_or_tail_page(MARIA_HA *info, MARIA_PINNED_PAGE page_link; MARIA_SHARE *share= info->s; DBUG_ENTER("get_head_or_tail_page"); - DBUG_PRINT("enter", ("length: %u", length)); + DBUG_PRINT("enter", ("page_type: %u length: %u", page_type, length)); block_size= share->block_size; if (block->org_bitmap_value == 0) /* Empty block */ @@ -1797,16 +1851,11 @@ static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info, goto err; } + /* + The following dir entry is unused in case of insert / update but + not in case of undo_update / undo_delete + */ dir= dir_entry_pos(buff, block_size, rownr); -#ifdef SANITY_CHECKS - /* Tail's should always be unused */ - if (page_type == TAIL_PAGE && max_entry > rownr && - (dir[0] != 0 || dir[1] != 0)) - { - DBUG_ASSERT(0); - goto err; - } -#endif if (extend_area_on_page(page_type == HEAD_PAGE ? info : 0, buff, dir, rownr, block_size, length, @@ -1941,7 +1990,8 @@ static my_bool write_tail(MARIA_HA *info, block->empty_space= (enough_free_entries(row_pos.buff, share->block_size, 1 + share->base.blobs) ? empty_space : 0); - block->used= BLOCKUSED_USED | BLOCKUSED_TAIL; + /* Keep BLOCKUSED_USE_ORG_BITMAP */ + block->used|= BLOCKUSED_USED | BLOCKUSED_TAIL; /* Increase data file size, if extended */ position= (my_off_t) block->page * block_size; @@ -2183,6 +2233,8 @@ static void store_extent_info(uchar *to, MARIA_BITMAP_BLOCK *block, *end_block; uint copy_length; my_bool first_found= 0; + DBUG_ENTER("store_extent_info"); + DBUG_PRINT("enter", ("count: %u", count)); for (block= first_block, end_block= first_block+count ; block < end_block; block++) @@ -2202,6 +2254,7 @@ static void store_extent_info(uchar *to, page_count|= START_EXTENT_BIT; } pagerange_store(to + PAGE_STORE_SIZE, page_count); + DBUG_DUMP("extent", to, ROW_EXTENT_SIZE); to+= ROW_EXTENT_SIZE; if (!first_found) { @@ -2216,6 +2269,7 @@ static void store_extent_info(uchar *to, data. */ bzero(to, (size_t) (row_extents_second_part + copy_length - to)); + DBUG_VOID_RETURN; } @@ -2234,7 +2288,8 @@ static void store_extent_info(uchar *to, @return @retval 0 ok - @retval 1 Error (out of memory or disk error changing bitmap) + @retval 1 Error (out of memory or disk error changing bitmap) or + wrong information in extent information */ static my_bool extent_to_bitmap_blocks(MARIA_HA *info, @@ -2245,7 +2300,7 @@ static my_bool extent_to_bitmap_blocks(MARIA_HA *info, { MARIA_BITMAP_BLOCK *block, *start_block; MARIA_SHARE *share= info->s; - uint i; + uint i, tail_page; DBUG_ENTER("extent_to_bitmap_blocks"); if (allocate_dynamic(&info->bitmap_blocks, extent_count + 2)) @@ -2271,13 +2326,36 @@ static my_bool extent_to_bitmap_blocks(MARIA_HA *info, page_count&= ~START_EXTENT_BIT; start_block->sub_blocks= (uint) (block - start_block); start_block= block; - } block->page= page_korr(extent_info); block->page_count= page_count; block->sub_blocks= 0; + if (block->page_count == 0) + { + /* Extend allocated but not used by write_block_record() */ + DBUG_ASSERT(block->page == 0); + /* This is the last block */ + blocks->count= i; + break; + } + if ((tail_page= page_count & TAIL_BIT)) + page_count= 1; - if (page_count & TAIL_BIT) + /* Check if wrong data */ + if (block->page == 0 || page_count == 0 || + (block->page + page_count) * share->block_size > + share->state.state.data_file_length) + { + DBUG_PRINT("error", ("page: %lu page_count: %u tail: %u length: %ld data_length: %ld", + (ulong) block->page, + (block->page_count & ~TAIL_BIT), + (uint) test(block->page_count & TAIL_BIT), + (ulong) ((block->page + (page_count & ~TAIL_BIT)) * + share->block_size), + (ulong) share->state.state.data_file_length)); + DBUG_RETURN(1); + } + if (tail_page) { block->org_bitmap_value= _ma_bitmap_get_page_bits(info, &share->bitmap, block->page); @@ -2289,7 +2367,7 @@ static my_bool extent_to_bitmap_blocks(MARIA_HA *info, my_bool res; pthread_mutex_lock(&share->bitmap.bitmap_lock); res= _ma_bitmap_set_full_page_bits(info, &share->bitmap, - block->page, block->page_count); + block->page, page_count); pthread_mutex_unlock(&share->bitmap.bitmap_lock); if (res) DBUG_RETURN(1); @@ -2711,9 +2789,16 @@ static my_bool write_block_record(MARIA_HA *info, sizeof(char*)); memcpy(data, tmp_pos, *blob_lengths); data+= *blob_lengths; - /* Skip over tail page that was to be used to store blob */ - block++; - bitmap_blocks->tail_page_skipped= 1; + /* + The following is not true when we want to insert data into original + place. In this case we don't have any extra blocks allocated + */ + if (likely(undo_lsn == LSN_ERROR)) + { + /* Skip over tail page that was prepared for storing blob */ + block++; + bitmap_blocks->tail_page_skipped= 1; + } } if (head_block->sub_blocks > 1) { @@ -2726,7 +2811,9 @@ static my_bool write_block_record(MARIA_HA *info, /* Update page directory */ head_length= (uint) (data - row_pos->data); - DBUG_PRINT("info", ("Used head length on page: %u", head_length)); + DBUG_PRINT("info", ("Used head length on page: %u header_length: %u", + head_length, + (uint) (flag & ROW_FLAG_TRANSID ? TRANSID_SIZE : 0))); DBUG_ASSERT(data <= end_of_data); if (head_length < share->base.min_block_length) { @@ -2736,6 +2823,7 @@ static my_bool write_block_record(MARIA_HA *info, data+= diff_length; head_length= share->base.min_block_length; } + DBUG_ASSERT(undo_lsn == LSN_ERROR || head_length == row_pos->length); int2store(row_pos->dir + 2, head_length); /* update empty space at start of block */ row_pos->empty_space-= head_length; @@ -2747,7 +2835,8 @@ static my_bool write_block_record(MARIA_HA *info, head_block->empty_space= 0; /* Page is full */ head_block->used|= BLOCKUSED_USED; - check_directory(page_buff, share->block_size, share->base.min_block_length); + check_directory(page_buff, share->block_size, share->base.min_block_length, + (uint) -1); /* Now we have to write tail pages, as we need to store the position @@ -2798,11 +2887,13 @@ static my_bool write_block_record(MARIA_HA *info, { /* Set only a bit, to not cause bitmap code to believe a block is full - when there is still a lot of entries in it + when there is still a lot of entries in it. */ block->used|= BLOCKUSED_USED; } } + DBUG_ASSERT((undo_lsn == LSN_ERROR || + block == bitmap_blocks->block + bitmap_blocks->count)); column= save_column; block= save_block; blob_lengths= save_blob_lengths; @@ -3196,9 +3287,10 @@ static my_bool write_block_record(MARIA_HA *info, else { uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + - PAGE_STORE_SIZE + DIRPOS_STORE_SIZE + + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE + 2 + HA_CHECKSUM_STORE_SIZE + 2 + PAGERANGE_STORE_SIZE + ROW_EXTENT_SIZE]; + uchar *log_pos; ha_checksum checksum_delta; /* LOGREC_UNDO_ROW_INSERT & LOGREC_UNDO_ROW_UPDATE share same header */ @@ -3208,18 +3300,17 @@ static my_bool write_block_record(MARIA_HA *info, dirpos_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE, row_pos->rownr); - - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= - (LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + - DIRPOS_STORE_SIZE); + log_pos= (log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE); store_checksum_in_rec(share, checksum_delta, row->checksum - old_record_checksum, - log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + - PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - log_array[TRANSLOG_INTERNAL_PARTS + 0].length); + log_pos, log_pos); compile_time_assert(sizeof(ha_checksum) == HA_CHECKSUM_STORE_SIZE); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - + log_data); + if (!old_record) { /* Store undo_lsn in case we are aborting the insert */ @@ -3238,16 +3329,18 @@ static my_bool write_block_record(MARIA_HA *info, else { /* Write UNDO log record for the UPDATE */ - uchar *log_pos= (log_data + - log_array[TRANSLOG_INTERNAL_PARTS + 0].length); size_t row_length, extents_length; - uint row_parts_count; + uint row_parts_count, cur_head_length; /* Write head length and extents of the original row so that we - during UNDO can put it back in the original position + during UNDO can put it back in the original position. + We don't store size for TRANSID, as we don't write this during + UNDO. */ - int2store(log_pos, info->cur_row.head_length); + cur_head_length= (info->cur_row.head_length - + info->cur_row.header_length); + int2store(log_pos, cur_head_length); pagerange_store(log_pos + 2, info->cur_row.extents_count); log_pos+= 2 + PAGERANGE_STORE_SIZE; log_array[TRANSLOG_INTERNAL_PARTS + 0].length+= (2 + @@ -3407,13 +3500,14 @@ static my_bool allocate_and_write_block_record(MARIA_HA *info, DBUG_ASSERT(row->checksum == (info->s->calc_checksum)(info, record)); } } + DBUG_PRINT("info", ("rowid: %lu (%lu:%u) length: %u", (ulong) row->lastpos, + (ulong) ma_recordpos_to_page(row->lastpos), + ma_recordpos_to_dir_entry(row->lastpos), + row_pos.length)); if (write_block_record(info, (uchar*) 0, record, row, blocks, blocks->block->org_bitmap_value != 0, &row_pos, undo_lsn, 0)) - goto err; /* Error reading bitmap */ - DBUG_PRINT("exit", ("rowid: %lu (%lu:%u)", (ulong) row->lastpos, - (ulong) ma_recordpos_to_page(row->lastpos), - ma_recordpos_to_dir_entry(row->lastpos))); + goto err; /* Now let checkpoint happen but don't commit */ DBUG_EXECUTE_IF("maria_over_alloc_bitmap", sleep(1000);); DBUG_RETURN(0); @@ -3559,6 +3653,7 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, MARIA_PINNED_PAGE page_link; uint rownr, org_empty_size, head_length; uint block_size= info->s->block_size; + uint errpos= 0; uchar *dir; pgcache_page_no_t page; struct st_row_pos_info row_pos; @@ -3597,11 +3692,21 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, rownr= ma_recordpos_to_dir_entry(record_pos); dir= dir_entry_pos(buff, block_size, rownr); - if ((org_empty_size + cur_row->head_length) >= new_row->total_length) + /* + We can't use cur_row->head_length as the block may have been compacted + since we read it. + */ + head_length= uint2korr(dir + 2); + + if ((org_empty_size + head_length) >= new_row->total_length) { uint rec_offset, length; MARIA_BITMAP_BLOCK block; + DBUG_PRINT("info", ("org_empty_size: %u org_length: %u new_length: %lu", + org_empty_size, head_length, + new_row->total_length)); + /* We can fit the new row in the same page as the original head part of the row @@ -3611,7 +3716,10 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, if (extend_area_on_page(info, buff, dir, rownr, block_size, new_row->total_length, &org_empty_size, &rec_offset, &length)) + { + errpos= 1; goto err; + } row_pos.buff= buff; row_pos.rownr= rownr; @@ -3628,9 +3736,15 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, if (*cur_row->tail_positions && delete_tails(info, cur_row->tail_positions)) + { + errpos= 2; goto err; + } if (cur_row->extents_count && free_full_pages(info, cur_row)) + { + errpos= 3; goto err; + } res= write_block_record(info, oldrec, record, new_row, blocks, 1, &row_pos, undo_lsn, old_checksum); /* We can't update or delete this without re-reading it again */ @@ -3640,14 +3754,23 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, /* Delete old row */ if (*cur_row->tail_positions && delete_tails(info, cur_row->tail_positions)) + { + errpos= 4; goto err; + } if (cur_row->extents_count && free_full_pages(info, cur_row)) + { + errpos= 5; goto err; + } head_length= uint2korr(dir + 2); if (_ma_bitmap_find_new_place(info, new_row, page, head_length + org_empty_size, blocks)) + { + errpos= 6; goto err; + } /* Allocate all size in block for record @@ -3674,10 +3797,14 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, row_pos.length= head_length; if ((res= write_block_record(info, oldrec, record, new_row, blocks, 1, &row_pos, undo_lsn, old_checksum))) + { + errpos= 7; goto err; + } DBUG_RETURN(0); err: + DBUG_PRINT("error", ("errpos: %d", errpos)); if (info->non_flushable_state) _ma_bitmap_flushable(info, -1); _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE); @@ -3695,6 +3822,8 @@ err: This is the main reason we don't make a lot of subfunctions that are common between _ma_update_block_record2() and this function. + + Note: If something goes wrong we mark the file crashed */ static my_bool _ma_update_at_original_place(MARIA_HA *info, @@ -3750,6 +3879,10 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info, if ((org_empty_size + cur_row->head_length) < length_on_head_page) { + DBUG_PRINT("error", + ("org_empty_size: %u head_length: %u length_on_page: %u", + org_empty_size, (uint) cur_row->head_length, + length_on_head_page)); my_errno= HA_ERR_WRONG_IN_RECORD; goto err; } @@ -3769,7 +3902,6 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info, row_pos.empty_space= empty_size; row_pos.dir= dir; row_pos.data= buff + rec_offset; - row_pos.length= length_on_head_page; /* Delete old row */ if (*cur_row->tail_positions && @@ -3799,12 +3931,17 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info, max(new_row->total_length, share->base.min_block_length) <= length_on_head_page); + /* Store same amount of data on head page as on original page */ + row_pos.length= (length_on_head_page - + (extent_count + 1 - blocks->count) * ROW_EXTENT_SIZE); + set_if_bigger(row_pos.length, share->base.min_block_length); if ((res= write_block_record(info, oldrec, record, new_row, blocks, 1, &row_pos, undo_lsn, old_checksum))) goto err; DBUG_RETURN(0); err: + _ma_mark_file_crashed(share); if (info->non_flushable_state) _ma_bitmap_flushable(info, -1); _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE); @@ -3858,7 +3995,7 @@ static int delete_dir_entry(uchar *buff, uint block_size, uint record_number, } #endif - check_directory(buff, block_size, 0); + check_directory(buff, block_size, 0, (uint) -1); empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); dir= dir_entry_pos(buff, block_size, record_number); length= uint2korr(dir + 2); @@ -3933,7 +4070,7 @@ static int delete_dir_entry(uchar *buff, uint block_size, uint record_number, *empty_space_res= empty_space; - check_directory(buff, block_size, 0); + check_directory(buff, block_size, 0, empty_space); DBUG_RETURN(0); } @@ -4135,7 +4272,8 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record) log_pos= log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE; dirpos_store(log_pos, record_number); log_pos+= DIRPOS_STORE_SIZE; - int2store(log_pos, info->cur_row.head_length); + int2store(log_pos, info->cur_row.head_length - + info->cur_row.header_length); log_pos+= 2; pagerange_store(log_pos, info->cur_row.extents_count); log_pos+= PAGERANGE_STORE_SIZE; @@ -4404,13 +4542,14 @@ crashed: 1 error */ -static my_bool read_long_data(MARIA_HA *info, uchar *to, ulong length, +static my_bool read_long_data2(MARIA_HA *info, uchar *to, ulong length, MARIA_EXTENT_CURSOR *extent, uchar **data, uchar **end_of_data) { - DBUG_ENTER("read_long_data"); + uint left_length= (uint) (*end_of_data - *data); + DBUG_ENTER("read_long_data2"); DBUG_PRINT("enter", ("length: %lu left_length: %u", - length, (uint) (*end_of_data - *data))); + length, left_length)); DBUG_ASSERT(*data <= *end_of_data); /* @@ -4422,14 +4561,15 @@ static my_bool read_long_data(MARIA_HA *info, uchar *to, ulong length, This may change in the future, which is why we have the loop written the way it's written. */ - if (extent->first_extent && length > (ulong) (*end_of_data - *data)) + if (extent->first_extent && length > left_length) + { *end_of_data= *data; + left_length= 0; + } for(;;) { - uint left_length; - left_length= (uint) (*end_of_data - *data); - if (likely(left_length >= length)) + if (unlikely(left_length >= length)) { memcpy(to, *data, length); (*data)+= length; @@ -4441,10 +4581,25 @@ static my_bool read_long_data(MARIA_HA *info, uchar *to, ulong length, length-= left_length; if (!(*data= read_next_extent(info, extent, end_of_data))) break; + left_length= (uint) (*end_of_data - *data); } DBUG_RETURN(1); } +static inline my_bool read_long_data(MARIA_HA *info, uchar *to, ulong length, + MARIA_EXTENT_CURSOR *extent, + uchar **data, uchar **end_of_data) +{ + uint left_length= (uint) (*end_of_data - *data); + if (likely(left_length >= length)) + { + memcpy(to, *data, length); + (*data)+= length; + return 0; + } + return read_long_data2(info, to, length, extent, data, end_of_data); +} + /* Read a record from page (helper function for _ma_read_block_record()) @@ -4496,6 +4651,8 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record, cur_row->head_length= (uint) (end_of_data - data); cur_row->full_page_count= cur_row->tail_count= 0; cur_row->blob_length= 0; + /* Number of bytes in header that we don't need to write during undo */ + cur_row->header_length= total_header_size[(flag & PRECALC_HEADER_BITMASK)]-1; if (flag & ROW_FLAG_TRANSID) { @@ -4507,7 +4664,7 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record, } /* Skip trans header (for now, until we have MVCC csupport) */ - data+= total_header_size[(flag & PRECALC_HEADER_BITMASK)]; + data+= cur_row->header_length + 1 ; if (flag & ROW_FLAG_NULLS_EXTENDED) cur_null_bytes+= data[-1]; @@ -4903,7 +5060,10 @@ int _ma_read_block_record(MARIA_HA *info, uchar *record, uint offset; uint block_size= share->block_size; DBUG_ENTER("_ma_read_block_record"); - DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos)); + DBUG_PRINT("enter", ("rowid: %lu page: %lu rownr: %u", + (ulong) record_pos, + (ulong) ma_recordpos_to_page(record_pos), + ma_recordpos_to_dir_entry(record_pos))); offset= ma_recordpos_to_dir_entry(record_pos); @@ -6078,7 +6238,10 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, /* Skip errors when reading outside of file and uninitialized pages */ if (!new_page || (my_errno != HA_ERR_FILE_TOO_SHORT && my_errno != HA_ERR_WRONG_CRC)) + { + DBUG_PRINT("error", ("Error %d when reading page", (int) my_errno)); goto err; + } /* Create new page */ buff= pagecache_block_link_to_buffer(page_link.link); buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE; @@ -6087,6 +6250,9 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, { /* Fix bitmap, just in case */ empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + if (!enough_free_entries_on_page(share, buff)) + empty_space= 0; /* Page is full */ + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) goto err; pagecache_unlock_by_link(share->pagecache, page_link.link, @@ -6103,7 +6269,13 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, changed to new type. */ if (!new_page) + { + DBUG_PRINT("error", + ("Found page of wrong type: %u, should have been %u", + (uint) (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK), + page_type)); goto crashed_file; + } make_empty_page(info, buff, page_type, 0); empty_space= block_size - PAGE_HEADER_SIZE - PAGE_SUFFIX_SIZE; (void) extend_directory(page_type == HEAD_PAGE ? info: 0, buff, @@ -6159,6 +6331,8 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, result= my_errno; /* Fix bitmap */ + if (!enough_free_entries_on_page(share, buff)) + empty_space= 0; /* Page is full */ if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) goto err; @@ -6246,6 +6420,8 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, if ((uint) (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type) { empty_space= uint2korr(buff+EMPTY_SPACE_OFFSET); + if (!enough_free_entries_on_page(share, buff)) + empty_space= 0; /* Page is full */ if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) goto err; @@ -6270,6 +6446,8 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, push_dynamic(&info->pinned_pages, (void*) &page_link); result= 0; + if (!enough_free_entries_on_page(share, buff)) + empty_space= 0; /* Page is full */ /* This will work even if the page was marked as UNALLOCATED_PAGE */ if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) result= my_errno; @@ -6642,7 +6820,7 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn, pgcache_page_no_t page; uint rownr; uchar *buff; - my_bool res= 1; + my_bool res; MARIA_PINNED_PAGE page_link; MARIA_SHARE *share= info->s; ha_checksum checksum; @@ -6688,11 +6866,16 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn, goto err; res= 0; -err: +end: if (info->non_flushable_state) _ma_bitmap_flushable(info, -1); _ma_unpin_all_pages_and_finalize_row(info, lsn); DBUG_RETURN(res); + +err: + res= 1; + _ma_mark_file_crashed(share); + goto end; } @@ -6915,6 +7098,10 @@ my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn, { DBUG_ASSERT(row.checksum == (share->calc_checksum)(info, record)); } + /* Store same amount of data on head page as on original page */ + row_pos.length= (length_on_head_page - + (extent_count + 1 - blocks->count) * ROW_EXTENT_SIZE); + set_if_bigger(row_pos.length, share->base.min_block_length); if (write_block_record(info, (uchar*) 0, record, &row, blocks, blocks->block->org_bitmap_value != 0, &row_pos, undo_lsn, 0)) @@ -6924,6 +7111,7 @@ my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn, DBUG_RETURN(0); err: + _ma_mark_file_crashed(share); if (info->non_flushable_state) _ma_bitmap_flushable(info, -1); _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE); @@ -6954,7 +7142,7 @@ my_bool _ma_apply_undo_row_update(MARIA_HA *info, LSN undo_lsn, pgcache_page_no_t page; ha_checksum checksum_delta; uint rownr, field_length_header, extent_count, length_on_head_page; - int error= 1; + int error; DBUG_ENTER("_ma_apply_undo_row_update"); LINT_INIT(checksum_delta); @@ -6962,6 +7150,7 @@ my_bool _ma_apply_undo_row_update(MARIA_HA *info, LSN undo_lsn, header+= PAGE_STORE_SIZE; rownr= dirpos_korr(header); header+= DIRPOS_STORE_SIZE; + record_pos= ma_recordpos(page, rownr); DBUG_PRINT("enter", ("rowid: %lu page: %lu rownr: %u", (ulong) record_pos, (ulong) page, rownr)); @@ -7091,9 +7280,14 @@ my_bool _ma_apply_undo_row_update(MARIA_HA *info, LSN undo_lsn, goto err; error= 0; -err: +end: my_free(current_record, MYF(0)); DBUG_RETURN(error); + +err: + error= 1; + _ma_mark_file_crashed(share); + goto end; } @@ -7173,3 +7367,25 @@ void maria_ignore_trids(MARIA_HA *info) info->trn->min_read_from= ~(TrID) 0; } } + + +#ifndef DBUG_OFF + +/* The following functions are useful to call from debugger */ + +void _ma_print_block_info(uchar *buff) +{ + LSN lsn= lsn_korr(buff); + + printf("LSN: %lu,0x%lx type: %u dir_entries: %u dir_free: %u empty_space: %u\n", + LSN_IN_PARTS(lsn), + (uint)buff[PAGE_TYPE_OFFSET], + (uint)buff[DIR_COUNT_OFFSET], + (uint)buff[DIR_FREE_OFFSET], + (uint) uint2korr(buff + EMPTY_SPACE_OFFSET)); + printf("Start of directory: %lu\n", + maria_block_size - PAGE_SUFFIX_SIZE - + (uint) buff[DIR_COUNT_OFFSET] * DIR_ENTRY_SIZE); + _ma_print_directory(stdout, buff, maria_block_size); +} +#endif diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h index cb682eef701..c39b0af73ad 100644 --- a/storage/maria/ma_blockrec.h +++ b/storage/maria/ma_blockrec.h @@ -176,6 +176,7 @@ my_bool _ma_compare_block_record(register MARIA_HA *info, void _ma_compact_block_page(uchar *buff, uint block_size, uint rownr, my_bool extend_block, TrID min_read_from, uint min_row_length); +my_bool enough_free_entries_on_page(MARIA_SHARE *share, uchar *page_buff); TRANSLOG_ADDRESS maria_page_get_lsn(uchar *page, pgcache_page_no_t page_no, uchar* data_ptr); @@ -279,7 +280,8 @@ my_bool write_hook_for_file_id(enum translog_record_type type, my_bool write_hook_for_commit(enum translog_record_type type, TRN *trn, MARIA_HA *tbl_info, LSN *lsn, void *hook_arg); -void _ma_block_get_status(void* param, my_bool concurrent_insert); +void _ma_block_get_status(void *param, my_bool concurrent_insert); +void _ma_block_get_status_no_versioning(void *param, my_bool concurrent_ins); void _ma_block_update_status(void *param); void _ma_block_restore_status(void *param); my_bool _ma_block_check_status(void *param); diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 4f93bf812a3..307befab5c7 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -136,11 +136,13 @@ void maria_chk_init_for_check(HA_CHECK *param, MARIA_HA *info) Set up transaction handler so that we can see all rows. When rows is read we will check the found id against param->max_tried */ - if (!ma_control_file_inited()) - param->max_trid= 0; /* Give warning for first trid found */ - else - param->max_trid= max_trid_in_system(); - + if (param->max_trid == 0) + { + if (!ma_control_file_inited()) + param->max_trid= 0; /* Give warning for first trid found */ + else + param->max_trid= max_trid_in_system(); + } maria_ignore_trids(info); } @@ -154,6 +156,9 @@ int maria_chk_status(HA_CHECK *param, MARIA_HA *info) if (maria_is_crashed_on_repair(info)) _ma_check_print_warning(param, "Table is marked as crashed and last repair failed"); + else if (maria_in_repair(info)) + _ma_check_print_warning(param, + "Last repair was aborted before finishing"); else if (maria_is_crashed(info)) _ma_check_print_warning(param, "Table is marked as crashed"); @@ -864,7 +869,7 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, llstr(anc_page->pos, llbuff)); } - if (anc_page->size > (uint) keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE) + if (anc_page->size > share->max_index_block_size) { _ma_check_print_error(param, "Page at %s has impossible (too big) pagelength", @@ -1755,7 +1760,7 @@ static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, uchar *record, _ma_check_print_error(param, "Page %9s: Row: %3d has an extent with " "wrong information in bitmap: " - "Page %9s Page_type: %d Bitmap: %d", + "Page: %9s Page_type: %d Bitmap: %d", llstr(page, llbuff), row, llstr(extent_page, llbuff2), page_type, bitmap_pattern); @@ -1926,8 +1931,8 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, else _ma_check_print_error(param, "Page %9s: Wrong data in bitmap. Page_type: " - "%d empty_space: %u Bitmap-bits: %d", - llstr(page, llbuff), page_type, + "%d full: %d empty_space: %u Bitmap-bits: %d", + llstr(page, llbuff), page_type, full_dir, empty_space, bitmap_pattern); if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) goto err; @@ -2242,7 +2247,7 @@ static my_bool protect_against_repair_crash(MARIA_HA *info, if ((param->testflag & T_NO_CREATE_RENAME_LSN) == 0) { /* this can be true only for a transactional table */ - maria_mark_crashed_on_repair(info); + maria_mark_in_repair(info); if (_ma_state_info_write(share, MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET | MA_STATE_INFO_WRITE_LOCK)) @@ -2267,10 +2272,14 @@ static int initialize_variables_for_repair(HA_CHECK *param, MARIA_SORT_INFO *sort_info, MARIA_SORT_PARAM *sort_param, MARIA_HA *info, - my_bool rep_quick) + my_bool rep_quick, + MARIA_SHARE *org_share) { MARIA_SHARE *share= info->s; + /* Ro allow us to restore state and check how state changed */ + memcpy(org_share, share, sizeof(*share)); + /* Repair code relies on share->state.state so we have to update it here */ if (share->lock.update_status) (*share->lock.update_status)(info); @@ -2318,11 +2327,13 @@ static int initialize_variables_for_repair(HA_CHECK *param, } /* Set up transaction handler so that we can see all rows */ - if (!ma_control_file_inited()) - param->max_trid= 0; /* Give warning for first trid found */ - else - param->max_trid= max_trid_in_system(); - + if (param->max_trid == 0) + { + if (!ma_control_file_inited()) + param->max_trid= 0; /* Give warning for first trid found */ + else + param->max_trid= max_trid_in_system(); + } maria_ignore_trids(info); /* Don't write transid's during repair */ maria_versioning(info, 0); @@ -2330,6 +2341,23 @@ static int initialize_variables_for_repair(HA_CHECK *param, } +/* + During initialize_variables_for_repair and related functions we set some + variables to values that makes sence during repair. + This function restores these values to their original values so that we can + use the handler in MariaDB without having to close and open the table. +*/ + +static void restore_table_state_after_repair(MARIA_HA *info, + MARIA_SHARE *org_share) +{ + maria_versioning(info, info->s->have_versioning); + info->s->lock_key_trees= org_share->lock_key_trees; +} + + + + /** @brief Drop all indexes @@ -2478,11 +2506,11 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, char llbuff[22],llbuff2[22]; MARIA_SORT_INFO sort_info; MARIA_SORT_PARAM sort_param; - my_bool block_record, scan_inited= 0, - reenable_logging= share->now_transactional; + my_bool block_record, scan_inited= 0, reenable_logging= 0; enum data_file_type org_data_file_type= share->data_file_type; myf sync_dir= ((share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0); + MARIA_SHARE backup_share; DBUG_ENTER("maria_repair"); got_error= 1; @@ -2490,15 +2518,15 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, start_records= share->state.state.records; if (!(param->testflag & T_SILENT)) { - printf("- recovering (with keycache) MARIA-table '%s'\n",name); + printf("- recovering (with keycache) Aria-table '%s'\n",name); printf("Data records: %s\n", llstr(start_records, llbuff)); } if (initialize_variables_for_repair(param, &sort_info, &sort_param, info, - rep_quick)) + rep_quick, &backup_share)) goto err; - if (reenable_logging) + if ((reenable_logging= share->now_transactional)) _ma_tmp_disable_logging_for_table(info, 0); sort_param.current_filepos= sort_param.filepos= new_header_length= @@ -2777,6 +2805,7 @@ err: /* If caller had disabled logging it's not up to us to re-enable it */ if (reenable_logging) _ma_reenable_logging_for_table(info, FALSE); + restore_table_state_after_repair(info, &backup_share); my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR)); my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); @@ -2974,7 +3003,7 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name) DBUG_RETURN(0); if (!(param->testflag & T_SILENT)) - printf("- Sorting index for MARIA-table '%s'\n",name); + printf("- Sorting index for Aria-table '%s'\n",name); if (protect_against_repair_crash(info, param, FALSE)) DBUG_RETURN(1); @@ -3102,13 +3131,15 @@ static int sort_one_index(HA_CHECK *param, MARIA_HA *info, new_page_pos=param->new_file_pos; param->new_file_pos+=keyinfo->block_length; key.keyinfo= keyinfo; - key.data= info->lastkey_buff; - if (!(buff= (uchar*) my_alloca((uint) keyinfo->block_length))) + if (!(buff= (uchar*) my_alloca((uint) keyinfo->block_length + + keyinfo->maxlength))) { _ma_check_print_error(param,"Not enough memory for key block"); DBUG_RETURN(-1); } + key.data= buff + keyinfo->block_length; + if (_ma_fetch_keypage(&page, info, keyinfo, pagepos, PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS, buff, 0)) @@ -3204,7 +3235,7 @@ static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info, DBUG_ENTER("maria_zerofill_index"); if (!(param->testflag & T_SILENT)) - printf("- Zerofilling index for MARIA-table '%s'\n",name); + printf("- Zerofilling index for Aria-table '%s'\n",name); /* Go through the index file */ for (pos= share->base.keystart, page= (ulonglong) (pos / block_size); @@ -3296,7 +3327,7 @@ static my_bool maria_zerofill_data(HA_CHECK *param, MARIA_HA *info, DBUG_RETURN(0); if (!(param->testflag & T_SILENT)) - printf("- Zerofilling data for MARIA-table '%s'\n",name); + printf("- Zerofilling data for Aria-table '%s'\n",name); /* Go through the record file */ for (page= 1, pos= block_size; @@ -3345,7 +3376,7 @@ static my_bool maria_zerofill_data(HA_CHECK *param, MARIA_HA *info, case TAIL_PAGE: { uint max_entry= (uint) buff[DIR_COUNT_OFFSET]; - uint offset, dir_start; + uint offset, dir_start, empty_space; uchar *dir; if (zero_lsn) @@ -3358,9 +3389,13 @@ static my_bool maria_zerofill_data(HA_CHECK *param, MARIA_HA *info, is_head_page ? ~(TrID) 0 : 0, is_head_page ? share->base.min_block_length : 0); + /* compactation may have increased free space */ + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + if (!enough_free_entries_on_page(share, buff)) + empty_space= 0; /* Page is full */ if (_ma_bitmap_set(info, page, is_head_page, - uint2korr(buff + EMPTY_SPACE_OFFSET))) + empty_space)) goto err; /* Zerofill the not used part */ @@ -3545,7 +3580,8 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, ulonglong key_map; myf sync_dir= ((share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0); - my_bool scan_inited= 0; + my_bool scan_inited= 0, reenable_logging= 0; + MARIA_SHARE backup_share; DBUG_ENTER("maria_repair_by_sort"); LINT_INIT(key_map); @@ -3554,14 +3590,17 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, start_records= share->state.state.records; if (!(param->testflag & T_SILENT)) { - printf("- recovering (with sort) MARIA-table '%s'\n",name); + printf("- recovering (with sort) Aria-table '%s'\n",name); printf("Data records: %s\n", llstr(start_records,llbuff)); } if (initialize_variables_for_repair(param, &sort_info, &sort_param, info, - rep_quick)) + rep_quick, &backup_share)) goto err; + if ((reenable_logging= share->now_transactional)) + _ma_tmp_disable_logging_for_table(info, 0); + org_header_length= share->pack.header_length; new_header_length= (param->testflag & T_UNPACK) ? 0 : org_header_length; sort_param.filepos= new_header_length; @@ -3967,6 +4006,11 @@ err: share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED | STATE_NOT_MOVABLE); + /* If caller had disabled logging it's not up to us to re-enable it */ + if (reenable_logging) + _ma_reenable_logging_for_table(info, FALSE); + restore_table_state_after_repair(info, &backup_share); + my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR)); my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); my_free(sort_info.key_block, MYF(MY_ALLOW_ZERO_PTR)); @@ -4037,10 +4081,12 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, IO_CACHE new_data_cache; /* For non-quick repair. */ IO_CACHE_SHARE io_share; MARIA_SORT_INFO sort_info; + MARIA_SHARE backup_share; ulonglong key_map; pthread_attr_t thr_attr; myf sync_dir= ((share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0); + my_bool reenable_logging= 0; DBUG_ENTER("maria_repair_parallel"); LINT_INIT(key_map); @@ -4049,14 +4095,17 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, start_records= share->state.state.records; if (!(param->testflag & T_SILENT)) { - printf("- parallel recovering (with sort) MARIA-table '%s'\n",name); + printf("- parallel recovering (with sort) Aria-table '%s'\n",name); printf("Data records: %s\n", llstr(start_records, llbuff)); } if (initialize_variables_for_repair(param, &sort_info, &tmp_sort_param, info, - rep_quick)) + rep_quick, &backup_share)) goto err; + if ((reenable_logging= share->now_transactional)) + _ma_tmp_disable_logging_for_table(info, 0); + new_header_length= ((param->testflag & T_UNPACK) ? 0 : share->pack.header_length); @@ -4360,8 +4409,7 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, goto err; } } - share->state.state.data_file_length= share->state.state.data_file_length= - sort_param->filepos; + share->state.state.data_file_length= sort_param->filepos; /* Only whole records */ share->state.version= (ulong) time((time_t*) 0); /* @@ -4484,6 +4532,11 @@ err: pthread_cond_destroy (&sort_info.cond); pthread_mutex_destroy(&sort_info.mutex); + /* If caller had disabled logging it's not up to us to re-enable it */ + if (reenable_logging) + _ma_reenable_logging_for_table(info, FALSE); + restore_table_state_after_repair(info, &backup_share); + my_free(sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR)); my_free(sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR)); my_free(sort_param,MYF(MY_ALLOW_ZERO_PTR)); @@ -5563,7 +5616,7 @@ static int sort_insert_key(MARIA_SORT_PARAM *sort_param, a_length+=t_length; _ma_store_page_used(share, anc_buff, a_length); key_block->end_pos+=t_length; - if (a_length <= (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)) + if (a_length <= share->max_index_block_size) { MARIA_KEY tmp_key2; tmp_key2.data= key_block->lastkey; @@ -5633,7 +5686,7 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) _ma_check_print_error(param, "Recover aborted; Can't run standard recovery on " "compressed tables with errors in data-file. " - "Use 'maria_chk --safe-recover' to fix it"); + "Use 'aria_chk --safe-recover' to fix it"); DBUG_RETURN(1); } @@ -6075,7 +6128,7 @@ void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info, } if (!(param->testflag & T_SILENT) && !(param->testflag & T_REP)) - printf("Updating MARIA file: %s\n", param->isam_file_name); + printf("Updating Aria file: %s\n", param->isam_file_name); /* We have to use an allocated buffer instead of info->rec_buff as _ma_put_key_in_record() may use info->rec_buff @@ -6716,7 +6769,7 @@ static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid) { _ma_check_print_warning(param, "Found row with transaction id %s but no " - "maria_control_file was specified. " + "aria_control_file was used or specified. " "The table may be corrupted", llstr(used_trid, buff)); } @@ -6724,7 +6777,7 @@ static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid) { _ma_check_print_error(param, "Found row with transaction id %s when max " - "transaction id according to maria_control_file " + "transaction id according to aria_control_file " "is %s", llstr(used_trid, buff), llstr(param->max_trid, buff2)); diff --git a/storage/maria/ma_check_standalone.h b/storage/maria/ma_check_standalone.h index 9b30c96089f..8cda285bb99 100644 --- a/storage/maria/ma_check_standalone.h +++ b/storage/maria/ma_check_standalone.h @@ -64,7 +64,7 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt,...) if (!param->warning_printed && !param->error_printed) { if (param->testflag & T_SILENT) - fprintf(stderr,"%s: MARIA file %s\n",my_progname_short, + fprintf(stderr,"%s: Aria file %s\n",my_progname_short, param->isam_file_name); param->out_flag|= O_DATA_LOST; } @@ -90,7 +90,7 @@ void _ma_check_print_error(HA_CHECK *param, const char *fmt,...) if (!param->warning_printed && !param->error_printed) { if (param->testflag & T_SILENT) - fprintf(stderr,"%s: MARIA file %s\n",my_progname_short,param->isam_file_name); + fprintf(stderr,"%s: Aria file %s\n",my_progname_short,param->isam_file_name); param->out_flag|= O_DATA_LOST; } param->error_printed|=1; diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index b75267f81f1..cf13cee9452 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -789,7 +789,6 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) not seen again in the loop. */ share->in_checkpoint= MARIA_CHECKPOINT_LOOKS_AT_ME; - /** @todo avoid strlen() */ total_names_length+= share->open_file_name.length; } } diff --git a/storage/maria/ma_checkpoint.h b/storage/maria/ma_checkpoint.h index 69645c6bcda..126f8111a23 100644 --- a/storage/maria/ma_checkpoint.h +++ b/storage/maria/ma_checkpoint.h @@ -89,4 +89,4 @@ static inline LSN lsn_read_non_atomic_32(const volatile LSN *x) @param sentence text to write */ #define ma_message_no_user(level, sentence) \ - my_printf_error(HA_ERR_GENERIC, "Maria engine: %s", MYF(level), sentence) + my_printf_error(HA_ERR_GENERIC, "Aria engine: %s", MYF(level), sentence) diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index 1e1fc34c77e..6f9018885e9 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -234,7 +234,7 @@ static int lock_control_file(const char *name) { if (retry == 0) my_printf_error(HA_ERR_INITIALIZATION, - "Can't lock maria control file '%s' for exclusive use, " + "Can't lock aria control file '%s' for exclusive use, " "error: %d. Will retry for %d seconds", 0, name, my_errno, MARIA_MAX_CONTROL_FILE_LOCK_RETRY); if (retry++ > MARIA_MAX_CONTROL_FILE_LOCK_RETRY) @@ -372,14 +372,14 @@ CONTROL_FILE_ERROR ma_control_file_open(my_bool create_if_missing, CF_MAGIC_STRING, CF_MAGIC_STRING_SIZE)) { error= CONTROL_FILE_BAD_MAGIC_STRING; - errmsg= "Missing valid id at start of file. File is not a valid maria control file"; + errmsg= "Missing valid id at start of file. File is not a valid aria control file"; goto err; } if (buffer[CF_VERSION_OFFSET] > CONTROL_FILE_VERSION) { error= CONTROL_FILE_BAD_VERSION; - sprintf(errmsg_buff, "File is from a future maria system: %d. Current version is: %d", + sprintf(errmsg_buff, "File is from a future aria system: %d. Current version is: %d", (int) buffer[CF_VERSION_OFFSET], CONTROL_FILE_VERSION); errmsg= errmsg_buff; goto err; @@ -398,15 +398,16 @@ CONTROL_FILE_ERROR ma_control_file_open(my_bool create_if_missing, } new_block_size= uint2korr(buffer + CF_BLOCKSIZE_OFFSET); - if (new_block_size != maria_block_size) + if (new_block_size != maria_block_size && maria_block_size) { error= CONTROL_FILE_WRONG_BLOCKSIZE; sprintf(errmsg_buff, - "Block size in control file (%u) is different than given maria_block_size: %u", + "Block size in control file (%u) is different than given aria_block_size: %u", new_block_size, (uint) maria_block_size); errmsg= errmsg_buff; goto err; } + maria_block_size= new_block_size; if (my_checksum(0, buffer, new_cf_create_time_size - CF_CHECKSUM_SIZE) != uint4korr(buffer + new_cf_create_time_size - CF_CHECKSUM_SIZE)) @@ -444,7 +445,7 @@ ok: err: if (print_error) my_printf_error(HA_ERR_INITIALIZATION, - "Got error '%s' when trying to use maria control file " + "Got error '%s' when trying to use aria control file " "'%s'", 0, errmsg, name); ma_control_file_end(); /* will unlock file if needed */ DBUG_RETURN(error); diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h index 4cb5527620d..f828ae69c6d 100644 --- a/storage/maria/ma_control_file.h +++ b/storage/maria/ma_control_file.h @@ -21,7 +21,7 @@ #ifndef _ma_control_file_h #define _ma_control_file_h -#define CONTROL_FILE_BASE_NAME "maria_log_control" +#define CONTROL_FILE_BASE_NAME "aria_log_control" /* Major version for control file. Should only be changed when doing big changes that made the new control file incompatible with all diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 6886dc8f291..9cf042ed21e 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -653,7 +653,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, if (info_length > 65535) { my_printf_error(HA_WRONG_CREATE_OPTION, - "Maria table '%s' has too many columns and/or " + "Aria table '%s' has too many columns and/or " "indexes and/or unique constraints.", MYF(0), name + dirname_length(name)); my_errno= HA_WRONG_CREATE_OPTION; @@ -841,7 +841,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, */ if (_ma_test_if_reopen(filename)) { - my_printf_error(0, "MARIA table '%s' is in use " + my_printf_error(0, "Aria table '%s' is in use " "(most likely by a MERGE table). Try FLUSH TABLES.", MYF(0), name + dirname_length(name)); my_errno= HA_ERR_TABLE_EXIST; diff --git a/storage/maria/ma_dbug.c b/storage/maria/ma_dbug.c index ea69975ad4b..0d2db0d4f4b 100644 --- a/storage/maria/ma_dbug.c +++ b/storage/maria/ma_dbug.c @@ -124,7 +124,7 @@ void _ma_print_keydata(FILE *stream, register HA_KEYSEG *keyseg, case HA_KEYTYPE_LONGLONG: { char buff[21]; - longlong2str(mi_sint8korr(key),buff,-10); + longlong10_to_str(mi_sint8korr(key),buff,-10); VOID(fprintf(stream,"%s",buff)); key=end; break; @@ -132,7 +132,7 @@ void _ma_print_keydata(FILE *stream, register HA_KEYSEG *keyseg, case HA_KEYTYPE_ULONGLONG: { char buff[21]; - longlong2str(mi_sint8korr(key),buff,10); + longlong10_to_str(mi_sint8korr(key),buff,10); VOID(fprintf(stream,"%s",buff)); key=end; break; diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c index 0e9e5caafbf..2420cac0e93 100644 --- a/storage/maria/ma_delete.c +++ b/storage/maria/ma_delete.c @@ -1,4 +1,5 @@ /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + Copyright (C) 2009-2010 Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -180,7 +181,11 @@ my_bool _ma_ck_delete(MARIA_HA *info, MARIA_KEY *key) key->data= key_buff; } - res= _ma_ck_real_delete(info, key, &new_root); + if ((res= _ma_ck_real_delete(info, key, &new_root))) + { + /* We have to mark the table crashed before unpin_all_pages() */ + maria_mark_crashed(info); + } key->data= save_key_data; if (!res && share->now_transactional) @@ -218,7 +223,8 @@ my_bool _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEY *key, my_errno=ENOMEM; DBUG_RETURN(1); } - DBUG_PRINT("info",("root_page: %ld", (long) old_root)); + DBUG_PRINT("info",("root_page: %lu", + (ulong) (old_root / keyinfo->block_length))); if (_ma_fetch_keypage(&page, info, keyinfo, old_root, PAGECACHE_LOCK_WRITE, DFLT_INIT_HITS, root_buff, 0)) { @@ -435,7 +441,8 @@ static int d_search(MARIA_HA *info, MARIA_KEY *key, uint32 comp_flag, */ if (share->now_transactional && _ma_log_delete(anc_page, s_temp.key_pos, - s_temp.changed_length, s_temp.move_length)) + s_temp.changed_length, s_temp.move_length, + 0, KEY_OP_DEBUG_LOG_DEL_CHANGE_1)) DBUG_RETURN(-1); if (!nod_flag) @@ -458,7 +465,7 @@ static int d_search(MARIA_HA *info, MARIA_KEY *key, uint32 comp_flag, } if (ret_value >0) { - save_flag=1; + save_flag= 2; if (ret_value == 1) ret_value= underflow(info, keyinfo, anc_page, &leaf_page, keypos); else @@ -474,17 +481,20 @@ static int d_search(MARIA_HA *info, MARIA_KEY *key, uint32 comp_flag, ret_value= _ma_insert(info, key, anc_page, keypos, last_key.data, (MARIA_PAGE*) 0, (uchar*) 0, (my_bool) 0); + + if (_ma_write_keypage(&leaf_page, PAGECACHE_LOCK_LEFT_WRITELOCKED, + DFLT_INIT_HITS)) + ret_value= -1; } } - if (ret_value == 0 && anc_page->size > - (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)) + if (ret_value == 0 && anc_page->size > share->max_index_block_size) { /* parent buffer got too big ; We have to split the page */ - save_flag=1; + save_flag= 3; ret_value= _ma_split_page(info, key, anc_page, - (uint) (keyinfo->block_length - - KEYPAGE_CHECKSUM_SIZE), + share->max_index_block_size, (uchar*) 0, 0, 0, lastkey, 0) | 2; + DBUG_ASSERT(anc_page->org_size == anc_page->size); } if (save_flag && ret_value != 1) { @@ -550,7 +560,8 @@ static int del(MARIA_HA *info, MARIA_KEY *key, MARIA_KEY ret_key; MARIA_PAGE next_page; DBUG_ENTER("del"); - DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", (long) leaf_page, + DBUG_PRINT("enter",("leaf_page: %lu keypos: 0x%lx", + (ulong) (leaf_page->pos / share->block_size), (ulong) keypos)); DBUG_DUMP("leaf_buff", leaf_page->buff, leaf_page->size); @@ -587,11 +598,10 @@ static int del(MARIA_HA *info, MARIA_KEY *key, ret_value= underflow(info, keyinfo, leaf_page, &next_page, endpos); if (ret_value == 0 && leaf_page->size > - (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)) + share->max_index_block_size) { ret_value= (_ma_split_page(info, key, leaf_page, - (uint) (keyinfo->block_length - - KEYPAGE_CHECKSUM_SIZE), + share->max_index_block_size, (uchar*) 0, 0, 0, ret_key_buff, 0) | 2); } @@ -708,8 +718,7 @@ err: @fn underflow() @param anc_buff Anchestor page data - @param leaf_page Page number of leaf page - @param leaf_buff Leaf page (page that underflowed) + @param leaf_page Leaf page (page that underflowed) @param leaf_page_link Pointer to pin information about leaf page @param keypos Position after current key in anc_buff @@ -743,7 +752,8 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, MARIA_KEY tmp_key, anc_key, leaf_key; MARIA_PAGE next_page; DBUG_ENTER("underflow"); - DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx",(long) leaf_page->pos, + DBUG_PRINT("enter",("leaf_page: %lu keypos: 0x%lx", + (ulong) (leaf_page->pos / share->block_size), (ulong) keypos)); DBUG_DUMP("anc_buff", anc_page->buff, anc_page->size); DBUG_DUMP("leaf_buff", leaf_page->buff, leaf_page->size); @@ -841,7 +851,7 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, anc_page->size= new_anc_length; page_store_size(share, anc_page); - if (buff_length <= (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)) + if (buff_length <= share->max_index_block_size) { /* All keys fitted into one page */ page_mark_changed(info, &next_page); @@ -854,10 +864,15 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, if (share->now_transactional) { - /* Log changes to parent page */ + /* + Log changes to parent page. Note that this page may have been + temporarily bigger than block_size. + */ if (_ma_log_delete(anc_page, key_deleted.key_pos, key_deleted.changed_length, - key_deleted.move_length)) + key_deleted.move_length, + anc_length - anc_page->org_size, + KEY_OP_DEBUG_LOG_DEL_CHANGE_2)) goto err; /* Log changes to leaf page. Data for leaf page is in leaf_buff @@ -986,7 +1001,8 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, */ DBUG_ASSERT(new_buff_length <= next_buff_length); if (_ma_log_prefix(&next_page, key_inserted.changed_length, - (int) (new_buff_length - next_buff_length))) + (int) (new_buff_length - next_buff_length), + KEY_OP_DEBUG_LOG_PREFIX_1)) goto err; } page_mark_changed(info, &next_page); @@ -1044,11 +1060,19 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, /* Remember for logging how many bytes of leaf_buff that are not changed */ DBUG_ASSERT((int) key_inserted.changed_length >= key_inserted.move_length); - unchanged_leaf_length= leaf_length - (key_inserted.changed_length - - key_inserted.move_length); + unchanged_leaf_length= (leaf_length - p_length - + (key_inserted.changed_length - + key_inserted.move_length)); new_buff_length= buff_length + leaf_length - p_length + t_length; +#ifdef EXTRA_DEBUG + /* Ensure that unchanged_leaf_length is correct */ + DBUG_ASSERT(bcmp(next_page.buff + new_buff_length - unchanged_leaf_length, + leaf_buff + leaf_length - unchanged_leaf_length, + unchanged_leaf_length) == 0); +#endif + page_flag= next_page.flag | leaf_page->flag; if (anc_key.flag & (SEARCH_USER_KEY_HAS_TRANSID | SEARCH_PAGE_KEY_HAS_TRANSID)) @@ -1069,8 +1093,7 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, anc_page->size= new_anc_length; page_store_size(share, anc_page); - if (new_buff_length <= (uint) (keyinfo->block_length - - KEYPAGE_CHECKSUM_SIZE)) + if (new_buff_length <= share->max_index_block_size) { /* All keys fitted into one page */ page_mark_changed(info, leaf_page); @@ -1079,10 +1102,14 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, if (share->now_transactional) { - /* Log changes to parent page */ + /* + Log changes to parent page. Note that this page may have been + temporarily bigger than block_size. + */ if (_ma_log_delete(anc_page, key_deleted.key_pos, - key_deleted.changed_length, key_deleted.move_length)) - + key_deleted.changed_length, key_deleted.move_length, + anc_length - anc_page->org_size, + KEY_OP_DEBUG_LOG_DEL_CHANGE_3)) goto err; /* Log changes to next page. Data for leaf page is in buff @@ -1192,8 +1219,10 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, This contains original data with new data added first */ DBUG_ASSERT(leaf_length <= new_leaf_length); + DBUG_ASSERT(new_leaf_length >= unchanged_leaf_length); if (_ma_log_prefix(leaf_page, new_leaf_length - unchanged_leaf_length, - (int) (new_leaf_length - leaf_length))) + (int) (new_leaf_length - leaf_length), + KEY_OP_DEBUG_LOG_PREFIX_2)) goto err; /* Log changes to next page @@ -1395,7 +1424,9 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint page_flag, uint nod_flag, ****************************************************************************/ /** - @brief log entry where some parts are deleted and some things are changed + @brief + log entry where some parts are deleted and some things are changed + and some data could be added last. @fn _ma_log_delete() @param info Maria handler @@ -1404,74 +1435,148 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint page_flag, uint nod_flag, @param key_pos Start of change area @param changed_length How many bytes where changed at key_pos @param move_length How many bytes where deleted at key_pos + @param append_length Length of data added last + This is taken from end of ma_page->buff + This is mainly used when a key is deleted. The append happens + when we delete a key from a page with data > block_size kept in + memory and we have to add back the data that was stored > block_size */ my_bool _ma_log_delete(MARIA_PAGE *ma_page, const uchar *key_pos, - uint changed_length, uint move_length) + uint changed_length, uint move_length, + uint append_length __attribute__((unused)), + enum en_key_debug debug_marker __attribute__((unused))) { LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 9 + 7], *log_pos; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; - uint translog_parts; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 2 + 3 + 3 + 6 + 3 + 7]; + uchar *log_pos; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 7]; + uint translog_parts, current_size, extra_length; uint offset= (uint) (key_pos - ma_page->buff); MARIA_HA *info= ma_page->info; MARIA_SHARE *share= info->s; my_off_t page; DBUG_ENTER("_ma_log_delete"); DBUG_PRINT("enter", ("page: %lu changed_length: %u move_length: %d", - (ulong) ma_page->pos, changed_length, move_length)); + (ulong) (ma_page->pos / share->block_size), + changed_length, move_length)); DBUG_ASSERT(share->now_transactional && move_length); DBUG_ASSERT(offset + changed_length <= ma_page->size); + DBUG_ASSERT(ma_page->org_size - move_length + append_length == ma_page->size); + DBUG_ASSERT(move_length <= ma_page->org_size - share->keypage_header); /* Store address of new root page */ page= ma_page->pos / share->block_size; page_store(log_data + FILEID_STORE_SIZE, page); log_pos= log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE; + current_size= ma_page->org_size; + +#ifdef EXTRA_DEBUG_KEY_CHANGES + *log_pos++= KEY_OP_DEBUG; + *log_pos++= debug_marker; +#endif + + /* Store keypage_flag */ + *log_pos++= KEY_OP_SET_PAGEFLAG; + *log_pos++= ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]; + log_pos[0]= KEY_OP_OFFSET; int2store(log_pos+1, offset); - log_pos[3]= KEY_OP_SHIFT; - int2store(log_pos+4, -(int) move_length); - log_pos+= 6; - translog_parts= 1; + log_pos+= 3; + translog_parts= TRANSLOG_INTERNAL_PARTS + 1; + extra_length= 0; + if (changed_length) { + if (offset + changed_length >= share->max_index_block_size) + { + changed_length= share->max_index_block_size - offset; + move_length= 0; /* Nothing to move */ + current_size= share->max_index_block_size; + } + log_pos[0]= KEY_OP_CHANGE; int2store(log_pos+1, changed_length); log_pos+= 3; - translog_parts= 2; - log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ma_page->buff + offset; - log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length; + log_array[translog_parts].str= ma_page->buff + offset; + log_array[translog_parts].length= changed_length; + translog_parts++; + + /* We only have to move things after offset+changed_length */ + offset+= changed_length; } -#ifdef EXTRA_DEBUG_KEY_CHANGES + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data); + + if (move_length) { - int page_length= ma_page->size; - ha_checksum crc; - crc= my_checksum(0, ma_page->buff + LSN_STORE_SIZE, - page_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, page_length); - int4store(log_pos+3, crc); - - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - changed_length+= 7; + uint log_length; + if (offset + move_length < share->max_index_block_size) + { + /* + Move down things that is on page. + page_offset in apply_redo_inxed() will be at original offset + + changed_length. + */ + log_pos[0]= KEY_OP_SHIFT; + int2store(log_pos+1, - (int) move_length); + log_length= 3; + current_size-= move_length; + } + else + { + /* Delete to end of page */ + uint tmp= current_size - offset; + current_size= offset; + log_pos[0]= KEY_OP_DEL_SUFFIX; + int2store(log_pos+1, tmp); + log_length= 3; + } + log_array[translog_parts].str= log_pos; + log_array[translog_parts].length= log_length; translog_parts++; + log_pos+= log_length; + extra_length+= log_length; } -#endif - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data); + if (current_size != ma_page->size && + current_size != share->max_index_block_size) + { + /* Append data that didn't fit on the page before */ + uint length= (min(ma_page->size, share->max_index_block_size) - + current_size); + uchar *data= ma_page->buff + current_size; + + DBUG_ASSERT(length <= append_length); + + log_pos[0]= KEY_OP_ADD_SUFFIX; + int2store(log_pos+1, length); + log_array[translog_parts].str= log_pos; + log_array[translog_parts].length= 3; + log_array[translog_parts + 1].str= data; + log_array[translog_parts + 1].length= length; + log_pos+= 3; + translog_parts+= 2; + current_size+= length; + extra_length+= 3 + length; + } + + _ma_log_key_changes(ma_page, + log_array + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= current_size; if (translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, (translog_size_t) - log_array[TRANSLOG_INTERNAL_PARTS + 0].length + - changed_length, - TRANSLOG_INTERNAL_PARTS + translog_parts, + log_array[TRANSLOG_INTERNAL_PARTS].length + + changed_length + extra_length, translog_parts, log_array, log_data, NULL)) DBUG_RETURN(1); + DBUG_RETURN(0); } diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c index 552b0767bec..902f06d93e5 100644 --- a/storage/maria/ma_init.c +++ b/storage/maria/ma_init.c @@ -40,6 +40,11 @@ void history_state_free(MARIA_STATE_HISTORY_CLOSED *closed_history) } +static int dummy_maria_create_trn_hook(MARIA_HA *info __attribute__((unused))) +{ + return 0; +} + /* Initialize maria @@ -64,6 +69,7 @@ int maria_init(void) pthread_mutex_init(&THR_LOCK_maria,MY_MUTEX_INIT_SLOW); _ma_init_block_record_data(); trnman_end_trans_hook= _ma_trnman_end_trans_hook; + maria_create_trn_hook= dummy_maria_create_trn_hook; my_handler_error_register(); } hash_init(&maria_stored_state, &my_charset_bin, 32, @@ -107,3 +113,72 @@ void maria_end(void) hash_free(&maria_stored_state); } } + +/** + Upgrade from older Aria versions: + + - In MariaDB 5.1, the name of the control file and log files had the + 'maria' prefix, now they have the 'aria' prefix. + + @return: 0 ok + 1 error + +*/ + +my_bool maria_upgrade() +{ + char name[FN_REFLEN], new_name[FN_REFLEN]; + DBUG_ENTER("maria_upgrade"); + + fn_format(name, "maria_log_control", maria_data_root, "", MYF(MY_WME)); + + if (!my_access(name,F_OK)) + { + /* + Old style control file found; Rename the control file and the log files. + We start by renaming all log files, so that if we get a crash + we will continue from where we left. + */ + uint i; + MY_DIR *dir= my_dir(maria_data_root, MYF(MY_WME)); + if (!dir) + DBUG_RETURN(1); + + my_message(HA_ERR_INITIALIZATION, + "Found old style Maria log files; " + "Converting them to Aria names", + MYF(ME_JUST_INFO)); + + for (i= 0; i < dir->number_off_files; i++) + { + const char *file= dir->dir_entry[i].name; + if (strncmp(file, "maria_log.", 10) == 0 && + file[10] >= '0' && file[10] <= '9' && + file[11] >= '0' && file[11] <= '9' && + file[12] >= '0' && file[12] <= '9' && + file[13] >= '0' && file[13] <= '9' && + file[14] >= '0' && file[14] <= '9' && + file[15] >= '0' && file[15] <= '9' && + file[16] >= '0' && file[16] <= '9' && + file[17] >= '0' && file[17] <= '9' && + file[18] == '\0') + { + /* Remove the 'm' in 'maria' */ + char old_logname[FN_REFLEN], new_logname[FN_REFLEN]; + fn_format(old_logname, file, maria_data_root, "", MYF(0)); + fn_format(new_logname, file+1, maria_data_root, "", MYF(0)); + if (my_rename(old_logname, new_logname, MYF(MY_WME))) + { + my_dirend(dir); + DBUG_RETURN(1); + } + } + } + my_dirend(dir); + + fn_format(new_name, CONTROL_FILE_BASE_NAME, maria_data_root, "", MYF(0)); + if (my_rename(name, new_name, MYF(MY_WME))) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c index 38317cd61db..1a61731e817 100644 --- a/storage/maria/ma_key_recover.c +++ b/storage/maria/ma_key_recover.c @@ -64,8 +64,9 @@ void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn) builds. */ #ifdef EXTRA_DEBUG - DBUG_ASSERT(!pinned_page->changed || - undo_lsn != LSN_IMPOSSIBLE || !info->s->now_transactional); + DBUG_ASSERT((!pinned_page->changed || + undo_lsn != LSN_IMPOSSIBLE || !info->s->now_transactional) || + (info->s->state.changed & STATE_CRASHED)); #endif pagecache_unlock_by_link(info->s->pagecache, pinned_page->link, pinned_page->unlock, PAGECACHE_UNPIN, @@ -311,24 +312,33 @@ my_bool write_hook_for_undo_key_delete(enum translog_record_type type, */ my_bool _ma_log_prefix(MARIA_PAGE *ma_page, uint changed_length, - int move_length) + int move_length, + enum en_key_debug debug_marker __attribute__((unused))) { uint translog_parts; LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 7 + 7 + 2], *log_pos; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 7 + 7 + 2 + 2]; + uchar *log_pos; uchar *buff= ma_page->buff; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; pgcache_page_no_t page; MARIA_HA *info= ma_page->info; DBUG_ENTER("_ma_log_prefix"); DBUG_PRINT("enter", ("page: %lu changed_length: %u move_length: %d", (ulong) ma_page->pos, changed_length, move_length)); + DBUG_ASSERT(ma_page->size == ma_page->org_size + move_length); + page= ma_page->pos / info->s->block_size; log_pos= log_data + FILEID_STORE_SIZE; page_store(log_pos, page); log_pos+= PAGE_STORE_SIZE; +#ifdef EXTRA_DEBUG_KEY_CHANGES + (*log_pos++)= KEY_OP_DEBUG; + (*log_pos++)= debug_marker; +#endif + /* Store keypage_flag */ *log_pos++= KEY_OP_SET_PAGEFLAG; *log_pos++= buff[KEYPAGE_TRANSFLAG_OFFSET]; @@ -372,21 +382,11 @@ my_bool _ma_log_prefix(MARIA_PAGE *ma_page, uint changed_length, translog_parts= 2; } -#ifdef EXTRA_DEBUG_KEY_CHANGES - { - int page_length= ma_page->size; - ha_checksum crc; - crc= my_checksum(0, buff + LSN_STORE_SIZE, page_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, page_length); - int4store(log_pos+3, crc); - - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - changed_length+= 7; - translog_parts++; - } -#endif + _ma_log_key_changes(ma_page, log_array + TRANSLOG_INTERNAL_PARTS + + translog_parts, log_pos, &changed_length, + &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, @@ -406,7 +406,7 @@ my_bool _ma_log_prefix(MARIA_PAGE *ma_page, uint changed_length, my_bool _ma_log_suffix(MARIA_PAGE *ma_page, uint org_length, uint new_length) { LSN lsn; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 10 + 7 + 2], *log_pos; uchar *buff= ma_page->buff; int diff; @@ -416,6 +416,8 @@ my_bool _ma_log_suffix(MARIA_PAGE *ma_page, uint org_length, uint new_length) DBUG_ENTER("_ma_log_suffix"); DBUG_PRINT("enter", ("page: %lu org_length: %u new_length: %u", (ulong) ma_page->pos, org_length, new_length)); + DBUG_ASSERT(ma_page->size == new_length); + DBUG_ASSERT(ma_page->org_size == org_length); page= ma_page->pos / info->s->block_size; @@ -450,20 +452,11 @@ my_bool _ma_log_suffix(MARIA_PAGE *ma_page, uint org_length, uint new_length) log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data); -#ifdef EXTRA_DEBUG_KEY_CHANGES - { - ha_checksum crc; - crc= my_checksum(0, buff + LSN_STORE_SIZE, new_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, new_length); - int4store(log_pos+3, crc); - - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - extra_length+= 7; - translog_parts++; - } -#endif + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, @@ -480,35 +473,45 @@ my_bool _ma_log_suffix(MARIA_PAGE *ma_page, uint org_length, uint new_length) @param ma_page Changed page @param org_page_length Length of data in page before key was added + Final length in ma_page->size @note If handle_overflow is set, then we have to protect against logging changes that is outside of the page. This may happen during underflow() handling where the buffer in memory temporary contains more data than block_size + + ma_page may be a page that was previously logged and cuted down + becasue it's too big. (org_page_length > ma_page->org_size) */ my_bool _ma_log_add(MARIA_PAGE *ma_page, - uint org_page_length, uchar *key_pos, - uint changed_length, int move_length, + uint org_page_length __attribute__ ((unused)), + uchar *key_pos, uint changed_length, int move_length, my_bool handle_overflow __attribute__ ((unused))) { LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3 + 3 + 3 + 3 + 7 + 2]; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 3 + 3 + 3 + 3 + 7 + + 3 + 2]; uchar *log_pos; uchar *buff= ma_page->buff; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 6]; MARIA_HA *info= ma_page->info; uint offset= (uint) (key_pos - buff); - uint page_length= info->s->block_size - KEYPAGE_CHECKSUM_SIZE; - uint translog_parts; + uint max_page_size= info->s->max_index_block_size; + uint translog_parts, current_size; pgcache_page_no_t page_pos; DBUG_ENTER("_ma_log_add"); DBUG_PRINT("enter", ("page: %lu org_page_length: %u changed_length: %u " "move_length: %d", - (ulong) ma_page->pos, org_page_length, changed_length, + (ulong) (ma_page->pos / info->s->block_size), + org_page_length, changed_length, move_length)); DBUG_ASSERT(info->s->now_transactional); + DBUG_ASSERT(move_length <= (int) changed_length); + DBUG_ASSERT(ma_page->org_size == min(org_page_length, max_page_size)); + DBUG_ASSERT(ma_page->size == org_page_length + move_length); + DBUG_ASSERT(offset < max_page_size); /* Write REDO entry that contains the logical operations we need @@ -517,37 +520,54 @@ my_bool _ma_log_add(MARIA_PAGE *ma_page, log_pos= log_data + FILEID_STORE_SIZE; page_pos= ma_page->pos / info->s->block_size; page_store(log_pos, page_pos); + current_size= ma_page->org_size; log_pos+= PAGE_STORE_SIZE; +#ifdef EXTRA_DEBUG_KEY_CHANGES + *log_pos++= KEY_OP_DEBUG; + *log_pos++= KEY_OP_DEBUG_LOG_ADD; +#endif + /* Store keypage_flag */ *log_pos++= KEY_OP_SET_PAGEFLAG; *log_pos++= buff[KEYPAGE_TRANSFLAG_OFFSET]; - if (org_page_length + move_length > page_length) + /* + Don't overwrite page boundary + It's ok to cut this as we will append the data at end of page + in the next log entry + */ + if (offset + changed_length > max_page_size) + { + DBUG_ASSERT(handle_overflow); + changed_length= max_page_size - offset; /* Update to end of page */ + move_length= 0; /* Nothing to move */ + /* Extend the page to max length on recovery */ + *log_pos++= KEY_OP_MAX_PAGELENGTH; + current_size= max_page_size; + } + + /* Check if adding the key made the page overflow */ + if (current_size + move_length > max_page_size) { /* - Overflow. Cut either key or data from page end so that key fits - The code that splits the too big page will ignore logging any - data over org_page_length + Adding the key caused an overflow. Cut away the part of the + page that doesn't fit. */ + uint diff; DBUG_ASSERT(handle_overflow); - if (offset + changed_length > page_length) - { - changed_length= page_length - offset; - move_length= 0; - } - else - { - uint diff= org_page_length + move_length - page_length; - log_pos[0]= KEY_OP_DEL_SUFFIX; - int2store(log_pos+1, diff); - log_pos+= 3; - org_page_length= page_length - move_length; - } + diff= current_size + move_length - max_page_size; + log_pos[0]= KEY_OP_DEL_SUFFIX; + int2store(log_pos+1, diff); + log_pos+= 3; + current_size= max_page_size - move_length; } - if (offset == org_page_length) + if (offset == current_size) + { log_pos[0]= KEY_OP_ADD_SUFFIX; + current_size+= changed_length; + } else { log_pos[0]= KEY_OP_OFFSET; @@ -558,51 +578,104 @@ my_bool _ma_log_add(MARIA_PAGE *ma_page, log_pos[0]= KEY_OP_SHIFT; int2store(log_pos+1, move_length); log_pos+= 3; + current_size+= move_length; } log_pos[0]= KEY_OP_CHANGE; } int2store(log_pos+1, changed_length); log_pos+= 3; - translog_parts= 2; log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data); log_array[TRANSLOG_INTERNAL_PARTS + 1].str= key_pos; log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length; + translog_parts= TRANSLOG_INTERNAL_PARTS + 2; -#ifdef EXTRA_DEBUG_KEY_CHANGES + /* + If page was originally > block_size before operation and now all data + fits, append the end data that was not part of the previous logged + page to it. + */ + DBUG_ASSERT(current_size <= max_page_size && current_size <= ma_page->size); + if (current_size != ma_page->size && current_size != max_page_size) { - MARIA_SHARE *share= info->s; - ha_checksum crc; - uint save_page_length= ma_page->size; - uint new_length= org_page_length + move_length; - _ma_store_page_used(share, buff, new_length); - crc= my_checksum(0, buff + LSN_STORE_SIZE, new_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, new_length); - int4store(log_pos+3, crc); - - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - changed_length+= 7; - translog_parts++; - _ma_store_page_used(share, buff, save_page_length); + uint length= min(ma_page->size, max_page_size) - current_size; + uchar *data= ma_page->buff + current_size; + + log_pos[0]= KEY_OP_ADD_SUFFIX; + int2store(log_pos+1, length); + log_array[translog_parts].str= log_pos; + log_array[translog_parts].length= 3; + log_array[translog_parts+1].str= data; + log_array[translog_parts+1].length= length; + log_pos+= 3; + translog_parts+= 2; + current_size+= length; + changed_length+= length + 3; } -#endif + + _ma_log_key_changes(ma_page, log_array + translog_parts, + log_pos, &changed_length, &translog_parts); + /* + Remember new page length for future log entries for same page + Note that this can be different from ma_page->size in case of page + overflow! + */ + ma_page->org_size= current_size; + DBUG_ASSERT(ma_page->org_size == min(ma_page->size, max_page_size)); if (translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, (translog_size_t) log_array[TRANSLOG_INTERNAL_PARTS + 0].length + - changed_length, - TRANSLOG_INTERNAL_PARTS + translog_parts, + changed_length, translog_parts, log_array, log_data, NULL)) DBUG_RETURN(-1); DBUG_RETURN(0); } +#ifdef EXTRA_DEBUG_KEY_CHANGES + +/* Log checksum and optionally key page to log */ + +void _ma_log_key_changes(MARIA_PAGE *ma_page, LEX_CUSTRING *log_array, + uchar *log_pos, uint *changed_length, + uint *translog_parts) +{ + MARIA_SHARE *share= ma_page->info->s; + int page_length= min(ma_page->size, share->max_index_block_size); + uint org_length; + ha_checksum crc; + + DBUG_ASSERT(ma_page->flag == (uint) ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]); + + /* We have to change length as the page may have been shortened */ + org_length= _ma_get_page_used(share, ma_page->buff); + _ma_store_page_used(share, ma_page->buff, page_length); + crc= my_checksum(0, ma_page->buff + LSN_STORE_SIZE, + page_length - LSN_STORE_SIZE); + _ma_store_page_used(share, ma_page->buff, org_length); + + log_pos[0]= KEY_OP_CHECK; + int2store(log_pos+1, page_length); + int4store(log_pos+3, crc); + + log_array[0].str= log_pos; + log_array[0].length= 7; + (*changed_length)+= 7; + (*translog_parts)++; +#ifdef EXTRA_STORE_FULL_PAGE_IN_KEY_CHANGES + log_array[1].str= ma_page->buff; + log_array[1].length= page_length; + (*changed_length)+= page_length; + (*translog_parts)++; +#endif /* EXTRA_STORE_FULL_PAGE_IN_KEY_CHANGES */ +} + +#endif /* EXTRA_DEBUG_KEY_CHANGES */ + /**************************************************************************** Redo of key pages ****************************************************************************/ @@ -698,7 +771,7 @@ uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn, bzero(buff, LSN_STORE_SIZE); memcpy(buff + LSN_STORE_SIZE, header, length); bzero(buff + LSN_STORE_SIZE + length, - share->block_size - LSN_STORE_SIZE - KEYPAGE_CHECKSUM_SIZE - length); + share->max_index_block_size - LSN_STORE_SIZE - length); bfill(buff + share->block_size - KEYPAGE_CHECKSUM_SIZE, KEYPAGE_CHECKSUM_SIZE, (uchar) 255); @@ -829,9 +902,13 @@ err: KEY_OP_ADD_SUFFIX 2 length, data Add data to end of page KEY_OP_DEL_SUFFIX 2 length Reduce page length with this Sets position to start of page - KEY_OP_CHECK 6 page_length[2},CRC Used only when debugging + KEY_OP_CHECK 6 page_length[2],CRC Used only when debugging + This may be followed by page_length + of data (until end of log record) KEY_OP_COMPACT_PAGE 6 transid KEY_OP_SET_PAGEFLAG 1 flag for page + KEY_OP_MAX_PAGELENGTH 0 Set page to max length + KEY_OP_DEBUG 1 Info where logging was done @return Operation status @retval 0 OK @@ -850,6 +927,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, const uchar *header_end= header + head_length; uint page_offset= 0, org_page_length; uint nod_flag, page_length, keypage_header, keynr; + uint max_page_size= share->max_index_block_size; int result; MARIA_PAGE page; DBUG_ENTER("_ma_apply_redo_index"); @@ -898,12 +976,15 @@ uint _ma_apply_redo_index(MARIA_HA *info, header+= 2; DBUG_PRINT("redo", ("key_op_shift: %d", length)); DBUG_ASSERT(page_offset != 0 && page_offset <= page_length && - page_length + length < share->block_size); + page_length + length <= max_page_size); if (length < 0) + { + DBUG_ASSERT(page_offset - length <= page_length); bmove(buff + page_offset, buff + page_offset - length, page_length - page_offset + length); - else + } + else if (page_length != page_offset) bmove_upp(buff + page_length + length, buff + page_length, page_length - page_offset); page_length+= length; @@ -916,6 +997,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, DBUG_ASSERT(page_offset != 0 && page_offset + length <= page_length); memcpy(buff + page_offset, header + 2 , length); + page_offset+= length; /* Put offset after changed length */ header+= 2 + length; break; } @@ -927,7 +1009,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, insert_length, changed_length)); DBUG_ASSERT(insert_length <= changed_length && - page_length + changed_length <= share->block_size); + page_length + changed_length <= max_page_size); bmove_upp(buff + page_length + insert_length, buff + page_length, page_length - keypage_header); @@ -953,8 +1035,8 @@ uint _ma_apply_redo_index(MARIA_HA *info, case KEY_OP_ADD_SUFFIX: /* 6 */ { uint insert_length= uint2korr(header); - DBUG_PRINT("redo", ("key_op_add_prefix: %u", insert_length)); - DBUG_ASSERT(page_length + insert_length <= share->block_size); + DBUG_PRINT("redo", ("key_op_add_suffix: %u", insert_length)); + DBUG_ASSERT(page_length + insert_length <= max_page_size); memcpy(buff + page_length, header+2, insert_length); page_length+= insert_length; @@ -982,15 +1064,32 @@ uint _ma_apply_redo_index(MARIA_HA *info, if (crc != (uint32) my_checksum(0, buff + LSN_STORE_SIZE, page_length - LSN_STORE_SIZE)) { - DBUG_PRINT("error", ("page_length %u",page_length)); - DBUG_DUMP("KEY_OP_CHECK bad page", buff, share->block_size); - DBUG_ASSERT("crc" == "failure in REDO_INDEX"); + DBUG_DUMP("KEY_OP_CHECK bad page", buff, page_length); + if (header + 6 + page_length <= header_end) + { + DBUG_DUMP("KEY_OP_CHECK org page", header + 6, page_length); + } + DBUG_ASSERT("crc failure in REDO_INDEX" == 0); } #endif DBUG_PRINT("redo", ("key_op_check")); - header+= 6; + /* + This is the last entry in the block and it can contain page_length + data or not + */ + DBUG_ASSERT(header + 6 == header_end || + header + 6 + page_length == header_end); + header= header_end; break; } + case KEY_OP_DEBUG: + DBUG_PRINT("redo", ("Debug: %u", (uint) header[0])); + header++; + break; + case KEY_OP_MAX_PAGELENGTH: + DBUG_PRINT("redo", ("key_op_max_page_length")); + page_length= max_page_size; + break; case KEY_OP_MULTI_COPY: /* 9 */ { /* @@ -1011,7 +1110,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, log_memcpy_length= uint2korr(header); header+= 2; log_memcpy_end= header + log_memcpy_length; - DBUG_ASSERT(full_length < share->block_size); + DBUG_ASSERT(full_length <= max_page_size); while (header < log_memcpy_end) { uint to, from; @@ -1020,7 +1119,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, from= uint2korr(header); header+= 2; /* "from" is a place in the existing page */ - DBUG_ASSERT(max(from, to) < share->block_size); + DBUG_ASSERT(max(from, to) < max_page_size); memcpy(buff + to, buff + from, full_length); } break; diff --git a/storage/maria/ma_key_recover.h b/storage/maria/ma_key_recover.h index b580433c99a..3fdb045ee40 100644 --- a/storage/maria/ma_key_recover.h +++ b/storage/maria/ma_key_recover.h @@ -64,16 +64,26 @@ extern my_bool write_hook_for_undo_key_delete(enum translog_record_type type, TRN *trn, MARIA_HA *tbl_info, LSN *lsn, void *hook_arg); -my_bool _ma_log_prefix(MARIA_PAGE *page, uint changed_length, int move_length); +my_bool _ma_log_prefix(MARIA_PAGE *page, uint changed_length, int move_length, + enum en_key_debug debug_marker); my_bool _ma_log_suffix(MARIA_PAGE *page, uint org_length, uint new_length); my_bool _ma_log_add(MARIA_PAGE *page, uint buff_length, uchar *key_pos, uint changed_length, int move_length, my_bool handle_overflow); my_bool _ma_log_delete(MARIA_PAGE *page, const uchar *key_pos, - uint changed_length, uint move_length); -my_bool _ma_log_change(MARIA_PAGE *page, const uchar *key_pos, uint length); + uint changed_length, uint move_length, + uint append_length, enum en_key_debug debug_marker); +my_bool _ma_log_change(MARIA_PAGE *page, const uchar *key_pos, uint length, + enum en_key_debug debug_marker); my_bool _ma_log_new(MARIA_PAGE *page, my_bool root_page); +#ifdef EXTRA_DEBUG_KEY_CHANGES +void _ma_log_key_changes(MARIA_PAGE *ma_page, LEX_CUSTRING *log_array, + uchar *log_pos, uint *changed_length, + uint *translog_parts); +#else +#define _ma_log_key_changes(A,B,C,D,E) +#endif uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn, const uchar *header, uint length); diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 482a4e9071f..dc99554a08d 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -909,7 +909,7 @@ char *translog_filename_by_fileno(uint32 file_no, char *path) DBUG_ASSERT(file_no <= 0xfffffff); /* log_descriptor.directory is already formated */ - end= strxmov(path, log_descriptor.directory, "maria_log.0000000", NullS); + end= strxmov(path, log_descriptor.directory, "aria_log.0000000", NullS); length= (uint) (int10_to_str(file_no, buff, 10) - buff); strmov(end - length +1, buff); @@ -1219,7 +1219,7 @@ my_bool translog_read_file_header(LOGHANDLER_FILE_INFO *desc, File file) DBUG_RETURN(1); } translog_interpret_file_header(desc, page_buff); - DBUG_PRINT("info", ("timestamp: %llu maria ver: %lu mysql ver: %lu " + DBUG_PRINT("info", ("timestamp: %llu aria ver: %lu mysql ver: %lu " "server id %lu page size %lu file number %lu " "max lsn: (%lu,0x%lx)", (ulonglong) desc->timestamp, @@ -3470,7 +3470,7 @@ static my_bool translog_truncate_log(TRANSLOG_ADDRESS addr) /** Applies function 'callback' to all files (in a directory) which - name looks like a log's name (maria_log.[0-9]{7}). + name looks like a log's name (aria_log.[0-9]{7}). If 'callback' returns TRUE this interrupts the walk and returns TRUE. Otherwise FALSE is returned after processing all log files. It cannot just use log_descriptor.directory because that may not yet have @@ -3496,7 +3496,7 @@ my_bool translog_walk_filenames(const char *directory, for (i= 0; i < dirp->number_off_files; i++) { char *file= dirp->dir_entry[i].name; - if (strncmp(file, "maria_log.", 10) == 0 && + if (strncmp(file, "aria_log.", 10) == 0 && file[10] >= '0' && file[10] <= '9' && file[11] >= '0' && file[11] <= '9' && file[12] >= '0' && file[12] <= '9' && @@ -3727,7 +3727,7 @@ my_bool translog_init_with_table(const char *directory, my_bool pageok; DBUG_PRINT("info", ("log found...")); /* - TODO: scan directory for maria_log.XXXXXXXX files and find + TODO: scan directory for aria_log.XXXXXXXX files and find highest XXXXXXXX & set logs_found TODO: check that last checkpoint within present log addresses space @@ -8073,7 +8073,7 @@ retest: } else { - soft_sync_max= lsn; + soft_sync_max= LSN_FILE_NO(lsn); soft_need_sync= 1; } @@ -8864,13 +8864,13 @@ void translog_soft_sync_end(void) #ifdef MARIA_DUMP_LOG #include <my_getopt.h> extern void translog_example_table_init(); -static const char *load_default_groups[]= { "maria_dump_log",0 }; +static const char *load_default_groups[]= { "aria_dump_log",0 }; static void get_options(int *argc,char * * *argv); #ifndef DBUG_OFF #if defined(__WIN__) -const char *default_dbug_option= "d:t:i:O,\\maria_dump_log.trace"; +const char *default_dbug_option= "d:t:i:O,\\aria_dump_log.trace"; #else -const char *default_dbug_option= "d:t:i:o,/tmp/maria_dump_log.trace"; +const char *default_dbug_option= "d:t:i:o,/tmp/aria_dump_log.trace"; #endif #endif static ulonglong opt_offset; @@ -8927,7 +8927,7 @@ static void usage(void) puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); puts("and you are welcome to modify and redistribute it under the GPL license\n"); - puts("Dump content of maria log pages."); + puts("Dump content of aria log pages."); VOID(printf("\nUsage: %s -f file OPTIONS\n", my_progname_short)); my_print_help(my_long_options); print_defaults("my", load_default_groups); @@ -8984,7 +8984,7 @@ static void dump_header_page(uchar *buff) translog_interpret_file_header(&desc, buff); printf(" This can be header page:\n" " Timestamp: %s\n" - " Maria log version: %lu\n" + " Aria log version: %lu\n" " Server version: %lu\n" " Server id %lu\n" " Page size %lu\n", diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index 224d93fb24b..7291c9811d7 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -16,12 +16,14 @@ #ifndef _ma_loghandler_h #define _ma_loghandler_h +#define MB (1024UL*1024) + /* transaction log default cache size (TODO: make it global variable) */ -#define TRANSLOG_PAGECACHE_SIZE (1024U*1024*2) +#define TRANSLOG_PAGECACHE_SIZE (2*MB) /* transaction log default file size */ -#define TRANSLOG_FILE_SIZE (1024U*1024*1024) +#define TRANSLOG_FILE_SIZE (1024U*MB) /* minimum possible transaction log size */ -#define TRANSLOG_MIN_FILE_SIZE (1024U*1024*8) +#define TRANSLOG_MIN_FILE_SIZE (8*MB) /* transaction log default flags (TODO: make it global variable) */ #define TRANSLOG_DEFAULT_FLAGS 0 @@ -165,7 +167,31 @@ enum en_key_op KEY_OP_CHECK, /* For debugging; CRC of used part of page */ KEY_OP_MULTI_COPY, /* List of memcpy()s with fixed-len sources in page */ KEY_OP_SET_PAGEFLAG, /* Set pageflag from next byte */ - KEY_OP_COMPACT_PAGE /* Compact key page */ + KEY_OP_COMPACT_PAGE, /* Compact key page */ + KEY_OP_MAX_PAGELENGTH, /* Set page to max page length */ + KEY_OP_DEBUG /* Entry for storing what triggered redo_index */ +}; + +enum en_key_debug +{ + KEY_OP_DEBUG_RTREE_COMBINE, /* 0 */ + KEY_OP_DEBUG_RTREE_SPLIT, /* 1 */ + KEY_OP_DEBUG_RTREE_SET_KEY, /* 2 */ + KEY_OP_DEBUG_FATHER_CHANGED_1, /* 3 */ + KEY_OP_DEBUG_FATHER_CHANGED_2, /* 4 */ + KEY_OP_DEBUG_LOG_SPLIT, /* 5 */ + KEY_OP_DEBUG_LOG_ADD, /* 6 */ + KEY_OP_DEBUG_LOG_PREFIX_1, /* 7 */ + KEY_OP_DEBUG_LOG_PREFIX_2, /* 8 */ + KEY_OP_DEBUG_LOG_PREFIX_3, /* 9 */ + KEY_OP_DEBUG_LOG_PREFIX_4, /* 10 */ + KEY_OP_DEBUG_LOG_PREFIX_5, /* 11 */ + KEY_OP_DEBUG_LOG_DEL_CHANGE_1, /* 12 */ + KEY_OP_DEBUG_LOG_DEL_CHANGE_2, /* 13 */ + KEY_OP_DEBUG_LOG_DEL_CHANGE_3, /* 14 */ + KEY_OP_DEBUG_LOG_DEL_CHANGE_RT, /* 15 */ + KEY_OP_DEBUG_LOG_DEL_PREFIX, /* 16 */ + KEY_OP_DEBUG_LOG_MIDDLE /* 17 */ }; diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index c86c439a702..30d099d939a 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -209,6 +209,7 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name, DBUG_RETURN(m_info); err: + DBUG_PRINT("error", ("error: %d", my_errno)); save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE; if ((save_errno == HA_ERR_CRASHED) || (save_errno == HA_ERR_CRASHED_ON_USAGE) || @@ -433,8 +434,14 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->base.born_transactional && ((!(open_flags & HA_OPEN_IGNORE_MOVED_STATE) && memcmp(share->base.uuid, maria_uuid, MY_UUID_SIZE)) || - share->state.create_trid > trnman_get_max_trid())) + (share->state.create_trid > trnman_get_max_trid() && + !maria_in_recovery))) { + DBUG_PRINT("warning", ("table is moved from another system. uuid_diff: %d create_trid: %lu max_trid: %lu", + memcmp(share->base.uuid, maria_uuid, + MY_UUID_SIZE) != 0, + (ulong) share->state.create_trid, + (ulong) trnman_get_max_trid())); if (open_flags & HA_OPEN_FOR_REPAIR) share->state.changed|= STATE_MOVED; else @@ -549,6 +556,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) strmov(share->open_file_name.str, name); share->block_size= share->base.block_size; /* Convenience */ + share->max_index_block_size= share->block_size - KEYPAGE_CHECKSUM_SIZE; { HA_KEYSEG *pos=share->keyparts; uint32 ftkey_nr= 1; @@ -892,6 +900,11 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->lock_restore_status= _ma_restore_status; } } + else if (share->now_transactional) + { + DBUG_ASSERT(share->data_file_type == BLOCK_RECORD); + share->lock.get_status= _ma_block_get_status_no_versioning; + } } #endif /* @@ -913,10 +926,15 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) if (!(m_info= maria_clone_internal(share, name, mode, data_file))) goto err; + if (maria_is_crashed(m_info)) + DBUG_PRINT("warning", ("table is crashed: changed: %u", + share->state.changed)); + pthread_mutex_unlock(&THR_LOCK_maria); DBUG_RETURN(m_info); err: + DBUG_PRINT("error", ("error: %d errpos: %d", my_errno, errpos)); save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE; if ((save_errno == HA_ERR_CRASHED) || (save_errno == HA_ERR_CRASHED_ON_USAGE) || diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c index acbee2a6f07..a4423133270 100644 --- a/storage/maria/ma_page.c +++ b/storage/maria/ma_page.c @@ -59,6 +59,7 @@ void _ma_page_setup(MARIA_PAGE *page, MARIA_HA *info, page->buff= buff; page->pos= pos; page->size= _ma_get_page_used(share, buff); + page->org_size= page->size; page->flag= _ma_get_keypage_flag(share, buff); page->node= ((page->flag & KEYPAGE_FLAG_ISNOD) ? share->base.key_reflength : 0); @@ -68,7 +69,7 @@ void _ma_page_setup(MARIA_PAGE *page, MARIA_HA *info, void page_cleanup(MARIA_SHARE *share, MARIA_PAGE *page) { uint length= page->size; - DBUG_ASSERT(length <= block_size - KEYPAGE_CHECKSUM_SIZE); + DBUG_ASSERT(length <= share->max_index_block_size); bzero(page->buff + length, share->block_size - length); } #endif @@ -103,7 +104,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info, MARIA_SHARE *share= info->s; uint block_size= share->block_size; DBUG_ENTER("_ma_fetch_keypage"); - DBUG_PRINT("enter",("pos: %ld", (long) pos)); + DBUG_PRINT("enter",("page: %lu", (ulong) (pos / block_size))); tmp= pagecache_read(share->pagecache, &share->kfile, (pgcache_page_no_t) (pos / block_size), level, buff, @@ -142,6 +143,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info, page->buff= tmp; page->pos= pos; page->size= _ma_get_page_used(share, tmp); + page->org_size= page->size; /* For debugging */ page->flag= _ma_get_keypage_flag(share, tmp); page->node= ((page->flag & KEYPAGE_FLAG_ISNOD) ? share->base.key_reflength : 0); @@ -149,7 +151,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info, #ifdef EXTRA_DEBUG { uint page_size= page->size; - if (page_size < 4 || page_size > block_size || + if (page_size < 4 || page_size > share->max_index_block_size || _ma_get_keynr(share, tmp) != keyinfo->key_nr) { DBUG_PRINT("error",("page %lu had wrong page length: %u keynr: %u", @@ -159,7 +161,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info, info->last_keypage = HA_OFFSET_ERROR; maria_print_error(share, HA_ERR_CRASHED); my_errno= HA_ERR_CRASHED; - tmp= 0; + DBUG_RETURN(1); } } #endif @@ -179,6 +181,13 @@ my_bool _ma_write_keypage(MARIA_PAGE *page, enum pagecache_page_lock lock, MARIA_PINNED_PAGE page_link; DBUG_ENTER("_ma_write_keypage"); + /* + The following ensures that for transactional tables we have logged + all changes that changes the page size (as the logging code sets + page->org_size) + */ + DBUG_ASSERT(!share->now_transactional || page->size == page->org_size); + #ifdef EXTRA_DEBUG /* Safety check */ { uint page_length, nod_flag; @@ -193,7 +202,7 @@ my_bool _ma_write_keypage(MARIA_PAGE *page, enum pagecache_page_lock lock, (page->pos & (maria_block_size-1))) { DBUG_PRINT("error",("Trying to write inside key status region: " - "key_start: %lu length: %lu page: %lu", + "key_start: %lu length: %lu page_pos: %lu", (long) share->base.keystart, (long) share->state.state.key_file_length, (long) page->pos)); @@ -201,7 +210,7 @@ my_bool _ma_write_keypage(MARIA_PAGE *page, enum pagecache_page_lock lock, DBUG_ASSERT(0); DBUG_RETURN(1); } - DBUG_PRINT("page",("write page at: %lu",(long) page->pos)); + DBUG_PRINT("page",("write page at: %lu",(ulong) (page->pos / block_size))); DBUG_DUMP("buff", buff, page_length); DBUG_ASSERT(page_length >= share->keypage_header + nod_flag + page->keyinfo->minlength || maria_in_recovery); @@ -274,7 +283,7 @@ int _ma_dispose(register MARIA_HA *info, my_off_t pos, my_bool page_not_read) enum pagecache_page_lock lock_method; enum pagecache_page_pin pin_method; DBUG_ENTER("_ma_dispose"); - DBUG_PRINT("enter",("pos: %ld", (long) pos)); + DBUG_PRINT("enter",("page: %lu", (ulong) (pos / block_size))); DBUG_ASSERT(pos % block_size == 0); (void) _ma_lock_key_del(info, 0); @@ -423,8 +432,7 @@ my_off_t _ma_new(register MARIA_HA *info, int level, share->key_del_current= mi_sizekorr(buff+share->keypage_header); #ifndef DBUG_OFF key_del_current= share->key_del_current; - DBUG_ASSERT(key_del_current != share->state.key_del && - (key_del_current != 0) && + DBUG_ASSERT((key_del_current != 0) && ((key_del_current == HA_OFFSET_ERROR) || (key_del_current <= (share->state.state.key_file_length - block_size)))); @@ -453,32 +461,48 @@ my_off_t _ma_new(register MARIA_HA *info, int level, Log compactation of a index page */ -static my_bool _ma_log_compact_keypage(MARIA_HA *info, my_off_t page, +static my_bool _ma_log_compact_keypage(MARIA_PAGE *ma_page, TrID min_read_from) { LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 1 + TRANSID_SIZE]; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 1 + 7 + TRANSID_SIZE]; + uchar *log_pos; LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + MARIA_HA *info= ma_page->info; MARIA_SHARE *share= info->s; + uint translog_parts, extra_length; + my_off_t page= ma_page->pos; DBUG_ENTER("_ma_log_compact_keypage"); - DBUG_PRINT("enter", ("page: %lu", (ulong) page)); + DBUG_PRINT("enter", ("page: %lu", (ulong) (page / share->block_size))); /* Store address of new root page */ page/= share->block_size; page_store(log_data + FILEID_STORE_SIZE, page); - log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE]= KEY_OP_COMPACT_PAGE; - transid_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE +1, - min_read_from); + log_pos= log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE; + + log_pos[0]= KEY_OP_COMPACT_PAGE; + transid_store(log_pos + 1, min_read_from); + log_pos+= 1 + TRANSID_SIZE; log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - + log_data); + translog_parts= 1; + extra_length= 0; + + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; if (translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, - (translog_size_t) sizeof(log_data), - TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data, NULL)) + log_array[TRANSLOG_INTERNAL_PARTS + + 0].length + extra_length, + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_array, log_data, NULL)) DBUG_RETURN(1); DBUG_RETURN(0); } @@ -526,7 +550,7 @@ my_bool _ma_compact_keypage(MARIA_PAGE *ma_page, TrID min_read_from) { if (!(page= (*ma_page->keyinfo->skip_key)(&key, 0, 0, page))) { - DBUG_PRINT("error",("Couldn't find last key: page: 0x%lx", + DBUG_PRINT("error",("Couldn't find last key: page_pos: 0x%lx", (long) page)); maria_print_error(share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; @@ -588,7 +612,7 @@ my_bool _ma_compact_keypage(MARIA_PAGE *ma_page, TrID min_read_from) if (share->now_transactional) { - if (_ma_log_compact_keypage(info, ma_page->pos, min_read_from)) + if (_ma_log_compact_keypage(ma_page, min_read_from)) DBUG_RETURN(1); } DBUG_RETURN(0); diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index dd676028f05..441310a60ea 100644 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -4187,7 +4187,16 @@ static int flush_cached_blocks(PAGECACHE *pagecache, { PAGECACHE_BLOCK_LINK *block= *cache; - if (block->pins) + /* + In the case of non_transactional tables we want to flush also + block pinned with reads. This is becasue we may have other + threads reading the block during flush, as non transactional + tables can have many readers while the one writer is doing the + flush. + We don't want to do flush pinned blocks during checkpoint. + We detect the checkpoint case by checking if type is LAZY. + */ + if ((type == FLUSH_KEEP_LAZY && block->pins) || block->wlocks) { KEYCACHE_DBUG_PRINT("flush_cached_blocks", ("block: %u (0x%lx) pinned", @@ -4204,13 +4213,9 @@ static int flush_cached_blocks(PAGECACHE *pagecache, *first_errno= HA_ERR_INTERNAL_ERROR; continue; } - /* if the block is not pinned then it is not write locked */ - DBUG_ASSERT(block->wlocks == 0); - DBUG_ASSERT(block->pins == 0); if (make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_WRITE, PAGECACHE_PIN, FALSE)) + PAGECACHE_LOCK_READ, PAGECACHE_PIN, FALSE)) DBUG_ASSERT(0); - DBUG_ASSERT(block->pins == 1); KEYCACHE_DBUG_PRINT("flush_cached_blocks", ("block: %u (0x%lx) to be flushed", @@ -4222,7 +4227,6 @@ static int flush_cached_blocks(PAGECACHE *pagecache, DBUG_PRINT("info", ("block: %u (0x%lx) pins: %u", PCBLOCK_NUMBER(pagecache, block), (ulong)block, block->pins)); - DBUG_ASSERT(block->pins == 1); /** @todo IO If page is contiguous with next page to flush, group flushes in one single my_pwrite(). @@ -4241,7 +4245,7 @@ static int flush_cached_blocks(PAGECACHE *pagecache, pagecache_pthread_mutex_lock(&pagecache->cache_lock); if (make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN, FALSE)) DBUG_ASSERT(0); diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index 7b3065b0208..7a7286e26f9 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -1,4 +1,5 @@ /* Copyright (C) 2006, 2007 MySQL AB + Copyright (C) 2010 Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -56,6 +57,7 @@ static ulong skipped_undo_phase; static ulonglong now; /**< for tracking execution time of phases */ static int (*save_error_handler_hook)(uint, const char *,myf); static uint recovery_warnings; /**< count of warnings */ +static uint recovery_found_crashed_tables; #define prototype_redo_exec_hook(R) \ static int exec_REDO_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec) @@ -107,7 +109,8 @@ prototype_undo_exec_hook(UNDO_KEY_DELETE); prototype_undo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT); prototype_undo_exec_hook(UNDO_BULK_INSERT); -static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply); +static int run_redo_phase(LSN lsn, LSN end_lsn, + enum maria_apply_log_way apply); static uint end_of_redo_phase(my_bool prepare_for_undo_phase); static int run_undo_phase(uint uncommitted); static void display_record_position(const LOG_DESC *log_desc, @@ -208,18 +211,18 @@ int maria_recovery_from_log(void) maria_in_recovery= TRUE; #ifdef EXTRA_DEBUG - fn_format(name_buff, "maria_recovery.trace", maria_data_root, "", MYF(0)); + fn_format(name_buff, "aria_recovery.trace", maria_data_root, "", MYF(0)); trace_file= my_fopen(name_buff, O_WRONLY|O_APPEND|O_CREAT, MYF(MY_WME)); #else trace_file= NULL; /* no trace file for being fast */ #endif - tprint(trace_file, "TRACE of the last MARIA recovery from mysqld\n"); + tprint(trace_file, "TRACE of the last Aria recovery from mysqld\n"); DBUG_ASSERT(maria_pagecache->inited); - res= maria_apply_log(LSN_IMPOSSIBLE, MARIA_LOG_APPLY, trace_file, - TRUE, TRUE, TRUE, &warnings_count); + res= maria_apply_log(LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, MARIA_LOG_APPLY, + trace_file, TRUE, TRUE, TRUE, &warnings_count); if (!res) { - if (warnings_count == 0) + if (warnings_count == 0 && recovery_found_crashed_tables == 0) tprint(trace_file, "SUCCESS\n"); else tprint(trace_file, "DOUBTFUL (%u warnings, check previous output)\n", @@ -237,6 +240,7 @@ int maria_recovery_from_log(void) @param from_lsn LSN from which log reading/applying should start; LSN_IMPOSSIBLE means "use last checkpoint" + @param end_lsn Apply until this. LSN_IMPOSSIBLE means until end. @param apply how log records should be applied or not @param trace_file trace file where progress/debug messages will go @param skip_DDLs_arg Should DDL records (CREATE/RENAME/DROP/REPAIR) @@ -253,7 +257,8 @@ int maria_recovery_from_log(void) @retval !=0 Error */ -int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply, +int maria_apply_log(LSN from_lsn, LSN end_lsn, + enum maria_apply_log_way apply, FILE *trace_file, my_bool should_run_undo_phase, my_bool skip_DDLs_arg, my_bool take_checkpoints, uint *warnings_count) @@ -261,13 +266,16 @@ int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply, int error= 0; uint uncommitted_trans; ulonglong old_now; + my_bool abort_message_printed= 0; DBUG_ENTER("maria_apply_log"); DBUG_ASSERT(apply == MARIA_LOG_APPLY || !should_run_undo_phase); DBUG_ASSERT(!maria_multi_threaded); - recovery_warnings= 0; + recovery_warnings= recovery_found_crashed_tables= 0; + maria_recovery_changed_data= 0; /* checkpoints can happen only if TRNs have been built */ DBUG_ASSERT(should_run_undo_phase || !take_checkpoints); + DBUG_ASSERT(end_lsn == LSN_IMPOSSIBLE || should_run_undo_phase == 0); all_active_trans= (struct st_trn_for_recovery *) my_malloc((SHORT_TRID_MAX + 1) * sizeof(struct st_trn_for_recovery), MYF(MY_ZEROFILL)); @@ -313,7 +321,7 @@ int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply, now= my_getsystime(); in_redo_phase= TRUE; trnman_init(max_trid_in_control_file); - if (run_redo_phase(from_lsn, apply)) + if (run_redo_phase(from_lsn, end_lsn, apply)) { ma_message_no_user(0, "Redo phase failed"); trnman_destroy(); @@ -321,6 +329,17 @@ int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply, } trnman_destroy(); + if (end_lsn != LSN_IMPOSSIBLE) + { + abort_message_printed= 1; + if (!trace_file) + fputc('\n', stderr); + my_message(HA_ERR_INITIALIZATION, + "Maria recovery aborted as end_lsn/end of file was reached", + MYF(0)); + goto err2; + } + if ((uncommitted_trans= end_of_redo_phase(should_run_undo_phase)) == (uint)-1) { @@ -437,10 +456,15 @@ int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply, goto end; err: - error= 1; tprint(tracef, "\nRecovery of tables with transaction logs FAILED\n"); +err2: if (trns_created) delete_all_transactions(); + error= 1; + if (close_all_tables()) + { + ma_message_no_user(0, "closing of tables failed"); + } end: error_handler_hook= save_error_handler_hook; hash_free(&all_dirty_pages); @@ -455,7 +479,7 @@ end: log_record_buffer.str= NULL; log_record_buffer.length= 0; ma_checkpoint_end(); - *warnings_count= recovery_warnings; + *warnings_count= recovery_warnings + recovery_found_crashed_tables; if (recovery_message_printed != REC_MSG_NONE) { if (procent_printed) @@ -465,17 +489,33 @@ end: fflush(stderr); } if (!error) + { ma_message_no_user(ME_JUST_INFO, "recovery done"); + maria_recovery_changed_data= 1; + } } - if (error) + else if (!error && max_trid_in_control_file != max_long_trid) + { + /* + maria_end() will set max trid in log file so that one can run + maria_chk on the tables + */ + maria_recovery_changed_data= 1; + } + + if (error && !abort_message_printed) + { + if (!trace_file) + fputc('\n', stderr); my_message(HA_ERR_INITIALIZATION, - "Maria recovery failed. Please run maria_chk -r on all maria " - "tables and delete all maria_log.######## files", MYF(0)); + "Aria recovery failed. Please run aria_chk -r on all Aria " + "tables and delete all aria_log.######## files", MYF(0)); + } procent_printed= 0; /* We don't cleanly close tables if we hit some error (may corrupt them by flushing some wrong blocks made from wrong REDOs). It also leaves their - open_count>0, which ensures that --maria-recover, if used, will try to + open_count>0, which ensures that --aria-recover, if used, will try to repair them. */ DBUG_RETURN(error); @@ -511,9 +551,14 @@ static int display_and_apply_record(const LOG_DESC *log_desc, if (log_desc->record_execute_in_redo_phase == NULL) { /* die on all not-yet-handled records :) */ - DBUG_ASSERT("one more hook" == "to write"); + DBUG_ASSERT("one more hook to write" == 0); return 1; } + if (rec->type == LOGREC_DEBUG_INFO) + { + /* Query already printed by display_record_position() */ + return 0; + } if ((error= (*log_desc->record_execute_in_redo_phase)(rec))) eprint(tracef, "Got error %d when executing record %s", my_errno, log_desc->name); @@ -604,6 +649,20 @@ prototype_redo_exec_hook(INCOMPLETE_LOG) /* no such table, don't need to warn */ return 0; } + + if (maria_is_crashed(info)) + return 0; + + if (info->s->state.is_of_horizon > rec->lsn) + { + /* + This table was repaired at a time after this log entry. + We can assume that all rows was inserted sucessfully and we don't + have to warn about that the inserted data was not logged + */ + return 0; + } + /* Example of what can go wrong when replaying DDLs: CREATE TABLE t (logged); INSERT INTO t VALUES(1) (logged); @@ -618,21 +677,65 @@ prototype_redo_exec_hook(INCOMPLETE_LOG) failure in _ma_apply_redo_insert_row_head_or_tail(): new data page is created whereas rownr is not 0). So when the server disables logging for ALTER TABLE or CREATE SELECT, it - logs LOGREC_INCOMPLETE_LOG to warn maria_read_log and then the user. + logs LOGREC_INCOMPLETE_LOG to warn aria_read_log and then the user. Another issue is that replaying of DDLs is not correct enough to work if there was a crash during a DDL (see comment in execution of REDO_RENAME_TABLE ). */ - tprint(tracef, "***WARNING: MySQL server currently logs no records" - " about insertion of data by ALTER TABLE and CREATE SELECT," - " as they are not necessary for recovery;" - " present applying of log records may well not work.***\n"); + + eprint(tracef, "***WARNING: Aria engine currently logs no records " + "about insertion of data by ALTER TABLE and CREATE SELECT, " + "as they are not necessary for recovery; " + "present applying of log records to table '%s' may well not work." + "***", info->s->index_file_name.str); + + /* Prevent using the table for anything else than undo repair */ + _ma_mark_file_crashed(info->s); recovery_warnings++; return 0; } +static my_bool create_database_if_not_exists(const char *name) +{ + char dirname[FN_REFLEN]; + size_t length; + MY_STAT stat_info; + DBUG_ENTER("create_database_if_not_exists"); + + dirname_part(dirname, name, &length); + if (!length) + { + /* Skip files without directores */ + DBUG_RETURN(0); + } + /* + Safety; Don't create files with hard path; + Should never happen with MariaDB + If hard path, then error will be detected when trying to create index file + */ + if (test_if_hard_path(dirname)) + DBUG_RETURN(0); + + if (my_stat(dirname,&stat_info,MYF(0))) + DBUG_RETURN(0); + + + tprint(tracef, "Creating not existing database '%s'\n", dirname); + if (my_mkdir(dirname, 0777, MYF(MY_WME))) + { + eprint(tracef, "***WARNING: Can't create not existing database '%s'", + dirname); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + + + + prototype_redo_exec_hook(REDO_CREATE_TABLE) { File dfile= -1, kfile= -1; @@ -644,11 +747,12 @@ prototype_redo_exec_hook(REDO_CREATE_TABLE) int error= 1, create_mode= O_RDWR | O_TRUNC, i; MARIA_HA *info= NULL; uint kfile_size_before_extension, keystart; + DBUG_ENTER("exec_REDO_LOGREC_REDO_CREATE_TABLE"); if (skip_DDLs) { tprint(tracef, "we skip DDLs\n"); - return 0; + DBUG_RETURN(0); } enlarge_buffer(rec); if (log_record_buffer.str == NULL || @@ -715,9 +819,12 @@ prototype_redo_exec_hook(REDO_CREATE_TABLE) maria_close(info); info= NULL; } - else /* one or two files absent, or header corrupted... */ - tprint(tracef, "Table '%s' can't be opened, probably does not exist\n", - name); + else + { + /* one or two files absent, or header corrupted... */ + tprint(tracef, "Table '%s' can't be opened (Error: %d)\n", + name, my_errno); + } /* if does not exist, or is older, overwrite it */ ptr= name + strlen(name) + 1; if ((flags= ptr[0] ? HA_DONT_TOUCH_DATA : 0)) @@ -748,6 +855,8 @@ prototype_redo_exec_hook(REDO_CREATE_TABLE) name); goto end; } + if (create_database_if_not_exists(name)) + goto end; fn_format(filename, name, "", MARIA_NAME_IEXT, (MY_UNPACK_FILENAME | (flags & HA_DONT_TOUCH_DATA) ? MY_RETURN_REAL_PATH : 0) | @@ -801,7 +910,7 @@ end: error|= my_close(kfile, MYF(MY_WME)); if (info != NULL) error|= maria_close(info); - return error; + DBUG_RETURN(error); } @@ -810,10 +919,12 @@ prototype_redo_exec_hook(REDO_RENAME_TABLE) char *old_name, *new_name; int error= 1; MARIA_HA *info= NULL; + DBUG_ENTER("exec_REDO_LOGREC_REDO_RENAME_TABLE"); + if (skip_DDLs) { tprint(tracef, "we skip DDLs\n"); - return 0; + DBUG_RETURN(0); } enlarge_buffer(rec); if (log_record_buffer.str == NULL || @@ -830,7 +941,7 @@ prototype_redo_exec_hook(REDO_RENAME_TABLE) new_name); /* Here is why we skip CREATE/DROP/RENAME when doing a recovery from - ha_maria (whereas we do when called from maria_read_log). Consider: + ha_maria (whereas we do when called from aria_read_log). Consider: CREATE TABLE t; RENAME TABLE t to u; DROP TABLE u; @@ -850,8 +961,8 @@ prototype_redo_exec_hook(REDO_RENAME_TABLE) crash. We however sync files and directories at each file rename. The SQL layer is anyway not crash-safe for DDLs (except the repartioning-related ones). - We replay DDLs in maria_read_log to be able to recreate tables from - scratch. It means that "maria_read_log -a" should not be used on a + We replay DDLs in aria_read_log to be able to recreate tables from + scratch. It means that "aria_read_log -a" should not be used on a database which just crashed during a DDL. And also ALTER TABLE does not log insertions of records into the temporary table, so replaying may fail (grep for INCOMPLETE_LOG in files). @@ -988,7 +1099,7 @@ end: tprint(tracef, "\n"); if (info != NULL) error|= maria_close(info); - return error; + DBUG_RETURN(error); } @@ -1015,7 +1126,11 @@ prototype_redo_exec_hook(REDO_REPAIR_TABLE) } if ((info= get_MARIA_HA_from_REDO_record(rec)) == NULL) DBUG_RETURN(0); - + if (maria_is_crashed(info)) + { + tprint(tracef, "we skip repairing crashed table\n"); + DBUG_RETURN(0); + } /* Otherwise, the mapping is newer than the table, and our record is newer than the mapping, so we can repair. @@ -1026,6 +1141,7 @@ prototype_redo_exec_hook(REDO_REPAIR_TABLE) param.isam_file_name= name= info->s->open_file_name.str; param.testflag= uint8korr(rec->header + FILEID_STORE_SIZE); param.tmpdir= maria_tmpdir; + param.max_trid= max_long_trid; DBUG_ASSERT(maria_tmpdir); info->s->state.key_map= uint8korr(rec->header + FILEID_STORE_SIZE + 8); @@ -1185,6 +1301,7 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) MARIA_HA *info; MARIA_SHARE *share; my_off_t dfile_len, kfile_len; + DBUG_ENTER("new_table"); checkpoint_useful= TRUE; if ((name == NULL) || (name[0] == 0)) @@ -1195,6 +1312,7 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) */ tprint(tracef, ", record is corrupted"); info= NULL; + recovery_warnings++; goto end; } tprint(tracef, "Table '%s', id %u", name, sid); @@ -1204,6 +1322,8 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) tprint(tracef, ", is absent (must have been dropped later?)" " or its header is so corrupted that we cannot open it;" " we skip it"); + if (my_errno != ENOENT) + recovery_found_crashed_tables++; error= 0; goto end; } @@ -1218,6 +1338,12 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) */ if (close_one_table(share->open_file_name.str, lsn_of_file_id)) goto end; + /* + We should not try to get length of data/index files as the files + are not on disk yet. + */ + _ma_tmp_disable_logging_for_table(info, FALSE); + goto set_lsn_of_file_id; } if (!share->base.born_transactional) { @@ -1227,6 +1353,7 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) */ tprint(tracef, ", is not transactional. Ignoring open request"); error= -1; + recovery_warnings++; goto end; } if (cmp_translog_addr(lsn_of_file_id, share->state.create_rename_lsn) <= 0) @@ -1235,6 +1362,7 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) " LOGREC_FILE_ID's LSN (%lu,0x%lx), ignoring open request", LSN_IN_PARTS(share->state.create_rename_lsn), LSN_IN_PARTS(lsn_of_file_id)); + recovery_warnings++; error= -1; goto end; /* @@ -1245,7 +1373,8 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) if (maria_is_crashed(info)) { eprint(tracef, "Table '%s' is crashed, skipping it. Please repair it with" - " maria_chk -r", share->open_file_name.str); + " aria_chk -r", share->open_file_name.str); + recovery_found_crashed_tables++; error= -1; /* not fatal, try with other tables */ goto end; /* @@ -1264,6 +1393,7 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) (kfile_len == MY_FILEPOS_ERROR)) { tprint(tracef, ", length unknown\n"); + recovery_warnings++; goto end; } if (share->state.state.data_file_length != dfile_len) @@ -1282,6 +1412,8 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) /* Recovery will fix this, no error */ ALERT_USER(); } + +set_lsn_of_file_id: /* This LSN serves in this situation; assume log is: FILE_ID(6->"t2") REDO_INSERT(6) FILE_ID(6->"t1") CHECKPOINT(6->"t1") @@ -1309,7 +1441,7 @@ end: if (error == -1) error= 0; } - return error; + DBUG_RETURN(error); } /* @@ -1322,7 +1454,8 @@ prototype_redo_exec_hook(REDO_INSERT_ROW_HEAD) int error= 1; uchar *buff= NULL; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) + { /* Table was skipped at open time (because later dropped/renamed, not @@ -1388,7 +1521,7 @@ prototype_redo_exec_hook(REDO_INSERT_ROW_TAIL) int error= 1; uchar *buff; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; enlarge_buffer(rec); if (log_record_buffer.str == NULL || @@ -1429,7 +1562,7 @@ prototype_redo_exec_hook(REDO_INSERT_ROW_BLOBS) pgcache_page_no_t first_page, last_page; char llbuf1[22], llbuf2[22]; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; enlarge_buffer(rec); if (log_record_buffer.str == NULL || @@ -1463,7 +1596,7 @@ prototype_redo_exec_hook(REDO_PURGE_ROW_HEAD) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; if (_ma_apply_redo_purge_row_head_or_tail(info, current_group_end_lsn, HEAD_PAGE, @@ -1479,7 +1612,7 @@ prototype_redo_exec_hook(REDO_PURGE_ROW_TAIL) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; if (_ma_apply_redo_purge_row_head_or_tail(info, current_group_end_lsn, TAIL_PAGE, @@ -1496,7 +1629,7 @@ prototype_redo_exec_hook(REDO_FREE_BLOCKS) int error= 1; uchar *buff; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; enlarge_buffer(rec); @@ -1523,7 +1656,7 @@ prototype_redo_exec_hook(REDO_FREE_HEAD_OR_TAIL) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; if (_ma_apply_redo_free_head_or_tail(info, current_group_end_lsn, @@ -1555,7 +1688,7 @@ prototype_redo_exec_hook(REDO_INDEX) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; enlarge_buffer(rec); @@ -1581,7 +1714,7 @@ prototype_redo_exec_hook(REDO_INDEX_NEW_PAGE) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; enlarge_buffer(rec); @@ -1608,7 +1741,7 @@ prototype_redo_exec_hook(REDO_INDEX_FREE_PAGE) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; if (_ma_apply_redo_index_free_page(info, current_group_end_lsn, @@ -1624,7 +1757,7 @@ prototype_redo_exec_hook(REDO_BITMAP_NEW_PAGE) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; enlarge_buffer(rec); @@ -1901,7 +2034,7 @@ prototype_redo_exec_hook(IMPORTED_TABLE) return 1; } name= (char *)log_record_buffer.str; - tprint(tracef, "Table '%s' was imported (auto-zerofilled) in this Maria instance\n", name); + tprint(tracef, "Table '%s' was imported (auto-zerofilled) in this Aria instance\n", name); return 0; } @@ -2070,7 +2203,7 @@ prototype_undo_exec_hook(UNDO_ROW_INSERT) MARIA_SHARE *share; const uchar *record_ptr; - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) { /* Unlike for REDOs, if the table was skipped it is abnormal; we have a @@ -2126,7 +2259,7 @@ prototype_undo_exec_hook(UNDO_ROW_DELETE) LSN previous_undo_lsn= lsn_korr(rec->header); MARIA_SHARE *share; - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) { skip_undo_record(previous_undo_lsn, trn); return 0; @@ -2165,7 +2298,7 @@ prototype_undo_exec_hook(UNDO_ROW_UPDATE) LSN previous_undo_lsn= lsn_korr(rec->header); MARIA_SHARE *share; - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) { skip_undo_record(previous_undo_lsn, trn); return 0; @@ -2204,7 +2337,7 @@ prototype_undo_exec_hook(UNDO_KEY_INSERT) LSN previous_undo_lsn= lsn_korr(rec->header); MARIA_SHARE *share; - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) { skip_undo_record(previous_undo_lsn, trn); return 0; @@ -2245,7 +2378,7 @@ prototype_undo_exec_hook(UNDO_KEY_DELETE) LSN previous_undo_lsn= lsn_korr(rec->header); MARIA_SHARE *share; - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) { skip_undo_record(previous_undo_lsn, trn); return 0; @@ -2286,7 +2419,7 @@ prototype_undo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT) LSN previous_undo_lsn= lsn_korr(rec->header); MARIA_SHARE *share; - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) { skip_undo_record(previous_undo_lsn, trn); return 0; @@ -2327,6 +2460,7 @@ prototype_undo_exec_hook(UNDO_BULK_INSERT) LSN previous_undo_lsn= lsn_korr(rec->header); MARIA_SHARE *share; + /* Here we don't check for crashed as we can undo the bulk insert */ if (info == NULL) { skip_undo_record(previous_undo_lsn, trn); @@ -2347,12 +2481,13 @@ prototype_undo_exec_hook(UNDO_BULK_INSERT) } -static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply) +static int run_redo_phase(LSN lsn, LSN lsn_end, enum maria_apply_log_way apply) { TRANSLOG_HEADER_BUFFER rec; struct st_translog_scanner_data scanner; int len; uint i; + DBUG_ENTER("run_redo_phase"); /* install hooks for execution */ #define install_redo_exec_hook(R) \ @@ -2417,7 +2552,7 @@ static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply) { tprint(tracef, "checkpoint address refers to the log end log or " "log is empty, nothing to do.\n"); - return 0; + DBUG_RETURN(0); } len= translog_read_record_header(lsn, &rec); @@ -2425,12 +2560,12 @@ static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply) if (len == RECHEADER_READ_ERROR) { eprint(tracef, "Failed to read header of the first record."); - return 1; + DBUG_RETURN(1); } if (translog_scanner_init(lsn, 1, &scanner, 1)) { tprint(tracef, "Scanner init failed\n"); - return 1; + DBUG_RETURN(1); } for (i= 1;;i++) { @@ -2475,6 +2610,17 @@ static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply) tprint(tracef, "Cannot find record where it should be\n"); goto err; } + if (lsn_end != LSN_IMPOSSIBLE && rec2.lsn >= lsn_end) + { + tprint(tracef, + "lsn_end reached at (%lu,0x%lx). " + "Skipping rest of redo entries", + LSN_IN_PARTS(rec2.lsn)); + translog_destroy_scanner(&scanner); + translog_free_record_header(&rec); + DBUG_RETURN(0); + } + if (translog_scanner_init(rec2.lsn, 1, &scanner2, 1)) { tprint(tracef, "Scanner2 init failed\n"); @@ -2570,12 +2716,12 @@ static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply) fflush(stderr); procent_printed= 1; } - return 0; + DBUG_RETURN(0); err: translog_destroy_scanner(&scanner); translog_free_record_header(&rec); - return 1; + DBUG_RETURN(1); } @@ -3114,6 +3260,8 @@ static LSN parse_checkpoint_record(LSN lsn) return LSN_ERROR; next_dirty_page_in_pool= dirty_pages_pool; minimum_rec_lsn_of_dirty_pages= LSN_MAX; + if (maria_recovery_verbose) + tprint(tracef, "Table_id Is_index Page_id Rec_lsn\n"); for (i= 0; i < nb_dirty_pages ; i++) { pgcache_page_no_t page_id; @@ -3130,6 +3278,9 @@ static LSN parse_checkpoint_record(LSN lsn) if (new_page((is_index << 16) | table_id, page_id, rec_lsn, next_dirty_page_in_pool++)) return LSN_ERROR; + if (maria_recovery_verbose) + tprint(tracef, "%8u %8u %12lu %lu,0x%lx\n", (uint) table_id, + (uint) is_index, (ulong) page_id, LSN_IN_PARTS(rec_lsn)); set_if_smaller(minimum_rec_lsn_of_dirty_pages, rec_lsn); } /* after that, there will be no insert/delete into the hash */ @@ -3228,7 +3379,12 @@ static int close_all_tables(void) state while they were used. As Recovery corrected them, don't alarm the user, don't ask for a table check: */ - info->s->state.open_count= 0; + if (info->s->state.open_count != 0) + { + /* let ma_close() mark the table properly closed */ + info->s->state.open_count= 1; + info->s->global_changed= 1; + } prepare_table_for_close(info, addr); error|= maria_close(info); pthread_mutex_lock(&THR_LOCK_maria); diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h index aa8fa7ecae9..0bfcdd17d39 100644 --- a/storage/maria/ma_recovery.h +++ b/storage/maria/ma_recovery.h @@ -26,7 +26,7 @@ C_MODE_START enum maria_apply_log_way { MARIA_LOG_APPLY, MARIA_LOG_DISPLAY_HEADER, MARIA_LOG_CHECK }; int maria_recovery_from_log(void); -int maria_apply_log(LSN lsn, enum maria_apply_log_way apply, +int maria_apply_log(LSN lsn, LSN lsn_end, enum maria_apply_log_way apply, FILE *trace_file, my_bool execute_undo_phase, my_bool skip_DDLs, my_bool take_checkpoints, uint *warnings_count); diff --git a/storage/maria/ma_recovery_util.c b/storage/maria/ma_recovery_util.c index a45a990472e..19e61daf4ef 100644 --- a/storage/maria/ma_recovery_util.c +++ b/storage/maria/ma_recovery_util.c @@ -57,8 +57,16 @@ void tprint(FILE *trace_file __attribute__ ((unused)), const char *format __attribute__ ((unused)), ...) { va_list args; +#ifndef DBUG_OFF + { + char buff[1024]; + va_start(args, format); + vsnprintf(buff, sizeof(buff)-1, format, args); + DBUG_PRINT("info", ("%s", buff)); + va_end(args); + } +#endif va_start(args, format); - DBUG_PRINT("info", ("%s", format)); if (trace_file != NULL) { if (procent_printed) diff --git a/storage/maria/ma_rt_index.c b/storage/maria/ma_rt_index.c index d50dd99e5cf..62474dbbad8 100644 --- a/storage/maria/ma_rt_index.c +++ b/storage/maria/ma_rt_index.c @@ -625,7 +625,8 @@ static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEY *key, { maria_rtree_combine_rect(keyinfo->seg, k, key->data, k, key_length); if (share->now_transactional && - _ma_log_change(&page, k, key_length)) + _ma_log_change(&page, k, key_length, + KEY_OP_DEBUG_RTREE_COMBINE)) goto err; page_mark_changed(info, &page); if (_ma_write_keypage(&page, PAGECACHE_LOCK_LEFT_WRITELOCKED, @@ -652,7 +653,8 @@ static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEY *key, if (maria_rtree_set_key_mbr(info, &k_key, _ma_kpos(nod_flag, k))) goto err; if (share->now_transactional && - _ma_log_change(&page, k, key_length)) + _ma_log_change(&page, k, key_length, + KEY_OP_DEBUG_RTREE_SPLIT)) goto err; /* add new key for new page */ _ma_kpointer(info, new_key_buff - nod_flag, *new_page); @@ -964,7 +966,8 @@ static int maria_rtree_delete_req(MARIA_HA *info, const MARIA_KEY *key, _ma_kpos(nod_flag, k))) goto err; if (share->now_transactional && - _ma_log_change(&page, k, key->data_length)) + _ma_log_change(&page, k, key->data_length, + KEY_OP_DEBUG_RTREE_SET_KEY)) goto err; page_mark_changed(info, &page) if (_ma_write_keypage(&page, diff --git a/storage/maria/ma_rt_key.c b/storage/maria/ma_rt_key.c index bc2c8f71f5d..2e204990a3b 100644 --- a/storage/maria/ma_rt_key.c +++ b/storage/maria/ma_rt_key.c @@ -91,7 +91,8 @@ int maria_rtree_delete_key(MARIA_PAGE *page, uchar *key, uint key_length) page->size-= key_length_with_nod_flag; page_store_size(share, page); if (share->now_transactional && - _ma_log_delete(page, key_start, 0, key_length_with_nod_flag)) + _ma_log_delete(page, key_start, 0, key_length_with_nod_flag, + 0, KEY_OP_DEBUG_LOG_DEL_CHANGE_RT)) return -1; return 0; } diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c index 8f137c2e0cf..856edc60490 100644 --- a/storage/maria/ma_rt_split.c +++ b/storage/maria/ma_rt_split.c @@ -308,7 +308,7 @@ static my_bool _ma_log_rt_split(MARIA_PAGE *page, LSN lsn; uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 1 + 2 + 1 + 2 + 2 + 7], *log_pos; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 5]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 6]; uint translog_parts, extra_length= 0; my_off_t page_pos; DBUG_ENTER("_ma_log_rt_split"); @@ -344,24 +344,11 @@ static my_bool _ma_log_rt_split(MARIA_PAGE *page, translog_parts+= 2; } -#ifdef EXTRA_DEBUG_KEY_CHANGES - { - int page_length= page->size; - ha_checksum crc; - uchar *check_start= log_pos; - crc= my_checksum(0, page->buff + LSN_STORE_SIZE, - page_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - log_pos++; - int2store(log_pos, page_length); - log_pos+= 2; - int4store(log_pos, crc); - log_pos+= 4; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= check_start; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - translog_parts++; - } -#endif + _ma_log_key_changes(page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + page->org_size= page->size; if (translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c index cb3e6bc3ee3..f7f79f90cf0 100644 --- a/storage/maria/ma_sort.c +++ b/storage/maria/ma_sort.c @@ -154,7 +154,7 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, keys < (uint) maxbuffer) { _ma_check_print_error(info->sort_info->param, - "maria_sort_buffer_size is too small"); + "aria_sort_buffer_size is too small"); goto err; } } @@ -178,7 +178,7 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, } if (memavl < MIN_SORT_MEMORY) { - _ma_check_print_error(info->sort_info->param, "Maria sort buffer" + _ma_check_print_error(info->sort_info->param, "Aria sort buffer" " too small"); /* purecov: tested */ goto err; /* purecov: tested */ } @@ -377,7 +377,7 @@ pthread_handler_t _ma_thr_find_all_keys(void *arg) keys < maxbuffer) { _ma_check_print_error(sort_param->sort_info->param, - "maria_sort_buffer_size is too small"); + "aria_sort_buffer_size is too small"); goto err; } } @@ -405,7 +405,7 @@ pthread_handler_t _ma_thr_find_all_keys(void *arg) if (memavl < MIN_SORT_MEMORY) { _ma_check_print_error(sort_param->sort_info->param, - "Maria sort buffer too small"); + "Aria sort buffer too small"); goto err; /* purecov: tested */ } diff --git a/storage/maria/ma_state.c b/storage/maria/ma_state.c index d7ddc73e2b4..c7eee7d511d 100644 --- a/storage/maria/ma_state.c +++ b/storage/maria/ma_state.c @@ -53,12 +53,16 @@ my_bool _ma_setup_live_state(MARIA_HA *info) { - TRN *trn= info->trn; + TRN *trn; MARIA_SHARE *share= info->s; MARIA_USED_TABLES *tables; MARIA_STATE_HISTORY *history; DBUG_ENTER("_ma_setup_live_state"); + if (maria_create_trn_hook(info)) + DBUG_RETURN(1); + + trn= info->trn; for (tables= (MARIA_USED_TABLES*) info->trn->used_tables; tables; tables= tables->next) @@ -69,6 +73,7 @@ my_bool _ma_setup_live_state(MARIA_HA *info) goto end; } } + /* Table was not used before, create new table state entry */ if (!(tables= (MARIA_USED_TABLES*) my_malloc(sizeof(*tables), MYF(MY_WME | MY_ZEROFILL)))) @@ -566,7 +571,8 @@ void _ma_block_get_status(void* param, my_bool concurrent_insert) { MARIA_HA *info=(MARIA_HA*) param; DBUG_ENTER("_ma_block_get_status"); - DBUG_PRINT("info", ("concurrent_insert %d", concurrent_insert)); + DBUG_PRINT("enter", ("concurrent_insert %d", concurrent_insert)); + info->row_base_length= info->s->base_length; info->row_flag= info->s->base.default_row_flag; if (concurrent_insert) @@ -589,6 +595,21 @@ void _ma_block_get_status(void* param, my_bool concurrent_insert) */ (void) _ma_setup_live_state(info); } + else + { + /* + Info->trn is set if this table is already handled and we are + called from maria_versioning() + */ + if (info->s->base.born_transactional && !info->trn) + { + /* + Assume for now that this doesn't fail (It can only fail in + out of memory conditions) + */ + (void) maria_create_trn_hook(info); + } + } DBUG_VOID_RETURN; } @@ -616,6 +637,30 @@ my_bool _ma_block_check_status(void *param __attribute__((unused))) } +/* Get status when transactional but not versioned */ + +void _ma_block_get_status_no_versioning(void* param, + my_bool concurrent_insert + __attribute__((unused))) +{ + MARIA_HA *info=(MARIA_HA*) param; + DBUG_ENTER("_ma_block_get_status_no_version"); + DBUG_PRINT("enter", ("concurrent_insert %d", concurrent_insert)); + DBUG_ASSERT(info->s->base.born_transactional); + + info->state->changed= 0; /* from _ma_reset_update_flag() */ + if (!info->trn) + { + /* + Assume for now that this doesn't fail (It can only fail in + out of memory conditions) + */ + (void) maria_create_trn_hook(info); + } + DBUG_VOID_RETURN; +} + + /** Enable/disable versioning */ @@ -633,6 +678,7 @@ void maria_versioning(MARIA_HA *info, my_bool versioning) info->lock.type= versioning ? TL_WRITE_CONCURRENT_INSERT : TL_WRITE; _ma_block_get_status((void*) info, versioning); info->lock.type= save_lock_type; + info->state= info->state_start= &info->s->state.common; } } diff --git a/storage/maria/ma_static.c b/storage/maria/ma_static.c index b4589469caf..917385f9568 100644 --- a/storage/maria/ma_static.c +++ b/storage/maria/ma_static.c @@ -37,6 +37,7 @@ my_bool maria_flush= 0, maria_single_user= 0; my_bool maria_delay_key_write= 0, maria_page_checksums= 1; my_bool maria_inited= FALSE; my_bool maria_in_ha_maria= FALSE; /* If used from ha_maria or not */ +my_bool maria_recovery_changed_data= 0, maria_recovery_verbose= 0; pthread_mutex_t THR_LOCK_maria; #if defined(THREAD) && !defined(DONT_USE_RW_LOCKS) ulong maria_concurrent_insert= 2; @@ -55,6 +56,7 @@ PAGECACHE *maria_log_pagecache= &maria_log_pagecache_var; MY_TMPDIR *maria_tmpdir; /* Tempdir for redo */ char *maria_data_root; HASH maria_stored_state; +int (*maria_create_trn_hook)(MARIA_HA *); /** @brief when transactionality does not matter we can use this transaction diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c index 081cac8a2aa..9e2f32f767b 100644 --- a/storage/maria/ma_test2.c +++ b/storage/maria/ma_test2.c @@ -83,6 +83,9 @@ int main(int argc, char *argv[]) if (! async_io) my_disable_async_io=1; + /* If we sync or not have no affect on this test */ + my_disable_sync= 1; + maria_data_root= (char *)"."; /* Maria requires that we always have a page cache */ if (maria_init() || @@ -351,7 +354,10 @@ int main(int argc, char *argv[]) key3[atoi((char*) read_record+keyinfo[2].seg[0].start)]=0; } else + { puts("Warning: Skipping delete test because no dupplicate keys"); + break; + } } if (testflag == 3) goto end; diff --git a/storage/maria/ma_test_force_start.pl b/storage/maria/ma_test_force_start.pl index 7ab8190a738..8148b2f212b 100755 --- a/storage/maria/ma_test_force_start.pl +++ b/storage/maria/ma_test_force_start.pl @@ -6,7 +6,7 @@ use warnings; my $usage= <<EOF; This program tests that the options ---maria-force-start-after-recovery-failures --maria-recover work as +--aria-force-start-after-recovery-failures --aria-recover work as expected. It has to be run from directory mysql-test, and works with non-debug and debug binaries. @@ -43,7 +43,7 @@ my $error_log_name= "./var/log/master.err"; my @cmd_output; my $whatever; # garbage data $ENV{MTR_VERSION} = 1; # MTR2 does not have --start-and-exit -my $base_server_cmd= "perl mysql-test-run.pl --mysqld=--maria-force-start-after-recovery-failures=$force_after --suite=maria maria.maria-recover ";
+my $base_server_cmd= "perl mysql-test-run.pl --mysqld=--aria-force-start-after-recovery-failures=$force_after --suite=maria maria.maria-recover ";
if ($^O =~ /^mswin/i) { print <<EOF; @@ -101,7 +101,7 @@ open(FILE, ">", $sql_name) or die; # sort_get_next_record() whose failure itself does not cause a retry. print FILE "create table t1 (a varchar(1000)". - ($corrupt_index ? ", index(a)" : "") .") engine=maria;\n"; + ($corrupt_index ? ", index(a)" : "") .") engine=aria;\n"; print FILE <<EOF; insert into t1 values("ThursdayMorningsMarket"); # If Recovery executes REDO_INDEX_NEW_PAGE it will overwrite our @@ -109,7 +109,7 @@ insert into t1 values("ThursdayMorningsMarket"); # create_rename_lsn using OPTIMIZE TABLE. This also makes sure to put # the pages on disk, so that we can corrupt them. optimize table t1; -# mark table open, so that --maria-recover repairs it +# mark table open, so that --aria-recover repairs it insert into t1 select concat(a,'b') from t1 limit 1; EOF close FILE; @@ -123,7 +123,7 @@ kill_server(9); print "ruining " . ($corrupt_index ? "first page of keys" : "bitmap page") . - " in table to test maria-recover\n"; + " in table to test aria-recover\n"; open(FILE, "+<", "./var/master-data/test/t1.$corrupt_file") or die; $whatever= ("\xAB" x 100); sysseek (FILE, $corrupt_index ? 8192 : (8192-100-100), 0) or die; @@ -131,7 +131,7 @@ syswrite (FILE, $whatever) or die; close FILE; print "ruining log to make recovery fail; mysqld should fail the $force_after first restarts\n"; -open(FILE, "+<", "./var/tmp/maria_log.00000001") or die; +open(FILE, "+<", "./var/tmp/aria_log.00000001") or die; $whatever= ("\xAB" x 8192); sysseek (FILE, 99, 0) or die; syswrite (FILE, $whatever) or die; @@ -148,8 +148,8 @@ for($i= 1; $i <= $force_after; $i= $i + 1) open(FILE, "<", $error_log_name) or die; @cmd_output= <FILE>; close FILE; - die unless grep(/\[ERROR\] mysqld(.exe)*: Maria engine: log initialization failed/, @cmd_output); - die unless grep(/\[ERROR\] Plugin 'MARIA' init function returned error./, @cmd_output); + die unless grep(/\[ERROR\] mysqld(.exe)*: Aria engine: log initialization failed/, @cmd_output); + die unless grep(/\[ERROR\] Plugin 'Aria' init function returned error./, @cmd_output); print "failed - ok\n"; } @@ -160,13 +160,13 @@ die if $?; open(FILE, "<", $error_log_name) or die; @cmd_output= <FILE>; close FILE; -die unless grep(/\[Warning\] mysqld(.exe)*: Maria engine: removed all logs after [\d]+ consecutive failures of recovery from logs/, @cmd_output); -die unless grep(/\[ERROR\] mysqld(.exe)*: File '.*tmp.maria_log.00000001' not found \(Errcode: 2\)/, @cmd_output); +die unless grep(/\[Warning\] mysqld(.exe)*: Aria engine: removed all logs after [\d]+ consecutive failures of recovery from logs/, @cmd_output); +die unless grep(/\[ERROR\] mysqld(.exe)*: File '.*tmp.aria_log.00000001' not found \(Errcode: 2\)/, @cmd_output); print "success - ok\n"; open(FILE, ">", $sql_name) or die; print FILE <<EOF; -set global maria_recover=normal; +set global aria_recover=normal; insert into t1 values('aaa'); EOF close FILE; diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c index bae58fd70cd..a90578c2162 100644 --- a/storage/maria/ma_unique.c +++ b/storage/maria/ma_unique.c @@ -68,8 +68,7 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, uchar *record, DBUG_ASSERT(info->last_key.data_length == MARIA_UNIQUE_HASH_LENGTH); if (_ma_search_next(info, &info->last_key, SEARCH_BIGGER, info->s->state.key_root[def->key]) || - bcmp((char*) info->last_key.data, (char*) key_buff, - MARIA_UNIQUE_HASH_LENGTH)) + bcmp(info->last_key.data, key_buff, MARIA_UNIQUE_HASH_LENGTH)) { info->page_changed= 1; /* Can't optimize read next */ info->cur_row.lastpos= lastpos; diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index 3b9ca46899f..c6718299d17 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -613,7 +613,7 @@ static int w_search(register MARIA_HA *info, uint32 comp_flag, MARIA_KEY *key, MARIA_KEYDEF *keyinfo= key->keyinfo; MARIA_PAGE page; DBUG_ENTER("w_search"); - DBUG_PRINT("enter",("page: %ld", (long) page_pos)); + DBUG_PRINT("enter", ("page: %lu", (ulong) (page_pos/keyinfo->block_length))); if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ MARIA_MAX_KEY_BUFF*2))) @@ -823,9 +823,9 @@ int _ma_insert(register MARIA_HA *info, MARIA_KEY *key, Check if the new key fits totally into the the page (anc_buff is big enough to contain a full page + one key) */ - if (a_length <= (uint) keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE) + if (a_length <= share->max_index_block_size) { - if (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE - a_length < 32 && + if (share->max_index_block_size - a_length < 32 && (keyinfo->flag & HA_FULLTEXT) && key_pos == endpos && share->base.key_reflength <= share->base.rec_reflength && share->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) @@ -885,9 +885,9 @@ ChangeSet@1.2562, 2008-04-09 07:41:40+02:00, serg@janus.mylan +9 -0 } else { - if (share->now_transactional && + if (share->now_transactional && _ma_log_add(anc_page, org_anc_length, - key_pos, s_temp.changed_length, t_length, 0)) + key_pos, s_temp.changed_length, t_length, 1)) DBUG_RETURN(-1); } DBUG_RETURN(0); /* There is room on page */ @@ -1265,7 +1265,7 @@ static int _ma_balance_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, curr_keylength); if ((right ? right_length : left_length) + curr_keylength <= - (uint) keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE) + share->max_index_block_size) { /* Enough space to hold all keys in the two buffers ; Balance bufferts */ new_left_length= share->keypage_header+nod_flag+(keys/2)*curr_keylength; @@ -1320,7 +1320,8 @@ static int _ma_balance_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, start of page */ if (_ma_log_prefix(&next_page, 0, - ((int) new_right_length - (int) right_length))) + ((int) new_right_length - (int) right_length), + KEY_OP_DEBUG_LOG_PREFIX_3)) goto err; } else @@ -1383,7 +1384,8 @@ static int _ma_balance_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, */ if (_ma_log_prefix(&next_page, (uint) (new_right_length - right_length), - (int) (new_right_length - right_length))) + (int) (new_right_length - right_length), + KEY_OP_DEBUG_LOG_PREFIX_4)) goto err; } else @@ -1416,7 +1418,8 @@ static int _ma_balance_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, /* Log changes to father (one level up) page */ if (share->now_transactional && - _ma_log_change(father_page, father_key_pos, k_length)) + _ma_log_change(father_page, father_key_pos, k_length, + KEY_OP_DEBUG_FATHER_CHANGED_1)) goto err; /* @@ -1544,7 +1547,8 @@ static int _ma_balance_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, This contains the last 'extra_buff' from 'buff' */ if (_ma_log_prefix(&extra_page, - 0, (int) (extra_buff_length - right_length))) + 0, (int) (extra_buff_length - right_length), + KEY_OP_DEBUG_LOG_PREFIX_5)) goto err; /* @@ -1583,7 +1587,8 @@ static int _ma_balance_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, /* Log changes to father (one level up) page */ if (share->now_transactional && - _ma_log_change(father_page, father_key_pos, k_length)) + _ma_log_change(father_page, father_key_pos, k_length, + KEY_OP_DEBUG_FATHER_CHANGED_2)) goto err; } @@ -1755,7 +1760,7 @@ void maria_flush_bulk_insert(MARIA_HA *info, uint inx) } } -void maria_end_bulk_insert(MARIA_HA *info, my_bool abort) +void maria_end_bulk_insert(MARIA_HA *info) { DBUG_ENTER("maria_end_bulk_insert"); if (info->bulk_insert) @@ -1765,7 +1770,7 @@ void maria_end_bulk_insert(MARIA_HA *info, my_bool abort) { if (is_tree_inited(&info->bulk_insert[i])) { - if (abort) + if (info->s->deleting) reset_free_element(&info->bulk_insert[i]); delete_tree(&info->bulk_insert[i]); } @@ -1889,6 +1894,9 @@ my_bool _ma_log_new(MARIA_PAGE *ma_page, my_bool root_page) log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ma_page->buff + LSN_STORE_SIZE; log_array[TRANSLOG_INTERNAL_PARTS + 1].length= page_length; + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; + if (translog_write_record(&lsn, LOGREC_REDO_INDEX_NEW_PAGE, info->trn, info, (translog_size_t) @@ -1905,57 +1913,51 @@ my_bool _ma_log_new(MARIA_PAGE *ma_page, my_bool root_page) Log when some part of the key page changes */ -my_bool _ma_log_change(MARIA_PAGE *ma_page, - const uchar *key_pos, uint length) +my_bool _ma_log_change(MARIA_PAGE *ma_page, const uchar *key_pos, uint length, + enum en_key_debug debug_marker __attribute__((unused))) { LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 6 + 7], *log_pos; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 6 + 7], *log_pos; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; uint offset= (uint) (key_pos - ma_page->buff), translog_parts; - uint extra_length= 0; my_off_t page; MARIA_HA *info= ma_page->info; DBUG_ENTER("_ma_log_change"); DBUG_PRINT("enter", ("page: %lu length: %u", (ulong) ma_page->pos, length)); DBUG_ASSERT(info->s->now_transactional); + DBUG_ASSERT(offset + length <= ma_page->size); + DBUG_ASSERT(ma_page->org_size == ma_page->size); /* Store address of new root page */ page= ma_page->pos / info->s->block_size; page_store(log_data + FILEID_STORE_SIZE, page); log_pos= log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE; + +#ifdef EXTRA_DEBUG_KEY_CHANGES + (*log_pos++)= KEY_OP_DEBUG; + (*log_pos++)= debug_marker; +#endif + log_pos[0]= KEY_OP_OFFSET; int2store(log_pos+1, offset); log_pos[3]= KEY_OP_CHANGE; int2store(log_pos+4, length); + log_pos+= 6; log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data) - 7; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (log_pos - log_data); log_array[TRANSLOG_INTERNAL_PARTS + 1].str= key_pos; log_array[TRANSLOG_INTERNAL_PARTS + 1].length= length; translog_parts= 2; -#ifdef EXTRA_DEBUG_KEY_CHANGES - { - int page_length= ma_page->size; - ha_checksum crc; - crc= my_checksum(0, ma_page->buff + LSN_STORE_SIZE, - page_length - LSN_STORE_SIZE); - log_pos+= 6; - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, page_length); - int4store(log_pos+3, crc); - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - extra_length+= 7; - translog_parts++; - } -#endif + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &length, &translog_parts); if (translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, - (translog_size_t) (sizeof(log_data) - 7 + length + - extra_length), + (translog_size_t) (log_pos - log_data) + length, TRANSLOG_INTERNAL_PARTS + translog_parts, log_array, log_data, NULL)) DBUG_RETURN(1); @@ -1966,17 +1968,27 @@ my_bool _ma_log_change(MARIA_PAGE *ma_page, /** @brief Write log entry for page splitting + @fn _ma_log_split() + @param + ma_page Page that is changed + org_length Original length of page + new_length New length of page + key_pos Where key is inserted on page (may be 0 if no key) + key_length Number of bytes changed at key_pos + move_length Number of bytes moved at key_pos to make room for key + prefix_or_suffix KEY_OP_NONE Ignored + KEY_OP_ADD_PREFIX Add data to start of page + KEY_OP_ADD_SUFFIX Add data to end of page + data What data was added + data_length Number of bytes added first or last + changed_length Number of bytes changed first or last. + @note Write log entry for page that has got a key added to the page under one and only one of the following senarios: - Page is shortened from end - Data is added to end of page - Data added at front of page - - @param prefix_or_suffix KEY_OP_NONE Ignored - KEY_OP_ADD_PREFIX Add data to start of page - KEY_OP_ADD_SUFFIX Add data to end of page - */ static my_bool _ma_log_split(MARIA_PAGE *ma_page, @@ -1987,9 +1999,9 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page, uint changed_length) { LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3+3+3+3+3+2]; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 2 + 3+3+3+3+3+2 +7]; uchar *log_pos; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 6]; uint offset= (uint) (key_pos - ma_page->buff); uint translog_parts, extra_length; MARIA_HA *info= ma_page->info; @@ -1998,11 +2010,23 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page, DBUG_PRINT("enter", ("page: %lu org_length: %u new_length: %u", (ulong) ma_page->pos, org_length, new_length)); + DBUG_ASSERT(changed_length >= data_length); + DBUG_ASSERT(org_length <= info->s->max_index_block_size); + log_pos= log_data + FILEID_STORE_SIZE; page= ma_page->pos / info->s->block_size; page_store(log_pos, page); log_pos+= PAGE_STORE_SIZE; +#ifdef EXTRA_DEBUG_KEY_CHANGES + (*log_pos++)= KEY_OP_DEBUG; + (*log_pos++)= KEY_OP_DEBUG_LOG_SPLIT; +#endif + + /* Store keypage_flag */ + *log_pos++= KEY_OP_SET_PAGEFLAG; + *log_pos++= ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]; + if (new_length <= offset || !key_pos) { /* @@ -2015,6 +2039,7 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page, log_pos+= 3; translog_parts= 1; extra_length= 0; + DBUG_ASSERT(data_length == 0); } else { @@ -2026,6 +2051,11 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page, */ max_key_length= new_length - offset; extra_length= min(key_length, max_key_length); + if (offset + move_length > new_length) + { + /* This is true when move_length includes changes for next packed key */ + move_length= new_length - offset; + } if ((int) new_length < (int) (org_length + move_length + data_length)) { @@ -2034,10 +2064,13 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page, log_pos[0]= KEY_OP_DEL_SUFFIX; int2store(log_pos + 1, diff); log_pos+= 3; + DBUG_ASSERT(data_length == 0); /* Page is shortened */ + DBUG_ASSERT(offset <= org_length - diff); } else { DBUG_ASSERT(new_length == org_length + move_length + data_length); + DBUG_ASSERT(offset <= org_length); } log_pos[0]= KEY_OP_OFFSET; @@ -2085,6 +2118,13 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page, log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data); + + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; + DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, (translog_size_t) @@ -2122,8 +2162,9 @@ static my_bool _ma_log_del_prefix(MARIA_PAGE *ma_page, int move_length) { LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 12], *log_pos; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2]; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 2 + 12 + 7]; + uchar *log_pos; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; uint offset= (uint) (key_pos - ma_page->buff); uint diff_length= org_length + move_length - new_length; uint translog_parts, extra_length; @@ -2134,6 +2175,7 @@ static my_bool _ma_log_del_prefix(MARIA_PAGE *ma_page, (ulong) ma_page->pos, org_length, new_length)); DBUG_ASSERT((int) diff_length > 0); + DBUG_ASSERT(ma_page->size == new_length); log_pos= log_data + FILEID_STORE_SIZE; page= ma_page->pos / info->s->block_size; @@ -2143,6 +2185,15 @@ static my_bool _ma_log_del_prefix(MARIA_PAGE *ma_page, translog_parts= 1; extra_length= 0; +#ifdef EXTRA_DEBUG_KEY_CHANGES + *log_pos++= KEY_OP_DEBUG; + *log_pos++= KEY_OP_DEBUG_LOG_DEL_PREFIX; +#endif + + /* Store keypage_flag */ + *log_pos++= KEY_OP_SET_PAGEFLAG; + *log_pos++= ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]; + if (offset < diff_length + info->s->keypage_header) { /* @@ -2190,6 +2241,12 @@ static my_bool _ma_log_del_prefix(MARIA_PAGE *ma_page, log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data); + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; + DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, (translog_size_t) @@ -2215,9 +2272,9 @@ static my_bool _ma_log_key_middle(MARIA_PAGE *ma_page, uint key_length, int move_length) { LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3+5+3+3+3]; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 2 + 3+5+3+3+3 + 7]; uchar *log_pos; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 6]; uint key_offset; uint translog_parts, extra_length; my_off_t page; @@ -2225,6 +2282,8 @@ static my_bool _ma_log_key_middle(MARIA_PAGE *ma_page, DBUG_ENTER("_ma_log_key_middle"); DBUG_PRINT("enter", ("page: %lu", (ulong) ma_page->pos)); + DBUG_ASSERT(ma_page->size == new_length); + /* new place of key after changes */ key_pos+= data_added_first; key_offset= (uint) (key_pos - ma_page->buff); @@ -2252,6 +2311,15 @@ static my_bool _ma_log_key_middle(MARIA_PAGE *ma_page, page_store(log_pos, page); log_pos+= PAGE_STORE_SIZE; +#ifdef EXTRA_DEBUG_KEY_CHANGES + *log_pos++= KEY_OP_DEBUG; + *log_pos++= KEY_OP_DEBUG_LOG_MIDDLE; +#endif + + /* Store keypage_flag */ + *log_pos++= KEY_OP_SET_PAGEFLAG; + *log_pos++= ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]; + log_pos[0]= KEY_OP_DEL_SUFFIX; int2store(log_pos+1, data_deleted_last); log_pos+= 3; @@ -2300,6 +2368,12 @@ static my_bool _ma_log_key_middle(MARIA_PAGE *ma_page, key_length); } + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; + DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, (translog_size_t) @@ -2323,13 +2397,17 @@ static my_bool _ma_log_middle(MARIA_PAGE *ma_page, uint data_deleted_last) { LSN lsn; - LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2]; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3 + 5], *log_pos; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3 + 5 + 7], *log_pos; MARIA_HA *info= ma_page->info; my_off_t page; + uint translog_parts, extra_length; DBUG_ENTER("_ma_log_middle"); DBUG_PRINT("enter", ("page: %lu", (ulong) page)); + DBUG_ASSERT(ma_page->org_size + data_added_first - data_deleted_last == + ma_page->size); + page= ma_page->page / info->s->block_size; log_pos= log_data + FILEID_STORE_SIZE; @@ -2352,12 +2430,21 @@ static my_bool _ma_log_middle(MARIA_PAGE *ma_page, log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ((char*) buff + info->s->keypage_header); log_array[TRANSLOG_INTERNAL_PARTS + 1].length= data_changed_first; + translog_parts= 2; + extra_length= data_changed_first; + + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; + DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, (translog_size_t) log_array[TRANSLOG_INTERNAL_PARTS + - 0].length + data_changed_first, - TRANSLOG_INTERNAL_PARTS + 2, + 0].length + extra_length, + TRANSLOG_INTERNAL_PARTS + translog_parts, log_array, log_data, NULL)); } #endif diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index ab3c641b55f..4e19d5878ea 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -36,12 +36,13 @@ SET_STACK_SIZE(9000) /* Minimum stack size for program */ static uint decode_bits; static char **default_argv; -static const char *load_default_groups[]= { "maria_chk", 0 }; +static const char *load_default_groups[]= { "aria_chk", 0 }; static const char *set_collation_name, *opt_tmpdir, *opt_log_dir; static CHARSET_INFO *set_collation; static int stopwords_inited= 0; static MY_TMPDIR maria_chk_tmpdir; static my_bool opt_transaction_logging, opt_debug, opt_require_control_file; +static my_bool opt_warning_for_wrong_transid; static const char *type_names[]= { @@ -70,9 +71,17 @@ static const char *record_formats[]= "Fixed length", "Packed", "Compressed", "Block", "?" }; +static const char *bitmap_description[]= +{ + "Empty page", "Part filled head page","Part filled head page", + "Part filled head page", "Full head page", + "Part filled tail page","Part filled tail page", + "Full tail or blob page" +}; + static const char *maria_stats_method_str="nulls_unequal"; -static char default_open_errmsg[]= "%d when opening MARIA-table '%s'"; -static char default_close_errmsg[]= "%d when closing MARIA-table '%s'"; +static char default_open_errmsg[]= "%d when opening Aria table '%s'"; +static char default_close_errmsg[]= "%d when closing Aria table '%s'"; static void get_options(int *argc,char * * *argv); static void print_version(void); @@ -105,7 +114,9 @@ int main(int argc, char **argv) error=0; maria_init(); - if (ma_control_file_open(FALSE, opt_require_control_file) && + maria_block_size= 0; /* Use block size from control file */ + if (ma_control_file_open(FALSE, opt_require_control_file || + !(check_param.testflag & T_SILENT)) && (opt_require_control_file || (opt_transaction_logging && (check_param.testflag & T_REP_ANY)))) { @@ -170,7 +181,7 @@ end: char buff[22],buff2[22]; if (!(check_param.testflag & T_SILENT) || check_param.testflag & T_INFO) puts("\n---------"); - printf("\nTotal of all %d MARIA-files:\nData records: %9s Deleted blocks: %9s\n",check_param.total_files,llstr(check_param.total_records,buff), + printf("\nTotal of all %d Aria-files:\nData records: %9s Deleted blocks: %9s\n",check_param.total_files,llstr(check_param.total_records,buff), llstr(check_param.total_deleted,buff2)); } free_defaults(default_argv); @@ -193,7 +204,7 @@ enum options_mc { OPT_FT_MAX_WORD_LEN, OPT_FT_STOPWORD_FILE, OPT_MAX_RECORD_LENGTH, OPT_AUTO_CLOSE, OPT_STATS_METHOD, OPT_TRANSACTION_LOG, OPT_SKIP_SAFEMALLOC, OPT_ZEROFILL_KEEP_LSN, OPT_REQUIRE_CONTROL_FILE, - OPT_LOG_DIR, OPT_DATADIR + OPT_LOG_DIR, OPT_DATADIR, OPT_WARNING_FOR_WRONG_TRANSID }; static struct my_option my_long_options[] = @@ -213,7 +224,7 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"character-sets-dir", OPT_CHARSETS_DIR, "Directory where character sets are.", - (uchar**) &charsets_dir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + (char**) &charsets_dir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"check", 'c', "Check table for errors.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -233,8 +244,8 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"data-file-length", 'D', "Max length of data file (when recreating data-file when it's full).", - (uchar**) &check_param.max_data_file_length, - (uchar**) &check_param.max_data_file_length, + &check_param.max_data_file_length, + &check_param.max_data_file_length, 0, GET_LL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"extend-check", 'e', "If used when checking a table, ensure that the table is 100 percent consistent, which will take a long time. If used when repairing a table, try to recover every possible row from the data file. Normally this will also find a lot of garbage rows; Don't use this option with repair if you are not totally desperate.", @@ -255,21 +266,21 @@ static struct my_option my_long_options[] = "Print statistics information about table that is checked.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"keys-used", 'k', - "Tell MARIA to update only some specific keys. # is a bit mask of which keys to use. This can be used to get faster inserts.", - (uchar**) &check_param.keys_in_use, - (uchar**) &check_param.keys_in_use, + "Tell Aria to update only some specific keys. # is a bit mask of which keys to use. This can be used to get faster inserts.", + &check_param.keys_in_use, + &check_param.keys_in_use, 0, GET_ULL, REQUIRED_ARG, -1, 0, 0, 0, 0, 0}, {"datadir", OPT_DATADIR, - "Path for control file (and logs if --log-dir not used).", - (uchar**) &maria_data_root, 0, 0, GET_STR, REQUIRED_ARG, + "Path for control file (and logs if --logdir not used).", + &maria_data_root, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - {"log-dir", OPT_LOG_DIR, + {"logdir", OPT_LOG_DIR, "Path for log files.", - (uchar**) &opt_log_dir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + (char**) &opt_log_dir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"max-record-length", OPT_MAX_RECORD_LENGTH, - "Skip rows bigger than this if maria_chk can't allocate memory to hold it", - (uchar**) &check_param.max_record_length, - (uchar**) &check_param.max_record_length, + "Skip rows bigger than this if aria_chk can't allocate memory to hold it", + &check_param.max_record_length, + &check_param.max_record_length, 0, GET_ULL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0}, {"medium-check", 'm', "Faster than extend-check, but only finds 99.99% of all errors. Should be good enough for most cases.", @@ -302,17 +313,15 @@ static struct my_option my_long_options[] = #endif {"set-auto-increment", 'A', "Force auto_increment to start at this or higher value. If no value is given, then sets the next auto_increment value to the highest used value for the auto key + 1.", - (uchar**) &check_param.auto_increment_value, - (uchar**) &check_param.auto_increment_value, + &check_param.auto_increment_value, + &check_param.auto_increment_value, 0, GET_ULL, OPT_ARG, 0, 0, 0, 0, 0, 0}, {"set-collation", OPT_SET_COLLATION, "Change the collation used by the index", - (uchar**) &set_collation_name, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - {"set-variable", 'O', - "Change the value of a variable. Please note that this option is deprecated; you can set variables directly with --variable-name=value.", - 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + (char**) &set_collation_name, 0, 0, GET_STR, REQUIRED_ARG, + 0, 0, 0, 0, 0, 0}, {"silent", 's', - "Only print errors. One can use two -s to make maria_chk very silent.", + "Only print errors. One can use two -s to make aria_chk very silent.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, #ifndef DBUG_OFF #ifdef SAFEMALLOC @@ -326,22 +335,22 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"sort-records", 'R', "Sort records according to an index. This makes your data much more localized and may speed up things. (It may be VERY slow to do a sort the first time!)", - (uchar**) &check_param.opt_sort_key, - (uchar**) &check_param.opt_sort_key, + &check_param.opt_sort_key, + &check_param.opt_sort_key, 0, GET_UINT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - {"tmpdir", 't', - "Path for temporary files.", - (uchar**) &opt_tmpdir, + {"tmpdir", 't', "Path for temporary files.", (char**) &opt_tmpdir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"transaction-log", OPT_TRANSACTION_LOG, "Log repair command to transaction log", - (uchar**) &opt_transaction_logging, (uchar**) &opt_transaction_logging, + &opt_transaction_logging, &opt_transaction_logging, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"update-state", 'U', - "Mark tables as crashed if any errors were found.", + "Mark tables as crashed if any errors were found and clean if check didn't " + "find any errors. This allows one to get rid of warnings like 'table not " + "properly closed'", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"unpack", 'u', - "Unpack file packed with mariapack.", + "Unpack file packed with aria_pack.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"verbose", 'v', "Print more information. This can be used with --description and --check. Use many -v for more verbosity!", @@ -350,51 +359,59 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"wait", 'w', "Wait if table is locked.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"warning-for-wrong-transaction-id", OPT_WARNING_FOR_WRONG_TRANSID, + "Give a warning if we find a transaction id in the table that is bigger" + "than what exists in the control file. Use --skip-... to disable warning", + &opt_warning_for_wrong_transid, &opt_warning_for_wrong_transid, + 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, { "page_buffer_size", OPT_PAGE_BUFFER_SIZE, "Size of page buffer. Used by --safe-repair", - (uchar**) &check_param.use_buffers, (uchar**) &check_param.use_buffers, 0, + &check_param.use_buffers, &check_param.use_buffers, 0, GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT, 1024L*1024L, (long) ~0L, (long) MALLOC_OVERHEAD, (long) IO_SIZE, 0}, - { "read_buffer_size", OPT_READ_BUFFER_SIZE, "", - (uchar**) &check_param.read_buffer_length, - (uchar**) &check_param.read_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + { "read_buffer_size", OPT_READ_BUFFER_SIZE, + "Read buffer size for sequential reads during scanning", + &check_param.read_buffer_length, + &check_param.read_buffer_length, 0, GET_ULONG, REQUIRED_ARG, (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD, (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0}, - { "write_buffer_size", OPT_WRITE_BUFFER_SIZE, "", - (uchar**) &check_param.write_buffer_length, - (uchar**) &check_param.write_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + { "write_buffer_size", OPT_WRITE_BUFFER_SIZE, + "Write buffer size for sequential writes during repair of fixed size or dynamic size rows", + &check_param.write_buffer_length, + &check_param.write_buffer_length, 0, GET_ULONG, REQUIRED_ARG, (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD, (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0}, { "sort_buffer_size", OPT_SORT_BUFFER_SIZE, "Size of sort buffer. Used by --recover", - (uchar**) &check_param.sort_buffer_length, - (uchar**) &check_param.sort_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + &check_param.sort_buffer_length, + &check_param.sort_buffer_length, 0, GET_ULONG, REQUIRED_ARG, (long) SORT_BUFFER_INIT, (long) (MIN_SORT_BUFFER + MALLOC_OVERHEAD), (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0}, - { "sort_key_blocks", OPT_SORT_KEY_BLOCKS, "", - (uchar**) &check_param.sort_key_blocks, - (uchar**) &check_param.sort_key_blocks, 0, GET_ULONG, REQUIRED_ARG, + { "sort_key_blocks", OPT_SORT_KEY_BLOCKS, + "Internal buffer for sorting keys; Don't touch :)", + &check_param.sort_key_blocks, + &check_param.sort_key_blocks, 0, GET_ULONG, REQUIRED_ARG, BUFFERS_WHEN_SORTING, 4L, 100L, 0L, 1L, 0}, - { "decode_bits", OPT_DECODE_BITS, "", (uchar**) &decode_bits, - (uchar**) &decode_bits, 0, GET_UINT, REQUIRED_ARG, 9L, 4L, 17L, 0L, 1L, 0}, - { "ft_min_word_len", OPT_FT_MIN_WORD_LEN, "", (uchar**) &ft_min_word_len, - (uchar**) &ft_min_word_len, 0, GET_ULONG, REQUIRED_ARG, 4, 1, HA_FT_MAXCHARLEN, + { "decode_bits", OPT_DECODE_BITS, "", &decode_bits, + &decode_bits, 0, GET_UINT, REQUIRED_ARG, 9L, 4L, 17L, 0L, 1L, 0}, + { "ft_min_word_len", OPT_FT_MIN_WORD_LEN, "", &ft_min_word_len, + &ft_min_word_len, 0, GET_ULONG, REQUIRED_ARG, 4, 1, HA_FT_MAXCHARLEN, 0, 1, 0}, - { "ft_max_word_len", OPT_FT_MAX_WORD_LEN, "", (uchar**) &ft_max_word_len, - (uchar**) &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXCHARLEN, 10, + { "ft_max_word_len", OPT_FT_MAX_WORD_LEN, "", &ft_max_word_len, + &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXCHARLEN, 10, HA_FT_MAXCHARLEN, 0, 1, 0}, - { "maria_ft_stopword_file", OPT_FT_STOPWORD_FILE, + { "aria_ft_stopword_file", OPT_FT_STOPWORD_FILE, "Use stopwords from this file instead of built-in list.", - (uchar**) &ft_stopword_file, (uchar**) &ft_stopword_file, 0, GET_STR, + (char**) &ft_stopword_file, (char**) &ft_stopword_file, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, { "stats_method", OPT_STATS_METHOD, "Specifies how index statistics collection code should treat NULLs. " "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".", - (uchar**) &maria_stats_method_str, (uchar**) &maria_stats_method_str, 0, + (char**) &maria_stats_method_str, (char**) &maria_stats_method_str, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, { "zerofill", 'z', - "Fill empty space in data and index files with zeroes", + "Fill empty space in data and index files with zeroes,", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, { "zerofill-keep-lsn", OPT_ZEROFILL_KEEP_LSN, "Like --zerofill but does not zero out LSN of data/index pages;" @@ -419,7 +436,7 @@ static void usage(void) print_version(); puts("By Monty, for your professional use"); puts("This software comes with NO WARRANTY: see the PUBLIC for details.\n"); - puts("Description, check and repair of MARIA tables."); + puts("Description, check and repair of Aria tables."); puts("Used without options all tables on the command will be checked for errors"); printf("Usage: %s [OPTIONS] tables[.MAI]\n", my_progname_short); printf("\nGlobal options:\n"); @@ -430,10 +447,12 @@ static void usage(void) printf("\ -H, --HELP Display this help and exit.\n\ -?, --help Display this help and exit.\n\ - -O, --set-variable var=option.\n\ - Change the value of a variable. Please note that\n\ - this option is deprecated; you can set variables\n\ - directly with '--variable-name=value'.\n\ + --datadir=path Path for control file (and logs if --logdir not used)\n\ + --logdir=path Path for log files\n\ + --require-control-file Abort if we can't find/read the maria_log_control\n\ + file\n\ + -s, --silent Only print errors. One can use two -s to make\n\ + maria_chk very silent.\n\ -t, --tmpdir=path Path for temporary files. Multiple paths can be\n\ specified, separated by "); #if defined( __WIN__) || defined(__NETWARE__) @@ -441,12 +460,8 @@ static void usage(void) #else printf("colon (:)"); #endif - printf(", they will be used\n\ + printf(", they will be used\n\ in a round-robin fashion.\n\ - --require-control-file Abort if we can't find/read the maria_log_control\n\ - file\n\ - -s, --silent Only print errors. One can use two -s to make\n\ - maria_chk very silent.\n\ -v, --verbose Print more information. This can be used with\n\ --description and --check. Use many -v for more verbosity.\n\ -V, --version Print version and exit.\n\ @@ -455,10 +470,10 @@ static void usage(void) puts(" --start-check-pos=# Start reading file at given offset.\n"); #endif - puts("Check options (check is the default action for maria_chk):\n\ + puts("Check options (check is the default action for aria_chk):\n\ -c, --check Check table for errors.\n\ -e, --extend-check Check the table VERY throughly. Only use this in\n\ - extreme cases as maria_chk should normally be able to\n\ + extreme cases as aria_chk should normally be able to\n\ find out if the table is ok even without this switch.\n\ -F, --fast Check only tables that haven't been closed properly.\n\ -C, --check-only-changed\n\ @@ -468,10 +483,11 @@ static void usage(void) -i, --information Print statistics information about table that is checked.\n\ -m, --medium-check Faster than extend-check, but only finds 99.99% of\n\ all errors. Should be good enough for most cases.\n\ - -U --update-state Mark tables as crashed if you find any errors.\n\ + -U, --update-state Mark tables as crashed if you find any errors.\n\ -T, --read-only Don't mark table as checked.\n"); - puts("Recover (repair)/ options (When using '-r' or '-o'):\n\ + puts("\ +Recover (repair)/ options (When using '--recover' or '--safe-recover'):\n\ -B, --backup Make a backup of the .MAD file as 'filename-time.BAK'.\n\ --correct-checksum Correct checksum information for table.\n\ -D, --data-file-length=# Max length of data file (when recreating data\n\ @@ -480,11 +496,11 @@ static void usage(void) Normally this will also find a lot of garbage rows;\n\ Don't use this option if you are not totally desperate.\n\ -f, --force Overwrite old temporary files.\n\ - -k, --keys-used=# Tell MARIA to update only some specific keys. # is a\n\ + -k, --keys-used=# Tell Aria to update only some specific keys. # is a\n\ bit mask of which keys to use. This can be used to\n\ get faster inserts.\n\ --max-record-length=#\n\ - Skip rows bigger than this if maria_chk can't allocate\n\ + Skip rows bigger than this if aria_chk can't allocate\n\ memory to hold it.\n\ -r, --recover Can fix almost anything except unique keys that aren't\n\ unique.\n\ @@ -498,23 +514,23 @@ static void usage(void) handle a couple of cases where '-r' reports that it\n\ can't fix the data file.\n\ --transaction-log Log repair command to transaction log. This is needed\n\ - if one wants to use the maria_read_log to repeat the \n\ + if one wants to use the aria_read_log to repeat the \n\ repair\n\ --character-sets-dir=...\n\ Directory where character sets are.\n\ --set-collation=name\n\ Change the collation used by the index.\n\ -q, --quick Faster repair by not modifying the data file.\n\ - One can give a second '-q' to force maria_chk to\n\ + One can give a second '-q' to force aria_chk to\n\ modify the original datafile in case of duplicate keys.\n\ NOTE: Tables where the data file is currupted can't be\n\ fixed with this option.\n\ - -u, --unpack Unpack file packed with mariapack.\n\ + -u, --unpack Unpack file packed with ariapack.\n\ "); puts("Other actions:\n\ -a, --analyze Analyze distribution of keys. Will make some joins in\n\ - MySQL faster. You can check the calculated distribution\n\ + MariaDB faster. You can check the calculated distribution\n\ by using '--description --verbose table_name'.\n\ --stats_method=name Specifies how index statistics collection code should\n\ treat NULLs. Possible values of name are \"nulls_unequal\"\n\ @@ -537,6 +553,13 @@ static void usage(void) --zerofill-keep-lsn Like --zerofill but does not zero out LSN of\n\ data/index pages."); + puts("Variables:\n\ +--page_buffer_size=# Size of page buffer. Used by --safe-repair\n\ +--read_buffer_size=# Read buffer size for sequential reads during scanning\n\ +--sort_buffer_size=# Size of sort buffer. Used by --recover\n\ +--sort_key_blocks=# Internal buffer for sorting keys; Don't touch :)\n\ +--write_buffer_size=# Write buffer size for sequential writes during repair"); + print_defaults("my", load_default_groups); my_print_variables(my_long_options); } @@ -754,7 +777,7 @@ get_one_option(int optid, check_param.testflag|= T_UPDATE_STATE; break; case '#': - DBUG_SET_INITIAL(argument ? argument : "d:t:o,/tmp/maria_chk.trace"); + DBUG_SET_INITIAL(argument ? argument : "d:t:o,/tmp/aria_chk.trace"); opt_debug= 1; break; case OPT_SKIP_SAFEMALLOC: @@ -918,7 +941,7 @@ static int maria_chk(HA_CHECK *param, char *filename) _ma_check_print_error(param,"'%s' doesn't have a correct index definition. You need to recreate it before you can do a repair",filename); break; case HA_ERR_NOT_A_TABLE: - _ma_check_print_error(param,"'%s' is not a MARIA-table",filename); + _ma_check_print_error(param,"'%s' is not a Aria table",filename); break; case HA_ERR_CRASHED_ON_USAGE: _ma_check_print_error(param,"'%s' is marked as crashed",filename); @@ -927,10 +950,10 @@ static int maria_chk(HA_CHECK *param, char *filename) _ma_check_print_error(param,"'%s' is marked as crashed after last repair",filename); break; case HA_ERR_OLD_FILE: - _ma_check_print_error(param,"'%s' is a old type of MARIA-table", filename); + _ma_check_print_error(param,"'%s' is a old type of Aria table", filename); break; case HA_ERR_NEW_FILE: - _ma_check_print_error(param,"'%s' uses new features not supported by this version of the MARIA library", filename); + _ma_check_print_error(param,"'%s' uses new features not supported by this version of the Aria library", filename); break; case HA_ERR_END_OF_FILE: _ma_check_print_error(param,"Couldn't read complete header from '%s'", filename); @@ -946,7 +969,7 @@ static int maria_chk(HA_CHECK *param, char *filename) filename); break; default: - _ma_check_print_error(param,"%d when opening MARIA-table '%s'", + _ma_check_print_error(param,"%d when opening Aria table '%s'", my_errno,filename); break; } @@ -990,7 +1013,7 @@ static int maria_chk(HA_CHECK *param, char *filename) if ((param->testflag & (T_REP_ANY | T_SORT_RECORDS)) && ((share->state.changed & (STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR) || + STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR) || !(param->testflag & T_CHECK_ONLY_CHANGED)))) need_to_check=1; @@ -1008,15 +1031,15 @@ static int maria_chk(HA_CHECK *param, char *filename) } if ((param->testflag & T_CHECK_ONLY_CHANGED) && (share->state.changed & (STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR))) + STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR))) need_to_check=1; if (!need_to_check) { if (!(param->testflag & T_SILENT) || param->testflag & T_INFO) - printf("MARIA file: %s is already checked\n",filename); + printf("Aria file: %s is already checked\n",filename); if (maria_close(info)) { - _ma_check_print_error(param,"%d when closing MARIA-table '%s'", + _ma_check_print_error(param,"%d when closing Aria table '%s'", my_errno,filename); DBUG_RETURN(1); } @@ -1043,7 +1066,7 @@ static int maria_chk(HA_CHECK *param, char *filename) if (maria_recreate_table(param, &info,filename)) { VOID(fprintf(stderr, - "MARIA-table '%s' is not fixed because of errors\n", + "Aria table '%s' is not fixed because of errors\n", filename)); return(-1); } @@ -1094,7 +1117,7 @@ static int maria_chk(HA_CHECK *param, char *filename) */ maria_lock_database(info, F_EXTRA_LCK); datafile= info->dfile.file; - if (init_pagecache(maria_pagecache, param->use_buffers, 0, 0, + if (init_pagecache(maria_pagecache, (size_t) param->use_buffers, 0, 0, maria_block_size, MY_WME) == 0) { _ma_check_print_error(param, "Can't initialize page cache with %lu memory", @@ -1225,20 +1248,25 @@ static int maria_chk(HA_CHECK *param, char *filename) if (!error && (param->testflag & T_ZEROFILL)) error= maria_zerofill(param, info, filename); if (!error) + { + DBUG_PRINT("info", ("Reseting crashed state")); share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR); + STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR); + } else maria_mark_crashed(info); } else if ((param->testflag & T_CHECK) || !(param->testflag & T_AUTO_INC)) { - if (!(param->testflag & T_SILENT) || param->testflag & T_INFO) - printf("Checking MARIA file: %s\n",filename); + if (!(param->testflag & T_VERY_SILENT) || param->testflag & T_INFO) + printf("Checking Aria file: %s\n",filename); if (!(param->testflag & T_SILENT)) printf("Data records: %7s Deleted blocks: %7s\n", llstr(info->state->records,llbuff), llstr(info->state->del,llbuff2)); maria_chk_init_for_check(param, info); + if (opt_warning_for_wrong_transid == 0) + param->max_trid= ~ (ulonglong) 0; error= maria_chk_status(param,info); maria_intersect_keys_active(share->state.key_map, param->keys_in_use); error|= maria_chk_size(param,info); @@ -1275,11 +1303,15 @@ static int maria_chk(HA_CHECK *param, char *filename) } if (!error) { - if ((share->state.changed & STATE_CHANGED) && - (param->testflag & T_UPDATE_STATE)) + if (((share->state.changed & + (STATE_CHANGED | STATE_CRASHED | STATE_CRASHED_ON_REPAIR | + STATE_IN_REPAIR)) || + share->state.open_count != 0) + && (param->testflag & T_UPDATE_STATE)) info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + DBUG_PRINT("info", ("Reseting crashed state")); share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR); + STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR); } else if (!maria_is_crashed(info) && (param->testflag & T_UPDATE_STATE)) @@ -1343,7 +1375,7 @@ end2: if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX)) { VOID(fprintf(stderr, - "MARIA-table '%s' is not fixed because of errors\n", + "Aria table '%s' is not fixed because of errors\n", filename)); if (param->testflag & T_REP_ANY) VOID(fprintf(stderr, @@ -1352,13 +1384,13 @@ end2: else if (!(param->error_printed & 2) && !(param->testflag & T_FORCE_CREATE)) VOID(fprintf(stderr, - "MARIA-table '%s' is corrupted\nFix it using switch \"-r\" or \"-o\"\n", + "Aria table '%s' is corrupted\nFix it using switch \"-r\" or \"-o\"\n", filename)); } else if (param->warning_printed && ! (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX | T_FORCE_CREATE))) - VOID(fprintf(stderr, "MARIA-table '%s' is usable but should be fixed\n", + VOID(fprintf(stderr, "Aria table '%s' is usable but should be fixed\n", filename)); VOID(fflush(stderr)); DBUG_RETURN(error); @@ -1389,7 +1421,7 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name) DBUG_VOID_RETURN; } - printf("MARIA file: %s\n",name); + printf("Aria file: %s\n",name); printf("Record format: %s\n", record_formats[share->data_file_type]); printf("Crashsafe: %s\n", share->base.born_transactional ? "yes" : "no"); @@ -1496,11 +1528,11 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name) printf("Recordlength: %16d\n",(int) share->base.pack_reclength); if (! maria_is_all_keys_active(share->state.key_map, share->base.keys)) { - longlong2str(share->state.key_map,buff,2); + longlong2str(share->state.key_map,buff,2,1); printf("Using only keys '%s' of %d possibly keys\n", buff, share->base.keys); } - puts("\ntable description:"); + puts("\nTable description:"); printf("Key Start Len Index Type"); if (param->testflag & T_VERBOSE) printf(" Rec/key Root Blocksize"); @@ -1642,6 +1674,14 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name) } VOID(putchar('\n')); } + if (share->data_file_type == BLOCK_RECORD) + { + uint i; + puts("\nBitmap Data size Description"); + for (i=0 ; i <= 7 ; i++) + printf("%u %5u %s\n", i, share->bitmap.sizes[i], + bitmap_description[i]); + } } DBUG_VOID_RETURN; } /* describe */ @@ -1711,7 +1751,7 @@ static int maria_sort_records(HA_CHECK *param, } if (!(param->testflag & T_SILENT)) { - printf("- Sorting records for MARIA-table '%s'\n",name); + printf("- Sorting records for Aria table '%s'\n",name); if (write_info) printf("Data records: %9s Deleted: %9s\n", llstr(info->state->records,llbuff), @@ -1954,7 +1994,7 @@ static my_bool write_log_record(HA_CHECK *param) { if (write_log_record_for_repair(param, info)) _ma_check_print_error(param, "%d when writing log record for" - " MARIA-table '%s'", my_errno, + " Aria table '%s'", my_errno, param->isam_file_name); else if (maria_close(info)) _ma_check_print_error(param, default_close_errmsg, my_errno, diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index d32dfbad06e..e58b149a051 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -39,6 +39,7 @@ #define SANITY_CHECKS 1 #ifdef EXTRA_DEBUG #define EXTRA_DEBUG_KEY_CHANGES +#define EXTRA_STORE_FULL_PAGE_IN_KEY_CHANGES #endif #define MAX_NONMAPPED_INSERTS 1000 @@ -243,7 +244,8 @@ typedef struct st_maria_file_bitmap uchar *map; pgcache_page_no_t page; /* Page number for current bitmap */ uint used_size; /* Size of bitmap head that is not 0 */ - my_bool changed; /* 1 if page needs to be flushed */ + my_bool changed; /* 1 if page needs to be written */ + my_bool changed_not_flushed; /* 1 if some bitmap is not flushed */ my_bool flush_all_requested; /**< If _ma_bitmap_flush_all waiting */ uint non_flushable; /**< 0 if bitmap and log are in sync */ PAGECACHE_FILE file; /* datafile where bitmap is stored */ @@ -361,6 +363,7 @@ typedef struct st_maria_share uint in_trans; /* Number of references by trn */ uint w_locks, r_locks, tot_locks; /* Number of read/write locks */ uint block_size; /* block_size of keyfile & data file*/ + uint max_index_block_size; /* block_size - end_of_page_info */ /* Fixed length part of a packed row in BLOCK_RECORD format */ uint base_length; myf write_flag; @@ -459,8 +462,9 @@ typedef struct st_maria_row uint *null_field_lengths; /* All null field lengths */ ulong *blob_lengths; /* Length for each blob */ ulong min_length, normal_length, char_length, varchar_length; - ulong blob_length, head_length, total_length; + ulong blob_length, total_length; size_t extents_buffer_length; /* Size of 'extents' buffer */ + uint head_length, header_length; uint field_lengths_length; /* Length of data in field_lengths */ uint extents_count; /* number of extents in 'extents' */ uint full_page_count, tail_count; /* For maria_chk */ @@ -483,7 +487,8 @@ typedef ICP_RESULT (*index_cond_func_t)(void *param); struct st_maria_handler { MARIA_SHARE *s; /* Shared between open:s */ - struct st_ma_transaction *trn; /* Pointer to active transaction */ + struct st_ma_transaction *trn; /* Pointer to active transaction */ + void *external_ptr; /* Pointer to THD in mysql */ MARIA_STATUS_INFO *state, state_save; MARIA_STATUS_INFO *state_start; /* State at start of transaction */ MARIA_ROW cur_row; /* The active row that we just read */ @@ -614,6 +619,7 @@ struct st_maria_handler #define STATE_NOT_ZEROFILLED 128 #define STATE_NOT_MOVABLE 256 #define STATE_MOVED 512 /* set if base->uuid != maria_uuid */ +#define STATE_IN_REPAIR 1024 /* We are running repair on table */ /* options to maria_read_cache */ @@ -669,11 +675,17 @@ struct st_maria_handler #define maria_mark_crashed_on_repair(x) do{(x)->s->state.changed|= \ STATE_CRASHED|STATE_CRASHED_ON_REPAIR; \ (x)->update|= HA_STATE_CHANGED; \ - DBUG_PRINT("error", \ - ("Marked table crashed")); \ + DBUG_PRINT("error", ("Marked table crashed on repair")); \ + }while(0) +#define maria_mark_in_repair(x) do{(x)->s->state.changed|= \ + STATE_CRASHED | STATE_IN_REPAIR; \ + (x)->update|= HA_STATE_CHANGED; \ + DBUG_PRINT("error", ("Marked table crashed for repair")); \ }while(0) #define maria_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED) #define maria_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR) +#define maria_in_repair(x) ((x)->s->state.changed & STATE_IN_REPAIR) + #ifdef EXTRA_DEBUG /** Brings additional information in certain debug builds and in standalone @@ -792,8 +804,10 @@ extern uint32 maria_read_vec[], maria_readnext_vec[]; extern uint maria_quick_table_bits; extern char *maria_data_root; extern uchar maria_zero_string[]; -extern my_bool maria_inited, maria_in_ha_maria; +extern my_bool maria_inited, maria_in_ha_maria, maria_recovery_changed_data; +extern my_bool maria_recovery_verbose; extern HASH maria_stored_state; +extern int (*maria_create_trn_hook)(MARIA_HA *); /* This is used by _ma_calc_xxx_key_length och _ma_store_key */ typedef struct st_maria_s_param @@ -826,6 +840,7 @@ typedef struct st_maria_page uchar *buff; /* Data for page */ my_off_t pos; /* Disk address to page */ uint size; /* Size of data on page */ + uint org_size; /* Size of page at read or after log */ uint node; /* 0 or share->base.key_reflength */ uint flag; /* Page flag */ uint link_offset; @@ -1076,7 +1091,7 @@ typedef struct st_maria_block_info #define USE_BUFFER_INIT (((1024L*1024L*128-MALLOC_OVERHEAD)/8192)*8192) #define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD) -#define SORT_BUFFER_INIT (1024L*1024L*64-MALLOC_OVERHEAD) +#define SORT_BUFFER_INIT (1024L*1024L*256-MALLOC_OVERHEAD) #define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD) #define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0) @@ -1246,7 +1261,6 @@ extern my_bool maria_flush_log_for_page(uchar *page, extern my_bool maria_flush_log_for_page_none(uchar *page, pgcache_page_no_t page_no, uchar *data_ptr); -void maria_concurrent_inserts(MARIA_HA *info, my_bool concurrent_insert); extern PAGECACHE *maria_log_pagecache; extern void ma_set_index_cond_func(MARIA_HA *info, index_cond_func_t func, void *func_arg); diff --git a/storage/maria/maria_ftdump.c b/storage/maria/maria_ftdump.c index 8b545e6e9af..870d07fa96e 100644 --- a/storage/maria/maria_ftdump.c +++ b/storage/maria/maria_ftdump.c @@ -46,7 +46,7 @@ static struct my_option my_long_options[] = {"stats", 's', "Report global stats.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"verbose", 'v', "Be verbose.", - (uchar**) &verbose, (uchar**) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + &verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; @@ -263,7 +263,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), static void usage() { - printf("Use: maria_ft_dump <table_name> <index_num>\n"); + printf("Use: aria_ft_dump <table_name> <index_num>\n"); my_print_help(my_long_options); my_print_variables(my_long_options); NETWARE_SET_SCREEN_MODE(1); diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c index 6c38cab294e..1d2d3995bd8 100644 --- a/storage/maria/maria_pack.c +++ b/storage/maria/maria_pack.c @@ -197,7 +197,7 @@ static struct st_file_buffer file_buffer; static QUEUE queue; static HUFF_COUNTS *global_count; static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; -static const char *load_default_groups[]= { "mariapack",0 }; +static const char *load_default_groups[]= { "ariapack",0 }; /* The main program */ @@ -238,7 +238,7 @@ int main(int argc, char **argv) } } if (ok && isamchk_neaded && !silent) - puts("Remember to run maria_chk -rq on compressed tables"); + puts("Remember to run aria_chk -rq on compressed tables"); VOID(fflush(stdout)); VOID(fflush(stderr)); free_defaults(default_argv); @@ -259,10 +259,10 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, #endif {"backup", 'b', "Make a backup of the table as table_name.OLD.", - (uchar**) &backup, (uchar**) &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + &backup, &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"character-sets-dir", OPT_CHARSETS_DIR_MP, - "Directory where character sets are.", (uchar**) &charsets_dir, - (uchar**) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + "Directory where character sets are.", (char**) &charsets_dir, + (char**) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.", 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, {"force", 'f', @@ -270,7 +270,7 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"join", 'j', "Join all given tables into 'new_table_name'. All tables MUST have identical layouts.", - (uchar**) &join_table, (uchar**) &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, + &join_table, &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"help", '?', "Display this help and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -284,8 +284,8 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"version", 'V', "Output version information and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"wait", 'w', "Wait and retry if table is in use.", (uchar**) &opt_wait, - (uchar**) &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"wait", 'w', "Wait and retry if table is in use.", &opt_wait, + &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; @@ -306,11 +306,11 @@ static void usage(void) puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); puts("and you are welcome to modify and redistribute it under the GPL license\n"); - puts("Pack a MARIA-table to take much less space."); - puts("Keys are not updated, you must run maria_chk -rq on the index (.MAI) file"); + puts("Pack a Aria-table to take much less space."); + puts("Keys are not updated, you must run aria_chk -rq on the index (.MAI) file"); puts("afterwards to update the keys."); puts("You should give the .MAI file as the filename argument."); - puts("To unpack a packed table, run maria_chk -u on the table"); + puts("To unpack a packed table, run aria_chk -u on the table"); VOID(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname)); my_print_help(my_long_options); @@ -359,7 +359,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), silent= 0; break; case '#': - DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_pack.trace"); + DBUG_PUSH(argument ? argument : "d:t:o,/tmp/aria_pack.trace"); break; case 'V': print_version(); @@ -2982,7 +2982,7 @@ static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg, } /* If there are no disabled indexes, keep key_file_length value from - original file so "maria_chk -rq" can use this value (this is necessary + original file so "aria_chk -rq" can use this value (this is necessary because index size cannot be easily calculated for fulltext keys) */ maria_clear_all_keys_active(share->state.key_map); diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index 2b2fa692f24..de45eb0bcb6 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -1,4 +1,5 @@ /* Copyright (C) 2007 MySQL AB + Copyright (C) 2010 Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,20 +20,20 @@ #define LOG_FLAGS 0 -static const char *load_default_groups[]= { "maria_read_log",0 }; +static const char *load_default_groups[]= { "aria_read_log",0 }; static void get_options(int *argc,char * * *argv); #ifndef DBUG_OFF #if defined(__WIN__) -const char *default_dbug_option= "d:t:O,\\maria_read_log.trace"; +const char *default_dbug_option= "d:t:O,\\aria_read_log.trace"; #else -const char *default_dbug_option= "d:t:o,/tmp/maria_read_log.trace"; +const char *default_dbug_option= "d:t:o,/tmp/aria_read_log.trace"; #endif #endif /* DBUG_OFF */ static my_bool opt_display_only, opt_apply, opt_apply_undo, opt_silent; static my_bool opt_check; static const char *opt_tmpdir; static ulong opt_page_buffer_size; -static ulonglong opt_start_from_lsn; +static ulonglong opt_start_from_lsn, opt_end_lsn, opt_start_from_checkpoint; static MY_TMPDIR maria_chk_tmpdir; @@ -52,9 +53,10 @@ int main(int argc, char **argv) if (maria_init()) { - fprintf(stderr, "Can't init Maria engine (%d)\n", errno); + fprintf(stderr, "Can't init Aria engine (%d)\n", errno); goto err; } + maria_block_size= 0; /* Use block size from file */ /* we don't want to create a control file, it MUST exist */ if (ma_control_file_open(FALSE, TRUE)) { @@ -67,7 +69,7 @@ int main(int argc, char **argv) goto err; } if (init_pagecache(maria_pagecache, opt_page_buffer_size, 0, 0, - TRANSLOG_PAGE_SIZE, MY_WME) == 0) + maria_block_size, MY_WME) == 0) { fprintf(stderr, "Got error in init_pagecache() (errno: %d)\n", errno); goto err; @@ -92,7 +94,6 @@ int main(int argc, char **argv) if (opt_display_only) printf("You are using --display-only, NOTHING will be written to disk\n"); - /* LSN could be also --start-from-lsn=# */ lsn= translog_first_lsn_in_log(); if (lsn == LSN_ERROR) { @@ -103,8 +104,16 @@ int main(int argc, char **argv) { fprintf(stdout, "The transaction log is empty\n"); } - fprintf(stdout, "The transaction log starts from lsn (%lu,0x%lx)\n", - LSN_IN_PARTS(lsn)); + if (opt_start_from_checkpoint && !opt_start_from_lsn && + last_checkpoint_lsn != LSN_IMPOSSIBLE) + { + lsn= LSN_IMPOSSIBLE; /* LSN set in maria_apply_log() */ + fprintf(stdout, "Starting from checkpoint (%lu,0x%lx)\n", + LSN_IN_PARTS(last_checkpoint_lsn)); + } + else + fprintf(stdout, "The transaction log starts from lsn (%lu,0x%lx)\n", + LSN_IN_PARTS(lsn)); if (opt_start_from_lsn) { @@ -119,8 +128,14 @@ int main(int argc, char **argv) LSN_IN_PARTS(lsn)); } - fprintf(stdout, "TRACE of the last maria_read_log\n"); - if (maria_apply_log(lsn, opt_apply ? MARIA_LOG_APPLY : + if (opt_end_lsn != LSN_IMPOSSIBLE) + { + /* We can't apply undo if we use end_lsn */ + opt_apply_undo= 0; + } + + fprintf(stdout, "TRACE of the last aria_read_log\n"); + if (maria_apply_log(lsn, opt_end_lsn, opt_apply ? MARIA_LOG_APPLY : (opt_check ? MARIA_LOG_CHECK : MARIA_LOG_DISPLAY_HEADER), opt_silent ? NULL : stdout, opt_apply_undo, FALSE, FALSE, &warnings_count)) @@ -150,6 +165,9 @@ err: #include "ma_check_standalone.h" +enum options_mc { + OPT_CHARSETS_DIR=256 +}; static struct my_option my_long_options[] = { @@ -158,6 +176,9 @@ static struct my_option my_long_options[] = " Displays a lot of information if not run with --silent", (uchar **) &opt_apply, (uchar **) &opt_apply, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"character-sets-dir", OPT_CHARSETS_DIR, + "Directory where character sets are.", + (char**) &charsets_dir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"check", 'c', "if --display-only, check if record is fully readable (for debugging)", (uchar **) &opt_check, (uchar **) &opt_check, 0, @@ -169,22 +190,31 @@ static struct my_option my_long_options[] = {"help", '?', "Display this help and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"display-only", 'd', "display brief info read from records' header", - (uchar **) &opt_display_only, (uchar **) &opt_display_only, 0, GET_BOOL, + &opt_display_only, &opt_display_only, 0, GET_BOOL, NO_ARG,0, 0, 0, 0, 0, 0}, - {"maria_log_dir_path", 'l', + {"aria-log-dir-path", 'l', "Path to the directory where to store transactional log", (uchar **) &maria_data_root, (uchar **) &maria_data_root, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - { "page_buffer_size", 'P', "", - (uchar**) &opt_page_buffer_size, (uchar**) &opt_page_buffer_size, 0, + { "page-buffer-size", 'P', "", + &opt_page_buffer_size, &opt_page_buffer_size, 0, GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT, (long) USE_BUFFER_INIT, (long) ~(ulong) 0, (long) MALLOC_OVERHEAD, (long) IO_SIZE, 0}, - { "start_from_lsn", 'o', "Start reading log from this lsn", - (uchar**) &opt_start_from_lsn, (uchar**) &opt_start_from_lsn, + { "start-from-lsn", 'o', "Start reading log from this lsn", + &opt_start_from_lsn, &opt_start_from_lsn, + 0, GET_ULL, REQUIRED_ARG, 0, 0, ~(longlong) 0, 0, 0, 0 }, + {"start-from-checkpoint", 'C', "Start applying from last checkpoint", + &opt_start_from_checkpoint, &opt_start_from_checkpoint, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + { "end-lsn", 'e', "Stop applying at this lsn. If end-lsn is used, UNDO:s " + "will not be applied", &opt_end_lsn, &opt_end_lsn, 0, GET_ULL, REQUIRED_ARG, 0, 0, ~(longlong) 0, 0, 0, 0 }, {"silent", 's', "Print less information during apply/undo phase", - (uchar **) &opt_silent, (uchar **) &opt_silent, 0, + &opt_silent, &opt_silent, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Print more information during apply/undo phase", + &maria_recovery_verbose, &maria_recovery_verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"tmpdir", 't', "Path for temporary files. Multiple paths can be specified, " "separated by " @@ -193,7 +223,7 @@ static struct my_option my_long_options[] = #else "colon (:)" #endif - , (uchar**) &opt_tmpdir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + , (char**) &opt_tmpdir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"undo", 'u', "Apply UNDO records to tables. (disable with --disable-undo)", (uchar **) &opt_apply_undo, (uchar **) &opt_apply_undo, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, @@ -206,7 +236,7 @@ static struct my_option my_long_options[] = static void print_version(void) { - VOID(printf("%s Ver 1.2 for %s on %s\n", + VOID(printf("%s Ver 1.3 for %s on %s\n", my_progname_short, SYSTEM_TYPE, MACHINE_TYPE)); NETWARE_SET_SCREEN_MODE(1); } @@ -219,10 +249,10 @@ static void usage(void) puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); puts("and you are welcome to modify and redistribute it under the GPL license\n"); - puts("Display and apply log records from a MARIA transaction log"); + puts("Display and apply log records from a Aria transaction log"); puts("found in the current directory (for now)"); #ifndef IDENTICAL_PAGES_AFTER_RECOVERY - puts("\nNote: Maria is compiled without -DIDENTICAL_PAGES_AFTER_RECOVERY\n" + puts("\nNote: Aria is compiled without -DIDENTICAL_PAGES_AFTER_RECOVERY\n" "which means that the table files are not byte-to-byte identical to\n" "files created during normal execution. This should be ok, except for\n" "test scripts that tries to compare files before and after recovery."); diff --git a/storage/maria/plug.in b/storage/maria/plug.in index 686c8361a87..008d82250c8 100644 --- a/storage/maria/plug.in +++ b/storage/maria/plug.in @@ -1,21 +1,19 @@ -MYSQL_STORAGE_ENGINE(maria,, [Maria Storage Engine], +MYSQL_STORAGE_ENGINE(aria,, [Aria Storage Engine], [Crash-safe tables with MyISAM heritage], [default,max,max-no-ndb]) -MYSQL_PLUGIN_DIRECTORY(maria, [storage/maria]) -MYSQL_PLUGIN_STATIC(maria, [libmaria.a]) -# Maria will probably go first into max builds, not all builds, -# so we don't declare it mandatory. -MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(maria, [ha_maria.cc]) +MYSQL_PLUGIN_DIRECTORY(aria, [storage/maria]) +MYSQL_PLUGIN_STATIC(aria, [libaria.a]) +MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(aria, [ha_maria.cc]) -MYSQL_PLUGIN_ACTIONS(maria, [ +MYSQL_PLUGIN_ACTIONS(aria, [ # AC_CONFIG_FILES(storage/maria/unittest/Makefile) -AC_ARG_WITH(maria-tmp-tables, - AC_HELP_STRING([--with-maria-tmp-tables],[Use Maria for internal temporary tables]), - [with_maria_tmp_tables=$withval], - [with_maria_tmp_tables=yes] +AC_ARG_WITH(aria-tmp-tables, + AC_HELP_STRING([--with-aria-tmp-tables],[Use Aria for internal temporary tables]), + [with_aria_tmp_tables=$withval], + [with_aria_tmp_tables=yes] ) -if test "$with_maria_tmp_tables" = "yes" +if test "$with_aria_tmp_tables" = "yes" then - AC_DEFINE([USE_MARIA_FOR_TMP_TABLES], [1], [Maria is used for internal temporary tables]) + AC_DEFINE([USE_MARIA_FOR_TMP_TABLES], [1], [Aria is used for internal temporary tables]) fi ]) diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index ceb8ad2ae2d..05330baed76 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -145,6 +145,7 @@ static uchar *trn_get_hash_key(const uchar *trn, size_t *len, int trnman_init(TrID initial_trid) { DBUG_ENTER("trnman_init"); + DBUG_PRINT("enter", ("initial_trid: %lu", (ulong) initial_trid)); short_trid_to_active_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*), MYF(MY_WME|MY_ZEROFILL)); @@ -176,6 +177,8 @@ int trnman_init(TrID initial_trid) trnman_active_transactions= 0; trnman_committed_transactions= 0; trnman_allocated_transactions= 0; + /* This is needed for recovery and repair */ + dummy_transaction_object.min_read_from= ~(TrID) 0; pool= 0; global_trid_generator= initial_trid; @@ -361,6 +364,7 @@ TRN *trnman_new_trn(WT_THD *wt) trn->used_tables= 0; trn->locked_tables= 0; + trn->flags= 0; /* only after the following function TRN is considered initialized, diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am index 28c75a06b29..b5bc8587066 100644 --- a/storage/maria/unittest/Makefile.am +++ b/storage/maria/unittest/Makefile.am @@ -20,9 +20,9 @@ INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ EXTRA_DIST= ma_test_all-t CMakeLists.txt \ ma_test_recovery.pl ma_test_recovery.expected # Only reason to link with libmyisam.a here is that it's where some fulltext -# pieces are (but soon we'll remove fulltext dependencies from Maria). +# pieces are (but soon we'll remove fulltext dependencies from Aria). LDADD= $(top_builddir)/unittest/mytap/libmytap.a \ - $(top_builddir)/storage/maria/libmaria.a \ + $(top_builddir)/storage/maria/libaria.a \ $(top_builddir)/storage/myisam/libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ @@ -108,8 +108,8 @@ ma_pagecache_rwconsist2_1k_t_CPPFLAGS = -DTEST_PAGE_SIZE=1024 # the generic lock manager may not be used in the end and lockman1-t crashes, # and lockman2-t takes at least quarter an hour, # so we don't build lockman-t and lockman1-t and lockman2-t -CLEANFILES = maria_log_control page_cache_test_file_1 \ - maria_log.???????? +CLEANFILES = aria_log_control page_cache_test_file_1 \ + aria_log.???????? # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c index 6702e4deb2f..164ea284f31 100644 --- a/storage/maria/unittest/ma_control_file-t.c +++ b/storage/maria/unittest/ma_control_file-t.c @@ -13,7 +13,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* Unit test of the control file module of the Maria engine WL#3234 */ +/* Unit test of the control file module of the Aria engine WL#3234 */ /* Note that it is not possible to test the durability of the write (can't @@ -24,11 +24,11 @@ #include <my_sys.h> #include <tap.h> -#ifndef WITH_MARIA_STORAGE_ENGINE +#ifndef WITH_ARIA_STORAGE_ENGINE /* - If Maria is not compiled in, normally we don't come to building this test. + If Aria is not compiled in, normally we don't come to building this test. */ -#error "Maria engine is not compiled in, test cannot be built" +#error "Aria engine is not compiled in, test cannot be built" #endif #include "maria.h" @@ -547,7 +547,7 @@ static struct my_option my_long_options[] = static void version(void) { printf("ma_control_file_test: unit test for the control file " - "module of the Maria storage engine. Ver 1.0 \n"); + "module of the Aria storage engine. Ver 1.0 \n"); } static my_bool diff --git a/storage/maria/unittest/ma_maria_log_cleanup.c b/storage/maria/unittest/ma_maria_log_cleanup.c index 7c78bf3a1a4..f85c75b1a88 100644 --- a/storage/maria/unittest/ma_maria_log_cleanup.c +++ b/storage/maria/unittest/ma_maria_log_cleanup.c @@ -38,7 +38,8 @@ my_bool maria_log_remove() for (i= 0; i < dirp->number_off_files; i++) { char *file= dirp->dir_entry[i].name; - if (strncmp(file, "maria_log.", 10) == 0 && + if (strncmp(file, "aria_log.", 9) == 0 && + file[9] >= '0' && file[9] <= '9' && file[10] >= '0' && file[10] <= '9' && file[11] >= '0' && file[11] <= '9' && file[12] >= '0' && file[12] <= '9' && @@ -46,8 +47,7 @@ my_bool maria_log_remove() file[14] >= '0' && file[14] <= '9' && file[15] >= '0' && file[15] <= '9' && file[16] >= '0' && file[16] <= '9' && - file[17] >= '0' && file[17] <= '9' && - file[18] == '\0') + file[17] == '\0') { if (fn_format(file_name, file, maria_data_root, "", MYF(MY_WME)) == NullS || diff --git a/storage/maria/unittest/ma_pagecache_single.c b/storage/maria/unittest/ma_pagecache_single.c index 53c820dcd2e..32e588e165a 100644 --- a/storage/maria/unittest/ma_pagecache_single.c +++ b/storage/maria/unittest/ma_pagecache_single.c @@ -246,7 +246,7 @@ int simple_read_change_write_read_test() /* Prepare page, read page 0 (and pin) then write page 1 and page 0. - Flush the file (shold flush only page 1 and return 1 (page 0 is + Flush the file (should flush only page 1 and return 1 (page 0 is still pinned). Check file on the disk. Unpin and flush. @@ -284,7 +284,7 @@ int simple_pin_test() bfill(buffw + TEST_PAGE_SIZE/2, TEST_PAGE_SIZE/2, ((unsigned char) 129)); pagecache_write(&pagecache, &file1, 0, 3, buffw, PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_WRITE_TO_READ, + PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_PIN_LEFT_PINNED, PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE); @@ -304,7 +304,7 @@ int simple_pin_test() pagecache_unlock(&pagecache, &file1, 0, - PAGECACHE_LOCK_READ_UNLOCK, + PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN, 0, 0, 0); if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) @@ -324,6 +324,93 @@ err: } /* + Prepare page, read page 0 (and pin) then write page 1 and page 0. + Flush the file (should flush only page 1 and return 1 (page 0 is + still pinned). + Check file on the disk. + Unpin and flush. + Check file on the disk. +*/ +int simple_pin_test2() +{ + unsigned char *buffw= malloc(TEST_PAGE_SIZE); + int res; + DBUG_ENTER("simple_pin_test2"); + /* prepare the file */ + bfill(buffw, TEST_PAGE_SIZE, '\1'); + pagecache_write(&pagecache, &file1, 0, 3, buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0, LSN_IMPOSSIBLE); + /* test */ + if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("Got error during flushing pagecache\n"); + exit(1); + } + pagecache_read(&pagecache, &file1, 0, 3, buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + pagecache_write(&pagecache, &file1, 1, 3, buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0, LSN_IMPOSSIBLE); + bfill(buffw + TEST_PAGE_SIZE/2, TEST_PAGE_SIZE/2, ((unsigned char) 129)); + pagecache_write(&pagecache, &file1, 0, 3, buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_TO_READ, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, + 0, LSN_IMPOSSIBLE); + /* + We have to get error because one page of the file is pinned, + other page should be flushed + */ + if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_KEEP_LAZY)) + { + diag("Did not get error in flush_pagecache_blocks 2\n"); + res= 0; + goto err; + } + ok((res= test(test_file(file1, file1_name, TEST_PAGE_SIZE*2, TEST_PAGE_SIZE*2, + simple_pin_test_file1))), + "Simple pin page file with pin 2"); + + /* Test that a normal flush goes through */ + if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("Got error in flush_pagecache_blocks 3\n"); + res= 0; + goto err; + } + pagecache_unlock(&pagecache, + &file1, + 0, + PAGECACHE_LOCK_READ_UNLOCK, + PAGECACHE_UNPIN, + 0, 0, 0); + if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("Got error in flush_pagecache_blocks 4\n"); + res= 0; + goto err; + } + ok((res&= test(test_file(file1, file1_name, TEST_PAGE_SIZE*2, TEST_PAGE_SIZE, + simple_pin_test_file2))), + "Simple pin page result file 2"); + if (res) + reset_file(&file1, file1_name); +err: + free(buffw); + DBUG_RETURN(res); +} + +/* Checks pins without lock. */ int simple_pin_no_lock_test() @@ -357,7 +444,7 @@ int simple_pin_no_lock_test() We have to get error because one page of the file is pinned, other page should be flushed */ - if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_KEEP_LAZY)) { diag("Did not get error in flush_pagecache_blocks 2\n"); res= 0; @@ -392,7 +479,7 @@ int simple_pin_no_lock_test() pagecache_unlock_by_link(&pagecache, link, PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_PIN_LEFT_PINNED, 0, 0, 1, FALSE); - if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_KEEP_LAZY)) { diag("Did not get error in flush_pagecache_blocks 3\n"); res= 0; @@ -609,6 +696,7 @@ static void *test_thread(void *arg) if (!simple_read_write_test() || !simple_read_change_write_read_test() || !simple_pin_test() || + !simple_pin_test2() || !simple_pin_no_lock_test() || !simple_delete_forget_test() || !simple_delete_flush_test()) @@ -657,8 +745,8 @@ int main(int argc __attribute__((unused)), DBUG_ENTER("main"); DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); - plan(16); - SKIP_BIG_TESTS(16) + plan(18); + SKIP_BIG_TESTS(18) { if ((tmp_file= my_open(file2_name, O_CREAT | O_TRUNC | O_RDWR, diff --git a/storage/maria/unittest/ma_test_all-t b/storage/maria/unittest/ma_test_all-t index 8e52f42b483..0b11daf7f98 100755 --- a/storage/maria/unittest/ma_test_all-t +++ b/storage/maria/unittest/ma_test_all-t @@ -20,7 +20,7 @@ $opt_number_of_tests= 0; $opt_run_tests= undef(); my $maria_path; # path to "storage/maria" -my $maria_exe_path; # path to executables (ma_test1, maria_chk etc) +my $maria_exe_path; # path to executables (ma_test1, aria_chk etc) my $my_progname= $0; $my_progname=~ s/.*[\/]//; my $runtime_error= 0; # Return 1 if error(s) occur during run @@ -146,7 +146,7 @@ sub run_tests # clean-up # - unlink <*.TMD maria_log*>; # Delete temporary files + unlink <*.TMD aria_log*>; # Delete temporary files # # Run tests @@ -280,38 +280,38 @@ sub run_check_tests for ($i= 0; defined($ma_test1_opt[$i]); $i++) { - unlink <maria_log_control maria_log.*>; + unlink <aria_log_control aria_log.*>; ok("$maria_exe_path/ma_test1$suffix $silent $ma_test1_opt[$i][0] $row_type", $verbose, $i + 1); - ok("$maria_exe_path/maria_chk$suffix $ma_test1_opt[$i][1] test1", + ok("$maria_exe_path/aria_chk$suffix $ma_test1_opt[$i][1] test1", $verbose, $i + 1); } # # These tests are outside the loops. Make sure to include them in # nr_tests manually # - ok("$maria_exe_path/maria_pack$suffix --force -s test1", $verbose, 0); - ok("$maria_exe_path/maria_chk$suffix -ess test1", $verbose, 0); + ok("$maria_exe_path/aria_pack$suffix --force -s test1", $verbose, 0); + ok("$maria_exe_path/aria_chk$suffix -ess test1", $verbose, 0); for ($i= 0; defined($ma_test2_opt[$i]); $i++) { - unlink <maria_log_control maria_log.*>; + unlink <aria_log_control aria_log.*>; ok("$maria_exe_path/ma_test2$suffix $silent $ma_test2_opt[$i][0] $row_type", $verbose, $i + 1); - ok("$maria_exe_path/maria_chk$suffix $ma_test2_opt[$i][1] test2", + ok("$maria_exe_path/aria_chk$suffix $ma_test2_opt[$i][1] test2", $verbose, $i + 1); } for ($i= 0; defined($ma_rt_test_opt[$i]); $i++) { - unlink <maria_log_control maria_log.*>; + unlink <aria_log_control aria_log.*>; ok("$maria_exe_path/ma_rt_test$suffix $silent $ma_rt_test_opt[$i][0] $row_type", $verbose, $i + 1); - ok("$maria_exe_path/maria_chk$suffix $ma_rt_test_opt[$i][1] rt_test", + ok("$maria_exe_path/aria_chk$suffix $ma_rt_test_opt[$i][1] rt_test", $verbose, $i + 1); } - unlink <maria_log_control maria_log.*>; + unlink <aria_log_control aria_log.*>; return 0; } @@ -327,34 +327,34 @@ sub run_repair_tests() my @t= ($NEW_TEST, "$maria_exe_path/ma_test1$suffix $silent --checksum $row_type", - "$maria_exe_path/maria_chk$suffix -se test1", - "$maria_exe_path/maria_chk$suffix --silent -re --transaction-log test1", - "$maria_exe_path/maria_chk$suffix -rs test1", - "$maria_exe_path/maria_chk$suffix -se test1", - "$maria_exe_path/maria_chk$suffix -rqs test1", - "$maria_exe_path/maria_chk$suffix -se test1", - "$maria_exe_path/maria_chk$suffix -rs --correct-checksum test1", - "$maria_exe_path/maria_chk$suffix -se test1", - "$maria_exe_path/maria_chk$suffix -rqs --correct-checksum test1", - "$maria_exe_path/maria_chk$suffix -se test1", - "$maria_exe_path/maria_chk$suffix -ros --correct-checksum test1", - "$maria_exe_path/maria_chk$suffix -se test1", - "$maria_exe_path/maria_chk$suffix -rqos --correct-checksum test1", - "$maria_exe_path/maria_chk$suffix -se test1", - "$maria_exe_path/maria_chk$suffix -sz test1", - "$maria_exe_path/maria_chk$suffix -se test1", + "$maria_exe_path/aria_chk$suffix -se test1", + "$maria_exe_path/aria_chk$suffix --silent -re --transaction-log test1", + "$maria_exe_path/aria_chk$suffix -rs test1", + "$maria_exe_path/aria_chk$suffix -se test1", + "$maria_exe_path/aria_chk$suffix -rqs test1", + "$maria_exe_path/aria_chk$suffix -se test1", + "$maria_exe_path/aria_chk$suffix -rs --correct-checksum test1", + "$maria_exe_path/aria_chk$suffix -se test1", + "$maria_exe_path/aria_chk$suffix -rqs --correct-checksum test1", + "$maria_exe_path/aria_chk$suffix -se test1", + "$maria_exe_path/aria_chk$suffix -ros --correct-checksum test1", + "$maria_exe_path/aria_chk$suffix -se test1", + "$maria_exe_path/aria_chk$suffix -rqos --correct-checksum test1", + "$maria_exe_path/aria_chk$suffix -se test1", + "$maria_exe_path/aria_chk$suffix -sz test1", + "$maria_exe_path/aria_chk$suffix -se test1", "$maria_exe_path/ma_test2$suffix $silent -c -d1 $row_type", - "$maria_exe_path/maria_chk$suffix -s --parallel-recover test2", - "$maria_exe_path/maria_chk$suffix -se test2", - "$maria_exe_path/maria_chk$suffix -s --parallel-recover --quick test2", - "$maria_exe_path/maria_chk$suffix -se test2", + "$maria_exe_path/aria_chk$suffix -s --parallel-recover test2", + "$maria_exe_path/aria_chk$suffix -se test2", + "$maria_exe_path/aria_chk$suffix -s --parallel-recover --quick test2", + "$maria_exe_path/aria_chk$suffix -se test2", "$maria_exe_path/ma_test2$suffix $silent -c $row_type", - "$maria_exe_path/maria_chk$suffix -se test2", - "$maria_exe_path/maria_chk$suffix -sr test2", - "$maria_exe_path/maria_chk$suffix -se test2", + "$maria_exe_path/aria_chk$suffix -se test2", + "$maria_exe_path/aria_chk$suffix -sr test2", + "$maria_exe_path/aria_chk$suffix -se test2", "$maria_exe_path/ma_test2$suffix $silent -c -t4 -b32768 $row_type", - "$maria_exe_path/maria_chk$suffix -s --zerofill test1", - "$maria_exe_path/maria_chk$suffix -se test1" + "$maria_exe_path/aria_chk$suffix -s --zerofill test1", + "$maria_exe_path/aria_chk$suffix -se test1" ); return &count_tests(\@t) if ($count); @@ -373,48 +373,48 @@ sub run_pack_tests() my @t= ($NEW_TEST, "$maria_exe_path/ma_test1$suffix $silent --checksum $row_type", - "$maria_exe_path/maria_pack$suffix --force -s test1", - "$maria_exe_path/maria_chk$suffix -ess test1", - "$maria_exe_path/maria_chk$suffix -rqs test1", - "$maria_exe_path/maria_chk$suffix -es test1", - "$maria_exe_path/maria_chk$suffix -rs test1", - "$maria_exe_path/maria_chk$suffix -es test1", - "$maria_exe_path/maria_chk$suffix -rus test1", - "$maria_exe_path/maria_chk$suffix -es test1", + "$maria_exe_path/aria_pack$suffix --force -s test1", + "$maria_exe_path/aria_chk$suffix -ess test1", + "$maria_exe_path/aria_chk$suffix -rqs test1", + "$maria_exe_path/aria_chk$suffix -es test1", + "$maria_exe_path/aria_chk$suffix -rs test1", + "$maria_exe_path/aria_chk$suffix -es test1", + "$maria_exe_path/aria_chk$suffix -rus test1", + "$maria_exe_path/aria_chk$suffix -es test1", $NEW_TEST, "$maria_exe_path/ma_test1$suffix $silent --checksum $row_type", - "$maria_exe_path/maria_pack$suffix --force -s test1", - "$maria_exe_path/maria_chk$suffix -rus --safe-recover test1", - "$maria_exe_path/maria_chk$suffix -es test1", + "$maria_exe_path/aria_pack$suffix --force -s test1", + "$maria_exe_path/aria_chk$suffix -rus --safe-recover test1", + "$maria_exe_path/aria_chk$suffix -es test1", $NEW_TEST, "$maria_exe_path/ma_test1$suffix $silent --checksum -S $row_type", - "$maria_exe_path/maria_chk$suffix -se test1", - "$maria_exe_path/maria_chk$suffix -ros test1", - "$maria_exe_path/maria_chk$suffix -rqs test1", - "$maria_exe_path/maria_chk$suffix -se test1", + "$maria_exe_path/aria_chk$suffix -se test1", + "$maria_exe_path/aria_chk$suffix -ros test1", + "$maria_exe_path/aria_chk$suffix -rqs test1", + "$maria_exe_path/aria_chk$suffix -se test1", $NEW_TEST, - "$maria_exe_path/maria_pack$suffix --force -s test1", - "$maria_exe_path/maria_chk$suffix -rqs test1", - "$maria_exe_path/maria_chk$suffix -es test1", - "$maria_exe_path/maria_chk$suffix -rus test1", - "$maria_exe_path/maria_chk$suffix -es test1", + "$maria_exe_path/aria_pack$suffix --force -s test1", + "$maria_exe_path/aria_chk$suffix -rqs test1", + "$maria_exe_path/aria_chk$suffix -es test1", + "$maria_exe_path/aria_chk$suffix -rus test1", + "$maria_exe_path/aria_chk$suffix -es test1", $NEW_TEST, "$maria_exe_path/ma_test2$suffix $silent -c -d1 $row_type", - "$maria_exe_path/maria_chk$suffix -s --parallel-recover test2", - "$maria_exe_path/maria_chk$suffix -se test2", - "$maria_exe_path/maria_chk$suffix -s --unpack --parallel-recover test2", - "$maria_exe_path/maria_chk$suffix -se test2", - "$maria_exe_path/maria_pack$suffix --force -s test1", - "$maria_exe_path/maria_chk$suffix -s --unpack --parallel-recover test2", - "$maria_exe_path/maria_chk$suffix -se test2", + "$maria_exe_path/aria_chk$suffix -s --parallel-recover test2", + "$maria_exe_path/aria_chk$suffix -se test2", + "$maria_exe_path/aria_chk$suffix -s --unpack --parallel-recover test2", + "$maria_exe_path/aria_chk$suffix -se test2", + "$maria_exe_path/aria_pack$suffix --force -s test1", + "$maria_exe_path/aria_chk$suffix -s --unpack --parallel-recover test2", + "$maria_exe_path/aria_chk$suffix -se test2", $NEW_TEST, "$maria_exe_path/ma_test1$suffix $silent -c $row_type", "cp test1.MAD test2.MAD", "cp test1.MAI test2.MAI", - "$maria_exe_path/maria_pack$suffix --force -s --join=test3 test1 test2", - "$maria_exe_path/maria_chk -s test3", - "$maria_exe_path/maria_chk -s --safe-recover test3", - "$maria_exe_path/maria_chk -s test3" + "$maria_exe_path/aria_pack$suffix --force -s --join=test3 test1 test2", + "$maria_exe_path/aria_chk -s test3", + "$maria_exe_path/aria_chk -s --safe-recover test3", + "$maria_exe_path/aria_chk -s test3" ); return &count_tests(\@t) if ($count); @@ -435,7 +435,7 @@ sub run_tests_on_warnings_and_errors ok("$maria_exe_path/ma_test2$suffix $silent -L -K -W -P -S -R1 -m500", $verbose, 0); - ok("$maria_exe_path/maria_chk$suffix -sm test2", $verbose, 0); + ok("$maria_exe_path/aria_chk$suffix -sm test2", $verbose, 0); # ma_test2$suffix $silent -L -K -R1 -m2000 ; Should give error 135\n # In the following a failure is a success and success is a failure $com= "$maria_exe_path/ma_test2$suffix $silent -L -K -R1 -m2000 "; @@ -443,15 +443,15 @@ sub run_tests_on_warnings_and_errors ok($com, $verbose, 0, 1); ok("cat ma_test2_message.txt", $verbose, 0); ok("grep \"Error: 135\" ma_test2_message.txt > /dev/null", $verbose, 0); - # maria_exe_path/maria_chk$suffix -sm test2 will warn that + # maria_exe_path/aria_chk$suffix -sm test2 will warn that # Datafile is almost full - ok("$maria_exe_path/maria_chk$suffix -sm test2 >ma_test2_message.txt 2>&1", + ok("$maria_exe_path/aria_chk$suffix -sm test2 >ma_test2_message.txt 2>&1", $verbose, 0); ok("cat ma_test2_message.txt", $verbose, 0); ok("grep \"warning: Datafile is almost full\" ma_test2_message.txt>/dev/null", $verbose, 0); unlink <ma_test2_message.txt>; - ok("$maria_exe_path/maria_chk$suffix -ssm test2", $verbose, 0); + ok("$maria_exe_path/aria_chk$suffix -ssm test2", $verbose, 0); return 0; } @@ -480,31 +480,31 @@ sub run_tests_on_clrs my @t= ($NEW_TEST, "$maria_exe_path/ma_test2$suffix -s -L -K -W -P -M -T -c -b -t2 -A1", - "cp maria_log_control tmp", - "$maria_exe_path/maria_read_log$suffix -a -s", - "$maria_exe_path/maria_chk$suffix -s -e test2", - "cp tmp/maria_log_control .", + "cp aria_log_control tmp", + "$maria_exe_path/aria_read_log$suffix -a -s", + "$maria_exe_path/aria_chk$suffix -s -e test2", + "cp tmp/aria_log_control .", "rm test2.MA?", - "$maria_exe_path/maria_read_log$suffix -a -s", - "$maria_exe_path/maria_chk$suffix -s -e test2", + "$maria_exe_path/aria_read_log$suffix -a -s", + "$maria_exe_path/aria_chk$suffix -s -e test2", "rm test2.MA?", $NEW_TEST, "$maria_exe_path/ma_test2$suffix -s -L -K -W -P -M -T -c -b -t2 -A1", - "$maria_exe_path/maria_read_log$suffix -a -s", - "$maria_exe_path/maria_chk$suffix -s -e test2", + "$maria_exe_path/aria_read_log$suffix -a -s", + "$maria_exe_path/aria_chk$suffix -s -e test2", "rm test2.MA?", - "$maria_exe_path/maria_read_log$suffix -a -s", - "$maria_exe_path/maria_chk$suffix -e -s test2", + "$maria_exe_path/aria_read_log$suffix -a -s", + "$maria_exe_path/aria_chk$suffix -e -s test2", "rm test2.MA?", $NEW_TEST, "$maria_exe_path/ma_test2$suffix -s -L -K -W -P -M -T -c -b32768 -t4 -A1", - "$maria_exe_path/maria_read_log$suffix -a -s", - "$maria_exe_path/maria_chk$suffix -es test2", - "$maria_exe_path/maria_read_log$suffix -a -s", - "$maria_exe_path/maria_chk$suffix -es test2", + "$maria_exe_path/aria_read_log$suffix -a -s", + "$maria_exe_path/aria_chk$suffix -es test2", + "$maria_exe_path/aria_read_log$suffix -a -s", + "$maria_exe_path/aria_chk$suffix -es test2", "rm test2.MA?", - "$maria_exe_path/maria_read_log$suffix -a -s", - "$maria_exe_path/maria_chk$suffix -es test2", + "$maria_exe_path/aria_read_log$suffix -a -s", + "$maria_exe_path/aria_chk$suffix -es test2", "rm test2.MA?" ); @@ -666,7 +666,7 @@ sub run_test_bunch { if ($clear && @$t[$i] eq $NEW_TEST) { - unlink <maria_log.* maria_log_control>; + unlink <aria_log.* aria_log_control>; } if (@$t[$i] ne $NEW_TEST) { @@ -686,7 +686,7 @@ $my_progname version $VER Description: -Run various Maria related tests. Typically used via make test as a unittest. +Run various Aria related tests. Typically used via make test as a unittest. Options --help Show this help and exit. diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c index bfbba5407c1..1644aa4885c 100644 --- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -31,7 +31,7 @@ static const char *default_dbug_option; #define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) #define LOG_FLAGS 0 -static char *first_translog_file= (char*)"maria_log.00000001"; +static char *first_translog_file= (char*)"aria_log.00000001"; static char *file1_name= (char*)"page_cache_test_file_1"; static PAGECACHE_FILE file1; diff --git a/storage/maria/unittest/ma_test_recovery.expected b/storage/maria/unittest/ma_test_recovery.expected index b95575173ee..5f7dd54e673 100644 --- a/storage/maria/unittest/ma_test_recovery.expected +++ b/storage/maria/unittest/ma_test_recovery.expected @@ -67,7 +67,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t2 -A1 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -76,7 +76,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -85,7 +85,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -96,7 +96,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t6 -A1 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -105,7 +105,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -114,7 +114,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -164,7 +164,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t2 -A2 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -173,7 +173,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -182,7 +182,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -193,7 +193,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t6 -A2 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -202,7 +202,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -211,7 +211,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -261,7 +261,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t2 -A3 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -270,7 +270,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -279,7 +279,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -290,7 +290,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t6 -A3 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -299,7 +299,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -308,7 +308,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -358,7 +358,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t2 -A4 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -367,7 +367,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -376,7 +376,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -387,7 +387,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -t6 -A4 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -396,7 +396,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -405,7 +405,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -455,7 +455,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t2 -A1 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -464,7 +464,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -473,7 +473,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -484,7 +484,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t6 -A1 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -493,7 +493,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -502,7 +502,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -552,7 +552,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t2 -A2 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -561,7 +561,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -570,7 +570,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -581,7 +581,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t6 -A2 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -590,7 +590,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -599,7 +599,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -649,7 +649,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t2 -A3 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -658,7 +658,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -667,7 +667,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -678,7 +678,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t6 -A3 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -687,7 +687,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -696,7 +696,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -746,7 +746,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t2 -A4 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -755,7 +755,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -764,7 +764,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -775,7 +775,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -t6 -A4 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -784,7 +784,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -793,7 +793,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -843,7 +843,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t2 -A1 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -852,7 +852,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -861,7 +861,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -872,7 +872,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t6 -A1 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -881,7 +881,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -890,7 +890,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -940,7 +940,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t2 -A2 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -949,7 +949,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -958,7 +958,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -969,7 +969,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t6 -A2 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -978,7 +978,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -987,7 +987,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1037,7 +1037,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t2 -A3 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1046,7 +1046,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1055,7 +1055,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1066,7 +1066,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t6 -A3 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1075,7 +1075,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1084,7 +1084,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1134,7 +1134,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t2 -A4 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1143,7 +1143,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1152,7 +1152,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1163,7 +1163,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -H1 -t6 -A4 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1172,7 +1172,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1181,7 +1181,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1231,7 +1231,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t2 -A1 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1240,7 +1240,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1249,7 +1249,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1260,7 +1260,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t6 -A1 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1269,7 +1269,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1278,7 +1278,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1328,7 +1328,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t2 -A2 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1337,7 +1337,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1346,7 +1346,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1357,7 +1357,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t6 -A2 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1366,7 +1366,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1375,7 +1375,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1425,7 +1425,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t2 -A3 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1434,7 +1434,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1443,7 +1443,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1454,7 +1454,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t6 -A3 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1463,7 +1463,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1472,7 +1472,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1522,7 +1522,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t2 -A4 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1531,7 +1531,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1540,7 +1540,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1551,7 +1551,7 @@ TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t1 (commit at end) TEST WITH ma_test2 -s -L -K -W -P -M -T -c -b32768 -H1 -t6 -A4 (additional aborted work) Dying on request without maria_commit()/maria_close() applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1560,7 +1560,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing idempotency applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable @@ -1569,7 +1569,7 @@ Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF END======= testing applying of CLRs to recreate table applying log -Differences in maria_chk -dvv, recovery not yet perfect ! +Differences in aria_chk -dvv, recovery not yet perfect ! ========DIFF START======= 6c6 < Status: checked,analyzed,optimized keys,sorted index pages,zerofilled,movable diff --git a/storage/maria/unittest/ma_test_recovery.pl b/storage/maria/unittest/ma_test_recovery.pl index 58b9cc3b56b..d9be82f4e58 100755 --- a/storage/maria/unittest/ma_test_recovery.pl +++ b/storage/maria/unittest/ma_test_recovery.pl @@ -17,7 +17,7 @@ $opt_abort_on_error=0; my $silent= "-s"; my $maria_path; # path to "storage/maria" -my $maria_exe_path; # path to executables (ma_test1, maria_chk etc) +my $maria_exe_path; # path to executables (ma_test1, aria_chk etc) my $tmp= "./tmp"; my $my_progname= $0; my $suffix; @@ -74,7 +74,7 @@ sub main { mkdir $tmp; } - print "MARIA RECOVERY TESTS\n"; + print "ARIA RECOVERY TESTS\n"; # To not flood the screen, we redirect all the commands below to a text file # and just give a final error if their output is not as expected @@ -98,7 +98,7 @@ sub main foreach my $prog (@t) { - unlink <maria_log.* maria_log_control>; + unlink <aria_log.* aria_log_control>; my $prog_no_suffix= $prog; $prog_no_suffix=~ s/$suffix// if ($suffix); print MY_LOG "TEST WITH $prog_no_suffix\n"; @@ -113,11 +113,11 @@ sub main { die("can't guess table name"); } - $com= "$maria_exe_path/maria_chk$suffix -dvv $table "; + $com= "$maria_exe_path/aria_chk$suffix -dvv $table "; $com.= "| grep -v \"Creation time:\" | grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\""; - $com.= "> $tmp/maria_chk_message.good.txt 2>&1"; + $com.= "> $tmp/aria_chk_message.good.txt 2>&1"; my_exec($com); - my $checksum= my_exec("$maria_exe_path/maria_chk$suffix -dss $table"); + my $checksum= my_exec("$maria_exe_path/aria_chk$suffix -dss $table"); move("$table.MAD", "$tmp/$table-good.MAD") || die "Can't move $table.MAD to $tmp/$table-good.MAD\n"; move("$table.MAI", "$tmp/$table-good.MAI") || @@ -181,7 +181,7 @@ sub main } $commit_run_args= $t2[$k + 1]; $abort_run_args= $t2[$k + 2]; - unlink <maria_log.* maria_log_control>; + unlink <aria_log.* aria_log_control>; my $prog_no_suffix= $prog; $prog_no_suffix=~ s/$suffix// if ($suffix); print MY_LOG "TEST WITH $prog_no_suffix $commit_run_args (commit at end)\n"; @@ -196,17 +196,17 @@ sub main { die("can't guess table name"); } - $com= "$maria_exe_path/maria_chk$suffix -dvv $table "; + $com= "$maria_exe_path/aria_chk$suffix -dvv $table "; $com.= "| grep -v \"Creation time:\" | grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\" "; - $com.= "> $tmp/maria_chk_message.good.txt 2>&1"; + $com.= "> $tmp/aria_chk_message.good.txt 2>&1"; $res= my_exec($com); print MY_LOG $res; - $checksum= my_exec("$maria_exe_path/maria_chk$suffix -dss $table"); + $checksum= my_exec("$maria_exe_path/aria_chk$suffix -dss $table"); move("$table.MAD", "$tmp/$table-good.MAD") || die "Can't move $table.MAD to $tmp/$table-good.MAD\n"; move("$table.MAI", "$tmp/$table-good.MAI") || die "Can't move $table.MAI to $tmp/$table-good.MAI\n"; - unlink <maria_log.* maria_log_control>; + unlink <aria_log.* aria_log_control>; print MY_LOG "TEST WITH $prog_no_suffix $abort_run_args$test_undo[$j] (additional aborted work)\n"; $res= my_exec("$maria_exe_path/$prog $abort_run_args$test_undo[$j]"); print MY_LOG $res; @@ -216,10 +216,10 @@ sub main die "Can't copy $table.MAI to $tmp/$table-before_undo.MAI\n"; # The lines below seem unneeded, will be removed soon - # We have to copy and restore logs, as running maria_read_log will - # change the maria_control_file - # rm -f $tmp/maria_log.* $tmp/maria_log_control - # cp $maria_path/maria_log* $tmp + # We have to copy and restore logs, as running aria_read_log will + # change the aria_control_file + # rm -f $tmp/aria_log.* $tmp/aria_log_control + # cp $maria_path/aria_log* $tmp if ($test_undo[$j] != 3) { apply_log($table, "shouldchangelog"); # should undo aborted work @@ -246,13 +246,13 @@ sub main print MY_LOG $res; print MY_LOG "testing applying of CLRs to recreate table\n"; unlink <$table.MA?>; - # cp $tmp/maria_log* $maria_path #unneeded + # cp $tmp/aria_log* $maria_path #unneeded apply_log($table, "shouldnotchangelog"); check_table_is_same($table, $checksum); $res= physical_cmp($table, "$tmp/$table-after_undo"); print MY_LOG $res; } - unlink <$table.* $tmp/$table* $tmp/maria_chk_*.txt $tmp/maria_read_log_$table.txt>; + unlink <$table.* $tmp/$table* $tmp/aria_chk_*.txt $tmp/aria_read_log_$table.txt>; } } } @@ -263,7 +263,7 @@ sub main } close(MY_LOG); - # also note that maria_chk -dvv shows differences for ma_test2 in UNDO phase, + # also note that aria_chk -dvv shows differences for ma_test2 in UNDO phase, # this is normal: removing records does not shrink the data/key file, # does not put back the "analyzed,optimized keys"(etc) index state. `diff -b $maria_path/unittest/ma_test_recovery.expected $tmp/ma_test_recovery.output`; @@ -296,29 +296,29 @@ sub check_table_is_same print "checking if table $table has changed\n"; } - $com= "$maria_exe_path/maria_chk$suffix -dvv $table | grep -v \"Creation time:\" "; - $com.= "| grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\" > $tmp/maria_chk_message.txt 2>&1"; + $com= "$maria_exe_path/aria_chk$suffix -dvv $table | grep -v \"Creation time:\" "; + $com.= "| grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\" > $tmp/aria_chk_message.txt 2>&1"; $res= `$com`; print MY_LOG $res; - $res= `$maria_exe_path/maria_chk$suffix -s -e --read-only $table`; + $res= `$maria_exe_path/aria_chk$suffix -ss -e --read-only $table`; print MY_LOG $res; - $checksum2= `$maria_exe_path/maria_chk$suffix -dss $table`; + $checksum2= `$maria_exe_path/aria_chk$suffix -dss $table`; if ("$checksum" ne "$checksum2") { print MY_LOG "checksum differs for $table before and after recovery\n"; return 1; } - $com= "diff $tmp/maria_chk_message.good.txt $tmp/maria_chk_message.txt "; - $com.= "> $tmp/maria_chk_diff.txt || true"; + $com= "diff $tmp/aria_chk_message.good.txt $tmp/aria_chk_message.txt "; + $com.= "> $tmp/aria_chk_diff.txt || true"; $res= `$com`; print MY_LOG $res; - if (-s "$tmp/maria_chk_diff.txt") + if (-s "$tmp/aria_chk_diff.txt") { - print MY_LOG "Differences in maria_chk -dvv, recovery not yet perfect !\n"; + print MY_LOG "Differences in aria_chk -dvv, recovery not yet perfect !\n"; print MY_LOG "========DIFF START=======\n"; - open(MY_FILE, "<$tmp/maria_chk_diff.txt") || die "Can't open file maria_chk_diff.txt\n"; + open(MY_FILE, "<$tmp/aria_chk_diff.txt") || die "Can't open file aria_chk_diff.txt\n"; while (<MY_FILE>) { print MY_LOG $_; @@ -346,13 +346,13 @@ sub apply_log print MY_LOG "bad argument '$shouldchangelog'\n"; return 1; } - foreach (<maria_log.*>) + foreach (<aria_log.*>) { $log_md5.= md5_conv($_); } print MY_LOG "applying log\n"; - my_exec("$maria_exe_path/maria_read_log$suffix -a > $tmp/maria_read_log_$table.txt"); - foreach (<maria_log.*>) + my_exec("$maria_exe_path/aria_read_log$suffix -a > $tmp/aria_read_log_$table.txt"); + foreach (<aria_log.*>) { $log_md5_2.= md5_conv($_); } @@ -360,13 +360,13 @@ sub apply_log { if ("$shouldchangelog" eq "shouldnotchangelog") { - print MY_LOG "maria_read_log should not have modified the log\n"; + print MY_LOG "aria_read_log should not have modified the log\n"; return 1; } } elsif ("$shouldchangelog" eq "shouldchangelog") { - print MY_LOG "maria_read_log should have modified the log\n"; + print MY_LOG "aria_read_log should have modified the log\n"; return 1; } } @@ -415,7 +415,7 @@ sub physical_cmp # save original tables to restore them later copy("$table.MAD", "$tmp/before_zerofill$table_no.MAD") || die(); copy("$table.MAI", "$tmp/before_zerofill$table_no.MAI") || die(); - $com= "$maria_exe_path/maria_chk$suffix -s --zerofill-keep-lsn $table"; + $com= "$maria_exe_path/aria_chk$suffix -ss --zerofill-keep-lsn $table"; $res= `$com`; print MY_LOG $res; $table_no= $table_no + 1; @@ -467,7 +467,7 @@ $my_progname version $VER Description: -Run various maria recovery tests and print the results +Run various Aria recovery tests and print the results Options --help Show this help and exit. diff --git a/storage/myisam/CMakeLists.txt b/storage/myisam/CMakeLists.txt index 1a667e271af..7744f4337ef 100644 --- a/storage/myisam/CMakeLists.txt +++ b/storage/myisam/CMakeLists.txt @@ -70,4 +70,6 @@ IF(NOT SOURCE_SUBLIBS) MYSQL_EMBED_MANIFEST("myisampack" "asInvoker") ENDIF(EMBED_MANIFESTS) + INSTALL(TARGETS myisam_ftdump myisamchk myisamlog myisampack DESTINATION bin COMPONENT runtime) + ENDIF(NOT SOURCE_SUBLIBS) diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index 8f30f31f0db..cb7c8d49d9f 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -1591,11 +1591,11 @@ void ha_myisam::start_bulk_insert(ha_rows rows) != 0 Error */ -int ha_myisam::end_bulk_insert(bool abort) +int ha_myisam::end_bulk_insert() { mi_end_bulk_insert(file); int err=mi_extra(file, HA_EXTRA_NO_CACHE, 0); - if (!err && !abort) + if (!err && !file->s->deleting) { if (can_enable_indexes) { diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h index db3c4737811..3fed26632fa 100644 --- a/storage/myisam/ha_myisam.h +++ b/storage/myisam/ha_myisam.h @@ -115,7 +115,7 @@ class ha_myisam: public handler int enable_indexes(uint mode); int indexes_are_disabled(void); void start_bulk_insert(ha_rows rows); - int end_bulk_insert(bool abort); + int end_bulk_insert(); ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); void update_create_info(HA_CREATE_INFO *create_info); int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); diff --git a/storage/myisam/mi_dbug.c b/storage/myisam/mi_dbug.c index 659abdce131..1a61d4823b5 100644 --- a/storage/myisam/mi_dbug.c +++ b/storage/myisam/mi_dbug.c @@ -118,7 +118,7 @@ void _mi_print_key(FILE *stream, register HA_KEYSEG *keyseg, case HA_KEYTYPE_LONGLONG: { char buff[21]; - longlong2str(mi_sint8korr(key),buff,-10); + longlong10_to_str(mi_sint8korr(key),buff,-10); VOID(fprintf(stream,"%s",buff)); key=end; break; @@ -126,7 +126,7 @@ void _mi_print_key(FILE *stream, register HA_KEYSEG *keyseg, case HA_KEYTYPE_ULONGLONG: { char buff[21]; - longlong2str(mi_sint8korr(key),buff,10); + longlong10_to_str(mi_sint8korr(key),buff,10); VOID(fprintf(stream,"%s",buff)); key=end; break; diff --git a/storage/myisam/mi_dynrec.c b/storage/myisam/mi_dynrec.c index adae7076858..f08e9f32a3f 100644 --- a/storage/myisam/mi_dynrec.c +++ b/storage/myisam/mi_dynrec.c @@ -66,9 +66,12 @@ static int _mi_cmp_buffer(File file, const uchar *buff, my_off_t filepos, my_bool mi_dynmap_file(MI_INFO *info, my_off_t size) { DBUG_ENTER("mi_dynmap_file"); - if (size > (my_off_t) (~((size_t) 0))) + if (size == 0 || size > (my_off_t) (~((size_t) 0))) { - DBUG_PRINT("warning", ("File is too large for mmap")); + if (size) + DBUG_PRINT("warning", ("File is too large for mmap")); + else + DBUG_PRINT("warning", ("Do not mmap zero-length")); DBUG_RETURN(1); } /* @@ -116,7 +119,7 @@ int mi_munmap_file(MI_INFO *info) { int ret; DBUG_ENTER("mi_unmap_file"); - if ((ret= my_munmap(info->s->file_map, info->s->mmaped_length))) + if ((ret= my_munmap(info->s->file_map, (size_t) info->s->mmaped_length))) DBUG_RETURN(ret); info->s->file_read= mi_nommap_pread; info->s->file_write= mi_nommap_pwrite; diff --git a/storage/myisam/mi_locking.c b/storage/myisam/mi_locking.c index 860c9c57889..741b9b01986 100644 --- a/storage/myisam/mi_locking.c +++ b/storage/myisam/mi_locking.c @@ -29,7 +29,6 @@ int mi_lock_database(MI_INFO *info, int lock_type) int error; uint count; MYISAM_SHARE *share=info->s; - uint flag; DBUG_ENTER("mi_lock_database"); DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u " "global_changed: %d open_count: %u name: '%s'", @@ -48,7 +47,7 @@ int mi_lock_database(MI_INFO *info, int lock_type) DBUG_RETURN(0); } - flag=error=0; + error= 0; pthread_mutex_lock(&share->intern_lock); if (share->kfile >= 0) /* May only be false on windows */ { @@ -128,14 +127,12 @@ int mi_lock_database(MI_INFO *info, int lock_type) { if (share->r_locks) { /* Only read locks left */ - flag=1; if (my_lock(share->kfile,F_RDLCK,0L,F_TO_EOF, MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error) error=my_errno; } else if (!share->w_locks) { /* No more locks */ - flag=1; if (my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF, MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error) error=my_errno; @@ -156,7 +153,6 @@ int mi_lock_database(MI_INFO *info, int lock_type) */ if (share->w_locks == 1) { - flag=1; if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, MYF(MY_SEEK_NOT_DONE))) { @@ -171,7 +167,6 @@ int mi_lock_database(MI_INFO *info, int lock_type) } if (!share->r_locks && !share->w_locks) { - flag=1; if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, info->lock_wait | MY_SEEK_NOT_DONE)) { @@ -196,7 +191,6 @@ int mi_lock_database(MI_INFO *info, int lock_type) { /* Change READONLY to RW */ if (share->r_locks == 1) { - flag=1; if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, MYF(info->lock_wait | MY_SEEK_NOT_DONE))) { @@ -213,7 +207,6 @@ int mi_lock_database(MI_INFO *info, int lock_type) { if (!share->w_locks) { - flag=1; if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, info->lock_wait | MY_SEEK_NOT_DONE)) { @@ -260,11 +253,6 @@ int mi_lock_database(MI_INFO *info, int lock_type) } #endif pthread_mutex_unlock(&share->intern_lock); -#if defined(FULL_LOG) || defined(_lint) - lock_type|=(int) (flag << 8); /* Set bit to set if real lock */ - myisam_log_command(MI_LOG_LOCK,info,(uchar*) &lock_type,sizeof(lock_type), - error); -#endif DBUG_RETURN(error); } /* mi_lock_database */ diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c index 7436548c7e1..e4240dec90c 100644 --- a/storage/myisam/mi_open.c +++ b/storage/myisam/mi_open.c @@ -139,8 +139,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) my_errno= HA_ERR_NOT_A_TABLE; goto err; } - if (memcmp((uchar*) share->state.header.file_version, - (uchar*) myisam_file_magic, 4)) + if (bcmp(share->state.header.file_version, myisam_file_magic, 4)) { DBUG_PRINT("error",("Wrong header in %s",name_buff)); DBUG_DUMP("error_dump", share->state.header.file_version, diff --git a/storage/myisam/mi_page.c b/storage/myisam/mi_page.c index 17a5820d768..82acb801c90 100644 --- a/storage/myisam/mi_page.c +++ b/storage/myisam/mi_page.c @@ -49,7 +49,7 @@ uchar *_mi_fetch_keypage(register MI_INFO *info, MI_KEYDEF *keyinfo, { DBUG_PRINT("error",("page %lu had wrong page length: %u", (ulong) page, page_size)); - DBUG_DUMP("page",tmp, keyinfo->block_length); + DBUG_DUMP("page", tmp, keyinfo->block_length); info->last_keypage = HA_OFFSET_ERROR; mi_print_error(info->s, HA_ERR_CRASHED); my_errno = HA_ERR_CRASHED; diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c index 03e0c72d7e0..52e28f8a5e9 100644 --- a/storage/myisam/mi_search.c +++ b/storage/myisam/mi_search.c @@ -819,7 +819,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, DBUG_PRINT("error", ("Found too long null packed key: %u of %u at 0x%lx", length, keyseg->length, (long) *page_pos)); - DBUG_DUMP("key",*page_pos,16); + DBUG_DUMP("key", *page_pos, 16); mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; return 0; @@ -876,7 +876,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, { DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx", length, keyseg->length, (long) *page_pos)); - DBUG_DUMP("key",*page_pos,16); + DBUG_DUMP("key", *page_pos, 16); mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; return 0; /* Error */ @@ -948,7 +948,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, DBUG_PRINT("error", ("Found too long binary packed key: %u of %u at 0x%lx", length, keyinfo->maxlength, (long) *page_pos)); - DBUG_DUMP("key",*page_pos,16); + DBUG_DUMP("key", *page_pos, 16); mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_RETURN(0); /* Wrong key */ diff --git a/storage/myisam/mi_test1.c b/storage/myisam/mi_test1.c index aea8d86e380..39303fb80e5 100644 --- a/storage/myisam/mi_test1.c +++ b/storage/myisam/mi_test1.c @@ -539,21 +539,21 @@ static struct my_option my_long_options[] = {"debug", '#', "Undocumented", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, #endif - {"delete_rows", 'd', "Undocumented", (uchar**) &remove_count, - (uchar**) &remove_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, + {"delete_rows", 'd', "Undocumented", &remove_count, + &remove_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, {"help", '?', "Display help and exit", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"insert_rows", 'i', "Undocumented", (uchar**) &insert_count, - (uchar**) &insert_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, + {"insert_rows", 'i', "Undocumented", &insert_count, + &insert_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, {"key_alpha", 'a', "Use a key of type HA_KEYTYPE_TEXT", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"key_binary_pack", 'B', "Undocumented", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"key_blob", 'b', "Undocumented", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"key_cache", 'K', "Undocumented", (uchar**) &key_cacheing, - (uchar**) &key_cacheing, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"key_length", 'k', "Undocumented", (uchar**) &key_length, (uchar**) &key_length, + {"key_cache", 'K', "Undocumented", &key_cacheing, + &key_cacheing, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key_length", 'k', "Undocumented", &key_length, &key_length, 0, GET_UINT, REQUIRED_ARG, 6, 0, 0, 0, 0, 0}, {"key_multiple", 'm', "Undocumented", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -564,21 +564,21 @@ static struct my_option my_long_options[] = {"key_varchar", 'w', "Test VARCHAR keys", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"null_fields", 'N', "Define fields with NULL", - (uchar**) &null_fields, (uchar**) &null_fields, 0, GET_BOOL, NO_ARG, + &null_fields, &null_fields, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"row_fixed_size", 'S', "Undocumented", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"row_pointer_size", 'R', "Undocumented", (uchar**) &rec_pointer_size, - (uchar**) &rec_pointer_size, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"row_pointer_size", 'R', "Undocumented", &rec_pointer_size, + &rec_pointer_size, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"silent", 's', "Undocumented", - (uchar**) &silent, (uchar**) &silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"skip_update", 'U', "Undocumented", (uchar**) &skip_update, - (uchar**) &skip_update, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"unique", 'C', "Undocumented", (uchar**) &opt_unique, (uchar**) &opt_unique, 0, + &silent, &silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"skip_update", 'U', "Undocumented", &skip_update, + &skip_update, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"unique", 'C', "Undocumented", &opt_unique, &opt_unique, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"update_rows", 'u', "Undocumented", (uchar**) &update_count, - (uchar**) &update_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, - {"verbose", 'v', "Be more verbose", (uchar**) &verbose, (uchar**) &verbose, 0, + {"update_rows", 'u', "Undocumented", &update_count, + &update_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Be more verbose", &verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"version", 'V', "Print version number and exit", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c index 67da89e55ef..527c5e03a27 100644 --- a/storage/myisam/mi_test2.c +++ b/storage/myisam/mi_test2.c @@ -410,7 +410,7 @@ int main(int argc, char *argv[]) } ant=0; while (mi_rprev(file,read_record3,0) == 0 && - bcmp(read_record3+start,key,length) == 0) ant++; + memcmp(read_record3+start,key,length) == 0) ant++; if (ant != dupp_keys) { printf("prev: Found: %d records of %d\n",ant,dupp_keys); @@ -448,7 +448,7 @@ int main(int argc, char *argv[]) goto end; } if (mi_rlast(file,read_record2,0) || - bcmp(read_record2,read_record3,reclength)) + memcmp(read_record2,read_record3,reclength)) { printf("Can't find last record\n"); DBUG_DUMP("record2",(uchar*) read_record2,reclength); @@ -463,7 +463,7 @@ int main(int argc, char *argv[]) printf("prev: I found: %d records of %d\n",ant,write_count); goto end; } - if (bcmp(read_record,read_record3,reclength)) + if (memcmp(read_record,read_record3,reclength)) { printf("Can't find first record\n"); goto end; @@ -478,7 +478,7 @@ int main(int argc, char *argv[]) mi_rprev(file,read_record3,0) == 0 || mi_rnext(file,read_record3,0)) goto err; - if (bcmp(read_record,read_record3,reclength) != 0) + if (memcmp(read_record,read_record3,reclength) != 0) printf("Can't find first record\n"); if (!silent) @@ -490,7 +490,7 @@ int main(int argc, char *argv[]) mi_rnext(file,read_record3,0) == 0 || mi_rprev(file,read_record3,0)) goto err; - if (bcmp(read_record2,read_record3,reclength)) + if (memcmp(read_record2,read_record3,reclength)) printf("Can't find last record\n"); #ifdef NOT_ANYMORE if (!silent) @@ -504,7 +504,7 @@ int main(int argc, char *argv[]) bzero((char*) file->lastkey,file->s->base.max_key_length*2); if (mi_rkey(file,read_record,0,key2,(uint) i,HA_READ_PREFIX)) goto err; - if (bcmp(read_record+start,key,(uint) i)) + if (memcmp(read_record+start,key,(uint) i)) { puts("Didn't find right record"); goto end; @@ -523,7 +523,7 @@ int main(int argc, char *argv[]) opt_delete++; ant=1; while (mi_rnext(file,read_record3,0) == 0 && - bcmp(read_record3+start,key,length) == 0) ant++; + memcmp(read_record3+start,key,length) == 0) ant++; if (ant != dupp_keys-1) { printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-1); @@ -541,7 +541,7 @@ int main(int argc, char *argv[]) opt_delete++; ant=1; while (mi_rprev(file,read_record3,0) == 0 && - bcmp(read_record3+start,key,length) == 0) ant++; + memcmp(read_record3+start,key,length) == 0) ant++; if (ant != dupp_keys-2) { printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-2); @@ -561,7 +561,7 @@ int main(int argc, char *argv[]) if (mi_rnext(file,read_record,0)) goto err; /* Skall finnas poster */ while (mi_rnext(file,read_record3,0) == 0 && - bcmp(read_record3+start,key,length) == 0) ant++; + memcmp(read_record3+start,key,length) == 0) ant++; if (ant != dupp_keys-3) { printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-3); @@ -576,7 +576,7 @@ int main(int argc, char *argv[]) opt_delete++; ant=0; while (mi_rprev(file,read_record3,0) == 0 && - bcmp(read_record3+start,key,length) == 0) ant++; + memcmp(read_record3+start,key,length) == 0) ant++; if (ant != dupp_keys-4) { printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-4); @@ -599,7 +599,7 @@ int main(int argc, char *argv[]) for (i=min(2,keys) ; i-- > 0 ;) { if (mi_rsame(file,read_record2,(int) i)) goto err; - if (bcmp(read_record,read_record2,reclength) != 0) + if (memcmp(read_record,read_record2,reclength) != 0) { printf("mi_rsame didn't find same record\n"); goto end; diff --git a/storage/myisam/mi_unique.c b/storage/myisam/mi_unique.c index 02fcd9289dd..fdba84a2e67 100644 --- a/storage/myisam/mi_unique.c +++ b/storage/myisam/mi_unique.c @@ -56,7 +56,7 @@ my_bool mi_check_unique(MI_INFO *info, MI_UNIQUEDEF *def, uchar *record, if (_mi_search_next(info,info->s->keyinfo+def->key, info->lastkey, MI_UNIQUE_HASH_LENGTH, SEARCH_BIGGER, info->s->state.key_root[def->key]) || - bcmp((char*) info->lastkey, (char*) key_buff, MI_UNIQUE_HASH_LENGTH)) + memcmp(info->lastkey, key_buff, MI_UNIQUE_HASH_LENGTH)) { info->page_changed=1; /* Can't optimize read next */ info->lastpos=lastpos; diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c index d5d1c548717..592426a3690 100644 --- a/storage/myisam/myisamchk.c +++ b/storage/myisam/myisamchk.c @@ -168,7 +168,7 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"character-sets-dir", OPT_CHARSETS_DIR, "Directory where character sets are.", - (uchar**) &charsets_dir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + (char**) &charsets_dir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"check", 'c', "Check table for errors.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -188,8 +188,8 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"data-file-length", 'D', "Max length of data file (when recreating data-file when it's full).", - (uchar**) &check_param.max_data_file_length, - (uchar**) &check_param.max_data_file_length, + &check_param.max_data_file_length, + &check_param.max_data_file_length, 0, GET_LL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"extend-check", 'e', "If used when checking a table, ensure that the table is 100 percent consistent, which will take a long time. If used when repairing a table, try to recover every possible row from the data file. Normally this will also find a lot of garbage rows; Don't use this option with repair if you are not totally desperate.", @@ -211,13 +211,13 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"keys-used", 'k', "Tell MyISAM to update only some specific keys. # is a bit mask of which keys to use. This can be used to get faster inserts.", - (uchar**) &check_param.keys_in_use, - (uchar**) &check_param.keys_in_use, + &check_param.keys_in_use, + &check_param.keys_in_use, 0, GET_ULL, REQUIRED_ARG, -1, 0, 0, 0, 0, 0}, {"max-record-length", OPT_MAX_RECORD_LENGTH, "Skip rows bigger than this if myisamchk can't allocate memory to hold it", - (uchar**) &check_param.max_record_length, - (uchar**) &check_param.max_record_length, + &check_param.max_record_length, + &check_param.max_record_length, 0, GET_ULL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0}, {"medium-check", 'm', "Faster than extend-check, but only finds 99.99% of all errors. Should be good enough for most cases.", @@ -246,12 +246,13 @@ static struct my_option my_long_options[] = #endif {"set-auto-increment", 'A', "Force auto_increment to start at this or higher value. If no value is given, then sets the next auto_increment value to the highest used value for the auto key + 1.", - (uchar**) &check_param.auto_increment_value, - (uchar**) &check_param.auto_increment_value, + &check_param.auto_increment_value, + &check_param.auto_increment_value, 0, GET_ULL, OPT_ARG, 0, 0, 0, 0, 0, 0}, {"set-collation", OPT_SET_COLLATION, "Change the collation used by the index", - (uchar**) &set_collation_name, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + (char**) &set_collation_name, 0, 0, GET_STR, REQUIRED_ARG, + 0, 0, 0, 0, 0, 0}, {"set-variable", 'O', "Change the value of a variable. Please note that this option is deprecated; you can set variables directly with --variable-name=value.", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, @@ -263,12 +264,11 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"sort-records", 'R', "Sort records according to an index. This makes your data much more localized and may speed up things. (It may be VERY slow to do a sort the first time!)", - (uchar**) &check_param.opt_sort_key, - (uchar**) &check_param.opt_sort_key, + &check_param.opt_sort_key, + &check_param.opt_sort_key, 0, GET_UINT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"tmpdir", 't', - "Path for temporary files.", - (uchar**) &opt_tmpdir, + "Path for temporary files.", (char**) &opt_tmpdir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"update-state", 'U', "Mark tables as crashed if any errors were found.", @@ -286,54 +286,54 @@ static struct my_option my_long_options[] = "Wait if table is locked.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, { "key_buffer_size", OPT_KEY_BUFFER_SIZE, "", - (uchar**) &check_param.use_buffers, (uchar**) &check_param.use_buffers, 0, + &check_param.use_buffers, &check_param.use_buffers, 0, GET_ULL, REQUIRED_ARG, USE_BUFFER_INIT, MALLOC_OVERHEAD, SIZE_T_MAX, MALLOC_OVERHEAD, IO_SIZE, 0}, { "key_cache_block_size", OPT_KEY_CACHE_BLOCK_SIZE, "", - (uchar**) &opt_key_cache_block_size, - (uchar**) &opt_key_cache_block_size, 0, + &opt_key_cache_block_size, + &opt_key_cache_block_size, 0, GET_LONG, REQUIRED_ARG, MI_KEY_BLOCK_LENGTH, MI_MIN_KEY_BLOCK_LENGTH, MI_MAX_KEY_BLOCK_LENGTH, 0, MI_MIN_KEY_BLOCK_LENGTH, 0}, { "myisam_block_size", OPT_MYISAM_BLOCK_SIZE, "", - (uchar**) &opt_myisam_block_size, (uchar**) &opt_myisam_block_size, 0, + &opt_myisam_block_size, &opt_myisam_block_size, 0, GET_LONG, REQUIRED_ARG, MI_KEY_BLOCK_LENGTH, MI_MIN_KEY_BLOCK_LENGTH, MI_MAX_KEY_BLOCK_LENGTH, 0, MI_MIN_KEY_BLOCK_LENGTH, 0}, { "read_buffer_size", OPT_READ_BUFFER_SIZE, "", - (uchar**) &check_param.read_buffer_length, - (uchar**) &check_param.read_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + &check_param.read_buffer_length, + &check_param.read_buffer_length, 0, GET_ULONG, REQUIRED_ARG, (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD, INT_MAX32, (long) MALLOC_OVERHEAD, (long) 1L, 0}, { "write_buffer_size", OPT_WRITE_BUFFER_SIZE, "", - (uchar**) &check_param.write_buffer_length, - (uchar**) &check_param.write_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + &check_param.write_buffer_length, + &check_param.write_buffer_length, 0, GET_ULONG, REQUIRED_ARG, (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD, INT_MAX32, (long) MALLOC_OVERHEAD, (long) 1L, 0}, { "sort_buffer_size", OPT_SORT_BUFFER_SIZE, "", - (uchar**) &check_param.sort_buffer_length, - (uchar**) &check_param.sort_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + &check_param.sort_buffer_length, + &check_param.sort_buffer_length, 0, GET_ULONG, REQUIRED_ARG, (long) SORT_BUFFER_INIT, (long) (MIN_SORT_BUFFER + MALLOC_OVERHEAD), ULONG_MAX, (long) MALLOC_OVERHEAD, (long) 1L, 0}, { "sort_key_blocks", OPT_SORT_KEY_BLOCKS, "", - (uchar**) &check_param.sort_key_blocks, - (uchar**) &check_param.sort_key_blocks, 0, GET_ULONG, REQUIRED_ARG, + &check_param.sort_key_blocks, + &check_param.sort_key_blocks, 0, GET_ULONG, REQUIRED_ARG, BUFFERS_WHEN_SORTING, 4L, 100L, 0L, 1L, 0}, - { "decode_bits", OPT_DECODE_BITS, "", (uchar**) &decode_bits, - (uchar**) &decode_bits, 0, GET_UINT, REQUIRED_ARG, 9L, 4L, 17L, 0L, 1L, 0}, - { "ft_min_word_len", OPT_FT_MIN_WORD_LEN, "", (uchar**) &ft_min_word_len, - (uchar**) &ft_min_word_len, 0, GET_ULONG, REQUIRED_ARG, 4, 1, HA_FT_MAXCHARLEN, + { "decode_bits", OPT_DECODE_BITS, "", &decode_bits, + &decode_bits, 0, GET_UINT, REQUIRED_ARG, 9L, 4L, 17L, 0L, 1L, 0}, + { "ft_min_word_len", OPT_FT_MIN_WORD_LEN, "", &ft_min_word_len, + &ft_min_word_len, 0, GET_ULONG, REQUIRED_ARG, 4, 1, HA_FT_MAXCHARLEN, 0, 1, 0}, - { "ft_max_word_len", OPT_FT_MAX_WORD_LEN, "", (uchar**) &ft_max_word_len, - (uchar**) &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXCHARLEN, 10, + { "ft_max_word_len", OPT_FT_MAX_WORD_LEN, "", &ft_max_word_len, + &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXCHARLEN, 10, HA_FT_MAXCHARLEN, 0, 1, 0}, { "ft_stopword_file", OPT_FT_STOPWORD_FILE, "Use stopwords from this file instead of built-in list.", - (uchar**) &ft_stopword_file, (uchar**) &ft_stopword_file, 0, GET_STR, + (char**) &ft_stopword_file, (char**) &ft_stopword_file, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"stats_method", OPT_STATS_METHOD, "Specifies how index statistics collection code should treat NULLs. " "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".", - (uchar**) &myisam_stats_method_str, (uchar**) &myisam_stats_method_str, 0, + (char**) &myisam_stats_method_str, (char**) &myisam_stats_method_str, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; @@ -1314,7 +1314,7 @@ static void descript(HA_CHECK *param, register MI_INFO *info, char * name) printf("Recordlength: %13d\n",(int) share->base.pack_reclength); if (! mi_is_all_keys_active(share->state.key_map, share->base.keys)) { - longlong2str(share->state.key_map,buff,2); + longlong2str(share->state.key_map,buff,2,1); printf("Using only keys '%s' of %d possibly keys\n", buff, share->base.keys); } diff --git a/storage/myisam/myisamlog.c b/storage/myisam/myisamlog.c index 8b4ccfe3256..c441acf6c3f 100644 --- a/storage/myisam/myisamlog.c +++ b/storage/myisam/myisamlog.c @@ -382,18 +382,18 @@ static int examine_log(char * file_name, char **table_names) curr_file_info->show_name); if (my_b_read(&cache,(uchar*) head,2)) goto err; + buff= 0; file_info.name=0; file_info.show_name=0; file_info.record=0; - if (read_string(&cache,(uchar**) (char*) &file_info.name, - (uint) mi_uint2korr(head))) + if (read_string(&cache, &buff, (uint) mi_uint2korr(head))) goto err; { uint i; char *pos,*to; /* Fix if old DOS files to new format */ - for (pos=file_info.name; (pos=strchr(pos,'\\')) ; pos++) + for (pos=file_info.name=(char*)buff; (pos=strchr(pos,'\\')) ; pos++) *pos= '/'; pos=file_info.name; @@ -692,7 +692,7 @@ static int read_string(IO_CACHE *file, register uchar* *to, register uint length *to= 0; DBUG_RETURN(1); } - *((char*) *to+length)= '\0'; + *((uchar*) *to+length)= '\0'; DBUG_RETURN (0); } /* read_string */ diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c index 300bab58305..54ff8a258ad 100644 --- a/storage/myisam/myisampack.c +++ b/storage/myisam/myisampack.c @@ -257,10 +257,10 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, #endif {"backup", 'b', "Make a backup of the table as table_name.OLD.", - (uchar**) &backup, (uchar**) &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + &backup, &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"character-sets-dir", OPT_CHARSETS_DIR_MP, - "Directory where character sets are.", (uchar**) &charsets_dir, - (uchar**) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + "Directory where character sets are.", (char**) &charsets_dir, + (char**) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.", 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, {"force", 'f', @@ -268,7 +268,7 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"join", 'j', "Join all given tables into 'new_table_name'. All tables MUST have identical layouts.", - (uchar**) &join_table, (uchar**) &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, + &join_table, &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"help", '?', "Display this help and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -282,8 +282,8 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"version", 'V', "Output version information and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"wait", 'w', "Wait and retry if table is in use.", (uchar**) &opt_wait, - (uchar**) &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"wait", 'w', "Wait and retry if table is in use.", &opt_wait, + &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; diff --git a/storage/myisam/rt_split.c b/storage/myisam/rt_split.c index 88cf643faf9..03d22de68fa 100644 --- a/storage/myisam/rt_split.c +++ b/storage/myisam/rt_split.c @@ -255,7 +255,6 @@ int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key, SplitStruct *stop; double *coord_buf; double *next_coord; - double *old_coord; int n_dim; uchar *source_cur, *cur1, *cur2; uchar *new_page= info->buff; @@ -293,8 +292,6 @@ int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key, rtree_d_mbr(keyinfo->seg, key, key_length, cur->coords); cur->key = key; - old_coord = next_coord; - if (split_rtree_node(task, max_keys + 1, mi_getint(page) + full_length + 2, full_length, rt_PAGE_MIN_SIZE(keyinfo->block_length), diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc index f27f22761b1..be3f0ca4813 100644 --- a/storage/myisammrg/ha_myisammrg.cc +++ b/storage/myisammrg/ha_myisammrg.cc @@ -295,8 +295,8 @@ static int myisammrg_parent_open_callback(void *callback_param, } } - DBUG_PRINT("myrg", ("open: '%.*s'.'%.*s'", (int)(child_l->db_length), - child_l->db, (int)(child_l->table_name_length), + DBUG_PRINT("myrg", ("open: '%.*s'.'%.*s'", (int) child_l->db_length, + child_l->db, (int) child_l->table_name_length, child_l->table_name)); /* Convert to lowercase if required. */ @@ -1134,8 +1134,8 @@ void ha_myisammrg::update_create_info(HA_CREATE_INFO *create_info) goto err; create_info->merge_list.elements++; - (*create_info->merge_list.next) = (uchar*) ptr; - create_info->merge_list.next= (uchar**) &ptr->next_local; + (*create_info->merge_list.next) = ptr; + create_info->merge_list.next= &ptr->next_local; } *create_info->merge_list.next=0; } @@ -1157,7 +1157,7 @@ int ha_myisammrg::create(const char *name, register TABLE *form, { char buff[FN_REFLEN]; const char **table_names, **pos; - TABLE_LIST *tables= (TABLE_LIST*) create_info->merge_list.first; + TABLE_LIST *tables= create_info->merge_list.first; THD *thd= current_thd; size_t dirlgt= dirname_length(name); DBUG_ENTER("ha_myisammrg::create"); diff --git a/storage/myisammrg/myrg_open.c b/storage/myisammrg/myrg_open.c index ea306c5ba9c..17c9b4ba4d1 100644 --- a/storage/myisammrg/myrg_open.c +++ b/storage/myisammrg/myrg_open.c @@ -227,9 +227,7 @@ MYRG_INFO *myrg_parent_open(const char *parent_name, int save_errno; int insert_method; uint length; - uint dir_length; uint child_count; - size_t name_buff_length; File fd; IO_CACHE file_cache; char parent_name_buff[FN_REFLEN * 2]; @@ -300,7 +298,6 @@ MYRG_INFO *myrg_parent_open(const char *parent_name, } /* Call callback for each child. */ - dir_length= dirname_part(parent_name_buff, parent_name, &name_buff_length); my_b_seek(&file_cache, 0); while ((length= my_b_gets(&file_cache, child_name_buff, FN_REFLEN - 1))) { @@ -380,7 +377,6 @@ int myrg_attach_children(MYRG_INFO *m_info, int handle_locking, { ulonglong file_offset; MI_INFO *myisam; - int rc; int errpos; int save_errno; uint idx; @@ -399,7 +395,6 @@ int myrg_attach_children(MYRG_INFO *m_info, int handle_locking, here and in ha_myisammrg::store_lock() forces consistent data. */ pthread_mutex_lock(&m_info->mutex); - rc= 1; errpos= 0; file_offset= 0; min_keys= 0; diff --git a/storage/mysql_storage_engine.cmake b/storage/mysql_storage_engine.cmake index b920f16452b..5424beca33a 100644 --- a/storage/mysql_storage_engine.cmake +++ b/storage/mysql_storage_engine.cmake @@ -38,6 +38,8 @@ IF(NOT SOURCE_SUBLIBS) IF(${engine}_LIBS) TARGET_LINK_LIBRARIES(${dyn_libname} ${${engine}_LIBS}) ENDIF(${engine}_LIBS) + # Install the plugin + INSTALL(TARGETS ${dyn_libname} DESTINATION lib/plugin COMPONENT runtime) MESSAGE("build ${engine} as DLL") ENDIF(${ENGINE_BUILD_TYPE} STREQUAL "STATIC") ENDIF(NOT SOURCE_SUBLIBS) diff --git a/storage/ndb/src/common/portlib/NdbMutex.c b/storage/ndb/src/common/portlib/NdbMutex.c index c9184e5d1f2..5595baba7c4 100644 --- a/storage/ndb/src/common/portlib/NdbMutex.c +++ b/storage/ndb/src/common/portlib/NdbMutex.c @@ -24,36 +24,31 @@ NdbMutex* NdbMutex_Create(void) { NdbMutex* pNdbMutex; int result; - DBUG_ENTER("NdbMutex_Create"); - + pNdbMutex = (NdbMutex*)NdbMem_Allocate(sizeof(NdbMutex)); - DBUG_PRINT("info",("NdbMem_Allocate 0x%lx", (long) pNdbMutex)); - + if (pNdbMutex == NULL) - DBUG_RETURN(NULL); - + return NULL; + result = pthread_mutex_init(pNdbMutex, NULL); assert(result == 0); - - DBUG_RETURN(pNdbMutex); + + return pNdbMutex; } int NdbMutex_Destroy(NdbMutex* p_mutex) { int result; - DBUG_ENTER("NdbMutex_Destroy"); if (p_mutex == NULL) - DBUG_RETURN(-1); + return -1; result = pthread_mutex_destroy(p_mutex); - DBUG_PRINT("info",("NdbMem_Free 0x%lx", (long) p_mutex)); NdbMem_Free(p_mutex); - - DBUG_RETURN(result); + return result; } diff --git a/storage/ndb/src/ndbapi/DictCache.cpp b/storage/ndb/src/ndbapi/DictCache.cpp index 04be3711847..9c66b2be9d2 100644 --- a/storage/ndb/src/ndbapi/DictCache.cpp +++ b/storage/ndb/src/ndbapi/DictCache.cpp @@ -20,8 +20,10 @@ #include <NdbCondition.h> #include <NdbSleep.h> -static NdbTableImpl f_invalid_table; -static NdbTableImpl f_altered_table; +static NdbTableImpl * f_invalid_table = 0; +static NdbTableImpl * f_altered_table = 0; + +static int ndb_dict_cache_count = 0; Ndb_local_table_info * Ndb_local_table_info::create(NdbTableImpl *table_impl, Uint32 sz) @@ -93,11 +95,29 @@ GlobalDictCache::GlobalDictCache(){ DBUG_ENTER("GlobalDictCache::GlobalDictCache"); m_tableHash.createHashTable(); m_waitForTableCondition = NdbCondition_Create(); + if (f_invalid_table == NULL) + f_invalid_table = new NdbTableImpl(); + if (f_altered_table == NULL) + f_altered_table = new NdbTableImpl(); + ndb_dict_cache_count++; DBUG_VOID_RETURN; } GlobalDictCache::~GlobalDictCache(){ DBUG_ENTER("GlobalDictCache::~GlobalDictCache"); + if (--ndb_dict_cache_count == 0) + { + if (f_invalid_table) + { + delete f_invalid_table; + f_invalid_table = 0; + } + if (f_altered_table) + { + delete f_altered_table; + f_altered_table = 0; + } + } NdbElement_t<Vector<TableVersion> > * curr = m_tableHash.getNext(0); while(curr != 0){ Vector<TableVersion> * vers = curr->theData; @@ -254,7 +274,7 @@ GlobalDictCache::put(const char * name, NdbTableImpl * tab) TableVersion & ver = vers->back(); if(ver.m_status != RETREIVING || !(ver.m_impl == 0 || - ver.m_impl == &f_invalid_table || ver.m_impl == &f_altered_table) || + ver.m_impl == f_invalid_table || ver.m_impl == f_altered_table) || ver.m_version != 0 || ver.m_refCount == 0){ abort(); @@ -271,7 +291,7 @@ GlobalDictCache::put(const char * name, NdbTableImpl * tab) ver.m_version = tab->m_version; ver.m_status = OK; } - else if (ver.m_impl == &f_invalid_table) + else if (ver.m_impl == f_invalid_table) { DBUG_PRINT("info", ("Table DROPPED invalid")); ver.m_impl = tab; @@ -279,7 +299,7 @@ GlobalDictCache::put(const char * name, NdbTableImpl * tab) ver.m_status = DROPPED; ver.m_impl->m_status = NdbDictionary::Object::Invalid; } - else if(ver.m_impl == &f_altered_table) + else if(ver.m_impl == f_altered_table) { DBUG_PRINT("info", ("Table DROPPED altered")); ver.m_impl = tab; @@ -440,7 +460,7 @@ GlobalDictCache::alter_table_rep(const char * name, if(i == sz - 1 && ver.m_status == RETREIVING) { - ver.m_impl = altered ? &f_altered_table : &f_invalid_table; + ver.m_impl = altered ? f_altered_table : f_invalid_table; DBUG_VOID_RETURN; } } diff --git a/storage/oqgraph/CMakeFiles.txt b/storage/oqgraph/CMakeLists.txt index b039c1ddb44..d81dbe323b6 100644 --- a/storage/oqgraph/CMakeFiles.txt +++ b/storage/oqgraph/CMakeLists.txt @@ -13,10 +13,25 @@ # along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
+INCLUDE (CheckCXXSourceCompiles)
+CHECK_CXX_SOURCE_COMPILES(
+"#include <boost/version.hpp>
+#if BOOST_VERSION >= 104000
+#else
+#error oops
+#endif
+int main() { return 0; }" BOOST_OK)
+
+IF(BOOST_OK)
+ INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
+
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_OQGRAPH /EHsc")
+
+ INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql
+ ${CMAKE_SOURCE_DIR}/regex
+ ${CMAKE_SOURCE_DIR}/extra/yassl/include)
+
+ SET(OQGRAPH_SOURCES ha_oqgraph.cc graphcore.cc)
+ MYSQL_STORAGE_ENGINE(OQGRAPH)
+ENDIF()
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql
- ${CMAKE_SOURCE_DIR}/regex
- ${CMAKE_SOURCE_DIR}/extra/yassl/include)
-ADD_LIBRARY(oqgraph ha_oqgraph.cc)
diff --git a/storage/oqgraph/Makefile.am b/storage/oqgraph/Makefile.am index 56eea4cb87a..e99e134db02 100644 --- a/storage/oqgraph/Makefile.am +++ b/storage/oqgraph/Makefile.am @@ -47,11 +47,11 @@ BOOST_CXXFLAGS+= -funroll-loops -fno-trapping-math EXTRA_DIST = ha_oqgraph.h ha_oqgraph.cc graphcore.cc \ graphcore-graph.h graphcore-types.h graphcore.h \ - CMakeFiles.txt plug.in oqgraph_probes.d + CMakeLists.txt plug.in oqgraph_probes.d # DTRACE = @DTRACE@ # DTRACEFLAGS = @DTRACEFLAGS@ -# DTRACEFILES = .libs/liboqgraph_engine_la-ha_oqgraph.o +# DTRACEFILES = .libs/libha_oqgraph_la-ha_oqgraph.o ORIG_CXXFLAGS = @CXXFLAGS@ CXXFLAGS= @@ -71,18 +71,18 @@ else INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_srcdir)/regex -I$(top_srcdir)/sql -I$(srcdir) -DHAVE_OQGRAPH endif !BUILD_OQGRAPH_STANDALONE -EXTRA_LTLIBRARIES = oqgraph_engine.la +EXTRA_LTLIBRARIES = ha_oqgraph.la mysqlplugin_LTLIBRARIES = @plugin_oqgraph_shared_target@ -oqgraph_engine_la_SOURCES = ha_oqgraph.cc -oqgraph_engine_la_LIBADD = libgraphcore.la +ha_oqgraph_la_SOURCES = ha_oqgraph.cc +ha_oqgraph_la_LIBADD = libgraphcore.la # if HAVE_DTRACE -# oqgraph_engine_la_LIBADD += oqgraph_probes.o +# ha_oqgraph_la_LIBADD += oqgraph_probes.o # endif -oqgraph_engine_la_LDFLAGS = -module -rpath $(mysqlplugindir) -oqgraph_engine_la_CFLAGS = $(ORIG_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -oqgraph_engine_la_CXXFLAGS = $(ORIG_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_oqgraph_la_LDFLAGS = -shared -module -rpath $(mysqlplugindir) +ha_oqgraph_la_CFLAGS = $(ORIG_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_oqgraph_la_CXXFLAGS = $(ORIG_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN # oqgraph_probes.h: oqgraph_probes.d # $(DTRACE) $(DTRACEFLAGS) -h -s oqgraph_probes.d diff --git a/storage/oqgraph/graphcore.cc b/storage/oqgraph/graphcore.cc index f19f99d0756..0b856ac253f 100644 --- a/storage/oqgraph/graphcore.cc +++ b/storage/oqgraph/graphcore.cc @@ -24,7 +24,7 @@ ====================================================================== */ -#include <strings.h> +#include <string.h> #define BOOST_ALL_NO_LIB 1 @@ -49,7 +49,7 @@ using namespace open_query; using namespace boost; -static const row empty_row = { 0 }; +static const row empty_row = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; namespace open_query { @@ -997,7 +997,7 @@ int stack_cursor::fetch_row(const row &row_info, row &result, const reference &ref) { last= ref; - if (optional<Vertex> v= last.vertex()) + if (last.vertex()) { optional<int> seq; optional<EdgeWeight> w; diff --git a/storage/oqgraph/plug.in b/storage/oqgraph/plug.in index 6331dade3dd..38c8310a915 100644 --- a/storage/oqgraph/plug.in +++ b/storage/oqgraph/plug.in @@ -1,6 +1,6 @@ MYSQL_STORAGE_ENGINE(oqgraph,,[Graph Storage Engine], [Open Query Graph Computation Engine], []) -MYSQL_PLUGIN_DYNAMIC(oqgraph, [oqgraph_engine.la]) +MYSQL_PLUGIN_DYNAMIC(oqgraph, [ha_oqgraph.la]) MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(oqgraph, [ha_oqgraph.cc]) MYSQL_PLUGIN_ACTIONS(oqgraph,[ AC_LANG_PUSH([C++]) @@ -35,6 +35,6 @@ AC_PREPROC_IFELSE( ], [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) - with_plugin_oqgraph=no]) + MYSQL_PLUGIN_WITHOUT(oqgraph)]) AC_LANG_POP() diff --git a/storage/pbxt/CMakeLists.txt b/storage/pbxt/CMakeLists.txt index a05b1f97083..6da0717043c 100644 --- a/storage/pbxt/CMakeLists.txt +++ b/storage/pbxt/CMakeLists.txt @@ -22,6 +22,8 @@ # # This file is used to make the Windows version +INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMYSQL_SERVER") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMYSQL_SERVER") @@ -101,3 +103,4 @@ src/xt_defs.h src/xt_errno.h) MYSQL_STORAGE_ENGINE(PBXT) + diff --git a/storage/pbxt/ChangeLog b/storage/pbxt/ChangeLog index b6023d26139..f0f9864d0d5 100644 --- a/storage/pbxt/ChangeLog +++ b/storage/pbxt/ChangeLog @@ -1,7 +1,49 @@ PBXT Release Notes ================== -+------- 1.0.11 Pre-GA - 2010-05-11 +------- 1.0.11-7 Pre-GA - 2010-09-09 + +RN336: Compiled and tested with MySQL 5.1.50. + +RN335: Fixed bug #523994: Deleting all records does not update table statistics. + +RN334: Made a change to reduce the time that only temporary tables exist during the ALTER TABLE and REPAIR TABLE statements. This increases the chance of recovery if a crash occurs during these operations. + +RN333: Log name of table when PBXT recovers an index on startup. If an error occurs during index recovery, the index is set to "repair pending". + +RN332: Fixed an inifinite loop when a record in a row is corrupt. Added logging and set the table to "repair pending" in this case. + +RN331: Fixed bug #626890: Crash on truncate table operation. + +RN330: Added additional checks for corruption of the index free list. + +------- 1.0.11-6 Pre-GA - 2010-07-08 + +RN329: Fixed bug #601245: make fails. PBXT did not compile if the partition engine was disabled in the MySQL build. + +------- 1.0.11-5 Pre-GA - 2010-06-18 + +RN328: Fixed bug #595478: Compile fails (1.0.11-4). + +------- 1.0.11-4 Pre-GA - 2010-06-15 + +RN327: Fixed a bug that caused a crash during delete on the index. The crash occurred due to memory overwrite when a long key is promoted after a shorter key is deleted, and the difference causes a node size overflow. + +------- 1.0.11-3 Pre-GA - 2010-06-11 + +RN326: Fixed bug #587740: pbxt-1.0.11-pre2-ga first time create partition table error. This was not a new bug. The problem was the PBXT system table's .frm files are corrupted when the first PBXT table created is a partition table. + +RN325: Fixed the "to-sweep" column output in xtstat. + +------- 1.0.11-2 Pre-GA - 2010-05-26 + +RN324: Fixed bug #584070:pbxt-1.0.11-pre-ga does not work with mysql 5.1.47. This bug fix removes a hack which was done to avoid running into the LOCK_plugin lock. + +------- 1.0.11-1 Pre-GA - 2010-05-19 + +RN323: Detect corruption of a key length in an index page. This bug fix avoids a possible crash due to index page corruption. + +------- 1.0.11 Pre-GA - 2010-05-11 RN322: Creating a table the references a non-existing table can now only be done if you set: foreign_key_checks = 0. Also fixed a failure when creating tables with recursive foreign key declarations. diff --git a/storage/pbxt/src/cache_xt.cc b/storage/pbxt/src/cache_xt.cc index 85eea41dd79..24e42d9e984 100644 --- a/storage/pbxt/src/cache_xt.cc +++ b/storage/pbxt/src/cache_xt.cc @@ -668,6 +668,9 @@ xtPublic void xt_ind_init(XTThreadPtr self, size_t cache_size) block->cb_data = buffer; buffer += XT_INDEX_PAGE_SIZE; #endif +#ifdef CHECK_BLOCK_TRAILERS + XT_SET_DISK_4(block->cp_check, 0xDEADBEEF); +#endif ind_cac_globals.cg_free_list = block; block++; } @@ -684,6 +687,19 @@ xtPublic void xt_ind_init(XTThreadPtr self, size_t cache_size) cont_(a); } +#ifdef CHECK_BLOCK_TRAILERS +xtPublic void check_block_trailers() +{ + XTIndBlockPtr block; + + block = ind_cac_globals.cg_blocks; + for (u_int i=0; i<ind_cac_globals.cg_block_count; i++) { + ASSERT_NS(XT_GET_DISK_4(block->cp_check) == 0xDEADBEEF); + block++; + } +} +#endif + xtPublic void xt_ind_exit(XTThreadPtr self) { #ifdef XT_USE_MYSYS @@ -1283,7 +1299,7 @@ static XTIndBlockPtr ind_cac_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNod * Conditionally count the number of deleted entries in the index: * We do this before other threads can read the block. */ - if (ind->mi_lazy_delete && read_data) + if (ind && ind->mi_lazy_delete && read_data) xt_ind_count_deleted_items(ot->ot_table, ind, block); /* Add to the hash table: */ @@ -1358,6 +1374,9 @@ xtPublic xtBool xt_ind_write(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID ad #ifdef XT_TRACK_INDEX_UPDATES ot->ot_ind_changed++; #endif +#ifdef CHECK_BLOCK_TRAILERS + check_block_trailers(); +#endif return OK; } diff --git a/storage/pbxt/src/cache_xt.h b/storage/pbxt/src/cache_xt.h index ca796ab1a74..63a5164e466 100644 --- a/storage/pbxt/src/cache_xt.h +++ b/storage/pbxt/src/cache_xt.h @@ -33,6 +33,7 @@ struct XTIdxReadBuffer; #ifdef DEBUG //#define XT_USE_CACHE_DEBUG_SIZES +//#define CHECK_BLOCK_TRAILERS #endif #ifdef XT_USE_CACHE_DEBUG_SIZES @@ -116,6 +117,9 @@ typedef struct XTIndBlock { #else xtWord1 cb_data[XT_INDEX_PAGE_SIZE]; #endif +#ifdef CHECK_BLOCK_TRAILERS + xtWord1 cp_check[4]; +#endif } XTIndBlockRec, *XTIndBlockPtr; typedef struct XTIndReference { @@ -177,6 +181,10 @@ xtBool xt_ind_copy_on_write(XTIndReferencePtr iref); XTIndHandlePtr xt_ind_get_handle(struct XTOpenTable *ot, XTIndexPtr ind, XTIndReferencePtr iref); void xt_ind_release_handle(XTIndHandlePtr handle, xtBool have_lock, XTThreadPtr thread); +#ifdef CHECK_BLOCK_TRAILERS +extern void check_block_trailers(); +#endif + #ifdef DEBUG //#define DEBUG_CHECK_IND_CACHE #endif diff --git a/storage/pbxt/src/database_xt.h b/storage/pbxt/src/database_xt.h index 1b1863d2045..7744aeeac31 100644 --- a/storage/pbxt/src/database_xt.h +++ b/storage/pbxt/src/database_xt.h @@ -117,6 +117,7 @@ typedef struct XTDatabase : public XTHeap { XTSortedListPtr db_table_by_id; XTSortedListPtr db_table_paths; /* A list of table paths used by this database. */ xtBool db_multi_path; + XTSortedListPtr db_error_list; /* A list of errors already reported. */ /* The open table pool: */ XTAllTablePoolsRec db_ot_pool; diff --git a/storage/pbxt/src/datadic_xt.cc b/storage/pbxt/src/datadic_xt.cc index 075d28edabe..6a58d23d980 100644 --- a/storage/pbxt/src/datadic_xt.cc +++ b/storage/pbxt/src/datadic_xt.cc @@ -396,7 +396,7 @@ void XTToken::expectNumber(XTThreadPtr self) struct charset_info_st; class XTTokenizer { - const struct charset_info_st *tkn_charset; + MX_CONST_CHARSET_INFO *tkn_charset; char *tkn_cstring; char *tkn_curr_pos; XTToken *tkn_current; @@ -1324,7 +1324,7 @@ void XTParseTable::parseDropIndex(XTThreadPtr self) class XTCreateTable : public XTParseTable { public: bool ct_convert; - const struct charset_info_st *ct_charset; + MX_CONST_CHARSET_INFO *ct_charset; XTPathStrPtr ct_tab_path; u_int ct_contraint_no; XTDDTable *ct_curr_table; @@ -2039,11 +2039,6 @@ void XTDDTableRef::deleteAllRows(XTThreadPtr self) if (!(ot = xt_db_open_table_using_tab(tr_fkey->co_table->dt_table, self))) xt_throw(self); - /* {FREE-ROWS-BAD} */ - /* - row_count = ((xtInt8) ot->ot_table->tab_row_eof_id) - 1; - row_count -= (xtInt8) ot->ot_table->tab_row_fnum; - */ /* Check if there are any rows in the referencing table: */ if (!xt_tab_seq_init(ot)) goto failed; diff --git a/storage/pbxt/src/datalog_xt.cc b/storage/pbxt/src/datalog_xt.cc index ff58a122e10..3238f0cbd17 100644 --- a/storage/pbxt/src/datalog_xt.cc +++ b/storage/pbxt/src/datalog_xt.cc @@ -1249,7 +1249,7 @@ xtBool XTDataLogBuffer::dlb_write_thru_log(xtLogID XT_NDEBUG_UNUSED(log_id), xtL */ dlb_data_log->dlf_log_eof += size; #ifdef DEBUG - if ((ulonglong) (log_offset + size) > (ulonglong) dlb_max_write_offset) + if (log_offset + (xtLogOffset) size > (xtLogOffset) dlb_max_write_offset) dlb_max_write_offset = log_offset + size; #endif dlb_flush_required = TRUE; @@ -1291,7 +1291,7 @@ xtBool XTDataLogBuffer::dlb_append_log(xtLogID XT_NDEBUG_UNUSED(log_id), xtLogOf if (!xt_pwrite_file(dlb_data_log->dlf_log_file, log_offset, size, data, &thread->st_statistics.st_data, thread)) return FAILED; #ifdef DEBUG - if ((ulonglong) (log_offset + size) > (ulonglong) dlb_max_write_offset) + if (log_offset + (xtLogOffset) size > (xtLogOffset) dlb_max_write_offset) dlb_max_write_offset = log_offset + size; #endif dlb_flush_required = TRUE; @@ -1734,8 +1734,8 @@ static xtBool dl_collect_garbage(XTThreadPtr self, XTDatabaseHPtr db, XTDataLogF xtLogOffset src_log_offset; xtLogID curr_log_id; xtLogOffset curr_log_offset; - xtLogID dest_log_id= 0; - xtLogOffset dest_log_offset= 0; + xtLogID dest_log_id = 0; + xtLogOffset dest_log_offset = 0; off_t garbage_count = 0; memset(&cs, 0, sizeof(XTCompactorStateRec)); diff --git a/storage/pbxt/src/discover_xt.cc b/storage/pbxt/src/discover_xt.cc index 09a2b09015e..7f7281d8c30 100644 --- a/storage/pbxt/src/discover_xt.cc +++ b/storage/pbxt/src/discover_xt.cc @@ -1622,8 +1622,11 @@ int xt_create_table_frm(handlerton *hton, THD* thd, const char *db, const char * COLUMN_FORMAT_TYPE_FIXED, #endif NULL /*default_value*/, NULL /*on_update_value*/, &comment, NULL /*change*/, - NULL /*interval_list*/, info->field_charset, 0 /*uint_geom_type*/, - NULL /*vcol_info*/, NULL /* create options */)) + NULL /*interval_list*/, info->field_charset, 0 /*uint_geom_type*/ +#if defined(MARIADB_BASE_VERSION) && MYSQL_VERSION_ID > 50200 + , NULL /*vcol_info*/, NULL /* create options */ +#endif + )) #endif goto error; @@ -1655,8 +1658,17 @@ int xt_create_table_frm(handlerton *hton, THD* thd, const char *db, const char * if (mysql_create_table_no_lock(thd, db, name, &create_info, &table_proto, &stmt->alter_info, 1, 0)) goto error; #else +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; + + part_info = thd->work_part_info; + thd->work_part_info = NULL; +#endif if (mysql_create_table_no_lock(thd, db, name, &mylex.create_info, &mylex.alter_info, 1, 0)) goto error; +#ifdef WITH_PARTITION_STORAGE_ENGINE + thd->work_part_info = part_info; +#endif #endif noerror: diff --git a/storage/pbxt/src/ha_pbxt.cc b/storage/pbxt/src/ha_pbxt.cc index ba3aa756516..ef0ae582c07 100644 --- a/storage/pbxt/src/ha_pbxt.cc +++ b/storage/pbxt/src/ha_pbxt.cc @@ -1232,6 +1232,11 @@ static int pbxt_init(void *p) THD *thd = NULL; #ifndef DRIZZLED +#if MYSQL_VERSION_ID < 50147 + /* A hack which is no longer required after 5.1.46 */ + extern myxt_mutex_t LOCK_plugin; +#endif + /* {MYSQL QUIRK} * I have to release this lock for PBXT recovery to * work, because it needs to open .frm files. @@ -1248,6 +1253,9 @@ static int pbxt_init(void *p) * Only real problem, 2 threads try to load the same * plugin at the same time. */ +#if MYSQL_VERSION_ID < 50147 + myxt_mutex_unlock(&LOCK_plugin); +#endif #endif /* Can't do this here yet, because I need a THD! */ @@ -1281,6 +1289,11 @@ static int pbxt_init(void *p) if (thd) myxt_destroy_thread(thd, FALSE); +#ifndef DRIZZLED +#if MYSQL_VERSION_ID < 50147 + myxt_mutex_lock(&LOCK_plugin); +#endif +#endif } #endif } @@ -1941,8 +1954,13 @@ xtPublic int ha_pbxt::reopen() * selectity of the indices, as soon as the number of rows * exceeds 200 (see [**]) */ +#ifdef XT_ROW_COUNT_CORRECTED + /* {CORRECTED-ROW-COUNT} */ + pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) < 150; +#else /* {FREE-ROWS-BAD} */ pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150; +#endif } /* I am not doing this anymore because it was only required @@ -2289,6 +2307,36 @@ void ha_pbxt::internal_close(THD *thd, struct XTThread *self) */ if (!thd || thd_sql_command(thd) == SQLCOM_FLUSH) // FLUSH TABLES xt_sync_flush_table(self, ot); + else { + /* This change is a result of a problem mentioned by Arjen. + * REPAIR and ALTER lead to the following sequence: + * 1. tab -- copy --> tmp1 + * 2. tab -- rename --> tmp2 + * 3. tmp1 -- rename --> tab + * 4. delete tmp2 + * + * PBXT flushes a table before rename. + * In the sequence above results in a table flush in step 3 which can + * take a very long time. + * + * The problem is, during this time frame we have only temp tables. + * A crash in this state leaves the database in a bad state. + * + * To reduce the time in this state, the flush needs to be done + * elsewhere. The code below causes the flish to occur after + * step 1: + */ + switch (thd_sql_command(thd)) { + case SQLCOM_REPAIR: + case SQLCOM_RENAME_TABLE: + case SQLCOM_OPTIMIZE: + case SQLCOM_ANALYZE: + case SQLCOM_ALTER_TABLE: + case SQLCOM_CREATE_INDEX: + xt_sync_flush_table(self, ot); + break; + } + } } freer_(); // xt_db_return_table_to_pool(ot); } @@ -2349,9 +2397,15 @@ int ha_pbxt::open(const char *table_path, int XT_UNUSED(mode), uint XT_UNUSED(te #else xt_tab_load_row_pointers(self, pb_open_tab); #endif + xt_ind_set_index_selectivity(pb_open_tab, self); +#ifdef XT_ROW_COUNT_CORRECTED + /* {CORRECTED-ROW-COUNT} */ + pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) < 150; +#else /* {FREE-ROWS-BAD} */ pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150; +#endif } init_auto_increment(0); @@ -3922,6 +3976,8 @@ int ha_pbxt::info(uint flag) if ((ot = pb_open_tab)) { if (flag & HA_STATUS_VARIABLE) { + register XTTableHPtr tab = ot->ot_table; + /* {FREE-ROWS-BAD} * Free row count is not reliable, so ignore it. * The problem is if tab_row_fnum > tab_row_eof_id - 1 then @@ -3948,11 +4004,26 @@ int ha_pbxt::info(uint flag) * the actual number of vectors. But it must assume that it has at * least EXTRA_RECORDS vectors. */ - stats.deleted = /* ot->ot_table->tab_row_fnum */ 0; - stats.records = (ha_rows) (ot->ot_table->tab_row_eof_id - 1 /* - stats.deleted */); - stats.data_file_length = xt_rec_id_to_rec_offset(ot->ot_table, ot->ot_table->tab_rec_eof_id); - stats.index_file_length = xt_ind_node_to_offset(ot->ot_table, ot->ot_table->tab_ind_eof); - stats.delete_length = ot->ot_table->tab_rec_fnum * ot->ot_rec_size; +#ifdef XT_ROW_COUNT_CORRECTED + if (tab->tab_row_eof_id <= tab->tab_row_fnum || + (!tab->tab_row_free_id && tab->tab_row_fnum)) + xt_tab_check_free_lists(NULL, ot, false, true); + stats.records = (ha_rows) tab->tab_row_eof_id - 1; + if (stats.records >= tab->tab_row_fnum) { + stats.deleted = tab->tab_row_fnum; + stats.records -= stats.deleted; + } + else { + stats.deleted = 0; + stats.records = 2; + } +#else + stats.deleted = /* tab->tab_row_fnum */ 0; + stats.records = (ha_rows) (tab->tab_row_eof_id - 1 /* - stats.deleted */); +#endif + stats.data_file_length = xt_rec_id_to_rec_offset(tab, tab->tab_rec_eof_id); + stats.index_file_length = xt_ind_node_to_offset(tab, tab->tab_ind_eof); + stats.delete_length = tab->tab_rec_fnum * ot->ot_rec_size; //check_time = info.check_time; stats.mean_rec_length = (ulong) ot->ot_rec_size; } @@ -4577,13 +4648,24 @@ xtPublic int ha_pbxt::external_lock(THD *thd, int lock_type) } if (pb_share->sh_recalc_selectivity) { +#ifdef XT_ROW_COUNT_CORRECTED + /* {CORRECTED-ROW-COUNT} */ + if ((pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) >= 200) +#else /* {FREE-ROWS-BAD} */ - if ((pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) >= 200) { + if ((pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) >= 200) +#endif + { /* [**] */ pb_share->sh_recalc_selectivity = FALSE; xt_ind_set_index_selectivity(pb_open_tab, self); +#ifdef XT_ROW_COUNT_CORRECTED + /* {CORRECTED-ROW-COUNT} */ + pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) < 150; +#else /* {FREE-ROWS-BAD} */ pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150; +#endif } } } @@ -4631,6 +4713,17 @@ xtPublic int ha_pbxt::external_lock(THD *thd, int lock_type) goto complete; } cont_(a); + + /* Occurs if you do: + * truncate table t1; + * truncate table t1; + */ + if (!pb_open_tab) { + if ((err = reopen())) { + pb_ex_in_use = 0; + goto complete; + } + } } else { pb_ex_in_use = 1; @@ -6069,7 +6162,7 @@ mysql_declare_plugin(pbxt) drizzle_declare_plugin_end; #else mysql_declare_plugin_end; -#ifdef MARIADB_BASE_VERSION +#if defined(MARIADB_BASE_VERSION) && MYSQL_VERSION_ID > 50200 maria_declare_plugin(pbxt) { /* PBXT */ MYSQL_STORAGE_ENGINE_PLUGIN, @@ -6083,7 +6176,7 @@ maria_declare_plugin(pbxt) 0x0001 /* 0.1 */, NULL, /* status variables */ pbxt_system_variables, /* system variables */ - "1.0.09g RC3", /* string version */ + "1.0.11-7 Pre-GA", /* string version */ MariaDB_PLUGIN_MATURITY_GAMMA /* maturity */ }, { /* PBXT_STATISTICS */ @@ -6098,7 +6191,7 @@ maria_declare_plugin(pbxt) 0x0005, NULL, /* status variables */ NULL, /* system variables */ - "1.0.09g RC3", /* string version */ + "1.0.11-7 Pre-GA", /* string version */ MariaDB_PLUGIN_MATURITY_GAMMA /* maturity */ } maria_declare_plugin_end; diff --git a/storage/pbxt/src/index_xt.cc b/storage/pbxt/src/index_xt.cc index c8995fe253c..f6c4b4d8aa3 100644 --- a/storage/pbxt/src/index_xt.cc +++ b/storage/pbxt/src/index_xt.cc @@ -272,10 +272,17 @@ static xtBool idx_new_branch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID *a } if ((XT_NODE_ID(wrote_pos) = XT_NODE_ID(tab->tab_ind_free))) { + xtIndexNodeID next_node; + /* Use the block on the free list: */ - if (!xt_ind_read_bytes(ot, ind, wrote_pos, sizeof(XTIndFreeBlockRec), (xtWord1 *) &free_block)) + if (!xt_ind_read_bytes(ot, NULL, wrote_pos, sizeof(XTIndFreeBlockRec), (xtWord1 *) &free_block)) + goto failed; + XT_NODE_ID(next_node) = (xtIndexNodeID) XT_GET_DISK_8(free_block.if_next_block_8); + if (XT_NODE_ID(next_node) >= XT_NODE_ID(tab->tab_ind_eof)) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, tab->tab_name); goto failed; - XT_NODE_ID(tab->tab_ind_free) = (xtIndexNodeID) XT_GET_DISK_8(free_block.if_next_block_8); + } + XT_NODE_ID(tab->tab_ind_free) = XT_NODE_ID(next_node); xt_unlock_mutex_ns(&tab->tab_ind_lock); *address = wrote_pos; TRACK_BLOCK_ALLOC(wrote_pos); @@ -1415,30 +1422,45 @@ static xtBool idx_replace_node_key(XTOpenTablePtr ot, XTIndexPtr ind, IdxStackIt if (idx_is_item_deleted(iref.ir_branch, &item->i_pos)) iref.ir_block->cp_del_count--; } - memmove(&iref.ir_branch->tb_data[item->i_pos.i_item_offset + item_size], - &iref.ir_branch->tb_data[item->i_pos.i_item_offset + item->i_pos.i_item_size], - item->i_pos.i_total_size - item->i_pos.i_item_offset - item->i_pos.i_item_size); - memcpy(&iref.ir_branch->tb_data[item->i_pos.i_item_offset], - item_buf, item_size); - if (ind->mi_lazy_delete) { - if (idx_is_item_deleted(iref.ir_branch, &item->i_pos)) - iref.ir_block->cp_del_count++; - } - item->i_pos.i_total_size = item->i_pos.i_total_size + item_size - item->i_pos.i_item_size; - XT_SET_DISK_2(iref.ir_branch->tb_size_2, XT_MAKE_NODE_SIZE(item->i_pos.i_total_size)); - IDX_TRACE("%d-> %x\n", (int) XT_NODE_ID(current), (int) XT_GET_DISK_2(iref.ir_branch->tb_size_2)); - iref.ir_updated = TRUE; + + if (item->i_pos.i_total_size + item_size - item->i_pos.i_item_size <= XT_INDEX_PAGE_DATA_SIZE) { + /* The new item is larger than the old, this can result + * in overflow of the node! + */ + memmove(&iref.ir_branch->tb_data[item->i_pos.i_item_offset + item_size], + &iref.ir_branch->tb_data[item->i_pos.i_item_offset + item->i_pos.i_item_size], + item->i_pos.i_total_size - item->i_pos.i_item_offset - item->i_pos.i_item_size); + memcpy(&iref.ir_branch->tb_data[item->i_pos.i_item_offset], + item_buf, item_size); + if (ind->mi_lazy_delete) { + if (idx_is_item_deleted(iref.ir_branch, &item->i_pos)) + iref.ir_block->cp_del_count++; + } + item->i_pos.i_total_size = item->i_pos.i_total_size + item_size - item->i_pos.i_item_size; + XT_SET_DISK_2(iref.ir_branch->tb_size_2, XT_MAKE_NODE_SIZE(item->i_pos.i_total_size)); + IDX_TRACE("%d-> %x\n", (int) XT_NODE_ID(current), (int) XT_GET_DISK_2(iref.ir_branch->tb_size_2)); + iref.ir_updated = TRUE; #ifdef DEBUG - if (ind->mi_lazy_delete) ASSERT_NS(item->i_pos.i_total_size <= XT_INDEX_PAGE_DATA_SIZE); #endif - if (item->i_pos.i_total_size <= XT_INDEX_PAGE_DATA_SIZE) return xt_ind_release(ot, ind, XT_UNLOCK_W_UPDATE, &iref); + } /* The node has overflowed!! */ result.sr_item = item->i_pos; + memcpy(ot->ot_ind_wbuf.tb_data, iref.ir_branch->tb_data, item->i_pos.i_item_offset); // First part of the buffer + memcpy(&ot->ot_ind_wbuf.tb_data[item->i_pos.i_item_offset], item_buf, item_size); // The new item + memcpy(&ot->ot_ind_wbuf.tb_data[item->i_pos.i_item_offset + item_size], + &iref.ir_branch->tb_data[item->i_pos.i_item_offset + item->i_pos.i_item_size], + item->i_pos.i_total_size - item->i_pos.i_item_offset - item->i_pos.i_item_size); + item->i_pos.i_total_size += item_size - item->i_pos.i_item_size; + item->i_pos.i_item_size = item_size; + XT_SET_DISK_2(ot->ot_ind_wbuf.tb_size_2, XT_MAKE_LEAF_SIZE(item->i_pos.i_total_size)); + IDX_TRACE("%d-> %x\n", (int) XT_NODE_ID(current), (int) XT_GET_DISK_2(ot->ot_ind_wbuf.tb_size_2)); + ASSERT_NS(item->i_pos.i_total_size > XT_INDEX_PAGE_DATA_SIZE && item->i_pos.i_total_size <= XT_INDEX_PAGE_DATA_SIZE*2); + /* Adjust the stack (we want the parents of the delete node): */ for (;;) { if (idx_pop(stack) == item) @@ -1448,7 +1470,7 @@ static xtBool idx_replace_node_key(XTOpenTablePtr ot, XTIndexPtr ind, IdxStackIt /* We assume that value can be overwritten (which is the case) */ key_value.sv_flags = XT_SEARCH_WHOLE_KEY; key_value.sv_key = key_buf; - if (!idx_get_middle_branch_item(ot, ind, iref.ir_branch, &key_value, &result)) + if (!idx_get_middle_branch_item(ot, ind, &ot->ot_ind_wbuf, &key_value, &result)) goto failed_1; if (!idx_new_branch(ot, ind, &new_branch)) @@ -1456,7 +1478,6 @@ static xtBool idx_replace_node_key(XTOpenTablePtr ot, XTIndexPtr ind, IdxStackIt /* Split the node: */ new_size = result.sr_item.i_total_size - result.sr_item.i_item_offset - result.sr_item.i_item_size; - // TODO: Are 2 buffers now required? new_branch_ptr = (XTIdxBranchDPtr) &ot->ot_ind_wbuf.tb_data[XT_INDEX_PAGE_DATA_SIZE]; memmove(new_branch_ptr->tb_data, &iref.ir_branch->tb_data[result.sr_item.i_item_offset + result.sr_item.i_item_size], new_size); @@ -1466,10 +1487,10 @@ static xtBool idx_replace_node_key(XTOpenTablePtr ot, XTIndexPtr ind, IdxStackIt goto failed_2; /* Change the size of the old branch: */ - XT_SET_DISK_2(iref.ir_branch->tb_size_2, XT_MAKE_NODE_SIZE(result.sr_item.i_item_offset)); - IDX_TRACE("%d-> %x\n", (int) XT_NODE_ID(current), (int) XT_GET_DISK_2(iref.ir_branch->tb_size_2)); + XT_SET_DISK_2(ot->ot_ind_wbuf.tb_size_2, XT_MAKE_NODE_SIZE(result.sr_item.i_item_offset)); + IDX_TRACE("%d-> %x\n", (int) XT_NODE_ID(current), (int) XT_GET_DISK_2(ot->ot_ind_wbuf.tb_size_2)); + memcpy(iref.ir_branch, &ot->ot_ind_wbuf, offsetof(XTIdxBranchDRec, tb_data) + result.sr_item.i_item_offset); iref.ir_updated = TRUE; - xt_ind_release(ot, ind, XT_UNLOCK_W_UPDATE, &iref); /* Insert the new branch into the parent node, using the new middle key value: */ @@ -2071,6 +2092,11 @@ xtPublic xtBool xt_idx_insert(XTOpenTablePtr ot, XTIndexPtr ind, xtRowID row_id, if (!idx_new_branch(ot, ind, &new_branch)) goto failed_1; + if (XT_NODE_ID(current) == XT_NODE_ID(new_branch)) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + goto failed_1; + } + /* Copy and write the rest of the data to the new node: */ new_size = result.sr_item.i_total_size - result.sr_item.i_item_offset - result.sr_item.i_item_size; new_branch_ptr = (XTIdxBranchDPtr) &ot->ot_ind_wbuf.tb_data[XT_INDEX_PAGE_DATA_SIZE]; @@ -2723,6 +2749,10 @@ xtPublic xtBool xt_idx_search(XTOpenTablePtr ot, XTIndexPtr ind, register XTIdxS #endif ASSERT_NS(iref.ir_xlock == 2); ASSERT_NS(iref.ir_updated == 2); + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; failed: @@ -2874,6 +2904,10 @@ xtPublic xtBool xt_idx_search_prev(XTOpenTablePtr ot, XTIndexPtr ind, register X //idx_check_index(ot, ind, TRUE); //idx_check_on_key(ot); #endif + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; failed: @@ -2964,6 +2998,10 @@ xtPublic xtBool xt_idx_next(register XTOpenTablePtr ot, register XTIndexPtr ind, if (!(XT_NODE_ID(current) = XT_NODE_ID(ind->mi_root))) { XT_INDEX_UNLOCK(ind, ot); + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; } @@ -3071,6 +3109,10 @@ xtPublic xtBool xt_idx_next(register XTOpenTablePtr ot, register XTIndexPtr ind, ot->ot_curr_rec_id = 0; ot->ot_curr_row_id = 0; XT_INDEX_UNLOCK(ind, ot); + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; } @@ -3112,6 +3154,10 @@ xtPublic xtBool xt_idx_next(register XTOpenTablePtr ot, register XTIndexPtr ind, ot->ot_curr_row_id = result.sr_row_id; ot->ot_ind_state = result.sr_item; + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; failed: @@ -3178,6 +3224,10 @@ xtPublic xtBool xt_idx_prev(register XTOpenTablePtr ot, register XTIndexPtr ind, if (!(XT_NODE_ID(current) = XT_NODE_ID(ind->mi_root))) { XT_INDEX_UNLOCK(ind, ot); + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; } @@ -3274,6 +3324,10 @@ xtPublic xtBool xt_idx_prev(register XTOpenTablePtr ot, register XTIndexPtr ind, ot->ot_curr_row_id = 0; XT_INDEX_UNLOCK(ind, ot); + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; unlock_check_on_key: @@ -3302,6 +3356,10 @@ xtPublic xtBool xt_idx_prev(register XTOpenTablePtr ot, register XTIndexPtr ind, ot->ot_curr_rec_id = result.sr_rec_id; ot->ot_curr_row_id = result.sr_row_id; ot->ot_ind_state = result.sr_item; + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; failed: @@ -3648,7 +3706,7 @@ xtPublic void xt_check_indices(XTOpenTablePtr ot) track_block_exists(current); #endif printf("%d ", (int) XT_NODE_ID(current)); - if (!xt_ind_read_bytes(ot, *ind, current, sizeof(XTIndFreeBlockRec), (xtWord1 *) &free_block)) { + if (!xt_ind_read_bytes(ot, NULL, current, sizeof(XTIndFreeBlockRec), (xtWord1 *) &free_block)) { xt_log_and_clear_exception_ns(); break; } @@ -4141,11 +4199,18 @@ void XTIndexLogPool::ilp_init(struct XTThread *self, struct XTDatabase *db, size if (!ilp_open_log(&il, log_id, FALSE, self)) goto failed; if (il->il_tab_id && il->il_log_eof) { + char table_name[XT_IDENTIFIER_NAME_SIZE*3+3]; + if (!il->il_open_table(&ot)) goto failed; if (ot) { - if (!il->il_apply_log(ot)) - goto failed; + xt_tab_make_table_name(ot->ot_table, table_name, sizeof(table_name)); + xt_logf(XT_NT_INFO, "PBXT: Recovering index, table: %s, bytes to read: %llu\n", table_name, (u_llong) il->il_log_eof); + if (!il->il_apply_log(ot)) { + /* If recovery of an index fails, then it is corrupt! */ + xt_tab_disable_index(ot->ot_table, XT_INDEX_CORRUPTED); + xt_log_and_clear_exception_ns(); + } ot->ot_thread = self; il->il_close_table(ot); } @@ -4468,8 +4533,7 @@ xtBool XTIndexLog::il_apply_log(struct XTOpenTable *ot) /* Corrupt log?! */ if (il_buffer_len < req_size) { xt_register_ixterr(XT_REG_CONTEXT, XT_ERR_INDEX_LOG_CORRUPT, xt_file_path(il_of)); - xt_log_and_clear_exception_ns(); - return OK; + return FAILED; } if (!xt_pread_file(il_of, offset, il_buffer_len, il_buffer_len, il_buffer, NULL, &ot->ot_thread->st_statistics.st_ilog, ot->ot_thread)) return FAILED; @@ -4548,8 +4612,7 @@ xtBool XTIndexLog::il_apply_log(struct XTOpenTable *ot) /* Corrupt log?! */ if (il_buffer_len < req_size) { xt_register_ixterr(XT_REG_CONTEXT, XT_ERR_INDEX_LOG_CORRUPT, xt_file_path(il_of)); - xt_log_and_clear_exception_ns(); - return OK; + return FAILED; } if (!xt_pread_file(il_of, offset, il_buffer_len, il_buffer_len, il_buffer, NULL, &ot->ot_thread->st_statistics.st_ilog, ot->ot_thread)) return FAILED; @@ -4597,8 +4660,7 @@ xtBool XTIndexLog::il_apply_log(struct XTOpenTable *ot) break; default: xt_register_ixterr(XT_REG_CONTEXT, XT_ERR_INDEX_LOG_CORRUPT, xt_file_path(il_of)); - xt_log_and_clear_exception_ns(); - return OK; + return FAILED; } } diff --git a/storage/pbxt/src/index_xt.h b/storage/pbxt/src/index_xt.h index 52f8f32dd33..a56e7b3cdb7 100644 --- a/storage/pbxt/src/index_xt.h +++ b/storage/pbxt/src/index_xt.h @@ -312,7 +312,7 @@ typedef struct XTIndex { u_int mi_flags; u_int mi_key_size; u_int mi_max_items; /* The maximum number of items that can fit in a leaf node. */ - xtBool mi_low_byte_first; + xtBool mi_key_corrupted; /* Set to TRUE if a currupted index key is detected. */ xtBool mi_fix_key; xtBool mi_lazy_delete; /* TRUE if index entries are "lazy deleted". */ u_int mi_single_type; /* Used when the index contains a single field. */ diff --git a/storage/pbxt/src/myxt_xt.cc b/storage/pbxt/src/myxt_xt.cc index 51490fc00f5..410bf2d2f3c 100644 --- a/storage/pbxt/src/myxt_xt.cc +++ b/storage/pbxt/src/myxt_xt.cc @@ -1088,7 +1088,10 @@ xtPublic u_int myxt_get_key_length(XTIndexPtr ind, xtWord1 *key_buf) } end: - return (xtWord1 *) key_data - key_buf; + u_int ilen = (xtWord1 *) key_data - key_buf; + if (ilen > XT_INDEX_MAX_KEY_SIZE) + ind->mi_key_corrupted = TRUE; + return ilen; } /* Derived from ha_key_cmp */ @@ -2183,7 +2186,8 @@ static XTIndexPtr my_create_index(XTThreadPtr self, TABLE *table_arg, u_int idx, xt_spinlock_init_with_autoname(self, &ind->mi_dirty_lock); ind->mi_index_no = idx; ind->mi_flags = (index->flags & (HA_NOSAME | HA_NULL_ARE_EQUAL | HA_UNIQUE_CHECK)); - ind->mi_low_byte_first = TS(table_arg)->db_low_byte_first; + //ind->mi_low_byte_first = TS(table_arg)->db_low_byte_first; + ind->mi_key_corrupted = FALSE; ind->mi_fix_key = TRUE; ind->mi_select_total = 0; ind->mi_subset_of = 0; diff --git a/storage/pbxt/src/myxt_xt.h b/storage/pbxt/src/myxt_xt.h index 62caf1c3b61..3898c8e30c6 100644 --- a/storage/pbxt/src/myxt_xt.h +++ b/storage/pbxt/src/myxt_xt.h @@ -69,17 +69,17 @@ void myxt_free_dictionary(XTThreadPtr self, XTDictionary *dic); void myxt_move_dictionary(XTDictionaryPtr dic, XTDictionaryPtr source_dic); XTDDTable *myxt_create_table_from_table(XTThreadPtr self, STRUCT_TABLE *my_tab); -void myxt_static_convert_identifier(XTThreadPtr self, const struct charset_info_st *cs, char *from, char *to, size_t to_len); -char *myxt_convert_identifier(XTThreadPtr self, const struct charset_info_st *cs, char *from); +void myxt_static_convert_identifier(XTThreadPtr self, MX_CONST_CHARSET_INFO *cs, char *from, char *to, size_t to_len); +char *myxt_convert_identifier(XTThreadPtr self, MX_CONST_CHARSET_INFO *cs, char *from); void myxt_static_convert_table_name(XTThreadPtr self, char *from, char *to, size_t to_len); void myxt_static_convert_file_name(char *from, char *to, size_t to_len); char *myxt_convert_table_name(XTThreadPtr self, char *from); int myxt_strcasecmp(char * a, char *b); -int myxt_isspace(const struct charset_info_st *cs, char a); -int myxt_ispunct(const struct charset_info_st *cs, char a); -int myxt_isdigit(const struct charset_info_st *cs, char a); +int myxt_isspace(MX_CONST_CHARSET_INFO *cs, char a); +int myxt_ispunct(MX_CONST_CHARSET_INFO *cs, char a); +int myxt_isdigit(MX_CONST_CHARSET_INFO *cs, char a); -const struct charset_info_st *myxt_getcharset(bool convert); +MX_CONST_CHARSET_INFO *myxt_getcharset(bool convert); void *myxt_create_thread(); void myxt_destroy_thread(void *thread, xtBool end_threads); diff --git a/storage/pbxt/src/restart_xt.cc b/storage/pbxt/src/restart_xt.cc index b0c8f2854ae..93720f2b113 100644 --- a/storage/pbxt/src/restart_xt.cc +++ b/storage/pbxt/src/restart_xt.cc @@ -1359,6 +1359,57 @@ static xtBool xres_sync_operations(XTThreadPtr self, XTDatabaseHPtr db, XTWriter return op_synced; } +#ifdef XT_CORRECT_TABLE_FREE_COUNT +#define CORRECT_COUNT TRUE +#else +#define CORRECT_COUNT FALSE +#endif +#ifdef XT_CHECK_RECORD_FREE_COUNT +#define CHECK_RECS TRUE +#else +#define CHECK_RECS FALSE +#endif +#if defined(XT_CHECK_RECORD_FREE_COUNT) || defined(XT_CHECK_ROW_FREE_COUNT) +#define RECOVER_FREE_COUNTS +#endif + +#ifdef RECOVER_FREE_COUNTS +/* {CORRECTED-ROW-COUNT} + * This error can be repeated by crashing the server during + * high activitity, after flush table writes the table header + * + * On recovery, the free count "from the future" is used as + * the starting point for subsequent allocation and frees. + * The count is wrong after that point. + * + * The recovery of the count only works correctly if a + * checkpoint is complete successfully after that table + * header is flushed. Basically the writing of the table + * header should be synchronsized with the writing of the + * end of the checkpoint. + * + * Another solution would be to log the count, along with + * the allocate and free commannds. + * + * The 3rd solution is the one used here. The count is corrected + * after recovery. + */ +static void xres_recover_table_free_counts(XTThreadPtr self, XTDatabaseHPtr db, XTWriterStatePtr ws) +{ + u_int edx; + XTTableEntryPtr te_ptr; + XTTableHPtr tab; + + xt_enum_tables_init(&edx); + while ((te_ptr = xt_enum_tables_next(self, db, &edx))) { + if ((tab = te_ptr->te_table)) { + if (xres_open_table(self, ws, te_ptr->te_tab_id)) + xt_tab_check_free_lists(self, ws->ws_ot, CHECK_RECS, CORRECT_COUNT); + } + } +} +#endif + /* * Operations from the log are applied in sequence order. * If the operations are out of sequence, they are buffered @@ -2175,6 +2226,13 @@ xtBool XTXactRestart::xres_restart(XTThreadPtr self, xtLogID *log_id, xtLogOffse /* This is true because if no transaction was placed in RAM then * the next transaction in RAM will have the next ID: */ db->db_xn_min_ram_id = db->db_xn_curr_id + 1; + +#ifdef RECOVER_FREE_COUNTS + if (xres_cp_log_id != *log_id || xres_cp_log_offset != *log_offset) { + /* Recovery took place, correct the row count! */ + xres_recover_table_free_counts(self, db, &ws); + } +#endif } failed: diff --git a/storage/pbxt/src/strutil_xt.cc b/storage/pbxt/src/strutil_xt.cc index 02132fbb06b..8183034a204 100644 --- a/storage/pbxt/src/strutil_xt.cc +++ b/storage/pbxt/src/strutil_xt.cc @@ -380,7 +380,7 @@ xtPublic void xt_int8_to_byte_size(xtInt8 value, char *string) /* Version number must also be set in configure.in! */ xtPublic c_char *xt_get_version(void) { - return "1.0.11 Pre-GA"; + return "1.0.11-7 Pre-GA"; } /* Copy and URL decode! */ diff --git a/storage/pbxt/src/table_xt.cc b/storage/pbxt/src/table_xt.cc index b01f4404ce3..2d93f161ac9 100644 --- a/storage/pbxt/src/table_xt.cc +++ b/storage/pbxt/src/table_xt.cc @@ -80,6 +80,65 @@ /* * ----------------------------------------------------------------------- + * Handle Error Detected in a Table + */ + +struct XTTableError { + xtTableID ter_tab_id; + xtRecordID ter_rec_id; +}; + +static int tab_comp_tab_error(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b) +{ + XTTableError *ter_a = ((XTTableError *) a); + XTTableError *ter_b = (XTTableError *) b; + + if (ter_a->ter_tab_id < ter_b->ter_tab_id) + return -1; + if (ter_a->ter_tab_id == ter_b->ter_tab_id) { + if (ter_a->ter_rec_id < ter_b->ter_rec_id) + return -1; + if (ter_a->ter_rec_id == ter_b->ter_rec_id) + return 0; + return 1; + } + return 1; +} + +static xtBool tab_record_corrupt(XTOpenTablePtr ot, xtRowID row_id, xtRecordID rec_id, bool not_valid, int where) +{ + XTTableHPtr tab = ot->ot_table; + XTDatabaseHPtr db = tab->tab_db; + XTTableError ter; + XTTableError *ter_ptr; + + ter.ter_tab_id = tab->tab_id; + ter.ter_rec_id = rec_id; + + xt_sl_lock_ns(db->db_error_list, ot->ot_thread); + if (!(ter_ptr = (XTTableError *) xt_sl_find(NULL, db->db_error_list, &ter))) { + xtBool ok; + char table_name[XT_IDENTIFIER_NAME_SIZE*3+3]; + + ok = xt_sl_insert(NULL, db->db_error_list, &ter, &ter); + xt_sl_unlock_ns(db->db_error_list); + if (!ok) + return FAILED; + xt_tab_set_table_repair_pending(tab); + xt_tab_make_table_name(tab, table_name, sizeof(table_name)); + xt_logf(XT_NT_ERROR, "#%d Table %s: row %llu, record %llu, is %s, REPAIR TABLE required.\n", where, + table_name, + (u_llong) row_id, + (u_llong) rec_id, + not_valid ? "not valid" : "free"); + } + else + xt_sl_unlock_ns(db->db_error_list); + return OK; +} + +/* + * ----------------------------------------------------------------------- * Compare paths: */ @@ -425,6 +484,7 @@ xtPublic void xt_tab_init_db(XTThreadPtr self, XTDatabaseHPtr db) db->db_tables = xt_new_hashtable(self, tab_list_comp, tab_list_hash, tab_list_free, TRUE, TRUE); db->db_table_by_id = xt_new_sortedlist(self, sizeof(XTTableEntryRec), 20, 20, tab_comp_by_id, db, tab_free_by_id, FALSE, FALSE); db->db_table_paths = xt_new_sortedlist(self, sizeof(XTTablePathPtr), 20, 20, tab_comp_path, db, tab_free_path, FALSE, FALSE); + db->db_error_list = xt_new_sortedlist(self, sizeof(XTTableError), 20, 20, tab_comp_tab_error, db, NULL, TRUE, FALSE); if (db->db_multi_path) { XTOpenFilePtr of; @@ -649,6 +709,10 @@ xtPublic void xt_tab_exit_db(XTThreadPtr self, XTDatabaseHPtr db) xt_free_sortedlist(self, db->db_table_paths); db->db_table_paths = NULL; } + if (db->db_error_list) { + xt_free_sortedlist(self, db->db_error_list); + db->db_error_list = NULL; + } } static void tab_check_table(XTThreadPtr self, XTTableHPtr XT_UNUSED(tab)) @@ -1713,6 +1777,116 @@ xtPublic void xt_drop_table(XTThreadPtr self, XTPathStrPtr tab_name, xtBool drop exit_(); } +xtPublic void xt_tab_check_free_lists(XTThreadPtr self, XTOpenTablePtr ot, bool check_recs, bool correct_count) +{ + char table_name[XT_IDENTIFIER_NAME_SIZE*3+3]; + register XTTableHPtr tab = ot->ot_table; + xtRowID prev_row_id; + xtRowID row_id; + xtRefID next_row_id; + u_llong free_count; + + xt_tab_make_table_name(tab, table_name, sizeof(table_name)); + if (check_recs) { + xtRecordID prev_rec_id; + xtRecordID rec_id; + XTTabRecExtDRec rec_buf; + + xt_lock_mutex_ns(&tab->tab_rec_lock); + /* Checking the free list: */ + prev_rec_id = 0; + free_count = 0; + rec_id = tab->tab_rec_free_id; + while (rec_id) { + if (rec_id >= tab->tab_rec_eof_id) { + xt_logf(XT_NT_ERROR, "Table %s: invalid reference on free list: %llu, ", table_name, (u_llong) rec_id); + if (prev_rec_id) + xt_logf(XT_NT_ERROR, "reference by: %llu\n", (u_llong) prev_rec_id); + else + xt_logf(XT_NT_ERROR, "reference by list head pointer\n"); + xt_tab_set_table_repair_pending(tab); + break; + } + if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_FIX_HEADER_SIZE, (xtWord1 *) &rec_buf)) { + if (self) + xt_throw(self); + else + xt_log_and_clear_warning(ot->ot_thread); + break; + } + if ((rec_buf.tr_rec_type_1 & XT_TAB_STATUS_MASK) != XT_TAB_STATUS_FREED) + xt_logf(XT_NT_INFO, "Table %s: record, %llu, on free list is not free\n", table_name, (u_llong) rec_id); + free_count++; + prev_rec_id = rec_id; + rec_id = XT_GET_DISK_4(rec_buf.tr_prev_rec_id_4); + } + if (free_count != tab->tab_rec_fnum) { + if (correct_count) { + tab->tab_rec_fnum = free_count; + tab->tab_head_rec_fnum = free_count; + tab->tab_flush_pending = TRUE; + xt_logf(XT_NT_INFO, "Table %s: free record count (%llu) has been set to the number of records on the list: %llu\n", table_name, (u_llong) tab->tab_rec_fnum, (u_llong) free_count); + } + else + xt_logf(XT_NT_INFO, "Table %s: free record count (%llu) differs from the number of records on the list: %llu\n", table_name, (u_llong) tab->tab_rec_fnum, (u_llong) free_count); + } + xt_unlock_mutex_ns(&tab->tab_rec_lock); + } + + /* Check the row free list: */ + xt_lock_mutex_ns(&tab->tab_row_lock); + + prev_row_id = 0; + free_count = 0; + row_id = tab->tab_row_free_id; + while (row_id) { + if (row_id >= tab->tab_row_eof_id) { + xt_logf(XT_NT_ERROR, "Table %s: invalid reference on free row: %llu, ", table_name, (u_llong) row_id); + if (prev_row_id) + xt_logf(XT_NT_ERROR, "reference by: %llu\n", (u_llong) prev_row_id); + else + xt_logf(XT_NT_ERROR, "reference by list head pointer\n"); + xt_tab_set_table_repair_pending(tab); + break; + } + if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &next_row_id, ot->ot_thread)) { + if (self) + xt_throw(self); + else + xt_log_and_clear_warning(ot->ot_thread); + break; + } + free_count++; + prev_row_id = row_id; + row_id = next_row_id; + } + if (free_count != tab->tab_row_fnum) { + if (correct_count) { + /* tab_row_fnum is the current value, and tab_head_row_fnum is the value on + * disk. tab_head_row_fnum is set by the writer as the changes are applied + * to the database. + * + * This is the value then stored in the header of the file. This value + * is in sync with other changes to the file. + * + * So the fact that I am setting both value means this will not work at + * runtime, unless all changes have been applied by the writer. + * + * The correct way to do this at run time would be to add the change to the + * transaction log, so that it is applied by the writer. + */ + tab->tab_row_fnum = free_count; + tab->tab_head_row_fnum = free_count; + tab->tab_flush_pending = TRUE; + xt_logf(XT_NT_INFO, "Table %s: free row count (%llu) has been set to the number of rows on the list: %llu\n", table_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count); + } + else + xt_logf(XT_NT_INFO, "Table %s: free row count (%llu) differs from the number of rows on the list: %llu\n", table_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count); + } + + xt_unlock_mutex_ns(&tab->tab_row_lock); +} + /* * Record buffer size: * ------------------- @@ -2010,7 +2184,7 @@ xtPublic void xt_check_table(XTThreadPtr self, XTOpenTablePtr ot) prec_id = rec_id; rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4); } - if (free_count2 < free_rec_count) + if (free_count2 != free_rec_count) xt_logf(XT_INFO, "Table %s: not all free blocks (%llu) on free list: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) free_count2); freer_(); // xt_unlock_mutex_ns(&tab->tab_rec_lock); @@ -2042,6 +2216,29 @@ xtPublic void xt_check_table(XTThreadPtr self, XTOpenTablePtr ot) rec_id++; } + prec_id = 0; + free_count2 = 0; + row_id = tab->tab_row_free_id; + while (row_id) { + if (row_id >= tab->tab_row_eof_id) { + xt_logf(XT_INFO, "Table %s: invalid reference on free row: %llu, ", tab->tab_name, (u_llong) row_id); + if (prec_id) + xt_logf(XT_INFO, "reference by: %llu\n", (u_llong) prec_id); + else + xt_logf(XT_INFO, "reference by list head pointer\n"); + break; + } + if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &ref_id, self)) { + xt_log_and_clear_exception(self); + break; + } + free_count2++; + prec_id = row_id; + row_id = ref_id; + } + if (free_count2 != tab->tab_row_fnum) + xt_logf(XT_INFO, "Table %s: free row count (%llu) differs from the number of row on the list: %llu\n", tab->tab_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count2); + freer_(); // xt_unlock_mutex(&tab->tab_row_lock); #ifdef CHECK_INDEX_ON_CHECK_TABLE @@ -3117,10 +3314,18 @@ static int tab_visible(register XTOpenTablePtr ot, XTTabRecHeadDPtr rec_head, xt #endif break; case XT_XN_REREAD: + /* {RETRY-READ} + * TODO: This is not as "correct" as it could be. + * Such records should be considered to be aborted, + * and removed from the list. + */ if (invalid_rec != var_rec_id) { invalid_rec = var_rec_id; goto retry_3; } + if (!tab_record_corrupt(ot, row_id, var_rec_id, true, 1)) + goto failed; + /* Assume end of list. */ #ifdef XT_CRASH_DEBUG /* Should not happen! */ @@ -3308,6 +3513,8 @@ xtPublic int xt_tab_visible(XTOpenTablePtr ot) /* Avoid infinite loop: */ if (read_again) { /* Should not happen! */ + if (!tab_record_corrupt(ot, row_id, ot->ot_curr_rec_id, true, 2)) + return XT_ERR; #ifdef XT_CRASH_DEBUG /* Generate a core dump! */ xt_crash_me(); @@ -3364,6 +3571,8 @@ xtPublic int xt_tab_read_record(register XTOpenTablePtr ot, xtWord1 *buffer) /* Avoid infinite loop: */ if (read_again) { /* Should not happen! */ + if (!tab_record_corrupt(ot, XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4), ot->ot_curr_rec_id, true, 3)) + return XT_ERR; #ifdef XT_CRASH_DEBUG /* Generate a core dump! */ xt_crash_me(); @@ -3580,6 +3789,7 @@ xtPublic xtBool xt_tab_free_row(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_ } tab->tab_row_free_id = row_id; tab->tab_row_fnum++; + ASSERT_NS(tab->tab_row_fnum < tab->tab_row_eof_id); xt_unlock_mutex_ns(&tab->tab_row_lock); if (!xt_xlog_modify_table(tab->tab_id, XT_LOG_ENT_ROW_FREED, op_seq, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, ot->ot_thread)) @@ -3776,7 +3986,7 @@ xtPublic int xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 xt_lock_mutex_ns(&tab->tab_db->db_co_ext_lock); if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer)) { xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock); - return FAILED; + return XT_ERR; } xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock); @@ -3824,7 +4034,7 @@ xtPublic int xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id); if (!xt_tab_put_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), ot->ot_row_rbuffer, &op_seq)) { xt_unlock_mutex_ns(&tab->tab_rec_lock); - return FAILED; + return XT_ERR; } tab->tab_rec_free_id = rec_id; ASSERT_NS(tab->tab_rec_free_id < tab->tab_rec_eof_id); @@ -3832,7 +4042,9 @@ xtPublic int xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 xt_unlock_mutex_ns(&tab->tab_rec_lock); free_rec->rf_rec_type_1 = old_rec_type; - return xt_xlog_modify_table(tab->tab_id, XT_LOG_ENT_REC_REMOVED_BI, op_seq, (xtRecordID) new_rec_type, rec_id, rec_size, ot->ot_row_rbuffer, ot->ot_thread); + if (!xt_xlog_modify_table(tab->tab_id, XT_LOG_ENT_REC_REMOVED_BI, op_seq, (xtRecordID) new_rec_type, rec_id, rec_size, ot->ot_row_rbuffer, ot->ot_thread)) + return XT_ERR; + return OK; } static xtRowID tab_new_row(XTOpenTablePtr ot, XTTableHPtr tab) @@ -3851,6 +4063,7 @@ static xtRowID tab_new_row(XTOpenTablePtr ot, XTTableHPtr tab) return 0; } tab->tab_row_free_id = next_row_id; + ASSERT_NS(tab->tab_row_fnum > 0); tab->tab_row_fnum--; } else { @@ -4170,9 +4383,12 @@ static xtBool tab_wait_for_rollback(XTOpenTablePtr ot, xtRowID row_id, xtRecordI return FAILED; if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1)) goto locked; - if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) + if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) { /* Should not happen: */ + if (!tab_record_corrupt(ot, row_id, var_rec_id, false, 4)) + return FAILED; goto record_invalid; + } xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4); switch (xt_xn_status(ot, xn_id, var_rec_id)) { case XT_XN_VISIBLE: @@ -4195,6 +4411,8 @@ static xtBool tab_wait_for_rollback(XTOpenTablePtr ot, xtRowID row_id, xtRecordI XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread); goto retry; case XT_XN_REREAD: + if (!tab_record_corrupt(ot, row_id, var_rec_id, true, 5)) + return FAILED; goto record_invalid; } var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4); @@ -4206,9 +4424,10 @@ static xtBool tab_wait_for_rollback(XTOpenTablePtr ot, xtRowID row_id, xtRecordI return FAILED; record_invalid: + /* {RETRY-READ} */ /* Prevent an infinite loop due to a bad record: */ if (invalid_rec != var_rec_id) { - var_rec_id = invalid_rec; + invalid_rec = var_rec_id; goto retry; } /* The record is invalid, it will be "overwritten"... */ @@ -4280,9 +4499,12 @@ xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXact #ifdef TRACE_VARIATIONS_IN_DUP_CHECK t_type="Re-read"; #endif + /* {RETRY-READ} */ /* Avoid infinite loop: */ if (invalid_rec == rec_id) { /* Should not happen! */ + if (!tab_record_corrupt(ot, XT_GET_DISK_4(rec_head.tr_row_id_4), rec_id, true, 6)) + goto failed; #ifdef XT_CRASH_DEBUG /* Generate a core dump! */ xt_crash_me(); @@ -4327,7 +4549,7 @@ xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXact if (XT_REC_IS_FREE(rec_head.tr_rec_type_1)) { /* Should not happen: */ if (invalid_rec != var_rec_id) { - var_rec_id = invalid_rec; + invalid_rec = var_rec_id; goto retry; } /* Assume end of list. */ @@ -4364,11 +4586,14 @@ xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXact } break; case XT_XN_REREAD: + /* {RETRY-READ} */ if (invalid_rec != var_rec_id) { - var_rec_id = invalid_rec; + invalid_rec = var_rec_id; goto retry; } /* Assume end of list. */ + if (!tab_record_corrupt(ot, row_id, invalid_rec, true, 7)) + goto failed; #ifdef XT_CRASH_DEBUG /* Should not happen! */ xt_crash_me(); @@ -5068,6 +5293,8 @@ xtPublic xtBool xt_tab_seq_next(XTOpenTablePtr ot, xtWord1 *buffer, xtBool *eof) ot->ot_on_page = FALSE; goto next_page; } + if (!tab_record_corrupt(ot, XT_GET_DISK_4(((XTTabRecHeadDPtr) buff_ptr)->tr_row_id_4), invalid_rec, true, 8)) + return XT_ERR; #ifdef XT_CRASH_DEBUG /* Should not happen! */ xt_crash_me(); @@ -5240,7 +5467,7 @@ static xtBool tab_exec_repair_pending(XTDatabaseHPtr db, int what, char *table_n return FALSE; } -static void tab_make_table_name(XTTableHPtr tab, char *table_name, size_t size) +xtPublic void xt_tab_make_table_name(XTTableHPtr tab, char *table_name, size_t size) { char *nptr; @@ -5316,7 +5543,7 @@ xtPublic xtBool xt_tab_is_table_repair_pending(XTTableHPtr tab) { char table_name[XT_IDENTIFIER_NAME_SIZE*3+3]; - tab_make_table_name(tab, table_name, sizeof(table_name)); + xt_tab_make_table_name(tab, table_name, sizeof(table_name)); return tab_exec_repair_pending(tab->tab_db, REP_FIND, table_name); } @@ -5326,7 +5553,7 @@ xtPublic void xt_tab_table_repaired(XTTableHPtr tab) char table_name[XT_IDENTIFIER_NAME_SIZE*3+3]; tab->tab_repair_pending = FALSE; - tab_make_table_name(tab, table_name, sizeof(table_name)); + xt_tab_make_table_name(tab, table_name, sizeof(table_name)); tab_exec_repair_pending(tab->tab_db, REP_DEL, table_name); } } @@ -5337,7 +5564,7 @@ xtPublic void xt_tab_set_table_repair_pending(XTTableHPtr tab) char table_name[XT_IDENTIFIER_NAME_SIZE*3+3]; tab->tab_repair_pending = TRUE; - tab_make_table_name(tab, table_name, sizeof(table_name)); + xt_tab_make_table_name(tab, table_name, sizeof(table_name)); tab_exec_repair_pending(tab->tab_db, REP_ADD, table_name); } } diff --git a/storage/pbxt/src/table_xt.h b/storage/pbxt/src/table_xt.h index 83f2168dd6e..f6c32587419 100644 --- a/storage/pbxt/src/table_xt.h +++ b/storage/pbxt/src/table_xt.h @@ -507,6 +507,7 @@ int xt_tab_compare_names(const char *n1, const char *n2); int xt_tab_compare_paths(char *n1, char *n2); void xt_tab_init_db(struct XTThread *self, struct XTDatabase *db); void xt_tab_exit_db(struct XTThread *self, struct XTDatabase *db); +void xt_tab_check_free_lists(struct XTThread *self, XTOpenTablePtr ot, bool check_recs, bool correct_count); void xt_check_tables(struct XTThread *self); char *xt_tab_file_to_name(size_t size, char *tab_name, char *file_name); @@ -572,6 +573,7 @@ xtBool xt_tab_get_rec_data(register XTOpenTablePtr ot, xtRecordID rec_id, siz void xt_tab_disable_index(XTTableHPtr tab, u_int ind_error); void xt_tab_set_index_error(XTTableHPtr tab); +void xt_tab_make_table_name(XTTableHPtr tab, char *table_name, size_t size); xtBool xt_tab_is_table_repair_pending(XTTableHPtr tab); void xt_tab_table_repaired(XTTableHPtr tab); void xt_tab_set_table_repair_pending(XTTableHPtr tab); diff --git a/storage/pbxt/src/thread_xt.cc b/storage/pbxt/src/thread_xt.cc index ac42896d22f..52c2c6c29c5 100644 --- a/storage/pbxt/src/thread_xt.cc +++ b/storage/pbxt/src/thread_xt.cc @@ -224,11 +224,16 @@ static void thr_log_va(XTThreadPtr self, c_char *func, c_char *file, u_int line, #else /* Use the buffer, unless it is too small */ va_list ap2; + int bufsize; va_copy(ap2, ap); - if (vsnprintf(buffer, DEFAULT_LOG_BUFFER_SIZE, fmt, ap) >= DEFAULT_LOG_BUFFER_SIZE) { - if (vasprintf(&log_string, fmt, ap2) == -1) + bufsize = vsnprintf(buffer, DEFAULT_LOG_BUFFER_SIZE, fmt, ap); + if (bufsize >= DEFAULT_LOG_BUFFER_SIZE) { + log_string = (char *) malloc(bufsize + 1); + if (vsnprintf(log_string, bufsize + 1, fmt, ap2) > bufsize) { + free(log_string); log_string = NULL; + } } else log_string = buffer; diff --git a/storage/pbxt/src/xaction_xt.cc b/storage/pbxt/src/xaction_xt.cc index 7281eafd8db..48abc5d2b66 100644 --- a/storage/pbxt/src/xaction_xt.cc +++ b/storage/pbxt/src/xaction_xt.cc @@ -1558,6 +1558,8 @@ xtPublic int xt_xn_status(XTOpenTablePtr ot, xtXactID xn_id, xtRecordID XT_UNUSE * Because we are only here because the record was valid but not * clean (you can confirm this by looking at the code that * calls this function). + * + * See {RETRY-READ} */ return XT_XN_REREAD; } @@ -1743,7 +1745,7 @@ xtPublic xtWord8 xt_xn_bytes_to_sweep(XTDatabaseHPtr db, XTThreadPtr thread) } else { xn_log_id = x_log_id; - x_log_offset = x_log_offset; + xn_log_offset = x_log_offset; } } xn_id++; diff --git a/storage/pbxt/src/xt_defs.h b/storage/pbxt/src/xt_defs.h index 98ebe0957a5..3c77415265c 100644 --- a/storage/pbxt/src/xt_defs.h +++ b/storage/pbxt/src/xt_defs.h @@ -397,6 +397,24 @@ typedef struct XTPathStr { */ #define XT_XLOG_FLUSH_FREQ 1000 +/* + * Define here if you want to check (and correct) the table free list + * counts. The free list counts are not durable, because they are not + * written to the log. + * + * The row free count is most critical because it can be used to + * estimate the the of rows in the record. + */ +#define XT_CHECK_ROW_FREE_COUNT +#ifdef DEBUG +#define XT_CHECK_RECORD_FREE_COUNT +#endif +#define XT_CORRECT_TABLE_FREE_COUNT + +#if defined(XT_CHECK_ROW_FREE_COUNT) && defined(XT_CORRECT_TABLE_FREE_COUNT) +#define XT_ROW_COUNT_CORRECTED +#endif + /* ---------------------------------------------------------------------- * GLOBAL CONSTANTS */ @@ -782,7 +800,7 @@ extern xtBool pbxt_crash_debug; #define MX_ULONG_T uint32_t #define MX_ULONGLONG_T uint64_t #define MX_LONGLONG_T uint64_t -#define MX_CHARSET_INFO const struct charset_info_st +#define MX_CHARSET_INFO struct charset_info_st #define MX_CONST_CHARSET_INFO const struct charset_info_st #define MX_CONST const #define MX_BITMAP MyBitmap @@ -873,7 +891,11 @@ extern "C" void session_mark_transaction_to_rollback(Session *session, bool all) #define MX_ULONGLONG_T ulonglong #define MX_LONGLONG_T longlong #define MX_CHARSET_INFO CHARSET_INFO -#define MX_CONST_CHARSET_INFO const struct charset_info_st +#if defined(MARIADB_BASE_VERSION) && MYSQL_VERSION_ID > 50200 +#define MX_CONST_CHARSET_INFO const struct charset_info_st +#else +#define MX_CONST_CHARSET_INFO struct charset_info_st +#endif #define MX_CONST #define MX_BITMAP MY_BITMAP #define MX_BIT_SIZE() n_bits diff --git a/storage/sphinx/CMakeLists.txt b/storage/sphinx/CMakeLists.txt new file mode 100644 index 00000000000..1a46f50f4a7 --- /dev/null +++ b/storage/sphinx/CMakeLists.txt @@ -0,0 +1,12 @@ +INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake") + +ADD_DEFINITIONS(-DMYSQL_SERVER) + +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/sql + ${CMAKE_SOURCE_DIR}/extra/yassl/include + ${CMAKE_SOURCE_DIR}/regex) + +SET(SPHINX_SOURCES ha_sphinx.cc) +SET(SPHINX_LIBS ws2_32.lib) +MYSQL_STORAGE_ENGINE(SPHINX) diff --git a/storage/sphinx/Makefile.am b/storage/sphinx/Makefile.am new file mode 100644 index 00000000000..5f58d673547 --- /dev/null +++ b/storage/sphinx/Makefile.am @@ -0,0 +1,55 @@ +# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#called from the top level Makefile + +MYSQLDATAdir = $(localstatedir) +MYSQLSHAREdir = $(pkgdatadir) +MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) +pkgplugindir = $(pkglibdir)/plugin +INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \ + -I$(top_srcdir)/regex \ + -I$(top_srcdir)/sql \ + -I$(srcdir) + +DEFS= @DEFS@ -D_REENTRANT -D_PTHREADS -DMYSQL_SERVER + +noinst_HEADERS = ha_sphinx.h + +EXTRA_LTLIBRARIES = ha_sphinx.la +pkgplugin_LTLIBRARIES = @plugin_sphinx_shared_target@ sphinx.la + +ha_sphinx_la_LDFLAGS = -module -rpath $(MYSQLLIBdir) \ + -L$(top_builddir)/libservices -lmysqlservices +ha_sphinx_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_sphinx_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_sphinx_la_SOURCES = ha_sphinx.cc + +sphinx_la_LDFLAGS = -module +sphinx_la_CXXFLAGS = $(AM_CFLAGS) +sphinx_la_CFLAGS = $(AM_CFLAGS) +sphinx_la_SOURCES = snippets_udf.cc + +EXTRA_LIBRARIES = libsphinx.a +noinst_LIBRARIES = @plugin_sphinx_static_target@ +libsphinx_a_CXXFLAGS = $(AM_CFLAGS) +libsphinx_a_CFLAGS = $(AM_CFLAGS) +libsphinx_a_SOURCES= ha_sphinx.cc + +EXTRA_DIST = CMakeLists.txt +# Don't update the files from bitkeeper +%::SCCS/s.% diff --git a/storage/sphinx/gen_data.php b/storage/sphinx/gen_data.php new file mode 100644 index 00000000000..dac374f095d --- /dev/null +++ b/storage/sphinx/gen_data.php @@ -0,0 +1,37 @@ +<?php + +$file_name= $argv[1]; + +//echo $file_name; + +$cont= file_get_contents($file_name); + +$words= explode(" ", $cont); + +//echo "words: ".(count($words))."\n"; + +$cw = count($words); + +echo "REPLACE INTO test.documents ( id, group_id, date_added, title, content ) VALUES\n"; + + +for ($i=1; $i<=100000; $i++) +{ + $count_words= mt_rand(10,30); + $pred = ""; + for ($j=0; $j<$count_words; $j++) + { + $pred .= chop($words[mt_rand(1, $cw-1)])." "; + } + $count_words= mt_rand(3,5); + $tit = ""; + for ($j=0; $j<$count_words; $j++) + { + $tit .= chop($words[mt_rand(1, $cw-1)])." "; + } + echo "($i,".mt_rand(1,20).",NOW(),'".addslashes($tit)."','".addslashes($pred)."'),\n"; +} + echo "(0,1,now(),'end','eND');\n"; + + +?> diff --git a/storage/sphinx/ha_sphinx.cc b/storage/sphinx/ha_sphinx.cc new file mode 100644 index 00000000000..e88464eb16c --- /dev/null +++ b/storage/sphinx/ha_sphinx.cc @@ -0,0 +1,3115 @@ +// +// $Id: ha_sphinx.cc 2058 2009-11-07 04:01:57Z shodan $ +// + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#if _MSC_VER>=1400 +#define _CRT_SECURE_NO_DEPRECATE 1 +#define _CRT_NONSTDC_NO_DEPRECATE 1 +#endif + +#include <mysql_version.h> + +#if MYSQL_VERSION_ID>50100 +#include "mysql_priv.h" +#include <mysql/plugin.h> +#else +#include "../mysql_priv.h" +#endif + +#include <mysys_err.h> +#include <my_sys.h> + +#ifndef __WIN__ + // UNIX-specific + #include <my_net.h> + #include <netdb.h> + #include <sys/un.h> + + #define RECV_FLAGS MSG_WAITALL + + #define sphSockClose(_sock) ::close(_sock) +#else + // Windows-specific + #include <io.h> + #define strcasecmp stricmp + #define snprintf _snprintf + + #define RECV_FLAGS 0 + + #define sphSockClose(_sock) ::closesocket(_sock) +#endif + +#include <ctype.h> +#include "ha_sphinx.h" + +#ifndef MSG_WAITALL +#define MSG_WAITALL 0 +#endif + +#if _MSC_VER>=1400 +#pragma warning(push,4) +#endif + +///////////////////////////////////////////////////////////////////////////// + +/// there might be issues with min() on different platforms (eg. Gentoo, they say) +#define Min(a,b) ((a)<(b)?(a):(b)) + +/// unaligned RAM accesses are forbidden on SPARC +#if defined(sparc) || defined(__sparc__) +#define UNALIGNED_RAM_ACCESS 0 +#else +#define UNALIGNED_RAM_ACCESS 1 +#endif + +#if MYSQL_VERSION_ID<50100 +#define thd_ha_data(X,Y) (X)->ha_data[sphinx_hton.slot] +#define ha_thd() current_thd +#endif // <50100 + +#if UNALIGNED_RAM_ACCESS + +/// pass-through wrapper +template < typename T > inline T sphUnalignedRead ( const T & tRef ) +{ + return tRef; +} + +/// pass-through wrapper +template < typename T > void sphUnalignedWrite ( void * pPtr, const T & tVal ) +{ + *(T*)pPtr = tVal; +} + +#else + +/// unaligned read wrapper for some architectures (eg. SPARC) +template < typename T > +inline T sphUnalignedRead ( const T & tRef ) +{ + T uTmp; + byte * pSrc = (byte *) &tRef; + byte * pDst = (byte *) &uTmp; + for ( int i=0; i<(int)sizeof(T); i++ ) + *pDst++ = *pSrc++; + return uTmp; +} + +/// unaligned write wrapper for some architectures (eg. SPARC) +template < typename T > +void sphUnalignedWrite ( void * pPtr, const T & tVal ) +{ + byte * pDst = (byte *) pPtr; + byte * pSrc = (byte *) &tVal; + for ( int i=0; i<(int)sizeof(T); i++ ) + *pDst++ = *pSrc++; +} + +#endif + +///////////////////////////////////////////////////////////////////////////// + +// FIXME! make this all dynamic +#define SPHINXSE_MAX_FILTERS 32 + +#define SPHINXSE_DEFAULT_HOST "127.0.0.1" +#define SPHINXSE_DEFAULT_PORT 9312 +#define SPHINXSE_DEFAULT_INDEX "*" + +#define SPHINXSE_SYSTEM_COLUMNS 3 + +#define SPHINXSE_MAX_ALLOC (16*1024*1024) +#define SPHINXSE_MAX_KEYWORDSTATS 4096 + +// FIXME! all the following is cut-n-paste from sphinx.h and searchd.cpp +#define SPHINX_VERSION "0.9.9" + +enum +{ + SPHINX_SEARCHD_PROTO = 1, + SEARCHD_COMMAND_SEARCH = 0, + VER_COMMAND_SEARCH = 0x116, +}; + +/// search query sorting orders +enum ESphSortOrder +{ + SPH_SORT_RELEVANCE = 0, ///< sort by document relevance desc, then by date + SPH_SORT_ATTR_DESC = 1, ///< sort by document date desc, then by relevance desc + SPH_SORT_ATTR_ASC = 2, ///< sort by document date asc, then by relevance desc + SPH_SORT_TIME_SEGMENTS = 3, ///< sort by time segments (hour/day/week/etc) desc, then by relevance desc + SPH_SORT_EXTENDED = 4, ///< sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC") + SPH_SORT_EXPR = 5, ///< sort by expression + + SPH_SORT_TOTAL +}; + +/// search query matching mode +enum ESphMatchMode +{ + SPH_MATCH_ALL = 0, ///< match all query words + SPH_MATCH_ANY, ///< match any query word + SPH_MATCH_PHRASE, ///< match this exact phrase + SPH_MATCH_BOOLEAN, ///< match this boolean query + SPH_MATCH_EXTENDED, ///< match this extended query + SPH_MATCH_FULLSCAN, ///< match all document IDs w/o fulltext query, apply filters + SPH_MATCH_EXTENDED2, ///< extended engine V2 + + SPH_MATCH_TOTAL +}; + +/// search query relevance ranking mode +enum ESphRankMode +{ + SPH_RANK_PROXIMITY_BM25 = 0, ///< default mode, phrase proximity major factor and BM25 minor one + SPH_RANK_BM25 = 1, ///< statistical mode, BM25 ranking only (faster but worse quality) + SPH_RANK_NONE = 2, ///< no ranking, all matches get a weight of 1 + SPH_RANK_WORDCOUNT = 3, ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts + SPH_RANK_PROXIMITY = 4, ///< phrase proximity + SPH_RANK_MATCHANY = 5, ///< emulate old match-any weighting + SPH_RANK_FIELDMASK = 6, ///< sets bits where there were matches + + SPH_RANK_TOTAL, + SPH_RANK_DEFAULT = SPH_RANK_PROXIMITY_BM25 +}; + +/// search query grouping mode +enum ESphGroupBy +{ + SPH_GROUPBY_DAY = 0, ///< group by day + SPH_GROUPBY_WEEK = 1, ///< group by week + SPH_GROUPBY_MONTH = 2, ///< group by month + SPH_GROUPBY_YEAR = 3, ///< group by year + SPH_GROUPBY_ATTR = 4 ///< group by attribute value +}; + +/// known attribute types +enum +{ + SPH_ATTR_NONE = 0, ///< not an attribute at all + SPH_ATTR_INTEGER = 1, ///< this attr is just an integer + SPH_ATTR_TIMESTAMP = 2, ///< this attr is a timestamp + SPH_ATTR_ORDINAL = 3, ///< this attr is an ordinal string number (integer at search time, specially handled at indexing time) + SPH_ATTR_BOOL = 4, ///< this attr is a boolean bit field + SPH_ATTR_FLOAT = 5, + SPH_ATTR_BIGINT = 6, + + SPH_ATTR_MULTI = 0x40000000UL ///< this attr has multiple values (0 or more) +}; + +/// known answers +enum +{ + SEARCHD_OK = 0, ///< general success, command-specific reply follows + SEARCHD_ERROR = 1, ///< general failure, error message follows + SEARCHD_RETRY = 2, ///< temporary failure, error message follows, client should retry later + SEARCHD_WARNING = 3 ///< general success, warning message and command-specific reply follow +}; + +////////////////////////////////////////////////////////////////////////////// + +#define SPHINX_DEBUG_OUTPUT 0 +#define SPHINX_DEBUG_CALLS 0 + +#include <stdarg.h> + +#if SPHINX_DEBUG_OUTPUT +inline void SPH_DEBUG ( const char * format, ... ) +{ + va_list ap; + va_start ( ap, format ); + fprintf ( stderr, "SphinxSE: " ); + vfprintf ( stderr, format, ap ); + fprintf ( stderr, "\n" ); + va_end ( ap ); +} +#else +inline void SPH_DEBUG ( const char *, ... ) {} +#endif + +#if SPHINX_DEBUG_CALLS + +#define SPH_ENTER_FUNC() { SPH_DEBUG ( "enter %s", __FUNCTION__ ); } +#define SPH_ENTER_METHOD() { SPH_DEBUG ( "enter %s(this=%08x)", __FUNCTION__, this ); } +#define SPH_RET(_arg) { SPH_DEBUG ( "leave %s", __FUNCTION__ ); return _arg; } +#define SPH_VOID_RET() { SPH_DEBUG ( "leave %s", __FUNCTION__ ); return; } + +#else + +#define SPH_ENTER_FUNC() +#define SPH_ENTER_METHOD() +#define SPH_RET(_arg) { return(_arg); } +#define SPH_VOID_RET() { return; } + +#endif + + +#define SafeDelete(_arg) { if ( _arg ) delete ( _arg ); (_arg) = NULL; } +#define SafeDeleteArray(_arg) { if ( _arg ) delete [] ( _arg ); (_arg) = NULL; } + +////////////////////////////////////////////////////////////////////////////// + +/// a structure that will be shared among all open Sphinx SE handlers +struct CSphSEShare +{ + pthread_mutex_t m_tMutex; + THR_LOCK m_tLock; + + char * m_sTable; + char * m_sScheme; + char * m_sHost; ///< points into m_sScheme buffer, DO NOT FREE EXPLICITLY + char * m_sSocket; ///< points into m_sScheme buffer, DO NOT FREE EXPLICITLY + char * m_sIndex; ///< points into m_sScheme buffer, DO NOT FREE EXPLICITLY + ushort m_iPort; + uint m_iTableNameLen; + uint m_iUseCount; + CHARSET_INFO * m_pTableQueryCharset; + + int m_iTableFields; + char ** m_sTableField; + enum_field_types * m_eTableFieldType; + + CSphSEShare () + : m_sTable ( NULL ) + , m_sScheme ( NULL ) + , m_sHost ( NULL ) + , m_sSocket ( NULL ) + , m_sIndex ( NULL ) + , m_iPort ( 0 ) + , m_iTableNameLen ( 0 ) + , m_iUseCount ( 1 ) + , m_pTableQueryCharset ( NULL ) + + , m_iTableFields ( 0 ) + , m_sTableField ( NULL ) + , m_eTableFieldType ( NULL ) + { + thr_lock_init ( &m_tLock ); + pthread_mutex_init ( &m_tMutex, MY_MUTEX_INIT_FAST ); + } + + ~CSphSEShare () + { + pthread_mutex_destroy ( &m_tMutex ); + thr_lock_delete ( &m_tLock ); + + SafeDeleteArray ( m_sTable ); + SafeDeleteArray ( m_sScheme ); + ResetTable (); + } + + void ResetTable () + { + for ( int i=0; i<m_iTableFields; i++ ) + SafeDeleteArray ( m_sTableField[i] ); + SafeDeleteArray ( m_sTableField ); + SafeDeleteArray ( m_eTableFieldType ); + } +}; + +/// schema attribute +struct CSphSEAttr +{ + char * m_sName; ///< attribute name (received from Sphinx) + uint32 m_uType; ///< attribute type (received from Sphinx) + int m_iField; ///< field index in current table (-1 if none) + + CSphSEAttr() + : m_sName ( NULL ) + , m_uType ( SPH_ATTR_NONE ) + , m_iField ( -1 ) + {} + + ~CSphSEAttr () + { + SafeDeleteArray ( m_sName ); + } +}; + +/// word stats +struct CSphSEWordStats +{ + char * m_sWord; + int m_iDocs; + int m_iHits; + + CSphSEWordStats () + : m_sWord ( NULL ) + , m_iDocs ( 0 ) + , m_iHits ( 0 ) + {} + + ~CSphSEWordStats () + { + SafeDeleteArray ( m_sWord ); + } +}; + +/// request stats +struct CSphSEStats +{ +public: + int m_iMatchesTotal; + int m_iMatchesFound; + int m_iQueryMsec; + int m_iWords; + CSphSEWordStats * m_dWords; + bool m_bLastError; + char m_sLastMessage[1024]; + + CSphSEStats() + : m_dWords ( NULL ) + { + Reset (); + } + + void Reset () + { + m_iMatchesTotal = 0; + m_iMatchesFound = 0; + m_iQueryMsec = 0; + m_iWords = 0; + SafeDeleteArray ( m_dWords ); + m_bLastError = false; + m_sLastMessage[0] = '\0'; + } + + ~CSphSEStats() + { + Reset (); + } +}; + +/// thread local storage +struct CSphSEThreadData +{ + static const int MAX_QUERY_LEN = 262144; // 256k should be enough, right? + + bool m_bStats; + CSphSEStats m_tStats; + + bool m_bQuery; + char m_sQuery[MAX_QUERY_LEN]; + + CHARSET_INFO * m_pQueryCharset; + + CSphSEThreadData () + : m_bStats ( false ) + , m_bQuery ( false ) + , m_pQueryCharset ( NULL ) + {} +}; + +/// filter types +enum ESphFilter +{ + SPH_FILTER_VALUES = 0, ///< filter by integer values set + SPH_FILTER_RANGE = 1, ///< filter by integer range + SPH_FILTER_FLOATRANGE = 2 ///< filter by float range +}; + + +/// search query filter +struct CSphSEFilter +{ +public: + ESphFilter m_eType; + char * m_sAttrName; + longlong m_uMinValue; + longlong m_uMaxValue; + float m_fMinValue; + float m_fMaxValue; + int m_iValues; + longlong * m_pValues; + int m_bExclude; + +public: + CSphSEFilter () + : m_eType ( SPH_FILTER_VALUES ) + , m_sAttrName ( NULL ) + , m_uMinValue ( 0 ) + , m_uMaxValue ( UINT_MAX ) + , m_fMinValue ( 0.0f ) + , m_fMaxValue ( 0.0f ) + , m_iValues ( 0 ) + , m_pValues ( NULL ) + , m_bExclude ( 0 ) + { + } + + ~CSphSEFilter () + { + SafeDeleteArray ( m_pValues ); + } +}; + + +/// float vs dword conversion +inline uint32 sphF2DW ( float f ) { union { float f; uint32 d; } u; u.f = f; return u.d; } + +/// dword vs float conversion +inline float sphDW2F ( uint32 d ) { union { float f; uint32 d; } u; u.d = d; return u.f; } + + +/// client-side search query +struct CSphSEQuery +{ +public: + const char * m_sHost; + int m_iPort; + +private: + char * m_sQueryBuffer; + + const char * m_sIndex; + int m_iOffset; + int m_iLimit; + + bool m_bQuery; + char * m_sQuery; + uint32 * m_pWeights; + int m_iWeights; + ESphMatchMode m_eMode; + ESphRankMode m_eRanker; + ESphSortOrder m_eSort; + char * m_sSortBy; + int m_iMaxMatches; + int m_iMaxQueryTime; + uint32 m_iMinID; + uint32 m_iMaxID; + + int m_iFilters; + CSphSEFilter m_dFilters[SPHINXSE_MAX_FILTERS]; + + ESphGroupBy m_eGroupFunc; + char * m_sGroupBy; + char * m_sGroupSortBy; + int m_iCutoff; + int m_iRetryCount; + int m_iRetryDelay; + char * m_sGroupDistinct; ///< points to query buffer; do NOT delete + int m_iIndexWeights; + char * m_sIndexWeight[SPHINXSE_MAX_FILTERS]; ///< points to query buffer; do NOT delete + int m_iIndexWeight[SPHINXSE_MAX_FILTERS]; + int m_iFieldWeights; + char * m_sFieldWeight[SPHINXSE_MAX_FILTERS]; ///< points to query buffer; do NOT delete + int m_iFieldWeight[SPHINXSE_MAX_FILTERS]; + + bool m_bGeoAnchor; + char * m_sGeoLatAttr; + char * m_sGeoLongAttr; + float m_fGeoLatitude; + float m_fGeoLongitude; + + char * m_sComment; + + struct Override_t + { + union Value_t + { + uint32 m_uValue; + longlong m_iValue64; + float m_fValue; + }; + char * m_sName; ///< points to query buffer + int m_iType; + Dynamic_array<ulonglong> m_dIds; + Dynamic_array<Value_t> m_dValues; + }; + Dynamic_array<Override_t *> m_dOverrides; + +public: + char m_sParseError[256]; + +public: + CSphSEQuery ( const char * sQuery, int iLength, const char * sIndex ); + ~CSphSEQuery (); + + bool Parse (); + int BuildRequest ( char ** ppBuffer ); + +protected: + char * m_pBuf; + char * m_pCur; + int m_iBufLeft; + bool m_bBufOverrun; + + template < typename T > int ParseArray ( T ** ppValues, const char * sValue ); + bool ParseField ( char * sField ); + + void SendBytes ( const void * pBytes, int iBytes ); + void SendWord ( short int v ) { v = ntohs(v); SendBytes ( &v, sizeof(short int) ); } + void SendInt ( int v ) { v = ntohl(v); SendBytes ( &v, sizeof(int) ); } + void SendDword ( uint v ) { v = ntohl(v) ;SendBytes ( &v, sizeof(uint) ); } + void SendUint64 ( ulonglong v ) { SendDword ( uint(v>>32) ); SendDword ( uint(v&0xFFFFFFFFUL) ); } + void SendString ( const char * v ) { int iLen = strlen(v); SendDword(iLen); SendBytes ( v, iLen ); } + void SendFloat ( float v ) { SendDword ( sphF2DW(v) ); } +}; + +template int CSphSEQuery::ParseArray<uint32> ( uint32 **, const char * ); +template int CSphSEQuery::ParseArray<longlong> ( longlong **, const char * ); + +////////////////////////////////////////////////////////////////////////////// + +#if MYSQL_VERSION_ID>50100 + +#if MYSQL_VERSION_ID<50114 +#error Sphinx SE requires MySQL 5.1.14 or higher if compiling for 5.1.x series! +#endif + +static handler * sphinx_create_handler ( handlerton * hton, TABLE_SHARE * table, MEM_ROOT * mem_root ); +static int sphinx_init_func ( void * p ); +static int sphinx_close_connection ( handlerton * hton, THD * thd ); +static int sphinx_panic ( handlerton * hton, enum ha_panic_function flag ); +static bool sphinx_show_status ( handlerton * hton, THD * thd, stat_print_fn * stat_print, enum ha_stat_type stat_type ); + +#else + +static bool sphinx_init_func_for_handlerton (); +static int sphinx_close_connection ( THD * thd ); +bool sphinx_show_status ( THD * thd ); + +#endif // >50100 + +////////////////////////////////////////////////////////////////////////////// + +static const char sphinx_hton_name[] = "SPHINX"; +static const char sphinx_hton_comment[] = "Sphinx storage engine " SPHINX_VERSION; + +#if MYSQL_VERSION_ID<50100 +handlerton sphinx_hton = +{ + #ifdef MYSQL_HANDLERTON_INTERFACE_VERSION + MYSQL_HANDLERTON_INTERFACE_VERSION, + #endif + sphinx_hton_name, + SHOW_OPTION_YES, + sphinx_hton_comment, + DB_TYPE_SPHINX_DB, + sphinx_init_func_for_handlerton, + 0, // slot + 0, // savepoint size + sphinx_close_connection, // close_connection + NULL, // savepoint + NULL, // rollback to savepoint + NULL, // release savepoint + NULL, // commit + NULL, // rollback + NULL, // prepare + NULL, // recover + NULL, // commit_by_xid + NULL, // rollback_by_xid + NULL, // create_cursor_read_view + NULL, // set_cursor_read_view + NULL, // close_cursor_read_view + HTON_CAN_RECREATE +}; +#else +static handlerton * sphinx_hton_ptr = NULL; +#endif + +////////////////////////////////////////////////////////////////////////////// + +// variables for Sphinx shared methods +pthread_mutex_t sphinx_mutex; // mutex to init the hash +static int sphinx_init = 0; // flag whether the hash was initialized +static HASH sphinx_open_tables; // hash used to track open tables + +////////////////////////////////////////////////////////////////////////////// +// INITIALIZATION AND SHUTDOWN +////////////////////////////////////////////////////////////////////////////// + +// hashing function +#if MYSQL_VERSION_ID>=50120 +typedef size_t GetKeyLength_t; +#else +typedef uint GetKeyLength_t; +#endif + +static byte * sphinx_get_key ( const byte * pSharePtr, GetKeyLength_t * pLength, my_bool ) +{ + CSphSEShare * pShare = (CSphSEShare *) pSharePtr; + *pLength = (size_t) pShare->m_iTableNameLen; + return (byte*) pShare->m_sTable; +} + +#if MYSQL_VERSION_ID<50100 +static int sphinx_init_func ( void * ) // to avoid unused arg warning +#else +static int sphinx_init_func ( void * p ) +#endif +{ + SPH_ENTER_FUNC(); + if ( !sphinx_init ) + { + sphinx_init = 1; + VOID ( pthread_mutex_init ( &sphinx_mutex, MY_MUTEX_INIT_FAST ) ); + hash_init ( &sphinx_open_tables, system_charset_info, 32, 0, 0, + sphinx_get_key, 0, 0 ); + + #if MYSQL_VERSION_ID > 50100 + handlerton * hton = (handlerton*) p; + hton->state = SHOW_OPTION_YES; + hton->db_type = DB_TYPE_AUTOASSIGN; + hton->create = sphinx_create_handler; + hton->close_connection = sphinx_close_connection; + hton->show_status = sphinx_show_status; + hton->panic = sphinx_panic; + hton->flags = HTON_CAN_RECREATE; + sphinx_hton_ptr = hton; + #endif + } + SPH_RET(0); +} + + +#if MYSQL_VERSION_ID<50100 +static bool sphinx_init_func_for_handlerton () +{ + return sphinx_init_func ( &sphinx_hton ); +} +#endif + + +#if MYSQL_VERSION_ID>50100 + +static int sphinx_close_connection ( handlerton * hton, THD * thd ) +{ + // deallocate common handler data + SPH_ENTER_FUNC(); + void ** tmp = thd_ha_data ( thd, hton ); + CSphSEThreadData * pTls = (CSphSEThreadData*) (*tmp); + SafeDelete ( pTls ); + *tmp = NULL; + SPH_RET(0); +} + + +static int sphinx_done_func ( void * ) +{ + SPH_ENTER_FUNC(); + + int error = 0; + if ( sphinx_init ) + { + sphinx_init = 0; + if ( sphinx_open_tables.records ) + error = 1; + hash_free ( &sphinx_open_tables ); + pthread_mutex_destroy ( &sphinx_mutex ); + } + + SPH_RET(0); +} + + +static int sphinx_panic ( handlerton * hton, enum ha_panic_function ) +{ + return sphinx_done_func ( hton ); +} + +#else + +static int sphinx_close_connection ( THD * thd ) +{ + // deallocate common handler data + SPH_ENTER_FUNC(); + CSphSEThreadData * pTls = (CSphSEThreadData*) thd->ha_data[sphinx_hton.slot]; + SafeDelete ( pTls ); + thd->ha_data[sphinx_hton.slot] = NULL; + SPH_RET(0); +} + +#endif // >50100 + +////////////////////////////////////////////////////////////////////////////// +// SHOW STATUS +////////////////////////////////////////////////////////////////////////////// + +#if MYSQL_VERSION_ID>50100 +static bool sphinx_show_status ( handlerton * hton, THD * thd, stat_print_fn * stat_print, + enum ha_stat_type ) +#else +bool sphinx_show_status ( THD * thd ) +#endif +{ + SPH_ENTER_FUNC(); + +#if MYSQL_VERSION_ID<50100 + Protocol * protocol = thd->protocol; + List<Item> field_list; +#endif + + char buf1[IO_SIZE]; + uint buf1len; + char buf2[IO_SIZE]; + uint buf2len= 0; + String words; + + buf1[0] = '\0'; + buf2[0] = '\0'; + +#if MYSQL_VERSION_ID>50100 + CSphSEThreadData * pTls = (CSphSEThreadData*) ( *thd_ha_data ( thd, hton ) ); +#else + if ( have_sphinx_db!=SHOW_OPTION_YES ) + { + my_message ( ER_NOT_SUPPORTED_YET, + "failed to call SHOW SPHINX STATUS: --skip-sphinx was specified", + MYF(0) ); + SPH_RET(TRUE); + } + CSphSEThreadData * pTls = (CSphSEThreadData*) thd->ha_data[sphinx_hton.slot]; +#endif + + if ( pTls && pTls->m_bStats ) + { + const CSphSEStats * pStats = &pTls->m_tStats; + buf1len = my_snprintf ( buf1, sizeof(buf1), + "total: %d, total found: %d, time: %d, words: %d", + pStats->m_iMatchesTotal, pStats->m_iMatchesFound, pStats->m_iQueryMsec, pStats->m_iWords ); + +#if MYSQL_VERSION_ID>50100 + stat_print ( thd, sphinx_hton_name, strlen(sphinx_hton_name), + STRING_WITH_LEN("stats"), buf1, buf1len ); +#else + field_list.push_back ( new Item_empty_string ( "Type",10 ) ); + field_list.push_back ( new Item_empty_string ( "Name",FN_REFLEN ) ); + field_list.push_back ( new Item_empty_string ( "Status",10 ) ); + if ( protocol->send_fields ( &field_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF ) ) + SPH_RET(TRUE); + + protocol->prepare_for_resend (); + protocol->store ( STRING_WITH_LEN("SPHINX"), system_charset_info ); + protocol->store ( STRING_WITH_LEN("stats"), system_charset_info ); + protocol->store ( buf1, buf1len, system_charset_info ); + if ( protocol->write() ) + SPH_RET(TRUE); +#endif + + if ( pStats->m_iWords ) + { + for ( int i=0; i<pStats->m_iWords; i++ ) + { + CSphSEWordStats & tWord = pStats->m_dWords[i]; + buf2len = my_snprintf ( buf2, sizeof(buf2), "%s%s:%d:%d ", + buf2, tWord.m_sWord, tWord.m_iDocs, tWord.m_iHits ); + } + + // convert it if we can + const char * sWord = buf2; + int iWord = buf2len; + + String sBuf3; + if ( pTls->m_pQueryCharset ) + { + uint iErrors; + sBuf3.copy ( buf2, buf2len, pTls->m_pQueryCharset, system_charset_info, &iErrors ); + sWord = sBuf3.c_ptr(); + iWord = sBuf3.length(); + } + +#if MYSQL_VERSION_ID>50100 + stat_print ( thd, sphinx_hton_name, strlen(sphinx_hton_name), + STRING_WITH_LEN("words"), sWord, iWord ); +#else + protocol->prepare_for_resend (); + protocol->store ( STRING_WITH_LEN("SPHINX"), system_charset_info ); + protocol->store ( STRING_WITH_LEN("words"), system_charset_info ); + protocol->store ( sWord, iWord, system_charset_info ); + if ( protocol->write() ) + SPH_RET(TRUE); +#endif + } + + // send last error or warning + if ( pStats->m_sLastMessage && pStats->m_sLastMessage[0] ) + { + const char * sMessageType = pStats->m_bLastError ? "error" : "warning"; + +#if MYSQL_VERSION_ID>50100 + stat_print ( thd, sphinx_hton_name, strlen(sphinx_hton_name), + sMessageType, strlen(sMessageType), pStats->m_sLastMessage, strlen(pStats->m_sLastMessage) ); +#else + protocol->prepare_for_resend (); + protocol->store ( STRING_WITH_LEN("SPHINX"), system_charset_info ); + protocol->store ( sMessageType, strlen(sMessageType), system_charset_info ); + protocol->store ( pStats->m_sLastMessage, strlen(pStats->m_sLastMessage), system_charset_info ); + if ( protocol->write() ) + SPH_RET(TRUE); +#endif + } + + } else + { + #if MYSQL_VERSION_ID < 50100 + field_list.push_back ( new Item_empty_string ( "Type", 10 ) ); + field_list.push_back ( new Item_empty_string ( "Name", FN_REFLEN ) ); + field_list.push_back ( new Item_empty_string ( "Status", 10 ) ); + if ( protocol->send_fields ( &field_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF ) ) + SPH_RET(TRUE); + + protocol->prepare_for_resend (); + protocol->store ( STRING_WITH_LEN("SPHINX"), system_charset_info ); + protocol->store ( STRING_WITH_LEN("stats"), system_charset_info ); + protocol->store ( STRING_WITH_LEN("no query has been executed yet"), system_charset_info ); + if ( protocol->write() ) + SPH_RET(TRUE); + #endif + } + + #if MYSQL_VERSION_ID < 50100 + send_eof(thd); + #endif + + SPH_RET(FALSE); +} + +////////////////////////////////////////////////////////////////////////////// +// HELPERS +////////////////////////////////////////////////////////////////////////////// + +static char * sphDup ( const char * sSrc, int iLen=-1 ) +{ + if ( !sSrc ) + return NULL; + + if ( iLen<0 ) + iLen = strlen(sSrc); + + char * sRes = new char [ 1+iLen ]; + memcpy ( sRes, sSrc, iLen ); + sRes[iLen] = '\0'; + return sRes; +} + + +static void sphLogError ( const char * sFmt, ... ) +{ + // emit timestamp +#ifdef __WIN__ + SYSTEMTIME t; + GetLocalTime ( &t ); + + fprintf ( stderr, "%02d%02d%02d %2d:%02d:%02d SphinxSE: internal error: ", + (int)t.wYear % 100, (int)t.wMonth, (int)t.wDay, + (int)t.wHour, (int)t.wMinute, (int)t.wSecond ); +#else + // Unix version + time_t tStamp; + time ( &tStamp ); + + struct tm * pParsed; +#ifdef HAVE_LOCALTIME_R + struct tm tParsed; + localtime_r ( &tStamp, &tParsed ); + pParsed = &tParsed; +#else + pParsed = localtime ( &tStamp ); +#endif // HAVE_LOCALTIME_R + + fprintf ( stderr, "%02d%02d%02d %2d:%02d:%02d SphinxSE: internal error: ", + pParsed->tm_year % 100, pParsed->tm_mon + 1, pParsed->tm_mday, + pParsed->tm_hour, pParsed->tm_min, pParsed->tm_sec); +#endif // __WIN__ + + // emit message + va_list ap; + va_start ( ap, sFmt ); + vfprintf ( stderr, sFmt, ap ); + va_end ( ap ); + + // emit newline + fprintf ( stderr, "\n" ); +} + + + +// the following scheme variants are recognized +// +// sphinx://host/index +// sphinx://host:port/index +// unix://unix/domain/socket:index +// unix://unix/domain/socket +static bool ParseUrl ( CSphSEShare * share, TABLE * table, bool bCreate ) +{ + SPH_ENTER_FUNC(); + + if ( share ) + { + // check incoming stuff + if ( !table ) + { + sphLogError ( "table==NULL in ParseUrl()" ); + return false; + } + if ( !table->s ) + { + sphLogError ( "(table->s)==NULL in ParseUrl()" ); + return false; + } + + // free old stuff + share->ResetTable (); + + // fill new stuff + share->m_iTableFields = table->s->fields; + if ( share->m_iTableFields ) + { + share->m_sTableField = new char * [ share->m_iTableFields ]; + share->m_eTableFieldType = new enum_field_types [ share->m_iTableFields ]; + + for ( int i=0; i<share->m_iTableFields; i++ ) + { + share->m_sTableField[i] = sphDup ( table->field[i]->field_name ); + share->m_eTableFieldType[i] = table->field[i]->type(); + } + } + } + + char * sScheme = NULL; + char * sHost = (char*) SPHINXSE_DEFAULT_HOST; + char * sIndex = (char*) SPHINXSE_DEFAULT_INDEX; + int iPort = SPHINXSE_DEFAULT_PORT; + + bool bOk = true; + while ( table->s->connect_string.length!=0 ) + { + bOk = false; + sScheme = sphDup ( table->s->connect_string.str, table->s->connect_string.length ); + + sHost = strstr ( sScheme, "://" ); + if ( !sHost ) + break; + sHost[0] = '\0'; + sHost += 2; + + if ( !strcmp ( sScheme, "unix" ) ) + { + // unix-domain socket + iPort = 0; + if (!( sIndex = strrchr ( sHost, ':' ) )) + sIndex = (char*) SPHINXSE_DEFAULT_INDEX; + else + { + *sIndex++ = '\0'; + if ( !*sIndex ) + sIndex = (char*) SPHINXSE_DEFAULT_INDEX; + } + bOk = true; + break; + } + if( strcmp ( sScheme, "sphinx" )!=0 && strcmp ( sScheme, "inet" )!=0 ) + break; + + // tcp + sHost++; + char * sPort = strchr ( sHost, ':' ); + if ( sPort ) + { + *sPort++ = '\0'; + if ( *sPort ) + { + sIndex = strchr ( sPort, '/' ); + if ( sIndex ) + *sIndex++ = '\0'; + else + sIndex = (char*) SPHINXSE_DEFAULT_INDEX; + + iPort = atoi(sPort); + if ( !iPort ) + iPort = SPHINXSE_DEFAULT_PORT; + } + } else + { + sIndex = strchr ( sHost, '/' ); + if ( sIndex ) + *sIndex++ = '\0'; + else + sIndex = (char*) SPHINXSE_DEFAULT_INDEX; + } + + bOk = true; + break; + } + + if ( !bOk ) + { + my_error ( bCreate ? ER_FOREIGN_DATA_STRING_INVALID_CANT_CREATE : ER_FOREIGN_DATA_STRING_INVALID, + MYF(0), table->s->connect_string ); + } else + { + if ( share ) + { + SafeDeleteArray ( share->m_sScheme ); + share->m_sScheme = sScheme; + share->m_sHost = sHost; + share->m_sIndex = sIndex; + share->m_iPort = (ushort)iPort; + } + } + if ( !bOk && !share ) + SafeDeleteArray ( sScheme ); + + SPH_RET(bOk); +} + + +// Example of simple lock controls. The "share" it creates is structure we will +// pass to each sphinx handler. Do you have to have one of these? Well, you have +// pieces that are used for locking, and they are needed to function. +static CSphSEShare * get_share ( const char * table_name, TABLE * table ) +{ + SPH_ENTER_FUNC(); + pthread_mutex_lock ( &sphinx_mutex ); + + CSphSEShare * pShare = NULL; + for ( ;; ) + { + // check if we already have this share +#if MYSQL_VERSION_ID>=50120 + pShare = (CSphSEShare*) hash_search ( &sphinx_open_tables, (const uchar *) table_name, strlen(table_name) ); +#else +#ifdef __WIN__ + pShare = (CSphSEShare*) hash_search ( &sphinx_open_tables, (const byte *) table_name, strlen(table_name) ); +#else + pShare = (CSphSEShare*) hash_search ( &sphinx_open_tables, table_name, strlen(table_name) ); +#endif // win +#endif // pre-5.1.20 + + if ( pShare ) + { + pShare->m_iUseCount++; + break; + } + + // try to allocate new share + pShare = new CSphSEShare (); + if ( !pShare ) + break; + + // try to setup it + pShare->m_pTableQueryCharset = table->field[2]->charset(); + if ( !ParseUrl ( pShare, table, false ) ) + { + SafeDelete ( pShare ); + break; + } + + // try to hash it + pShare->m_iTableNameLen = strlen(table_name); + pShare->m_sTable = sphDup ( table_name ); + if ( my_hash_insert ( &sphinx_open_tables, (const byte *)pShare ) ) + { + SafeDelete ( pShare ); + break; + } + + // all seems fine + break; + } + + pthread_mutex_unlock ( &sphinx_mutex ); + SPH_RET(pShare); +} + + +// Free lock controls. We call this whenever we close a table. If the table had +// the last reference to the share then we free memory associated with it. +static int free_share ( CSphSEShare * pShare ) +{ + SPH_ENTER_FUNC(); + pthread_mutex_lock ( &sphinx_mutex ); + + if ( !--pShare->m_iUseCount ) + { + hash_delete ( &sphinx_open_tables, (byte *)pShare ); + SafeDelete ( pShare ); + } + + pthread_mutex_unlock ( &sphinx_mutex ); + SPH_RET(0); +} + + +#if MYSQL_VERSION_ID>50100 +static handler * sphinx_create_handler ( handlerton * hton, TABLE_SHARE * table, MEM_ROOT * mem_root ) +{ + return new ( mem_root ) ha_sphinx ( hton, table ); +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// CLIENT-SIDE REQUEST STUFF +////////////////////////////////////////////////////////////////////////////// + +CSphSEQuery::CSphSEQuery ( const char * sQuery, int iLength, const char * sIndex ) + : m_sHost ( "" ) + , m_iPort ( 0 ) + , m_sIndex ( sIndex ? sIndex : (char*) "*" ) + , m_iOffset ( 0 ) + , m_iLimit ( 20 ) + , m_bQuery ( false ) + , m_sQuery ( (char*) "" ) + , m_pWeights ( NULL ) + , m_iWeights ( 0 ) + , m_eMode ( SPH_MATCH_ALL ) + , m_eRanker ( SPH_RANK_PROXIMITY_BM25 ) + , m_eSort ( SPH_SORT_RELEVANCE ) + , m_sSortBy ( (char*) "" ) + , m_iMaxMatches ( 1000 ) + , m_iMaxQueryTime ( 0 ) + , m_iMinID ( 0 ) + , m_iMaxID ( 0 ) + , m_iFilters ( 0 ) + , m_eGroupFunc ( SPH_GROUPBY_DAY ) + , m_sGroupBy ( (char*) "" ) + , m_sGroupSortBy ( (char*) "@group desc" ) + , m_iCutoff ( 0 ) + , m_iRetryCount ( 0 ) + , m_iRetryDelay ( 0 ) + , m_sGroupDistinct ( (char*) "" ) + , m_iIndexWeights ( 0 ) + , m_iFieldWeights ( 0 ) + , m_bGeoAnchor ( false ) + , m_sGeoLatAttr ( (char*) "" ) + , m_sGeoLongAttr ( (char*) "" ) + , m_fGeoLatitude ( 0.0f ) + , m_fGeoLongitude ( 0.0f ) + , m_sComment ( (char*) "" ) + + , m_pBuf ( NULL ) + , m_pCur ( NULL ) + , m_iBufLeft ( 0 ) + , m_bBufOverrun ( false ) +{ + m_sQueryBuffer = new char [ iLength+2 ]; + memcpy ( m_sQueryBuffer, sQuery, iLength ); + m_sQueryBuffer[iLength]= ';'; + m_sQueryBuffer[iLength+1]= '\0'; +} + + +CSphSEQuery::~CSphSEQuery () +{ + SPH_ENTER_METHOD(); + SafeDeleteArray ( m_sQueryBuffer ); + SafeDeleteArray ( m_pWeights ); + SafeDeleteArray ( m_pBuf ); + for ( int i=0; i<m_dOverrides.elements(); i++ ) + SafeDelete ( m_dOverrides.at(i) ); + SPH_VOID_RET(); +} + + +template < typename T > +int CSphSEQuery::ParseArray ( T ** ppValues, const char * sValue ) +{ + SPH_ENTER_METHOD(); + + assert ( ppValues ); + assert ( !(*ppValues) ); + + const char * pValue; + bool bPrevDigit = false; + int iValues = 0; + + // count the values + for ( pValue=sValue; *pValue; pValue++ ) + { + bool bDigit = (*pValue)>='0' && (*pValue)<='9'; + if ( bDigit && !bPrevDigit ) + iValues++; + bPrevDigit = bDigit; + } + if ( !iValues ) + SPH_RET(0); + + // extract the values + T * pValues = new T [ iValues ]; + *ppValues = pValues; + + int iIndex = 0, iSign = 1; + T uValue = 0; + + bPrevDigit = false; + for ( pValue=sValue ;; pValue++ ) + { + bool bDigit = (*pValue)>='0' && (*pValue)<='9'; + + if ( bDigit ) + { + if ( !bPrevDigit ) + uValue = 0; + uValue = uValue*10 + ( (*pValue)-'0' ); + } + else if ( bPrevDigit ) + { + assert ( iIndex<iValues ); + pValues [ iIndex++ ] = uValue * iSign; + iSign = 1; + } + else if ( *pValue=='-' ) + iSign = -1; + bPrevDigit = bDigit; + + if ( !*pValue ) + break; + } + + SPH_RET(iValues); +} + + +static char * chop ( char * s ) +{ + while ( *s && isspace(*s) ) + s++; + + char * p = s + strlen(s); + while ( p>s && isspace(p[-1]) ) + p--; + *p = '\0'; + + return s; +} + + +static bool myisattr ( char c ) +{ + return + ( c>='0' && c<='9' ) || + ( c>='a' && c<='z' ) || + ( c>='A' && c<='Z' ) || + c=='_'; +} + + +bool CSphSEQuery::ParseField ( char * sField ) +{ + SPH_ENTER_METHOD(); + + // look for option name/value separator + char * sValue = strchr ( sField, '=' ); + if ( !sValue || sValue==sField || sValue[-1]=='\\' ) + { + // by default let's assume it's just query + if ( sField[0] ) + { + if ( m_bQuery ) + { + snprintf ( m_sParseError, sizeof(m_sParseError), "search query already specified; '%s' is redundant", sField ); + SPH_RET(false); + } else + { + m_sQuery = sField; + m_bQuery = true; + + // unescape + char *s = sField, *d = sField; + while ( *s ) + { + if ( *s!='\\' ) *d++ = *s; + s++; + } + *d = '\0'; + } + } + SPH_RET(true); + } + + // split + *sValue++ = '\0'; + sValue = chop ( sValue ); + int iValue = atoi ( sValue ); + + // handle options + char * sName = chop ( sField ); + + if ( !strcmp ( sName, "query" ) ) m_sQuery = sValue; + else if ( !strcmp ( sName, "host" ) ) m_sHost = sValue; + else if ( !strcmp ( sName, "port" ) ) m_iPort = iValue; + else if ( !strcmp ( sName, "index" ) ) m_sIndex = sValue; + else if ( !strcmp ( sName, "offset" ) ) m_iOffset = iValue; + else if ( !strcmp ( sName, "limit" ) ) m_iLimit = iValue; + else if ( !strcmp ( sName, "weights" ) ) m_iWeights = ParseArray<uint32> ( &m_pWeights, sValue ); + else if ( !strcmp ( sName, "minid" ) ) m_iMinID = iValue; + else if ( !strcmp ( sName, "maxid" ) ) m_iMaxID = iValue; + else if ( !strcmp ( sName, "maxmatches" ) ) m_iMaxMatches = iValue; + else if ( !strcmp ( sName, "maxquerytime" ) ) m_iMaxQueryTime = iValue; + else if ( !strcmp ( sName, "groupsort" ) ) m_sGroupSortBy = sValue; + else if ( !strcmp ( sName, "distinct" ) ) m_sGroupDistinct = sValue; + else if ( !strcmp ( sName, "cutoff" ) ) m_iCutoff = iValue; + else if ( !strcmp ( sName, "comment" ) ) m_sComment = sValue; + + else if ( !strcmp ( sName, "mode" ) ) + { + + m_eMode = SPH_MATCH_ALL; + if ( !strcmp ( sValue, "any") ) m_eMode = SPH_MATCH_ANY; + else if ( !strcmp ( sValue, "phrase" ) ) m_eMode = SPH_MATCH_PHRASE; + else if ( !strcmp ( sValue, "boolean") ) m_eMode = SPH_MATCH_BOOLEAN; + else if ( !strcmp ( sValue, "ext") ) m_eMode = SPH_MATCH_EXTENDED; + else if ( !strcmp ( sValue, "extended") ) m_eMode = SPH_MATCH_EXTENDED; + else if ( !strcmp ( sValue, "ext2") ) m_eMode = SPH_MATCH_EXTENDED2; + else if ( !strcmp ( sValue, "extended2") ) m_eMode = SPH_MATCH_EXTENDED2; + else if ( !strcmp ( sValue, "all") ) m_eMode = SPH_MATCH_ALL; + else if ( !strcmp ( sValue, "fullscan") ) m_eMode = SPH_MATCH_FULLSCAN; + else + { + snprintf ( m_sParseError, sizeof(m_sParseError), "unknown matching mode '%s'", sValue ); + SPH_RET(false); + } + } else if ( !strcmp ( sName, "ranker" ) ) + { + + m_eRanker = SPH_RANK_PROXIMITY_BM25; + if ( !strcmp ( sValue, "proximity_bm25") ) m_eRanker = SPH_RANK_PROXIMITY_BM25; + else if ( !strcmp ( sValue, "bm25" ) ) m_eRanker = SPH_RANK_BM25; + else if ( !strcmp ( sValue, "none" ) ) m_eRanker = SPH_RANK_NONE; + else if ( !strcmp ( sValue, "wordcount" ) ) m_eRanker = SPH_RANK_WORDCOUNT; + else if ( !strcmp ( sValue, "proximity" ) ) m_eRanker = SPH_RANK_PROXIMITY; + else if ( !strcmp ( sValue, "matchany" ) ) m_eRanker = SPH_RANK_MATCHANY; + else if ( !strcmp ( sValue, "fieldmask" ) ) m_eRanker = SPH_RANK_FIELDMASK; + else + { + snprintf ( m_sParseError, sizeof(m_sParseError), "unknown ranking mode '%s'", sValue ); + SPH_RET(false); + } + } else if ( !strcmp ( sName, "sort" ) ) + { + static const struct + { + const char * m_sName; + ESphSortOrder m_eSort; + } dSortModes[] = + { + { "relevance", SPH_SORT_RELEVANCE }, + { "attr_desc:", SPH_SORT_ATTR_DESC }, + { "attr_asc:", SPH_SORT_ATTR_ASC }, + { "time_segments:", SPH_SORT_TIME_SEGMENTS }, + { "extended:", SPH_SORT_EXTENDED }, + { "expr:", SPH_SORT_EXPR } + }; + + int i; + const int nModes = sizeof(dSortModes)/sizeof(dSortModes[0]); + for ( i=0; i<nModes; i++ ) + if ( !strncmp ( sValue, dSortModes[i].m_sName, strlen(dSortModes[i].m_sName) ) ) + { + m_eSort = dSortModes[i].m_eSort; + m_sSortBy = sValue + strlen(dSortModes[i].m_sName); + break; + } + if ( i==nModes ) + { + snprintf ( m_sParseError, sizeof(m_sParseError), "unknown sorting mode '%s'", sValue ); + SPH_RET(false); + } + + } else if ( !strcmp ( sName, "groupby" ) ) + { + static const struct + { + const char * m_sName; + ESphGroupBy m_eFunc; + } dGroupModes[] = + { + { "day:", SPH_GROUPBY_DAY }, + { "week:", SPH_GROUPBY_WEEK }, + { "month:", SPH_GROUPBY_MONTH }, + { "year:", SPH_GROUPBY_YEAR }, + { "attr:", SPH_GROUPBY_ATTR }, + }; + + int i; + const int nModes = sizeof(dGroupModes)/sizeof(dGroupModes[0]); + for ( i=0; i<nModes; i++ ) + if ( !strncmp ( sValue, dGroupModes[i].m_sName, strlen(dGroupModes[i].m_sName) ) ) + { + m_eGroupFunc = dGroupModes[i].m_eFunc; + m_sGroupBy = sValue + strlen(dGroupModes[i].m_sName); + break; + } + if ( i==nModes ) + { + snprintf ( m_sParseError, sizeof(m_sParseError), "unknown groupby mode '%s'", sValue ); + SPH_RET(false); + } + + } else if ( m_iFilters<SPHINXSE_MAX_FILTERS && + ( !strcmp ( sName, "range" ) || !strcmp ( sName, "!range" ) || !strcmp ( sName, "floatrange" ) || !strcmp ( sName, "!floatrange" ) ) ) + { + for ( ;; ) + { + char * p = sName; + CSphSEFilter & tFilter = m_dFilters [ m_iFilters ]; + tFilter.m_bExclude = ( *p=='!' ); if ( tFilter.m_bExclude ) p++; + tFilter.m_eType = ( *p=='f' ) ? SPH_FILTER_FLOATRANGE : SPH_FILTER_RANGE; + + if (!( p = strchr ( sValue, ',' ) )) + break; + *p++ = '\0'; + + tFilter.m_sAttrName = chop ( sValue ); + sValue = p; + + if (!( p = strchr ( sValue, ',' ) )) + break; + *p++ = '\0'; + + if ( tFilter.m_eType==SPH_FILTER_RANGE ) + { + tFilter.m_uMinValue = strtoll ( sValue, NULL, 0 ); + tFilter.m_uMaxValue = strtoll ( p, NULL, 0 ); + } else + { + tFilter.m_fMinValue = (float)atof(sValue); + tFilter.m_fMaxValue = (float)atof(p); + } + + // all ok + m_iFilters++; + break; + } + + } else if ( m_iFilters<SPHINXSE_MAX_FILTERS && + ( !strcmp ( sName, "filter" ) || !strcmp ( sName, "!filter" ) ) ) + { + for ( ;; ) + { + CSphSEFilter & tFilter = m_dFilters [ m_iFilters ]; + tFilter.m_eType = SPH_FILTER_VALUES; + tFilter.m_bExclude = ( strcmp ( sName, "!filter")==0 ); + + // get the attr name + while ( (*sValue) && !myisattr(*sValue) ) + sValue++; + if ( !*sValue ) + break; + + tFilter.m_sAttrName = sValue; + while ( (*sValue) && myisattr(*sValue) ) + sValue++; + if ( !*sValue ) + break; + *sValue++ = '\0'; + + // get the values + tFilter.m_iValues = ParseArray<longlong> ( &tFilter.m_pValues, sValue ); + if ( !tFilter.m_iValues ) + { + assert ( !tFilter.m_pValues ); + break; + } + + // all ok + m_iFilters++; + break; + } + + } else if ( !strcmp ( sName, "indexweights" ) || !strcmp ( sName, "fieldweights" ) ) + { + bool bIndex = !strcmp ( sName, "indexweights" ); + int * pCount = bIndex ? &m_iIndexWeights : &m_iFieldWeights; + char ** pNames = bIndex ? &m_sIndexWeight[0] : &m_sFieldWeight[0]; + int * pWeights = bIndex ? &m_iIndexWeight[0] : &m_iFieldWeight[0]; + + *pCount = 0; + + char * p = sValue; + while ( *p && *pCount<SPHINXSE_MAX_FILTERS ) + { + // extract attr name + if ( !myisattr(*p) ) + { + snprintf ( m_sParseError, sizeof(m_sParseError), "%s: index name expected near '%s'", sName, p ); + SPH_RET(false); + } + + pNames[*pCount] = p; + while ( myisattr(*p) ) p++; + + if ( *p!=',' ) + { + snprintf ( m_sParseError, sizeof(m_sParseError), "%s: comma expected near '%s'", sName, p ); + SPH_RET(false); + } + *p++ = '\0'; + + // extract attr value + char * sVal = p; + while ( isdigit(*p) ) p++; + if ( p==sVal ) + { + snprintf ( m_sParseError, sizeof(m_sParseError), "%s: integer weight expected near '%s'", sName, sVal ); + SPH_RET(false); + } + pWeights[*pCount] = atoi(sVal); + (*pCount)++; + + if ( !*p ) break; + if ( *p!=',' ) + { + snprintf ( m_sParseError, sizeof(m_sParseError), "%s: comma expected near '%s'", sName, p ); + SPH_RET(false); + } + p++; + } + + } else if ( !strcmp ( sName, "geoanchor" ) ) + { + m_bGeoAnchor = false; + for ( ;; ) + { + char * sLat = sValue; + char * p = sValue; + + if (!( p = strchr ( p, ',' ) )) break; *p++ = '\0'; + char * sLong = p; + + if (!( p = strchr ( p, ',' ) )) break; *p++ = '\0'; + char * sLatVal = p; + + if (!( p = strchr ( p, ',' ) )) break; *p++ = '\0'; + char * sLongVal = p; + + m_sGeoLatAttr = chop(sLat); + m_sGeoLongAttr = chop(sLong); + m_fGeoLatitude = (float)atof(sLatVal); + m_fGeoLongitude = (float)atof(sLongVal); + m_bGeoAnchor = true; + break; + } + if ( !m_bGeoAnchor ) + { + snprintf ( m_sParseError, sizeof(m_sParseError), "geoanchor: parse error, not enough comma-separated arguments" ); + SPH_RET(false); + } + } + else if ( !strcmp ( sName, "override" ) ) // name,type,id:value,id:value,... + { + char * sName = NULL; + int iType = 0; + CSphSEQuery::Override_t * pOverride = NULL; + + // get name and type + char * sRest = sValue; + for ( ;; ) + { + sName = sRest; + if ( !*sName ) + break; + + if (!( sRest = strchr ( sRest, ',' ) )) break; *sRest++ = '\0'; + char * sType = sRest; + if (!( sRest = strchr ( sRest, ',' ) )) break; + + static const struct + { + const char * m_sName; + int m_iType; + } + dAttrTypes[] = + { + { "int", SPH_ATTR_INTEGER }, + { "timestamp", SPH_ATTR_TIMESTAMP }, + { "bool", SPH_ATTR_BOOL }, + { "float", SPH_ATTR_FLOAT }, + { "bigint", SPH_ATTR_BIGINT } + }; + for ( uint i=0; i<sizeof(dAttrTypes)/sizeof(*dAttrTypes); i++ ) + if ( !strncmp( sType, dAttrTypes[i].m_sName, sRest - sType ) ) + { + iType = dAttrTypes[i].m_iType; + break; + } + break; + } + + // fail + if ( !sName || !*sName || !iType ) + { + snprintf ( m_sParseError, sizeof(m_sParseError), "override: malformed query" ); + SPH_RET(false); + } + + // grab id:value pairs + sRest++; + while ( sRest ) + { + char * sId = sRest; + if (!( sRest = strchr ( sRest, ':' ) )) break; *sRest++ = '\0'; + if (!( sRest - sId )) break; + + char * sValue = sRest; + if (( sRest = strchr ( sRest, ',' ) )) *sRest++ = '\0'; + if ( !*sValue ) + break; + + if ( !pOverride ) + { + pOverride = new CSphSEQuery::Override_t; + pOverride->m_sName = chop(sName); + pOverride->m_iType = iType; + m_dOverrides.append(pOverride); + } + + ulonglong uId = strtoull ( sId, NULL, 10 ); + CSphSEQuery::Override_t::Value_t tValue; + if ( iType == SPH_ATTR_FLOAT ) + tValue.m_fValue = (float)atof(sValue); + else if ( iType == SPH_ATTR_BIGINT ) + tValue.m_iValue64 = strtoll ( sValue, NULL, 10 ); + else + tValue.m_uValue = (uint32)strtoul ( sValue, NULL, 10 ); + + pOverride->m_dIds.append ( uId ); + pOverride->m_dValues.append ( tValue ); + } + + if ( !pOverride ) + { + snprintf ( m_sParseError, sizeof(m_sParseError), "override: id:value mapping expected" ); + SPH_RET(false); + } + SPH_RET(true); + } + else + { + snprintf ( m_sParseError, sizeof(m_sParseError), "unknown parameter '%s'", sName ); + SPH_RET(false); + } + + // !COMMIT handle syntax errors + + SPH_RET(true); +} + + +bool CSphSEQuery::Parse () +{ + SPH_ENTER_METHOD(); + SPH_DEBUG ( "query [[ %s ]]", m_sQueryBuffer ); + + m_bQuery = false; + char * pCur = m_sQueryBuffer; + char * pNext = pCur; + + while (( pNext = strchr ( pNext, ';' ) )) + { + // handle escaped semicolons + if ( pNext>m_sQueryBuffer && pNext[-1]=='\\' && pNext[1]!='\0' ) + { + pNext++; + continue; + } + + // handle semicolon-separated clauses + *pNext++ = '\0'; + if ( !ParseField ( pCur ) ) + SPH_RET(false); + pCur = pNext; + } + + SPH_RET(true); +} + + +void CSphSEQuery::SendBytes ( const void * pBytes, int iBytes ) +{ + SPH_ENTER_METHOD(); + if ( m_iBufLeft<iBytes ) + { + m_bBufOverrun = true; + SPH_VOID_RET(); + } + + memcpy ( m_pCur, pBytes, iBytes ); + + m_pCur += iBytes; + m_iBufLeft -= iBytes; + SPH_VOID_RET(); +} + + +int CSphSEQuery::BuildRequest ( char ** ppBuffer ) +{ + SPH_ENTER_METHOD(); + + // calc request length + int iReqSize = 124 + 4*m_iWeights + + strlen ( m_sSortBy ) + + strlen ( m_sQuery ) + + strlen ( m_sIndex ) + + strlen ( m_sGroupBy ) + + strlen ( m_sGroupSortBy ) + + strlen ( m_sGroupDistinct ) + + strlen ( m_sComment ); + for ( int i=0; i<m_iFilters; i++ ) + { + const CSphSEFilter & tFilter = m_dFilters[i]; + iReqSize += 12 + strlen ( tFilter.m_sAttrName ); // string attr-name; int type; int exclude-flag + switch ( tFilter.m_eType ) + { + case SPH_FILTER_VALUES: iReqSize += 4 + 8*tFilter.m_iValues; break; + case SPH_FILTER_RANGE: iReqSize += 16; break; + case SPH_FILTER_FLOATRANGE: iReqSize += 8; break; + } + } + if ( m_bGeoAnchor ) // 1.14+ + iReqSize += 16 + strlen ( m_sGeoLatAttr ) + strlen ( m_sGeoLongAttr ); + for ( int i=0; i<m_iIndexWeights; i++ ) // 1.15+ + iReqSize += 8 + strlen(m_sIndexWeight[i] ); + for ( int i=0; i<m_iFieldWeights; i++ ) // 1.18+ + iReqSize += 8 + strlen(m_sFieldWeight[i] ); + // overrides + iReqSize += 4; + for ( int i=0; i<m_dOverrides.elements(); i++ ) + { + CSphSEQuery::Override_t * pOverride = m_dOverrides.at(i); + const uint32 uSize = pOverride->m_iType == SPH_ATTR_BIGINT ? 16 : 12; // id64 + value + iReqSize += strlen ( pOverride->m_sName ) + 12 + uSize*pOverride->m_dIds.elements(); + } + // select + iReqSize += 4; + + m_iBufLeft = 0; + SafeDeleteArray ( m_pBuf ); + + m_pBuf = new char [ iReqSize ]; + if ( !m_pBuf ) + SPH_RET(-1); + + m_pCur = m_pBuf; + m_iBufLeft = iReqSize; + m_bBufOverrun = false; + (*ppBuffer) = m_pBuf; + + // build request + SendWord ( SEARCHD_COMMAND_SEARCH ); // command id + SendWord ( VER_COMMAND_SEARCH ); // command version + SendInt ( iReqSize-8 ); // packet body length + + SendInt ( 1 ); // number of queries + SendInt ( m_iOffset ); + SendInt ( m_iLimit ); + SendInt ( m_eMode ); + SendInt ( m_eRanker ); // 1.16+ + SendInt ( m_eSort ); + SendString ( m_sSortBy ); // sort attr + SendString ( m_sQuery ); // query + SendInt ( m_iWeights ); + for ( int j=0; j<m_iWeights; j++ ) + SendInt ( m_pWeights[j] ); // weights + SendString ( m_sIndex ); // indexes + SendInt ( 1 ); // id64 range follows + SendUint64 ( m_iMinID ); // id/ts ranges + SendUint64 ( m_iMaxID ); + + SendInt ( m_iFilters ); + for ( int j=0; j<m_iFilters; j++ ) + { + const CSphSEFilter & tFilter = m_dFilters[j]; + SendString ( tFilter.m_sAttrName ); + SendInt ( tFilter.m_eType ); + + switch ( tFilter.m_eType ) + { + case SPH_FILTER_VALUES: + SendInt ( tFilter.m_iValues ); + for ( int k=0; k<tFilter.m_iValues; k++ ) + SendUint64 ( tFilter.m_pValues[k] ); + break; + + case SPH_FILTER_RANGE: + SendUint64 ( tFilter.m_uMinValue ); + SendUint64 ( tFilter.m_uMaxValue ); + break; + + case SPH_FILTER_FLOATRANGE: + SendFloat ( tFilter.m_fMinValue ); + SendFloat ( tFilter.m_fMaxValue ); + break; + } + + SendInt ( tFilter.m_bExclude ); + } + + SendInt ( m_eGroupFunc ); + SendString ( m_sGroupBy ); + SendInt ( m_iMaxMatches ); + SendString ( m_sGroupSortBy ); + SendInt ( m_iCutoff ); // 1.9+ + SendInt ( m_iRetryCount ); // 1.10+ + SendInt ( m_iRetryDelay ); + SendString ( m_sGroupDistinct ); // 1.11+ + SendInt ( m_bGeoAnchor ); // 1.14+ + if ( m_bGeoAnchor ) + { + SendString ( m_sGeoLatAttr ); + SendString ( m_sGeoLongAttr ); + SendFloat ( m_fGeoLatitude ); + SendFloat ( m_fGeoLongitude ); + } + SendInt ( m_iIndexWeights ); // 1.15+ + for ( int i=0; i<m_iIndexWeights; i++ ) + { + SendString ( m_sIndexWeight[i] ); + SendInt ( m_iIndexWeight[i] ); + } + SendInt ( m_iMaxQueryTime ); // 1.17+ + SendInt ( m_iFieldWeights ); // 1.18+ + for ( int i=0; i<m_iFieldWeights; i++ ) + { + SendString ( m_sFieldWeight[i] ); + SendInt ( m_iFieldWeight[i] ); + } + SendString ( m_sComment ); + + // overrides + SendInt ( m_dOverrides.elements() ); + for ( int i=0; i<m_dOverrides.elements(); i++ ) + { + CSphSEQuery::Override_t * pOverride = m_dOverrides.at(i); + SendString ( pOverride->m_sName ); + SendDword ( pOverride->m_iType ); + SendInt ( pOverride->m_dIds.elements() ); + for ( int j=0; j<pOverride->m_dIds.elements(); j++ ) + { + SendUint64 ( pOverride->m_dIds.at(j) ); + if ( pOverride->m_iType == SPH_ATTR_FLOAT ) + SendFloat ( pOverride->m_dValues.at(j).m_fValue ); + else if ( pOverride->m_iType == SPH_ATTR_BIGINT ) + SendUint64 ( pOverride->m_dValues.at(j).m_iValue64 ); + else + SendDword ( pOverride->m_dValues.at(j).m_uValue ); + } + } + + // select + SendString ( "" ); + + // detect buffer overruns and underruns, and report internal error + if ( m_bBufOverrun || m_iBufLeft!=0 || m_pCur-m_pBuf!=iReqSize ) + SPH_RET(-1); + + // all fine + SPH_RET(iReqSize); +} + +////////////////////////////////////////////////////////////////////////////// +// SPHINX HANDLER +////////////////////////////////////////////////////////////////////////////// + +static const char * ha_sphinx_exts[] = { NullS }; + + +#if MYSQL_VERSION_ID<50100 +ha_sphinx::ha_sphinx ( TABLE_ARG * table ) + : handler ( &sphinx_hton, table ) +#else +ha_sphinx::ha_sphinx ( handlerton * hton, TABLE_ARG * table ) + : handler ( hton, table ) +#endif + , m_pShare ( NULL ) + , m_iMatchesTotal ( 0 ) + , m_iCurrentPos ( 0 ) + , m_pCurrentKey ( NULL ) + , m_iCurrentKeyLen ( 0 ) + , m_pResponse ( NULL ) + , m_pResponseEnd ( NULL ) + , m_pCur ( NULL ) + , m_bUnpackError ( false ) + , m_iFields ( 0 ) + , m_dFields ( NULL ) + , m_iAttrs ( 0 ) + , m_dAttrs ( NULL ) + , m_bId64 ( 0 ) + , m_dUnboundFields ( NULL ) +{ + SPH_ENTER_METHOD(); + SPH_VOID_RET(); +} + + +// If frm_error() is called then we will use this to to find out what file extentions +// exist for the storage engine. This is also used by the default rename_table and +// delete_table method in handler.cc. +const char ** ha_sphinx::bas_ext() const +{ + return ha_sphinx_exts; +} + + +// Used for opening tables. The name will be the name of the file. +// A table is opened when it needs to be opened. For instance +// when a request comes in for a select on the table (tables are not +// open and closed for each request, they are cached). +// +// Called from handler.cc by handler::ha_open(). The server opens all tables by +// calling ha_open() which then calls the handler specific open(). +int ha_sphinx::open ( const char * name, int, uint ) +{ + SPH_ENTER_METHOD(); + m_pShare = get_share ( name, table ); + if ( !m_pShare ) + SPH_RET(1); + + thr_lock_data_init ( &m_pShare->m_tLock, &m_tLock, NULL ); + + *thd_ha_data ( table->in_use, ht ) = NULL; + + SPH_RET(0); +} + + +int ha_sphinx::ConnectToSearchd ( const char * sQueryHost, int iQueryPort ) +{ + SPH_ENTER_METHOD(); + + struct sockaddr_in sin; +#ifndef __WIN__ + struct sockaddr_un saun; +#endif + + int iDomain = 0; + int iSockaddrSize = 0; + struct sockaddr * pSockaddr = NULL; + + in_addr_t ip_addr; + int version; + uint uClientVersion = htonl ( SPHINX_SEARCHD_PROTO ); + + const char * sHost = ( sQueryHost && *sQueryHost ) ? sQueryHost : m_pShare->m_sHost; + ushort iPort = iQueryPort ? (ushort)iQueryPort : m_pShare->m_iPort; + + if ( iPort ) + { + iDomain = AF_INET; + iSockaddrSize = sizeof(sin); + pSockaddr = (struct sockaddr *) &sin; + + memset ( &sin, 0, sizeof(sin) ); + sin.sin_family = AF_INET; + sin.sin_port = htons(iPort); + + // prepare host address + if ( (int)( ip_addr=inet_addr(sHost) ) != (int)INADDR_NONE ) + { + memcpy ( &sin.sin_addr, &ip_addr, sizeof(ip_addr) ); + } else + { + int tmp_errno; + struct hostent tmp_hostent, *hp; + char buff2 [ GETHOSTBYNAME_BUFF_SIZE ]; + + hp = my_gethostbyname_r ( sHost, &tmp_hostent, + buff2, sizeof(buff2), &tmp_errno ); + if ( !hp ) + { + my_gethostbyname_r_free(); + + char sError[256]; + my_snprintf ( sError, sizeof(sError), "failed to resolve searchd host (name=%s)", sHost ); + + my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), sError ); + SPH_RET(-1); + } + + memcpy ( &sin.sin_addr, hp->h_addr, + Min ( sizeof(sin.sin_addr), (size_t)hp->h_length ) ); + my_gethostbyname_r_free(); + } + } else + { +#ifndef __WIN__ + iDomain = AF_UNIX; + iSockaddrSize = sizeof(saun); + pSockaddr = (struct sockaddr *) &saun; + + memset ( &saun, 0, sizeof(saun) ); + saun.sun_family = AF_UNIX; + strncpy ( saun.sun_path, sHost, sizeof(saun.sun_path)-1 ); +#else + my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), "UNIX sockets are not supported on Windows" ); + SPH_RET(-1); +#endif + } + + char sError[512]; + int iSocket = socket ( iDomain, SOCK_STREAM, 0 ); + + if ( iSocket<0 ) + { + my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), "failed to create client socket" ); + SPH_RET(-1); + } + + if ( connect ( iSocket, pSockaddr, iSockaddrSize )<0 ) + { + sphSockClose ( iSocket ); + my_snprintf ( sError, sizeof(sError), "failed to connect to searchd (host=%s, errno=%d, port=%d)", + sHost, errno, iPort ); + my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), sError ); + SPH_RET(-1); + } + + if ( ::recv ( iSocket, (char *)&version, sizeof(version), 0 )!=sizeof(version) ) + { + sphSockClose ( iSocket ); + my_snprintf ( sError, sizeof(sError), "failed to receive searchd version (host=%s, port=%d)", + sHost, iPort ); + my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), sError ); + SPH_RET(-1); + } + + if ( ::send ( iSocket, (char*)&uClientVersion, sizeof(uClientVersion), 0 )!=sizeof(uClientVersion) ) + { + sphSockClose ( iSocket ); + my_snprintf ( sError, sizeof(sError), "failed to send client version (host=%s, port=%d)", + sHost, iPort ); + my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), sError ); + SPH_RET(-1); + } + + SPH_RET(iSocket); +} + + +// Closes a table. We call the free_share() function to free any resources +// that we have allocated in the "shared" structure. +// +// Called from sql_base.cc, sql_select.cc, and table.cc. +// In sql_select.cc it is only used to close up temporary tables or during +// the process where a temporary table is converted over to being a +// myisam table. +// For sql_base.cc look at close_data_tables(). +int ha_sphinx::close() +{ + SPH_ENTER_METHOD(); + SPH_RET ( free_share(m_pShare) ); +} + + +int ha_sphinx::write_row ( uchar * ) +{ + SPH_ENTER_METHOD(); + SPH_RET ( HA_ERR_WRONG_COMMAND ); +} + + +int ha_sphinx::update_row ( const uchar *, uchar * ) +{ + SPH_ENTER_METHOD(); + SPH_RET ( HA_ERR_WRONG_COMMAND ); +} + + +int ha_sphinx::delete_row ( const uchar * ) +{ + SPH_ENTER_METHOD(); + SPH_RET ( HA_ERR_WRONG_COMMAND ); +} + + +// keynr is key (index) number +// sorted is 1 if result MUST be sorted according to index +int ha_sphinx::index_init ( uint keynr, bool ) +{ + SPH_ENTER_METHOD(); + active_index = keynr; + SPH_RET(0); +} + + +int ha_sphinx::index_end() +{ + SPH_ENTER_METHOD(); + SPH_RET(0); +} + + +uint32 ha_sphinx::UnpackDword () +{ + if ( m_pCur+sizeof(uint32)>m_pResponseEnd ) + { + m_pCur = m_pResponseEnd; + m_bUnpackError = true; + return 0; + } + + uint32 uRes = ntohl ( sphUnalignedRead ( *(uint32*)m_pCur ) ); + m_pCur += sizeof(uint32); + return uRes; +} + + +char * ha_sphinx::UnpackString () +{ + uint32 iLen = UnpackDword (); + if ( !iLen ) + return NULL; + + if ( m_pCur+iLen>m_pResponseEnd ) + { + m_pCur = m_pResponseEnd; + m_bUnpackError = true; + return NULL; + } + + char * sRes = new char [ 1+iLen ]; + memcpy ( sRes, m_pCur, iLen ); + sRes[iLen] = '\0'; + m_pCur += iLen; + return sRes; +} + + +static inline const char * FixNull ( const char * s ) +{ + return s ? s : "(null)"; +} + + +bool ha_sphinx::UnpackSchema () +{ + SPH_ENTER_METHOD(); + + // cleanup + if ( m_dFields ) + for ( int i=0; i<(int)m_iFields; i++ ) + SafeDeleteArray ( m_dFields[i] ); + SafeDeleteArray ( m_dFields ); + + // unpack network packet + uint32 uStatus = UnpackDword (); + char * sMessage = NULL; + + if ( uStatus!=SEARCHD_OK ) + { + sMessage = UnpackString (); + CSphSEThreadData * pTls = GetTls (); + if ( pTls ) + { + strncpy ( pTls->m_tStats.m_sLastMessage, sMessage, sizeof(pTls->m_tStats.m_sLastMessage) ); + pTls->m_tStats.m_bLastError = ( uStatus==SEARCHD_ERROR ); + } + + if ( uStatus==SEARCHD_ERROR ) + { + char sError[1024]; + my_snprintf ( sError, sizeof(sError), "searchd error: %s", sMessage ); + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), sError ); + SafeDeleteArray ( sMessage ); + SPH_RET ( false ); + } + } + + m_iFields = UnpackDword (); + m_dFields = new char * [ m_iFields ]; + if ( !m_dFields ) + { + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: UnpackSchema() failed (fields alloc error)" ); + SPH_RET(false); + } + + for ( uint32 i=0; i<m_iFields; i++ ) + m_dFields[i] = UnpackString (); + + SafeDeleteArray ( m_dAttrs ); + m_iAttrs = UnpackDword (); + m_dAttrs = new CSphSEAttr [ m_iAttrs ]; + if ( !m_dAttrs ) + { + for ( int i=0; i<(int)m_iFields; i++ ) + SafeDeleteArray ( m_dFields[i] ); + SafeDeleteArray ( m_dFields ); + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: UnpackSchema() failed (attrs alloc error)" ); + SPH_RET(false); + } + + for ( uint32 i=0; i<m_iAttrs; i++ ) + { + m_dAttrs[i].m_sName = UnpackString (); + m_dAttrs[i].m_uType = UnpackDword (); + if ( m_bUnpackError ) // m_sName may be null + break; + + m_dAttrs[i].m_iField = -1; + for ( int j=SPHINXSE_SYSTEM_COLUMNS; j<m_pShare->m_iTableFields; j++ ) + { + const char * sTableField = m_pShare->m_sTableField[j]; + const char * sAttrField = m_dAttrs[i].m_sName; + if ( m_dAttrs[i].m_sName[0]=='@' ) + { + const char * sAtPrefix = "_sph_"; + if ( strncmp ( sTableField, sAtPrefix, strlen(sAtPrefix) ) ) + continue; + sTableField += strlen(sAtPrefix); + sAttrField++; + } + + if ( !strcasecmp ( sAttrField, sTableField ) ) + { + // we're almost good, but + // let's enforce that timestamp columns can only receive timestamp attributes + if ( m_pShare->m_eTableFieldType[j]!=MYSQL_TYPE_TIMESTAMP || m_dAttrs[i].m_uType==SPH_ATTR_TIMESTAMP ) + m_dAttrs[i].m_iField = j; + break; + } + } + } + + m_iMatchesTotal = UnpackDword (); + + m_bId64 = UnpackDword (); + if ( m_bId64 && m_pShare->m_eTableFieldType[0] != MYSQL_TYPE_LONGLONG ) + { + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: 1st column must be bigint to accept 64-bit DOCIDs" ); + SPH_RET(false); + } + + // network packet unpacked; build unbound fields map + SafeDeleteArray ( m_dUnboundFields ); + m_dUnboundFields = new int [ m_pShare->m_iTableFields ]; + + for ( int i=0; i<m_pShare->m_iTableFields; i++ ) + { + if ( i<SPHINXSE_SYSTEM_COLUMNS ) + m_dUnboundFields[i] = SPH_ATTR_NONE; + + else if ( m_pShare->m_eTableFieldType[i]==MYSQL_TYPE_TIMESTAMP ) + m_dUnboundFields[i] = SPH_ATTR_TIMESTAMP; + + else + m_dUnboundFields[i] = SPH_ATTR_INTEGER; + } + + for ( uint32 i=0; i<m_iAttrs; i++ ) + if ( m_dAttrs[i].m_iField>=0 ) + m_dUnboundFields [ m_dAttrs[i].m_iField ] = SPH_ATTR_NONE; + + if ( m_bUnpackError ) + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: UnpackSchema() failed (unpack error)" ); + + SPH_RET(!m_bUnpackError); +} + + +bool ha_sphinx::UnpackStats ( CSphSEStats * pStats ) +{ + assert ( pStats ); + + char * pCurSave = m_pCur; + for ( uint i=0; i<m_iMatchesTotal && m_pCur<m_pResponseEnd-sizeof(uint32); i++ ) + { + m_pCur += m_bId64 ? 12 : 8; // skip id+weight + for ( uint32 i=0; i<m_iAttrs && m_pCur<m_pResponseEnd-sizeof(uint32); i++ ) + { + if ( m_dAttrs[i].m_uType & SPH_ATTR_MULTI ) + { + // skip MVA list + uint32 uCount = UnpackDword (); + m_pCur += uCount*4; + } + else // skip normal value + m_pCur += m_dAttrs[i].m_uType == SPH_ATTR_BIGINT ? 8 : 4; + } + } + + pStats->m_iMatchesTotal = UnpackDword (); + pStats->m_iMatchesFound = UnpackDword (); + pStats->m_iQueryMsec = UnpackDword (); + pStats->m_iWords = UnpackDword (); + + if ( m_bUnpackError ) + return false; + + SafeDeleteArray ( pStats->m_dWords ); + if ( pStats->m_iWords<0 || pStats->m_iWords>=SPHINXSE_MAX_KEYWORDSTATS ) + return false; + pStats->m_dWords = new CSphSEWordStats [ pStats->m_iWords ]; + if ( !pStats->m_dWords ) + return false; + + for ( int i=0; i<pStats->m_iWords; i++ ) + { + CSphSEWordStats & tWord = pStats->m_dWords[i]; + tWord.m_sWord = UnpackString (); + tWord.m_iDocs = UnpackDword (); + tWord.m_iHits = UnpackDword (); + } + + if ( m_bUnpackError ) + return false; + + m_pCur = pCurSave; + return true; +} + + +/// condition pushdown implementation, to properly intercept WHERE clauses on my columns +const COND * ha_sphinx::cond_push ( const COND * cond ) +{ + // catch the simplest case: query_column="some text" + for ( ;; ) + { + if ( cond->type()!=COND::FUNC_ITEM ) + break; + + Item_func * condf = (Item_func *)cond; + if ( condf->functype()!=Item_func::EQ_FUNC || condf->argument_count()!=2 ) + break; + + Item ** args = condf->arguments(); + if ( args[0]->type()!=COND::FIELD_ITEM || args[1]->type()!=COND::STRING_ITEM ) + break; + + Item_field * pField = (Item_field *) args[0]; + if ( pField->field->field_index!=2 ) // FIXME! magic key index + break; + + // get my tls + CSphSEThreadData * pTls = GetTls (); + if ( !pTls ) + break; + + // copy the query, and let know that we intercepted this condition + Item_string * pString = (Item_string *) args[1]; + pTls->m_bQuery = true; + strncpy ( pTls->m_sQuery, pString->str_value.c_ptr(), sizeof(pTls->m_sQuery) ); + pTls->m_sQuery[sizeof(pTls->m_sQuery)-1] = '\0'; + pTls->m_pQueryCharset = pString->str_value.charset(); + return NULL; + } + + // don't change anything + return cond; +} + + +/// condition popup +void ha_sphinx::cond_pop () +{ + CSphSEThreadData * pTls = GetTls (); + if ( pTls && pTls->m_bQuery ) + pTls->m_bQuery = false; + return; +} + + +/// get TLS (maybe allocate it, too) +CSphSEThreadData * ha_sphinx::GetTls() +{ + // where do we store that pointer in today's version? + CSphSEThreadData ** ppTls; + ppTls = (CSphSEThreadData**) thd_ha_data ( ha_thd(), ht ); + + // allocate if needed + if ( !*ppTls ) + *ppTls = new CSphSEThreadData (); + + // errors will be handled by caller + return *ppTls; +} + + +// Positions an index cursor to the index specified in the handle. Fetches the +// row if available. If the key value is null, begin at the first key of the +// index. +int ha_sphinx::index_read ( byte * buf, const byte * key, uint key_len, enum ha_rkey_function ) +{ + SPH_ENTER_METHOD(); + char sError[256]; + + // set new data for thd->ha_data, it is used in show_status + CSphSEThreadData * pTls = GetTls(); + if ( !pTls ) + { + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: TLS malloc() failed" ); + SPH_RET ( HA_ERR_END_OF_FILE ); + } + pTls->m_tStats.Reset (); + + // parse query + if ( pTls->m_bQuery ) + { + // we have a query from condition pushdown + m_pCurrentKey = (const byte *) pTls->m_sQuery; + m_iCurrentKeyLen = strlen(pTls->m_sQuery); + } else + { + // just use the key (might be truncated) + m_pCurrentKey = key+HA_KEY_BLOB_LENGTH; + m_iCurrentKeyLen = uint2korr(key); // or maybe key_len? + pTls->m_pQueryCharset = m_pShare ? m_pShare->m_pTableQueryCharset : NULL; + } + + CSphSEQuery q ( (const char*)m_pCurrentKey, m_iCurrentKeyLen, m_pShare->m_sIndex ); + if ( !q.Parse () ) + { + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), q.m_sParseError ); + SPH_RET ( HA_ERR_END_OF_FILE ); + } + + // do connect + int iSocket = ConnectToSearchd ( q.m_sHost, q.m_iPort ); + if ( iSocket<0 ) + SPH_RET ( HA_ERR_END_OF_FILE ); + + // my buffer + char * pBuffer; // will be free by CSphSEQuery dtor; do NOT free manually + int iReqLen = q.BuildRequest ( &pBuffer ); + + if ( iReqLen<=0 ) + { + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: q.BuildRequest() failed" ); + SPH_RET ( HA_ERR_END_OF_FILE ); + } + + // send request + ::send ( iSocket, pBuffer, iReqLen, 0 ); + + // receive reply + char sHeader[8]; + int iGot = ::recv ( iSocket, sHeader, sizeof(sHeader), RECV_FLAGS ); + if ( iGot!=sizeof(sHeader) ) + { + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "failed to receive response header (searchd went away?)" ); + SPH_RET ( HA_ERR_END_OF_FILE ); + } + + short int uRespStatus = ntohs ( sphUnalignedRead ( *(short int*)( &sHeader[0] ) ) ); + short int uRespVersion = ntohs ( sphUnalignedRead ( *(short int*)( &sHeader[2] ) ) ); + uint uRespLength = ntohl ( sphUnalignedRead ( *(uint *)( &sHeader[4] ) ) ); + SPH_DEBUG ( "got response header (status=%d version=%d length=%d)", + uRespStatus, uRespVersion, uRespLength ); + + SafeDeleteArray ( m_pResponse ); + if ( uRespLength<=SPHINXSE_MAX_ALLOC ) + m_pResponse = new char [ uRespLength+1 ]; + + if ( !m_pResponse ) + { + my_snprintf ( sError, sizeof(sError), "bad searchd response length (length=%u)", uRespLength ); + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), sError ); + SPH_RET ( HA_ERR_END_OF_FILE ); + } + + int iRecvLength = 0; + while ( iRecvLength<(int)uRespLength ) + { + int iRecv = ::recv ( iSocket, m_pResponse+iRecvLength, uRespLength-iRecvLength, RECV_FLAGS ); + if ( iRecv<0 ) + break; + iRecvLength += iRecv; + } + + ::closesocket ( iSocket ); + iSocket = -1; + + if ( iRecvLength!=(int)uRespLength ) + { + my_snprintf ( sError, sizeof(sError), "net read error (expected=%d, got=%d)", uRespLength, iRecvLength ); + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), sError ); + SPH_RET ( HA_ERR_END_OF_FILE ); + } + + // we'll have a message, at least + pTls->m_bStats = true; + + // parse reply + m_iCurrentPos = 0; + m_pCur = m_pResponse; + m_pResponseEnd = m_pResponse + uRespLength; + m_bUnpackError = false; + + if ( uRespStatus!=SEARCHD_OK ) + { + char * sMessage = UnpackString (); + if ( !sMessage ) + { + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "no valid response from searchd (status=%d, resplen=%d)", + uRespStatus, uRespLength ); + SPH_RET ( HA_ERR_END_OF_FILE ); + } + + strncpy ( pTls->m_tStats.m_sLastMessage, sMessage, sizeof(pTls->m_tStats.m_sLastMessage) ); + SafeDeleteArray ( sMessage ); + + if ( uRespStatus!=SEARCHD_WARNING ) + { + my_snprintf ( sError, sizeof(sError), "searchd error: %s", pTls->m_tStats.m_sLastMessage ); + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), sError ); + + pTls->m_tStats.m_bLastError = true; + SPH_RET ( HA_ERR_END_OF_FILE ); + } + } + + if ( !UnpackSchema () ) + SPH_RET ( HA_ERR_END_OF_FILE ); + + if ( !UnpackStats ( &pTls->m_tStats ) ) + { + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: UnpackStats() failed" ); + SPH_RET ( HA_ERR_END_OF_FILE ); + } + + SPH_RET ( get_rec ( buf, key, key_len ) ); +} + + +// Positions an index cursor to the index specified in key. Fetches the +// row if any. This is only used to read whole keys. +int ha_sphinx::index_read_idx ( byte *, uint, const byte *, uint, enum ha_rkey_function ) +{ + SPH_ENTER_METHOD(); + SPH_RET ( HA_ERR_WRONG_COMMAND ); +} + + +// Used to read forward through the index. +int ha_sphinx::index_next ( byte * buf ) +{ + SPH_ENTER_METHOD(); + SPH_RET ( get_rec ( buf, m_pCurrentKey, m_iCurrentKeyLen ) ); +} + + +int ha_sphinx::index_next_same ( byte * buf, const byte * key, uint keylen ) +{ + SPH_ENTER_METHOD(); + SPH_RET ( get_rec ( buf, key, keylen ) ); +} + + +int ha_sphinx::get_rec ( byte * buf, const byte *, uint ) +{ + SPH_ENTER_METHOD(); + + if ( m_iCurrentPos>=m_iMatchesTotal ) + { + SafeDeleteArray ( m_pResponse ); + SPH_RET ( HA_ERR_END_OF_FILE ); + } + + #if MYSQL_VERSION_ID>50100 + my_bitmap_map * org_bitmap = dbug_tmp_use_all_columns ( table, table->write_set ); + #endif + Field ** field = table->field; + + // unpack and return the match + longlong uMatchID = UnpackDword (); + if ( m_bId64 ) + uMatchID = ( uMatchID<<32 ) + UnpackDword(); + uint32 uMatchWeight = UnpackDword (); + + field[0]->store ( uMatchID, 1 ); + field[1]->store ( uMatchWeight, 1 ); + field[2]->store ( (const char*)m_pCurrentKey, m_iCurrentKeyLen, &my_charset_bin ); + + for ( uint32 i=0; i<m_iAttrs; i++ ) + { + longlong iValue64= 0; + uint32 uValue = UnpackDword (); + if ( m_dAttrs[i].m_uType == SPH_ATTR_BIGINT ) + iValue64 = ( (longlong)uValue<<32 ) | UnpackDword(); + if ( m_dAttrs[i].m_iField<0 ) + { + // skip MVA + if ( m_dAttrs[i].m_uType & SPH_ATTR_MULTI ) + for ( ; uValue>0 && !m_bUnpackError; uValue-- ) + UnpackDword(); + continue; + } + + Field * af = field [ m_dAttrs[i].m_iField ]; + switch ( m_dAttrs[i].m_uType ) + { + case SPH_ATTR_INTEGER: + case SPH_ATTR_ORDINAL: + case SPH_ATTR_BOOL: + af->store ( uValue, 1 ); + break; + + case SPH_ATTR_FLOAT: + af->store ( sphDW2F(uValue) ); + break; + + case SPH_ATTR_TIMESTAMP: + if ( af->type()==MYSQL_TYPE_TIMESTAMP ) + longstore ( af->ptr, uValue ); // because store() does not accept timestamps + else + af->store ( uValue, 1 ); + break; + + case SPH_ATTR_BIGINT: + af->store ( iValue64, 0 ); + break; + + case ( SPH_ATTR_MULTI | SPH_ATTR_INTEGER ): + if ( uValue<=0 ) + { + // shortcut, empty MVA set + af->store ( "", 0, &my_charset_bin ); + + } else + { + // convert MVA set to comma-separated string + char sBuf[1024]; // FIXME! magic size + char * pCur = sBuf; + + for ( ; uValue>0 && !m_bUnpackError; uValue-- ) + { + uint32 uEntry = UnpackDword (); + if ( pCur < sBuf+sizeof(sBuf)-16 ) // 10 chars per 32bit value plus some safety bytes + { + sprintf ( pCur, "%u", uEntry ); + while ( *pCur ) *pCur++; + if ( uValue>1 ) + *pCur++ = ','; // non-trailing commas + } + } + + af->store ( sBuf, pCur-sBuf, &my_charset_bin ); + } + break; + + default: + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: unhandled attr type" ); + SafeDeleteArray ( m_pResponse ); + SPH_RET ( HA_ERR_END_OF_FILE ); + } + } + + if ( m_bUnpackError ) + { + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: response unpacker failed" ); + SafeDeleteArray ( m_pResponse ); + SPH_RET ( HA_ERR_END_OF_FILE ); + } + + // zero out unmapped fields + for ( int i=SPHINXSE_SYSTEM_COLUMNS; i<(int)table->s->fields; i++ ) + if ( m_dUnboundFields[i]!=SPH_ATTR_NONE ) + switch ( m_dUnboundFields[i] ) + { + case SPH_ATTR_INTEGER: table->field[i]->store ( 0, 1 ); break; + case SPH_ATTR_TIMESTAMP: longstore ( table->field[i]->ptr, 0 ); break; + default: + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), + "INTERNAL ERROR: unhandled unbound field type %d", m_dUnboundFields[i] ); + SafeDeleteArray ( m_pResponse ); + SPH_RET ( HA_ERR_END_OF_FILE ); + } + + memset ( buf, 0, table->s->null_bytes ); + m_iCurrentPos++; + + #if MYSQL_VERSION_ID > 50100 + dbug_tmp_restore_column_map(table->write_set, org_bitmap); + #endif + + SPH_RET(0); +} + + +// Used to read backwards through the index. +int ha_sphinx::index_prev ( byte * ) +{ + SPH_ENTER_METHOD(); + SPH_RET ( HA_ERR_WRONG_COMMAND ); +} + + +// index_first() asks for the first key in the index. +// +// Called from opt_range.cc, opt_sum.cc, sql_handler.cc, +// and sql_select.cc. +int ha_sphinx::index_first ( byte * ) +{ + SPH_ENTER_METHOD(); + SPH_RET ( HA_ERR_END_OF_FILE ); +} + +// index_last() asks for the last key in the index. +// +// Called from opt_range.cc, opt_sum.cc, sql_handler.cc, +// and sql_select.cc. +int ha_sphinx::index_last ( byte * ) +{ + SPH_ENTER_METHOD(); + SPH_RET ( HA_ERR_WRONG_COMMAND ); +} + + +int ha_sphinx::rnd_init ( bool ) +{ + SPH_ENTER_METHOD(); + SPH_RET(0); +} + + +int ha_sphinx::rnd_end() +{ + SPH_ENTER_METHOD(); + SPH_RET(0); +} + + +int ha_sphinx::rnd_next ( byte * ) +{ + SPH_ENTER_METHOD(); + SPH_RET ( HA_ERR_END_OF_FILE ); +} + + +void ha_sphinx::position ( const byte * ) +{ + SPH_ENTER_METHOD(); + SPH_VOID_RET(); +} + + +// This is like rnd_next, but you are given a position to use +// to determine the row. The position will be of the type that you stored in +// ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key +// or position you saved when position() was called. +// Called from filesort.cc records.cc sql_insert.cc sql_select.cc sql_update.cc. +int ha_sphinx::rnd_pos ( byte *, byte * ) +{ + SPH_ENTER_METHOD(); + SPH_RET ( HA_ERR_WRONG_COMMAND ); +} + + +#if MYSQL_VERSION_ID>=50030 +int ha_sphinx::info ( uint ) +#else +void ha_sphinx::info ( uint ) +#endif +{ + SPH_ENTER_METHOD(); + + if ( table->s->keys>0 ) + table->key_info[0].rec_per_key[0] = 1; + + #if MYSQL_VERSION_ID>50100 + stats.records = 20; + #else + records = 20; + #endif + +#if MYSQL_VERSION_ID>=50030 + SPH_RET(0); +#else + SPH_VOID_RET(); +#endif +} + + +int ha_sphinx::reset () +{ + SPH_ENTER_METHOD(); + CSphSEThreadData * pTls = GetTls (); + if ( pTls ) + pTls->m_bQuery = false; + SPH_RET(0); +} + + +int ha_sphinx::delete_all_rows() +{ + SPH_ENTER_METHOD(); + SPH_RET ( HA_ERR_WRONG_COMMAND ); +} + + +// First you should go read the section "locking functions for mysql" in +// lock.cc to understand this. +// This create a lock on the table. If you are implementing a storage engine +// that can handle transacations look at ha_berkely.cc to see how you will +// want to go about doing this. Otherwise you should consider calling flock() +// here. +// +// Called from lock.cc by lock_external() and unlock_external(). Also called +// from sql_table.cc by copy_data_between_tables(). +int ha_sphinx::external_lock ( THD *, int ) +{ + SPH_ENTER_METHOD(); + SPH_RET(0); +} + + +THR_LOCK_DATA ** ha_sphinx::store_lock ( THD *, THR_LOCK_DATA ** to, + enum thr_lock_type lock_type ) +{ + SPH_ENTER_METHOD(); + + if ( lock_type!=TL_IGNORE && m_tLock.type==TL_UNLOCK ) + m_tLock.type=lock_type; + + *to++ = &m_tLock; + SPH_RET(to); +} + + +int ha_sphinx::delete_table ( const char * ) +{ + SPH_ENTER_METHOD(); + SPH_RET(0); +} + + +// Renames a table from one name to another from alter table call. +// +// If you do not implement this, the default rename_table() is called from +// handler.cc and it will delete all files with the file extentions returned +// by bas_ext(). +// +// Called from sql_table.cc by mysql_rename_table(). +int ha_sphinx::rename_table ( const char *, const char * ) +{ + SPH_ENTER_METHOD(); + SPH_RET(0); +} + + +// Given a starting key, and an ending key estimate the number of rows that +// will exist between the two. end_key may be empty which in case determine +// if start_key matches any rows. +// +// Called from opt_range.cc by check_quick_keys(). +ha_rows ha_sphinx::records_in_range ( uint, key_range *, key_range * ) +{ + SPH_ENTER_METHOD(); + SPH_RET(3); // low number to force index usage +} + + +static inline bool IsIntegerFieldType ( enum_field_types eType ) +{ + return eType==MYSQL_TYPE_LONG || eType==MYSQL_TYPE_LONGLONG; +} + + +// create() is called to create a database. The variable name will have the name +// of the table. When create() is called you do not need to worry about opening +// the table. Also, the FRM file will have already been created so adjusting +// create_info will not do you any good. You can overwrite the frm file at this +// point if you wish to change the table definition, but there are no methods +// currently provided for doing that. +// +// Called from handle.cc by ha_create_table(). +int ha_sphinx::create ( const char * name, TABLE * table, HA_CREATE_INFO * ) +{ + SPH_ENTER_METHOD(); + char sError[256]; + + if ( !ParseUrl ( NULL, table, true ) ) + SPH_RET(-1); + + for ( ;; ) + { + // check system fields (count and types) + if ( table->s->fields<SPHINXSE_SYSTEM_COLUMNS ) + { + my_snprintf ( sError, sizeof(sError), "%s: there MUST be at least %d columns", + name, SPHINXSE_SYSTEM_COLUMNS ); + break; + } + + if ( !IsIntegerFieldType ( table->field[0]->type() ) || !((Field_num *)table->field[0])->unsigned_flag ) + { + my_snprintf ( sError, sizeof(sError), "%s: 1st column (docid) MUST be unsigned integer or bigint", name ); + break; + } + + if ( !IsIntegerFieldType ( table->field[1]->type() ) ) + { + my_snprintf ( sError, sizeof(sError), "%s: 2nd column (weight) MUST be integer or bigint", name ); + break; + } + + enum_field_types f2 = table->field[2]->type(); + if ( f2!=MYSQL_TYPE_VARCHAR + && f2!=MYSQL_TYPE_BLOB && f2!=MYSQL_TYPE_MEDIUM_BLOB && f2!=MYSQL_TYPE_LONG_BLOB && f2!=MYSQL_TYPE_TINY_BLOB ) + { + my_snprintf ( sError, sizeof(sError), "%s: 3rd column (search query) MUST be varchar or text", name ); + break; + } + + // check attributes + int i; + for ( i=3; i<(int)table->s->fields; i++ ) + { + enum_field_types eType = table->field[i]->type(); + if ( eType!=MYSQL_TYPE_TIMESTAMP && !IsIntegerFieldType(eType) && eType!=MYSQL_TYPE_VARCHAR && eType!=MYSQL_TYPE_FLOAT ) + { + my_snprintf ( sError, sizeof(sError), "%s: %dth column (attribute %s) MUST be integer, bigint, timestamp, varchar, or float", + name, i+1, table->field[i]->field_name ); + break; + } + } + + if ( i!=(int)table->s->fields ) + break; + + // check index + if ( + table->s->keys!=1 || + table->key_info[0].key_parts!=1 || + strcasecmp ( table->key_info[0].key_part[0].field->field_name, table->field[2]->field_name ) ) + { + my_snprintf ( sError, sizeof(sError), "%s: there must be an index on '%s' column", + name, table->field[2]->field_name ); + break; + } + + // all good + sError[0] = '\0'; + break; + } + if ( sError[0] ) + { + my_error ( ER_CANT_CREATE_TABLE, MYF(0), sError, -1 ); + SPH_RET(-1); + } + + SPH_RET(0); +} + +//// show functions + +#if MYSQL_VERSION_ID<50100 +#define SHOW_VAR_FUNC_BUFF_SIZE 1024 +#endif + +static int sphinx_showfunc ( THD * thd, SHOW_VAR * out, char * sBuffer ) +{ + CSphSEThreadData *pTls = (CSphSEThreadData *) *thd_ha_data ( thd, sphinx_hton_ptr ); + CSphSEStats * pStats = ( pTls && pTls->m_bStats ) ? &pTls->m_tStats : 0; + SHOW_VAR *array = (SHOW_VAR*)thd_alloc(thd, sizeof(SHOW_VAR)*7); + out->type = SHOW_ARRAY; + out->value = (char*)array; + if (pStats) + { + array[0].name = "total"; + array[0].type = SHOW_INT; + array[0].value = (char *) &pStats->m_iMatchesTotal; + array[1].name = "total_found"; + array[1].type = SHOW_INT; + array[1].value = (char *) &pStats->m_iMatchesFound; + array[2].name = "time"; + array[2].type = SHOW_INT; + array[2].value = (char *) &pStats->m_iQueryMsec; + array[3].name = "word_count"; + array[3].type = SHOW_INT; + array[3].value = (char *) &pStats->m_iWords; + array[4].name = "error"; + array[4].type = SHOW_CHAR; + array[4].value = (char *) &pStats->m_sLastMessage; + array[5].name = "words"; + array[5].type = SHOW_CHAR; + array[5].value = sBuffer; + sBuffer[0] = 0; + + if ( pStats->m_iWords ) + { + uint uBuffLen = 0; + + // the following is partially based on code in sphinx_show_status() + for ( int i=0; i<pStats->m_iWords; i++ ) + { + CSphSEWordStats & tWord = pStats->m_dWords[i]; + uBuffLen = my_snprintf ( sBuffer, SHOW_VAR_FUNC_BUFF_SIZE, "%s%s:%d:%d ", sBuffer, + tWord.m_sWord, tWord.m_iDocs, tWord.m_iHits ); + } + + if ( uBuffLen > 0 ) + { + // trim last space + sBuffer [ --uBuffLen ] = 0; + + if ( pTls->m_pQueryCharset ) + { + // String::c_ptr() will nul-terminate the buffer. + // + // NOTE: It's not entirely clear whether this conversion is necessary at all. + + String sConvert; + uint iErrors; + sConvert.copy ( sBuffer, uBuffLen, pTls->m_pQueryCharset, system_charset_info, &iErrors ); + memcpy ( sBuffer, sConvert.c_ptr(), sConvert.length() + 1 ); + } + } + } + + array[6].name = 0; // terminate the array + } + else + array[0].name = 0; + return 0; +} + +#if MYSQL_VERSION_ID>50100 +struct st_mysql_storage_engine sphinx_storage_engine = +{ + MYSQL_HANDLERTON_INTERFACE_VERSION +}; + +struct st_mysql_show_var sphinx_status_vars[] = +{ + {"sphinx", (char *)sphinx_showfunc, SHOW_FUNC}, + {0, 0, (enum_mysql_show_type)0} +}; + + +mysql_declare_plugin(sphinx) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &sphinx_storage_engine, + sphinx_hton_name, + "Sphinx developers", + sphinx_hton_comment, + PLUGIN_LICENSE_GPL, + sphinx_init_func, // Plugin Init + sphinx_done_func, // Plugin Deinit + 0x0001, // 0.1 + sphinx_status_vars, + NULL, + NULL +} +mysql_declare_plugin_end; + +#ifdef maria_declare_plugin +maria_declare_plugin(sphinx) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &sphinx_storage_engine, + sphinx_hton_name, + "Sphinx developers", + sphinx_hton_comment, + PLUGIN_LICENSE_GPL, + sphinx_init_func, // Plugin Init + sphinx_done_func, // Plugin Deinit + 0x0001, // 0.1 + sphinx_status_vars, + NULL, + "0.1", // string version + MariaDB_PLUGIN_MATURITY_EXPERIMENTAL +} +maria_declare_plugin_end; +#endif + +#endif // >50100 + +// +// $Id: ha_sphinx.cc 2058 2009-11-07 04:01:57Z shodan $ +// diff --git a/storage/sphinx/ha_sphinx.h b/storage/sphinx/ha_sphinx.h new file mode 100644 index 00000000000..3f517062cff --- /dev/null +++ b/storage/sphinx/ha_sphinx.h @@ -0,0 +1,159 @@ +// +// $Id: ha_sphinx.h 1428 2008-09-05 18:06:30Z xale $ +// + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface // gcc class implementation +#endif + + +#if MYSQL_VERSION_ID>50100 +#define TABLE_ARG st_table_share +#else +#define TABLE_ARG st_table +#endif + + +#if MYSQL_VERSION_ID>=50120 +typedef uchar byte; +#endif + + +/// forward decls +class THD; +struct CSphReqQuery; +struct CSphSEShare; +struct CSphSEAttr; +struct CSphSEStats; +struct CSphSEThreadData; + +/// Sphinx SE handler class +class ha_sphinx : public handler +{ +protected: + THR_LOCK_DATA m_tLock; ///< MySQL lock + + CSphSEShare * m_pShare; ///< shared lock info + + uint m_iMatchesTotal; + uint m_iCurrentPos; + const byte * m_pCurrentKey; + uint m_iCurrentKeyLen; + + char * m_pResponse; ///< searchd response storage + char * m_pResponseEnd; ///< searchd response storage end (points to wilderness!) + char * m_pCur; ///< current position into response + bool m_bUnpackError; ///< any errors while unpacking response + +public: +#if MYSQL_VERSION_ID<50100 + ha_sphinx ( TABLE_ARG * table_arg ); +#else + ha_sphinx ( handlerton * hton, TABLE_ARG * table_arg ); +#endif + ~ha_sphinx () {} + + const char * table_type () const { return "SPHINX"; } ///< SE name for display purposes + const char * index_type ( uint ) { return "HASH"; } ///< index type name for display purposes + const char ** bas_ext () const; ///< my file extensions + + #if MYSQL_VERSION_ID>50100 + ulonglong table_flags () const { return HA_CAN_INDEX_BLOBS; } ///< bitmap of implemented flags (see handler.h for more info) + #else + ulong table_flags () const { return HA_CAN_INDEX_BLOBS; } ///< bitmap of implemented flags (see handler.h for more info) + #endif + + ulong index_flags ( uint, uint, bool ) const { return 0; } ///< bitmap of flags that says how SE implements indexes + uint max_supported_record_length () const { return HA_MAX_REC_LENGTH; } + uint max_supported_keys () const { return 1; } + uint max_supported_key_parts () const { return 1; } + uint max_supported_key_length () const { return MAX_KEY_LENGTH; } + uint max_supported_key_part_length () const { return MAX_KEY_LENGTH; } + + #if MYSQL_VERSION_ID>50100 + virtual double scan_time () { return (double)( stats.records+stats.deleted )/20.0 + 10; } ///< called in test_quick_select to determine if indexes should be used + #else + virtual double scan_time () { return (double)( records+deleted )/20.0 + 10; } ///< called in test_quick_select to determine if indexes should be used + #endif + + virtual double read_time(uint index, uint ranges, ha_rows rows) + { return (double)rows/20.0 + 1; } ///< index read time estimate + +public: + int open ( const char * name, int mode, uint test_if_locked ); + int close (); + + int write_row ( uchar * buf ); + int update_row ( const uchar * old_data, uchar * new_data ); + int delete_row ( const uchar * buf ); + + int index_init ( uint keynr, bool sorted ); // 5.1.x + int index_init ( uint keynr ) { return index_init ( keynr, false ); } // 5.0.x + + int index_end (); + int index_read ( byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag ); + int index_read_idx ( byte * buf, uint idx, const byte * key, uint key_len, enum ha_rkey_function find_flag ); + int index_next ( byte * buf ); + int index_next_same ( byte * buf, const byte * key, uint keylen ); + int index_prev ( byte * buf ); + int index_first ( byte * buf ); + int index_last ( byte * buf ); + + int get_rec ( byte * buf, const byte * key, uint keylen ); + + int rnd_init ( bool scan ); + int rnd_end (); + int rnd_next ( byte * buf ); + int rnd_pos ( byte * buf, byte * pos ); + void position ( const byte * record ); + +#if MYSQL_VERSION_ID>=50030 + int info ( uint ); +#else + void info ( uint ); +#endif + + int reset(); + int external_lock ( THD * thd, int lock_type ); + int delete_all_rows (); + ha_rows records_in_range ( uint inx, key_range * min_key, key_range * max_key ); + + int delete_table ( const char * from ); + int rename_table ( const char * from, const char * to ); + int create ( const char * name, TABLE * form, HA_CREATE_INFO * create_info ); + + THR_LOCK_DATA **store_lock ( THD * thd, THR_LOCK_DATA ** to, enum thr_lock_type lock_type ); + +public: + virtual const COND * cond_push ( const COND *cond ); + virtual void cond_pop (); + +private: + uint32 m_iFields; + char ** m_dFields; + + uint32 m_iAttrs; + CSphSEAttr * m_dAttrs; + int m_bId64; + + int * m_dUnboundFields; + +private: + int ConnectToSearchd ( const char * sQueryHost, int iQueryPort ); + + uint32 UnpackDword (); + char * UnpackString (); + bool UnpackSchema (); + bool UnpackStats ( CSphSEStats * pStats ); + + CSphSEThreadData * GetTls (); +}; + + +#if MYSQL_VERSION_ID < 50100 +bool sphinx_show_status ( THD * thd ); +#endif + +// +// $Id: ha_sphinx.h 1428 2008-09-05 18:06:30Z xale $ +// diff --git a/storage/sphinx/make-patch.sh b/storage/sphinx/make-patch.sh new file mode 100644 index 00000000000..6fca5838ded --- /dev/null +++ b/storage/sphinx/make-patch.sh @@ -0,0 +1,36 @@ +#!/bin/sh + +OUT=$1 +ORIG=$2 +NEW=$3 + +if [ ! \( "$1" -a "$2" -a "$3" \) ]; then + echo "$0 <patch> <original> <new>" + exit 1 +fi + +FILES=' +/config/ac-macros/ha_sphinx.m4 +/configure.in +/libmysqld/Makefile.am +/sql/handler.cc +/sql/handler.h +/sql/Makefile.am +/sql/mysqld.cc +/sql/mysql_priv.h +/sql/set_var.cc +/sql/sql_lex.h +/sql/sql_parse.cc +/sql/sql_yacc.yy +/sql/structs.h +/sql/sql_show.cc +' + +rm -f $OUT +if [ -e $OUT ]; then + exit 1 +fi + +for name in $FILES; do + diff -BNru "$ORIG$name" "$NEW$name" >> $OUT +done diff --git a/storage/sphinx/plug.in b/storage/sphinx/plug.in new file mode 100644 index 00000000000..6c96e41ae52 --- /dev/null +++ b/storage/sphinx/plug.in @@ -0,0 +1,6 @@ +MYSQL_STORAGE_ENGINE(sphinx,,[Sphinx Storage Engine], + [SE client for Sphinx search daemon], []) +MYSQL_PLUGIN_DIRECTORY(sphinx, [storage/sphinx]) +MYSQL_PLUGIN_STATIC(sphinx, [libsphinx.a]) +MYSQL_PLUGIN_DYNAMIC(sphinx, [ha_sphinx.la]) + diff --git a/storage/sphinx/snippets_udf.cc b/storage/sphinx/snippets_udf.cc new file mode 100644 index 00000000000..961d1a92ed1 --- /dev/null +++ b/storage/sphinx/snippets_udf.cc @@ -0,0 +1,766 @@ +// +// $Id: snippets_udf.cc 2058 2009-11-07 04:01:57Z shodan $ +// + +// +// Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License. You should have +// received a copy of the GPL license along with this program; if you +// did not, you can find it at http://www.gnu.org/ +// + +#include <mysql_version.h> + +#if MYSQL_VERSION_ID>50100 +#include "mysql_priv.h" +#include <mysql/plugin.h> +#else +#include "../mysql_priv.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include <sys/un.h> +#include <netdb.h> + +#include <mysys_err.h> +#include <my_sys.h> + +#if MYSQL_VERSION_ID>=50120 +typedef uchar byte; +#endif + +/// partially copy-pasted stuff that should be moved elsewhere + +#if UNALIGNED_RAM_ACCESS + +/// pass-through wrapper +template < typename T > inline T sphUnalignedRead ( const T & tRef ) +{ + return tRef; +} + +/// pass-through wrapper +template < typename T > void sphUnalignedWrite ( void * pPtr, const T & tVal ) +{ + *(T*)pPtr = tVal; +} + +#else + +/// unaligned read wrapper for some architectures (eg. SPARC) +template < typename T > +inline T sphUnalignedRead ( const T & tRef ) +{ + T uTmp; + byte * pSrc = (byte *) &tRef; + byte * pDst = (byte *) &uTmp; + for ( int i=0; i<(int)sizeof(T); i++ ) + *pDst++ = *pSrc++; + return uTmp; +} + +/// unaligned write wrapper for some architectures (eg. SPARC) +template < typename T > +void sphUnalignedWrite ( void * pPtr, const T & tVal ) +{ + byte * pDst = (byte *) pPtr; + byte * pSrc = (byte *) &tVal; + for ( int i=0; i<(int)sizeof(T); i++ ) + *pDst++ = *pSrc++; +} + +#endif + +#define SPHINXSE_MAX_ALLOC (16*1024*1024) + +#define SafeDelete(_arg) { if ( _arg ) delete ( _arg ); (_arg) = NULL; } +#define SafeDeleteArray(_arg) { if ( _arg ) delete [] ( _arg ); (_arg) = NULL; } + +#define Min(a,b) ((a)<(b)?(a):(b)) + +typedef unsigned int DWORD; + +inline DWORD sphF2DW ( float f ) { union { float f; uint32 d; } u; u.f = f; return u.d; } + +static char * sphDup ( const char * sSrc, int iLen=-1 ) +{ + if ( !sSrc ) + return NULL; + + if ( iLen<0 ) + iLen = strlen(sSrc); + + char * sRes = new char [ 1+iLen ]; + memcpy ( sRes, sSrc, iLen ); + sRes[iLen] = '\0'; + return sRes; +} + +static inline void sphShowErrno ( const char * sCall ) +{ + char sError[256]; + snprintf ( sError, sizeof(sError), "%s() failed: [%d] %s", sCall, errno, strerror(errno) ); + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), sError ); +} + +static const bool sphReportErrors = true; + +static bool sphSend ( int iFd, const char * pBuffer, int iSize, bool bReportErrors = false ) +{ + assert ( pBuffer ); + assert ( iSize > 0 ); + + const int iResult = send ( iFd, pBuffer, iSize, 0 ); + if ( iResult != iSize ) + { + if ( bReportErrors ) sphShowErrno("send"); + return false; + } + return true; +} + +static bool sphRecv ( int iFd, char * pBuffer, int iSize, bool bReportErrors = false ) +{ + assert ( pBuffer ); + assert ( iSize > 0 ); + + while ( iSize ) + { + const int iResult = recv ( iFd, pBuffer, iSize, 0 ); + if ( iResult > 0 ) + { + iSize -= iResult; + pBuffer += iSize; + } + else if ( iResult == 0 ) + { + if ( bReportErrors ) + my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), "recv() failed: disconnected" ); + return false; + } + else + { + if ( bReportErrors ) sphShowErrno("recv"); + return false; + } + } + return true; +} + +enum +{ + SPHINX_SEARCHD_PROTO = 1, + + SEARCHD_COMMAND_SEARCH = 0, + SEARCHD_COMMAND_EXCERPT = 1, + + VER_COMMAND_SEARCH = 0x116, + VER_COMMAND_EXCERPT = 0x100, +}; + +/// known answers +enum +{ + SEARCHD_OK = 0, ///< general success, command-specific reply follows + SEARCHD_ERROR = 1, ///< general failure, error message follows + SEARCHD_RETRY = 2, ///< temporary failure, error message follows, client should retry later + SEARCHD_WARNING = 3 ///< general success, warning message and command-specific reply follow +}; + +#define SPHINXSE_DEFAULT_SCHEME "sphinx" +#define SPHINXSE_DEFAULT_HOST "127.0.0.1" +#define SPHINXSE_DEFAULT_PORT 9312 +#define SPHINXSE_DEFAULT_INDEX "*" + +class CSphBuffer +{ +private: + bool m_bOverrun; + int m_iSize; + int m_iLeft; + char * m_pBuffer; + char * m_pCurrent; + +public: + CSphBuffer ( const int iSize ) + : m_bOverrun ( false ) + , m_iSize ( iSize ) + , m_iLeft ( iSize ) + { + assert ( iSize > 0 ); + m_pBuffer = new char[iSize]; + m_pCurrent = m_pBuffer; + } + + ~CSphBuffer () + { + SafeDelete ( m_pBuffer ); + } + + const char * Ptr() const { return m_pBuffer; } + + bool Finalize() + { + return !( m_bOverrun || m_iLeft != 0 || m_pCurrent - m_pBuffer != m_iSize ); + } + + void SendBytes ( const void * pBytes, int iBytes ); + + void SendWord ( short int v ) { v = ntohs(v); SendBytes ( &v, sizeof(v) ); } + void SendInt ( int v ) { v = ntohl(v); SendBytes ( &v, sizeof(v) ); } + void SendDword ( DWORD v ) { v = ntohl(v) ;SendBytes ( &v, sizeof(v) ); } + void SendUint64 ( ulonglong v ) { SendDword ( uint(v>>32) ); SendDword ( uint(v&0xFFFFFFFFUL) ); } + void SendString ( const char * v ) { SendString ( v, strlen(v) ); } + void SendString ( const char * v, int iLen ) { SendDword(iLen); SendBytes ( v, iLen ); } + void SendFloat ( float v ) { SendDword ( sphF2DW(v) ); } +}; + +void CSphBuffer::SendBytes ( const void * pBytes, int iBytes ) +{ + if ( m_iLeft < iBytes ) + { + m_bOverrun = true; + return; + } + + memcpy ( m_pCurrent, pBytes, iBytes ); + + m_pCurrent += iBytes; + m_iLeft -= iBytes; +} + +struct CSphUrl +{ + char * m_sBuffer; + char * m_sFormatted; + + char * m_sScheme; + char * m_sHost; + char * m_sIndex; + + int m_iPort; + + CSphUrl() + : m_sBuffer ( NULL ) + , m_sFormatted ( NULL ) + , m_sScheme ( (char*) SPHINXSE_DEFAULT_SCHEME ) + , m_sHost ( (char*) SPHINXSE_DEFAULT_HOST ) + , m_sIndex ( (char*) SPHINXSE_DEFAULT_INDEX ) + , m_iPort ( SPHINXSE_DEFAULT_PORT ) + {} + + ~CSphUrl() + { + SafeDeleteArray ( m_sFormatted ); + SafeDeleteArray ( m_sBuffer ); + } + + bool Parse ( const char * sUrl, int iLen ); + int Connect(); + const char * Format(); +}; + +const char * CSphUrl::Format() +{ + if ( !m_sFormatted ) + { + int iSize = 15 + strlen(m_sHost) + strlen(m_sIndex); + m_sFormatted = new char [ iSize ]; + if ( m_iPort ) + snprintf ( m_sFormatted, iSize, "inet://%s:%d/%s", m_sHost, m_iPort, m_sIndex ); + else + snprintf ( m_sFormatted, iSize, "unix://%s/%s", m_sHost, m_sIndex ); + } + return m_sFormatted; +} + +// the following scheme variants are recognized +// +// inet://host/index +// inet://host:port/index +// unix://unix/domain/socket:index +// unix://unix/domain/socket +bool CSphUrl::Parse ( const char * sUrl, int iLen ) +{ + bool bOk = true; + while ( iLen ) + { + bOk = false; + + m_sBuffer = sphDup ( sUrl, iLen ); + m_sScheme = m_sBuffer; + + m_sHost = strstr ( m_sBuffer, "://" ); + if ( !m_sHost ) + break; + m_sHost[0] = '\0'; + m_sHost += 2; + + if ( !strcmp ( m_sScheme, "unix" ) ) + { + // unix-domain socket + m_iPort = 0; + if (!( m_sIndex = strrchr ( m_sHost, ':' ) )) + m_sIndex = (char*) SPHINXSE_DEFAULT_INDEX; + else + { + *m_sIndex++ = '\0'; + if ( !*m_sIndex ) + m_sIndex = (char*) SPHINXSE_DEFAULT_INDEX; + } + bOk = true; + break; + } + if( strcmp ( m_sScheme, "sphinx" ) != 0 && strcmp ( m_sScheme, "inet" ) != 0 ) + break; + + // inet + m_sHost++; + char * sPort = strchr ( m_sHost, ':' ); + if ( sPort ) + { + *sPort++ = '\0'; + if ( *sPort ) + { + m_sIndex = strchr ( sPort, '/' ); + if ( m_sIndex ) + *m_sIndex++ = '\0'; + else + m_sIndex = (char*) SPHINXSE_DEFAULT_INDEX; + + m_iPort = atoi(sPort); + if ( !m_iPort ) + m_iPort = SPHINXSE_DEFAULT_PORT; + } + } else + { + m_sIndex = strchr ( m_sHost, '/' ); + if ( m_sIndex ) + *m_sIndex++ = '\0'; + else + m_sIndex = (char*) SPHINXSE_DEFAULT_INDEX; + } + + bOk = true; + break; + } + + return bOk; +} + +int CSphUrl::Connect() +{ + struct sockaddr_in sin; +#ifndef __WIN__ + struct sockaddr_un saun; +#endif + + int iDomain = 0; + int iSockaddrSize = 0; + struct sockaddr * pSockaddr = NULL; + + in_addr_t ip_addr; + + if ( m_iPort ) + { + iDomain = AF_INET; + iSockaddrSize = sizeof(sin); + pSockaddr = (struct sockaddr *) &sin; + + memset ( &sin, 0, sizeof(sin) ); + sin.sin_family = AF_INET; + sin.sin_port = htons(m_iPort); + + // resolve address + if ( (int)( ip_addr=inet_addr(m_sHost) ) != (int)INADDR_NONE ) + memcpy ( &sin.sin_addr, &ip_addr, sizeof(ip_addr) ); + else + { + int tmp_errno; + struct hostent tmp_hostent, *hp; + char buff2 [ GETHOSTBYNAME_BUFF_SIZE ]; + + hp = my_gethostbyname_r ( m_sHost, &tmp_hostent, + buff2, sizeof(buff2), &tmp_errno ); + if ( !hp ) + { + my_gethostbyname_r_free(); + + char sError[256]; + snprintf ( sError, sizeof(sError), "failed to resolve searchd host (name=%s)", m_sHost ); + + my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), sError ); + return -1; + } + + memcpy ( &sin.sin_addr, hp->h_addr, Min ( sizeof(sin.sin_addr), (size_t)hp->h_length ) ); + my_gethostbyname_r_free(); + } + } + else + { +#ifndef __WIN__ + iDomain = AF_UNIX; + iSockaddrSize = sizeof(saun); + pSockaddr = (struct sockaddr *) &saun; + + memset ( &saun, 0, sizeof(saun) ); + saun.sun_family = AF_UNIX; + strncpy ( saun.sun_path, m_sHost, sizeof(saun.sun_path)-1 ); +#else + my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), "Unix-domain sockets are not supported on Windows" ); + return -1; +#endif + } + + // connect to searchd and exchange versions + uint uServerVersion; + uint uClientVersion = htonl ( SPHINX_SEARCHD_PROTO ); + int iSocket = -1; + const char * pError = NULL; + do + { + iSocket = socket ( iDomain, SOCK_STREAM, 0 ); + if ( iSocket == -1 ) + { + pError = "Failed to create client socket"; + break; + } + + if ( connect ( iSocket, pSockaddr, iSockaddrSize ) == -1) + { + pError = "Failed to connect to searchd"; + break; + } + + if ( !sphRecv ( iSocket, (char *)&uServerVersion, sizeof(uServerVersion) ) ) + { + pError = "Failed to receive searchd version"; + break; + } + + if ( !sphSend ( iSocket, (char *)&uClientVersion, sizeof(uClientVersion) ) ) + { + pError = "Failed to send client version"; + break; + } + } + while(0); + + // fixme: compare versions? + + if ( pError ) + { + char sError[1024]; + snprintf ( sError, sizeof(sError), "%s [%d] %s", Format(), errno, strerror(errno) ); + my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), sError ); + + if ( iSocket != -1 ) + close ( iSocket ); + + return -1; + } + + return iSocket; +} + +struct CSphResponse +{ + char * m_pBuffer; + char * m_pBody; + + CSphResponse () + : m_pBuffer ( NULL ) + , m_pBody ( NULL ) + {} + + CSphResponse ( DWORD uSize ) + : m_pBody ( NULL ) + { + m_pBuffer = new char[uSize]; + } + + ~CSphResponse () + { + SafeDeleteArray ( m_pBuffer ); + } + + static CSphResponse * Read ( int iSocket, int iClientVersion ); +}; + +CSphResponse * +CSphResponse::Read ( int iSocket, int iClientVersion ) +{ + char sHeader[8]; + if ( !sphRecv ( iSocket, sHeader, sizeof(sHeader) ) ) + return NULL; + + int iStatus = ntohs ( sphUnalignedRead ( *(short int *) &sHeader[0] ) ); + int iVersion = ntohs ( sphUnalignedRead ( *(short int *) &sHeader[2] ) ); + DWORD uLength = ntohl ( sphUnalignedRead ( *(DWORD *) &sHeader[4] ) ); + + if ( iVersion < iClientVersion ) // fixme: warn + {} + + if ( uLength <= SPHINXSE_MAX_ALLOC ) + { + CSphResponse * pResponse = new CSphResponse ( uLength ); + if ( !sphRecv ( iSocket, pResponse->m_pBuffer, uLength ) ) + { + SafeDelete ( pResponse ); + return NULL; + } + + pResponse->m_pBody = pResponse->m_pBuffer; + if ( iStatus != SEARCHD_OK ) + { + DWORD uSize = ntohl ( *(DWORD *)pResponse->m_pBuffer ); + if ( iStatus == SEARCHD_WARNING ) + pResponse->m_pBody += uSize; // fixme: report the warning somehow + else + { + char * sMessage = sphDup ( pResponse->m_pBuffer + sizeof(DWORD), uSize ); + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), sMessage ); + SafeDelete ( sMessage ); + SafeDelete ( pResponse ); + return NULL; + } + } + return pResponse; + } + return NULL; +} + +/// udf + +extern "C" +{ + my_bool sphinx_snippets_init ( UDF_INIT * pUDF, UDF_ARGS * pArgs, char * sMessage ); + void sphinx_snippets_deinit ( UDF_INIT * pUDF ); + char * sphinx_snippets ( UDF_INIT * pUDF, UDF_ARGS * pArgs, char * sResult, unsigned long * pLength, char * pIsNull, char * sError ); +}; + +#define MAX_MESSAGE_LENGTH 255 +#define MAX_RESULT_LENGTH 255 + +struct CSphSnippets +{ + CSphUrl m_tUrl; + CSphResponse * m_pResponse; + + int m_iBeforeMatch; + int m_iAfterMatch; + int m_iChunkSeparator; + int m_iLimit; + int m_iAround; + int m_iFlags; + + CSphSnippets() + : m_pResponse(NULL) + , m_iBeforeMatch(0) + , m_iAfterMatch(0) + , m_iChunkSeparator(0) + // defaults + , m_iLimit(256) + , m_iAround(5) + , m_iFlags(1) + { + } + + ~CSphSnippets() + { + SafeDelete ( m_pResponse ); + } +}; + +#define KEYWORD(NAME) else if ( strncmp ( NAME, pArgs->attributes[i], pArgs->attribute_lengths[i] ) == 0 ) + +#define CHECK_TYPE(TYPE) \ + if ( pArgs->arg_type[i] != TYPE ) \ + { \ + snprintf ( sMessage, MAX_MESSAGE_LENGTH, \ + "%.*s argument must be a string", \ + (int)pArgs->attribute_lengths[i], \ + pArgs->attributes[i] ); \ + bFail = true; \ + break; \ + } \ + if ( TYPE == STRING_RESULT && !pArgs->args[i] ) \ + { \ + snprintf ( sMessage, MAX_MESSAGE_LENGTH, \ + "%.*s argument must be constant (and not NULL)", \ + (int)pArgs->attribute_lengths[i], \ + pArgs->attributes[i] ); \ + bFail = true; \ + break; \ + } + +#define STRING CHECK_TYPE(STRING_RESULT) +#define INT CHECK_TYPE(INT_RESULT); int iValue = *(long long *)pArgs->args[i] + +my_bool sphinx_snippets_init ( UDF_INIT * pUDF, UDF_ARGS * pArgs, char * sMessage ) +{ + if ( pArgs->arg_count < 3 ) + { + strncpy ( sMessage, "insufficient arguments", MAX_MESSAGE_LENGTH ); + return 1; + } + + bool bFail = false; + CSphSnippets * pOpts = new CSphSnippets; + for ( uint i = 0; i < pArgs->arg_count; i++ ) + { + if ( i < 3 ) + { + if ( pArgs->arg_type[i] != STRING_RESULT ) + { + strncpy ( sMessage, "first three arguments must be of string type", MAX_MESSAGE_LENGTH ); + bFail = true; + break; + } + } + KEYWORD("sphinx") + { + STRING; + if ( !pOpts->m_tUrl.Parse ( pArgs->args[i], pArgs->lengths[i] ) ) + { + strncpy ( sMessage, "failed to parse connection string", MAX_MESSAGE_LENGTH ); + bFail = true; + break; + } + } + KEYWORD("before_match") { STRING; pOpts->m_iBeforeMatch = i; } + KEYWORD("after_match") { STRING; pOpts->m_iAfterMatch = i; } + KEYWORD("chunk_separator") { STRING; pOpts->m_iChunkSeparator = i; } + KEYWORD("limit") { INT; pOpts->m_iLimit = iValue; } + KEYWORD("around") { INT; pOpts->m_iAround = iValue; } + KEYWORD("exact_phrase") { INT; if ( iValue ) pOpts->m_iFlags |= 2; } + KEYWORD("single_passage") { INT; if ( iValue ) pOpts->m_iFlags |= 4; } + KEYWORD("use_boundaries") { INT; if ( iValue ) pOpts->m_iFlags |= 8; } + KEYWORD("weight_order") { INT; if ( iValue ) pOpts->m_iFlags |= 16; } + else + { + snprintf ( sMessage, MAX_MESSAGE_LENGTH, "unrecognized argument: %.*s", + (int)pArgs->attribute_lengths[i], pArgs->attributes[i] ); + bFail = true; + break; + } + } + + if ( bFail ) + { + SafeDelete ( pOpts ); + return 1; + } + pUDF->ptr = (char *)pOpts; + return 0; +} + +#undef STRING +#undef INT +#undef KEYWORD +#undef CHECK_TYPE + +#define ARG(i) pArgs->args[i], pArgs->lengths[i] +#define ARG_LEN(VAR, LEN) ( VAR ? pArgs->lengths[VAR] : LEN ) + +#define SEND_STRING(INDEX, DEFAULT) \ + if ( INDEX ) \ + tBuffer.SendString ( ARG(INDEX) ); \ + else \ + tBuffer.SendString ( DEFAULT, sizeof(DEFAULT) - 1 ); + + +char * sphinx_snippets ( UDF_INIT * pUDF, UDF_ARGS * pArgs, char * sResult, unsigned long * pLength, char * pIsNull, char * pError ) +{ + CSphSnippets * pOpts = (CSphSnippets *)pUDF->ptr; + assert ( pOpts ); + + if ( !pArgs->args[0] || !pArgs->args[1] || !pArgs->args[2] ) + { + *pIsNull = 1; + return sResult; + } + + const int iSize = + 8 + // header + 8 + + 4 + pArgs->lengths[1] + // index + 4 + pArgs->lengths[2] + // words + 4 + ARG_LEN ( pOpts->m_iBeforeMatch, 3 ) + + 4 + ARG_LEN ( pOpts->m_iAfterMatch, 4 ) + + 4 + ARG_LEN ( pOpts->m_iChunkSeparator, 5 ) + + 12 + + 4 + pArgs->lengths[0]; // document + + CSphBuffer tBuffer(iSize); + + tBuffer.SendWord ( SEARCHD_COMMAND_EXCERPT ); + tBuffer.SendWord ( VER_COMMAND_EXCERPT ); + tBuffer.SendDword ( iSize - 8 ); + + tBuffer.SendDword ( 0 ); + tBuffer.SendDword ( pOpts->m_iFlags ); + + tBuffer.SendString ( ARG(1) ); // index + tBuffer.SendString ( ARG(2) ); // words + + SEND_STRING ( pOpts->m_iBeforeMatch, "<b>" ); + SEND_STRING ( pOpts->m_iAfterMatch, "</b>" ); + SEND_STRING ( pOpts->m_iChunkSeparator, " ... " ); + + tBuffer.SendInt ( pOpts->m_iLimit ); + tBuffer.SendInt ( pOpts->m_iAround ); + + // single document + tBuffer.SendInt ( 1 ); + tBuffer.SendString ( ARG(0) ); + + int iSocket = -1; + do + { + if ( !tBuffer.Finalize() ) + { + my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: failed to build request" ); + break; + } + + iSocket = pOpts->m_tUrl.Connect(); + if ( iSocket == -1 ) break; + if ( !sphSend ( iSocket, tBuffer.Ptr(), iSize, sphReportErrors ) ) break; + + CSphResponse * pResponse = CSphResponse::Read ( iSocket, 0x100 ); + if ( !pResponse ) break; + + close ( iSocket ); + pOpts->m_pResponse = pResponse; + *pLength = ntohl( *(DWORD *)pResponse->m_pBody ); + return pResponse->m_pBody + sizeof(DWORD); + } + while(0); + + if ( iSocket != -1 ) + close ( iSocket ); + + *pError = 1; + return sResult; +} + +#undef SEND_STRING +#undef ARG_LEN +#undef ARG + +void sphinx_snippets_deinit ( UDF_INIT * pUDF ) +{ + CSphSnippets * pOpts = (CSphSnippets *)pUDF->ptr; + SafeDelete ( pOpts ); +} + +// +// $Id: snippets_udf.cc 2058 2009-11-07 04:01:57Z shodan $ +// diff --git a/storage/sphinx/sphinx.5.0.22.diff b/storage/sphinx/sphinx.5.0.22.diff new file mode 100644 index 00000000000..7dd4ebf1410 --- /dev/null +++ b/storage/sphinx/sphinx.5.0.22.diff @@ -0,0 +1,284 @@ +diff -B -N -r -u mysql-5.0.22/config/ac-macros/ha_sphinx.m4 mysql-5.0.22.sx/config/ac-macros/ha_sphinx.m4 +--- mysql-5.0.22/config/ac-macros/ha_sphinx.m4 1970-01-01 01:00:00.000000000 +0100 ++++ mysql-5.0.22.sx/config/ac-macros/ha_sphinx.m4 2006-06-06 19:49:38.000000000 +0200 +@@ -0,0 +1,30 @@ ++dnl --------------------------------------------------------------------------- ++dnl Macro: MYSQL_CHECK_EXAMPLEDB ++dnl Sets HAVE_SPHINX_DB if --with-sphinx-storage-engine is used ++dnl --------------------------------------------------------------------------- ++AC_DEFUN([MYSQL_CHECK_SPHINXDB], [ ++ AC_ARG_WITH([sphinx-storage-engine], ++ [ ++ --with-sphinx-storage-engine ++ Enable the Sphinx Storage Engine], ++ [sphinxdb="$withval"], ++ [sphinxdb=no]) ++ AC_MSG_CHECKING([for example storage engine]) ++ ++ case "$sphinxdb" in ++ yes ) ++ AC_DEFINE([HAVE_SPHINX_DB], [1], [Builds Sphinx Engine]) ++ AC_MSG_RESULT([yes]) ++ [sphinxdb=yes] ++ ;; ++ * ) ++ AC_MSG_RESULT([no]) ++ [sphinxdb=no] ++ ;; ++ esac ++ ++]) ++dnl --------------------------------------------------------------------------- ++dnl END OF MYSQL_CHECK_EXAMPLE SECTION ++dnl --------------------------------------------------------------------------- ++ +diff -B -N -r -u mysql-5.0.22/configure.in mysql-5.0.22.sx/configure.in +--- mysql-5.0.22/configure.in 2006-05-25 10:56:45.000000000 +0200 ++++ mysql-5.0.22.sx/configure.in 2006-06-06 19:49:38.000000000 +0200 +@@ -41,6 +41,7 @@ + sinclude(config/ac-macros/ha_berkeley.m4) + sinclude(config/ac-macros/ha_blackhole.m4) + sinclude(config/ac-macros/ha_example.m4) ++sinclude(config/ac-macros/ha_sphinx.m4) + sinclude(config/ac-macros/ha_federated.m4) + sinclude(config/ac-macros/ha_innodb.m4) + sinclude(config/ac-macros/ha_ndbcluster.m4) +@@ -2450,6 +2451,7 @@ + MYSQL_CHECK_BDB + MYSQL_CHECK_INNODB + MYSQL_CHECK_EXAMPLEDB ++MYSQL_CHECK_SPHINXDB + MYSQL_CHECK_ARCHIVEDB + MYSQL_CHECK_CSVDB + MYSQL_CHECK_BLACKHOLEDB +diff -B -N -r -u mysql-5.0.22/libmysqld/Makefile.am mysql-5.0.22.sx/libmysqld/Makefile.am +--- mysql-5.0.22/libmysqld/Makefile.am 2006-05-25 10:56:55.000000000 +0200 ++++ mysql-5.0.22.sx/libmysqld/Makefile.am 2006-06-06 19:49:38.000000000 +0200 +@@ -27,7 +27,7 @@ + -DSHAREDIR="\"$(MYSQLSHAREdir)\"" + INCLUDES= @bdb_includes@ \ + -I$(top_builddir)/include -I$(top_srcdir)/include \ +- -I$(top_srcdir)/sql -I$(top_srcdir)/sql/examples \ ++ -I$(top_srcdir)/sql -I$(top_srcdir)/sql/examples -I$(top_srcdir)/sql/sphinx \ + -I$(top_srcdir)/regex \ + $(openssl_includes) $(yassl_includes) @ZLIB_INCLUDES@ + +@@ -38,6 +38,7 @@ + libmysqlsources = errmsg.c get_password.c libmysql.c client.c pack.c \ + my_time.c + sqlexamplessources = ha_example.cc ha_tina.cc ++sqlsphinxsources = ha_sphinx.cc + + noinst_HEADERS = embedded_priv.h emb_qcache.h + +@@ -65,7 +66,7 @@ + parse_file.cc sql_view.cc sql_trigger.cc my_decimal.cc \ + ha_blackhole.cc ha_archive.cc my_user.c + +-libmysqld_int_a_SOURCES= $(libmysqld_sources) $(libmysqlsources) $(sqlsources) $(sqlexamplessources) ++libmysqld_int_a_SOURCES= $(libmysqld_sources) $(libmysqlsources) $(sqlsources) $(sqlexamplessources) $(sqlsphinxsources) + libmysqld_a_SOURCES= + + # automake misses these +@@ -133,12 +134,16 @@ + rm -f $$f; \ + @LN_CP_F@ $(top_srcdir)/sql/examples/$$f $$f; \ + done; \ ++ for f in $(sqlsphinxsources); do \ ++ rm -f $$f; \ ++ @LN_CP_F@ $(top_srcdir)/sql/sphinx/$$f $$f; \ ++ done; \ + rm -f client_settings.h; \ + @LN_CP_F@ $(top_srcdir)/libmysql/client_settings.h client_settings.h + + + clean-local: +- rm -f `echo $(sqlsources) $(libmysqlsources) $(sqlexamplessources) | sed "s;\.lo;.c;g"` \ ++ rm -f `echo $(sqlsources) $(libmysqlsources) $(sqlexamplessources) $(sqlsphinxsources) | sed "s;\.lo;.c;g"` \ + $(top_srcdir)/linked_libmysqld_sources; \ + rm -f client_settings.h + +diff -B -N -r -u mysql-5.0.22/sql/handler.cc mysql-5.0.22.sx/sql/handler.cc +--- mysql-5.0.22/sql/handler.cc 2006-05-25 10:56:42.000000000 +0200 ++++ mysql-5.0.22.sx/sql/handler.cc 2006-06-06 19:49:38.000000000 +0200 +@@ -78,6 +78,15 @@ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + HTON_NO_FLAGS }; + #endif ++#ifdef HAVE_SPHINX_DB ++#include "sphinx/ha_sphinx.h" ++extern handlerton sphinx_hton; ++#else ++handlerton sphinx_hton = { "SPHINX", SHOW_OPTION_NO, "SPHINX storage engine", ++ DB_TYPE_SPHINX_DB, NULL, 0, 0, NULL, NULL, ++ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ++ HTON_NO_FLAGS }; ++#endif + #ifdef HAVE_INNOBASE_DB + #include "ha_innodb.h" + extern handlerton innobase_hton; +@@ -147,6 +156,7 @@ + &example_hton, + &archive_hton, + &tina_hton, ++ &sphinx_hton, + &ndbcluster_hton, + &federated_hton, + &myisammrg_hton, +@@ -345,6 +355,12 @@ + return new (alloc) ha_tina(table); + return NULL; + #endif ++#ifdef HAVE_SPHINX_DB ++ case DB_TYPE_SPHINX_DB: ++ if (have_sphinx_db == SHOW_OPTION_YES) ++ return new (alloc) ha_sphinx(table); ++ return NULL; ++#endif + #ifdef HAVE_NDBCLUSTER_DB + case DB_TYPE_NDBCLUSTER: + if (have_ndbcluster == SHOW_OPTION_YES) +diff -B -N -r -u mysql-5.0.22/sql/handler.h mysql-5.0.22.sx/sql/handler.h +--- mysql-5.0.22/sql/handler.h 2006-05-25 10:56:55.000000000 +0200 ++++ mysql-5.0.22.sx/sql/handler.h 2006-06-06 19:49:38.000000000 +0200 +@@ -183,8 +183,9 @@ + DB_TYPE_BERKELEY_DB, DB_TYPE_INNODB, + DB_TYPE_GEMINI, DB_TYPE_NDBCLUSTER, + DB_TYPE_EXAMPLE_DB, DB_TYPE_ARCHIVE_DB, DB_TYPE_CSV_DB, +- DB_TYPE_FEDERATED_DB, ++ DB_TYPE_FEDERATED_DB, + DB_TYPE_BLACKHOLE_DB, ++ DB_TYPE_SPHINX_DB, + DB_TYPE_DEFAULT // Must be last + }; + +diff -B -N -r -u mysql-5.0.22/sql/Makefile.am mysql-5.0.22.sx/sql/Makefile.am +--- mysql-5.0.22/sql/Makefile.am 2006-05-25 10:56:41.000000000 +0200 ++++ mysql-5.0.22.sx/sql/Makefile.am 2006-06-06 19:49:38.000000000 +0200 +@@ -66,6 +66,7 @@ + sql_array.h sql_cursor.h \ + examples/ha_example.h ha_archive.h \ + examples/ha_tina.h ha_blackhole.h \ ++ sphinx/ha_sphinx.h \ + ha_federated.h + mysqld_SOURCES = sql_lex.cc sql_handler.cc \ + item.cc item_sum.cc item_buff.cc item_func.cc \ +@@ -102,6 +103,7 @@ + sp_cache.cc parse_file.cc sql_trigger.cc \ + examples/ha_example.cc ha_archive.cc \ + examples/ha_tina.cc ha_blackhole.cc \ ++ sphinx/ha_sphinx.cc \ + ha_federated.cc + + gen_lex_hash_SOURCES = gen_lex_hash.cc +diff -B -N -r -u mysql-5.0.22/sql/mysqld.cc mysql-5.0.22.sx/sql/mysqld.cc +--- mysql-5.0.22/sql/mysqld.cc 2006-05-25 10:56:41.000000000 +0200 ++++ mysql-5.0.22.sx/sql/mysqld.cc 2006-06-06 19:49:38.000000000 +0200 +@@ -6420,6 +6420,11 @@ + #else + have_csv_db= SHOW_OPTION_NO; + #endif ++#ifdef HAVE_SPHINX_DB ++ have_sphinx_db= SHOW_OPTION_YES; ++#else ++ have_sphinx_db= SHOW_OPTION_NO; ++#endif + #ifdef HAVE_NDBCLUSTER_DB + have_ndbcluster=SHOW_OPTION_DISABLED; + #else +@@ -7457,6 +7462,7 @@ + #undef have_example_db + #undef have_archive_db + #undef have_csv_db ++#undef have_sphinx_db + #undef have_federated_db + #undef have_partition_db + #undef have_blackhole_db +@@ -7467,6 +7473,7 @@ + SHOW_COMP_OPTION have_example_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_archive_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_csv_db= SHOW_OPTION_NO; ++SHOW_COMP_OPTION have_sphinx_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_federated_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_partition_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_blackhole_db= SHOW_OPTION_NO; +diff -B -N -r -u mysql-5.0.22/sql/mysql_priv.h mysql-5.0.22.sx/sql/mysql_priv.h +--- mysql-5.0.22/sql/mysql_priv.h 2006-05-25 10:56:43.000000000 +0200 ++++ mysql-5.0.22.sx/sql/mysql_priv.h 2006-06-06 19:49:38.000000000 +0200 +@@ -1279,6 +1279,12 @@ + #else + extern SHOW_COMP_OPTION have_csv_db; + #endif ++#ifdef HAVE_SPHINX_DB ++extern handlerton sphinx_hton; ++#define have_sphinx_db sphinx_hton.state ++#else ++extern SHOW_COMP_OPTION have_sphinx_db; ++#endif + #ifdef HAVE_FEDERATED_DB + extern handlerton federated_hton; + #define have_federated_db federated_hton.state +diff -B -N -r -u mysql-5.0.22/sql/set_var.cc mysql-5.0.22.sx/sql/set_var.cc +--- mysql-5.0.22/sql/set_var.cc 2006-05-25 10:56:41.000000000 +0200 ++++ mysql-5.0.22.sx/sql/set_var.cc 2006-06-06 19:49:38.000000000 +0200 +@@ -809,6 +809,7 @@ + {"have_compress", (char*) &have_compress, SHOW_HAVE}, + {"have_crypt", (char*) &have_crypt, SHOW_HAVE}, + {"have_csv", (char*) &have_csv_db, SHOW_HAVE}, ++ {"have_sphinx", (char*) &have_sphinx_db, SHOW_HAVE}, + {"have_example_engine", (char*) &have_example_db, SHOW_HAVE}, + {"have_federated_engine", (char*) &have_federated_db, SHOW_HAVE}, + {"have_geometry", (char*) &have_geometry, SHOW_HAVE}, +diff -B -N -r -u mysql-5.0.22/sql/sql_lex.h mysql-5.0.22.sx/sql/sql_lex.h +--- mysql-5.0.22/sql/sql_lex.h 2006-05-25 10:56:41.000000000 +0200 ++++ mysql-5.0.22.sx/sql/sql_lex.h 2006-06-06 19:49:38.000000000 +0200 +@@ -58,6 +58,7 @@ + SQLCOM_SHOW_DATABASES, SQLCOM_SHOW_TABLES, SQLCOM_SHOW_FIELDS, + SQLCOM_SHOW_KEYS, SQLCOM_SHOW_VARIABLES, SQLCOM_SHOW_LOGS, SQLCOM_SHOW_STATUS, + SQLCOM_SHOW_INNODB_STATUS, SQLCOM_SHOW_NDBCLUSTER_STATUS, SQLCOM_SHOW_MUTEX_STATUS, ++ SQLCOM_SHOW_SPHINX_STATUS, + SQLCOM_SHOW_PROCESSLIST, SQLCOM_SHOW_MASTER_STAT, SQLCOM_SHOW_SLAVE_STAT, + SQLCOM_SHOW_GRANTS, SQLCOM_SHOW_CREATE, SQLCOM_SHOW_CHARSETS, + SQLCOM_SHOW_COLLATIONS, SQLCOM_SHOW_CREATE_DB, SQLCOM_SHOW_TABLE_STATUS, +diff -B -N -r -u mysql-5.0.22/sql/sql_parse.cc mysql-5.0.22.sx/sql/sql_parse.cc +--- mysql-5.0.22/sql/sql_parse.cc 2006-05-25 10:56:41.000000000 +0200 ++++ mysql-5.0.22.sx/sql/sql_parse.cc 2006-06-06 19:49:38.000000000 +0200 +@@ -25,6 +25,9 @@ + #ifdef HAVE_INNOBASE_DB + #include "ha_innodb.h" + #endif ++#ifdef HAVE_SPHINX_DB ++#include "sphinx/ha_sphinx.h" ++#endif + + #ifdef HAVE_NDBCLUSTER_DB + #include "ha_ndbcluster.h" +@@ -2722,6 +2725,15 @@ + break; + } + #endif ++#ifdef HAVE_SPHINX_DB ++ case SQLCOM_SHOW_SPHINX_STATUS: ++ { ++ if (check_global_access(thd, SUPER_ACL)) ++ goto error; ++ res = sphinx_show_status(thd); ++ break; ++ } ++#endif + #ifdef HAVE_REPLICATION + case SQLCOM_LOAD_MASTER_TABLE: + { +diff -B -N -r -u mysql-5.0.22/sql/sql_yacc.yy mysql-5.0.22.sx/sql/sql_yacc.yy +--- mysql-5.0.22/sql/sql_yacc.yy 2006-05-25 10:56:43.000000000 +0200 ++++ mysql-5.0.22.sx/sql/sql_yacc.yy 2006-06-06 19:49:38.000000000 +0200 +@@ -6584,6 +6584,9 @@ + case DB_TYPE_INNODB: + Lex->sql_command = SQLCOM_SHOW_INNODB_STATUS; + break; ++ case DB_TYPE_SPHINX_DB: ++ Lex->sql_command = SQLCOM_SHOW_SPHINX_STATUS; ++ break; + default: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "STATUS"); + YYABORT; diff --git a/storage/sphinx/sphinx.5.0.27.diff b/storage/sphinx/sphinx.5.0.27.diff new file mode 100644 index 00000000000..9ff6cf4fe48 --- /dev/null +++ b/storage/sphinx/sphinx.5.0.27.diff @@ -0,0 +1,284 @@ +diff -B -N -r -u mysql-5.0.22/config/ac-macros/ha_sphinx.m4 mysql-5.0.22.sx/config/ac-macros/ha_sphinx.m4 +--- mysql-5.0.22/config/ac-macros/ha_sphinx.m4 1970-01-01 01:00:00.000000000 +0100 ++++ mysql-5.0.22.sx/config/ac-macros/ha_sphinx.m4 2006-06-06 19:49:38.000000000 +0200 +@@ -0,0 +1,30 @@ ++dnl --------------------------------------------------------------------------- ++dnl Macro: MYSQL_CHECK_EXAMPLEDB ++dnl Sets HAVE_SPHINX_DB if --with-sphinx-storage-engine is used ++dnl --------------------------------------------------------------------------- ++AC_DEFUN([MYSQL_CHECK_SPHINXDB], [ ++ AC_ARG_WITH([sphinx-storage-engine], ++ [ ++ --with-sphinx-storage-engine ++ Enable the Sphinx Storage Engine], ++ [sphinxdb="$withval"], ++ [sphinxdb=no]) ++ AC_MSG_CHECKING([for example storage engine]) ++ ++ case "$sphinxdb" in ++ yes ) ++ AC_DEFINE([HAVE_SPHINX_DB], [1], [Builds Sphinx Engine]) ++ AC_MSG_RESULT([yes]) ++ [sphinxdb=yes] ++ ;; ++ * ) ++ AC_MSG_RESULT([no]) ++ [sphinxdb=no] ++ ;; ++ esac ++ ++]) ++dnl --------------------------------------------------------------------------- ++dnl END OF MYSQL_CHECK_EXAMPLE SECTION ++dnl --------------------------------------------------------------------------- ++ +diff -B -N -r -u mysql-5.0.22/configure.in mysql-5.0.22.sx/configure.in +--- mysql-5.0.22/configure.in 2006-05-25 10:56:45.000000000 +0200 ++++ mysql-5.0.22.sx/configure.in 2006-06-06 19:49:38.000000000 +0200 +@@ -41,6 +41,7 @@ + sinclude(config/ac-macros/ha_berkeley.m4) + sinclude(config/ac-macros/ha_blackhole.m4) + sinclude(config/ac-macros/ha_example.m4) ++sinclude(config/ac-macros/ha_sphinx.m4) + sinclude(config/ac-macros/ha_federated.m4) + sinclude(config/ac-macros/ha_innodb.m4) + sinclude(config/ac-macros/ha_ndbcluster.m4) +@@ -2450,6 +2451,7 @@ + MYSQL_CHECK_BDB + MYSQL_CHECK_INNODB + MYSQL_CHECK_EXAMPLEDB ++MYSQL_CHECK_SPHINXDB + MYSQL_CHECK_ARCHIVEDB + MYSQL_CHECK_CSVDB + MYSQL_CHECK_BLACKHOLEDB +diff -B -N -r -u mysql-5.0.22/libmysqld/Makefile.am mysql-5.0.22.sx/libmysqld/Makefile.am +--- mysql-5.0.22/libmysqld/Makefile.am 2006-05-25 10:56:55.000000000 +0200 ++++ mysql-5.0.22.sx/libmysqld/Makefile.am 2006-06-06 19:49:38.000000000 +0200 +@@ -27,7 +27,7 @@ + -DSHAREDIR="\"$(MYSQLSHAREdir)\"" + INCLUDES= @bdb_includes@ \ + -I$(top_builddir)/include -I$(top_srcdir)/include \ +- -I$(top_srcdir)/sql -I$(top_srcdir)/sql/examples \ ++ -I$(top_srcdir)/sql -I$(top_srcdir)/sql/examples -I$(top_srcdir)/sql/sphinx \ + -I$(top_srcdir)/regex \ + $(openssl_includes) $(yassl_includes) @ZLIB_INCLUDES@ + +@@ -38,6 +38,7 @@ + libmysqlsources = errmsg.c get_password.c libmysql.c client.c pack.c \ + my_time.c + sqlexamplessources = ha_example.cc ha_tina.cc ++sqlsphinxsources = ha_sphinx.cc + + noinst_HEADERS = embedded_priv.h emb_qcache.h + +@@ -65,7 +66,7 @@ + parse_file.cc sql_view.cc sql_trigger.cc my_decimal.cc \ + ha_blackhole.cc ha_archive.cc my_user.c + +-libmysqld_int_a_SOURCES= $(libmysqld_sources) $(libmysqlsources) $(sqlsources) $(sqlexamplessources) ++libmysqld_int_a_SOURCES= $(libmysqld_sources) $(libmysqlsources) $(sqlsources) $(sqlexamplessources) $(sqlsphinxsources) + libmysqld_a_SOURCES= + + # automake misses these +@@ -133,12 +134,16 @@ + rm -f $$f; \ + @LN_CP_F@ $(top_srcdir)/sql/examples/$$f $$f; \ + done; \ ++ for f in $(sqlsphinxsources); do \ ++ rm -f $$f; \ ++ @LN_CP_F@ $(top_srcdir)/sql/sphinx/$$f $$f; \ ++ done; \ + rm -f client_settings.h; \ + @LN_CP_F@ $(top_srcdir)/libmysql/client_settings.h client_settings.h + + + clean-local: +- rm -f `echo $(sqlsources) $(libmysqlsources) $(sqlexamplessources) | sed "s;\.lo;.c;g"` \ ++ rm -f `echo $(sqlsources) $(libmysqlsources) $(sqlexamplessources) $(sqlsphinxsources) | sed "s;\.lo;.c;g"` \ + $(top_srcdir)/linked_libmysqld_sources; \ + rm -f client_settings.h + +diff -B -N -r -u mysql-5.0.22/sql/handler.cc mysql-5.0.22.sx/sql/handler.cc +--- mysql-5.0.22/sql/handler.cc 2006-05-25 10:56:42.000000000 +0200 ++++ mysql-5.0.22.sx/sql/handler.cc 2006-06-06 19:49:38.000000000 +0200 +@@ -78,6 +78,15 @@ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + HTON_NO_FLAGS }; + #endif ++#ifdef HAVE_SPHINX_DB ++#include "sphinx/ha_sphinx.h" ++extern handlerton sphinx_hton; ++#else ++handlerton sphinx_hton = { "SPHINX", SHOW_OPTION_NO, "SPHINX storage engine", ++ DB_TYPE_SPHINX_DB, NULL, 0, 0, NULL, NULL, ++ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ++ HTON_NO_FLAGS }; ++#endif + #ifdef HAVE_INNOBASE_DB + #include "ha_innodb.h" + extern handlerton innobase_hton; +@@ -147,6 +156,7 @@ + &example_hton, + &archive_hton, + &tina_hton, ++ &sphinx_hton, + &ndbcluster_hton, + &federated_hton, + &myisammrg_hton, +@@ -345,6 +355,12 @@ + return new (alloc) ha_tina(table); + return NULL; + #endif ++#ifdef HAVE_SPHINX_DB ++ case DB_TYPE_SPHINX_DB: ++ if (have_sphinx_db == SHOW_OPTION_YES) ++ return new (alloc) ha_sphinx(table); ++ return NULL; ++#endif + #ifdef HAVE_NDBCLUSTER_DB + case DB_TYPE_NDBCLUSTER: + if (have_ndbcluster == SHOW_OPTION_YES) +diff -B -N -r -u mysql-5.0.22/sql/handler.h mysql-5.0.22.sx/sql/handler.h +--- mysql-5.0.22/sql/handler.h 2006-05-25 10:56:55.000000000 +0200 ++++ mysql-5.0.22.sx/sql/handler.h 2006-06-06 19:49:38.000000000 +0200 +@@ -183,8 +183,9 @@ + DB_TYPE_BERKELEY_DB, DB_TYPE_INNODB, + DB_TYPE_GEMINI, DB_TYPE_NDBCLUSTER, + DB_TYPE_EXAMPLE_DB, DB_TYPE_ARCHIVE_DB, DB_TYPE_CSV_DB, +- DB_TYPE_FEDERATED_DB, ++ DB_TYPE_FEDERATED_DB, + DB_TYPE_BLACKHOLE_DB, ++ DB_TYPE_SPHINX_DB, + DB_TYPE_DEFAULT // Must be last + }; + +diff -B -N -r -u mysql-5.0.22/sql/Makefile.am mysql-5.0.22.sx/sql/Makefile.am +--- mysql-5.0.22/sql/Makefile.am 2006-05-25 10:56:41.000000000 +0200 ++++ mysql-5.0.22.sx/sql/Makefile.am 2006-06-06 19:49:38.000000000 +0200 +@@ -66,6 +66,7 @@ + sql_array.h sql_cursor.h \ + examples/ha_example.h ha_archive.h \ + examples/ha_tina.h ha_blackhole.h \ ++ sphinx/ha_sphinx.h \ + ha_federated.h + mysqld_SOURCES = sql_lex.cc sql_handler.cc \ + item.cc item_sum.cc item_buff.cc item_func.cc \ +@@ -102,6 +103,7 @@ + sp_cache.cc parse_file.cc sql_trigger.cc \ + examples/ha_example.cc ha_archive.cc \ + examples/ha_tina.cc ha_blackhole.cc \ ++ sphinx/ha_sphinx.cc \ + ha_federated.cc + + gen_lex_hash_SOURCES = gen_lex_hash.cc +diff -B -N -r -u mysql-5.0.22/sql/mysqld.cc mysql-5.0.22.sx/sql/mysqld.cc +--- mysql-5.0.22/sql/mysqld.cc 2006-05-25 10:56:41.000000000 +0200 ++++ mysql-5.0.22.sx/sql/mysqld.cc 2006-06-06 19:49:38.000000000 +0200 +@@ -6420,6 +6420,11 @@ + #else + have_csv_db= SHOW_OPTION_NO; + #endif ++#ifdef HAVE_SPHINX_DB ++ have_sphinx_db= SHOW_OPTION_YES; ++#else ++ have_sphinx_db= SHOW_OPTION_NO; ++#endif + #ifdef HAVE_NDBCLUSTER_DB + have_ndbcluster=SHOW_OPTION_DISABLED; + #else +@@ -7457,6 +7462,7 @@ + #undef have_example_db + #undef have_archive_db + #undef have_csv_db ++#undef have_sphinx_db + #undef have_federated_db + #undef have_partition_db + #undef have_blackhole_db +@@ -7467,6 +7473,7 @@ + SHOW_COMP_OPTION have_example_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_archive_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_csv_db= SHOW_OPTION_NO; ++SHOW_COMP_OPTION have_sphinx_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_federated_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_partition_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_blackhole_db= SHOW_OPTION_NO; +diff -B -N -r -u mysql-5.0.22/sql/mysql_priv.h mysql-5.0.22.sx/sql/mysql_priv.h +--- mysql-5.0.22/sql/mysql_priv.h 2006-05-25 10:56:43.000000000 +0200 ++++ mysql-5.0.22.sx/sql/mysql_priv.h 2006-06-06 19:49:38.000000000 +0200 +@@ -1279,6 +1279,12 @@ + #else + extern SHOW_COMP_OPTION have_csv_db; + #endif ++#ifdef HAVE_SPHINX_DB ++extern handlerton sphinx_hton; ++#define have_sphinx_db sphinx_hton.state ++#else ++extern SHOW_COMP_OPTION have_sphinx_db; ++#endif + #ifdef HAVE_FEDERATED_DB + extern handlerton federated_hton; + #define have_federated_db federated_hton.state +diff -B -N -r -u mysql-5.0.22/sql/set_var.cc mysql-5.0.22.sx/sql/set_var.cc +--- mysql-5.0.22/sql/set_var.cc 2006-05-25 10:56:41.000000000 +0200 ++++ mysql-5.0.22.sx/sql/set_var.cc 2006-06-06 19:49:38.000000000 +0200 +@@ -864,6 +864,7 @@ + {"have_compress", (char*) &have_compress, SHOW_HAVE}, + {"have_crypt", (char*) &have_crypt, SHOW_HAVE}, + {"have_csv", (char*) &have_csv_db, SHOW_HAVE}, ++ {"have_sphinx", (char*) &have_sphinx_db, SHOW_HAVE}, + {"have_dynamic_loading", (char*) &have_dlopen, SHOW_HAVE}, + {"have_example_engine", (char*) &have_example_db, SHOW_HAVE}, + {"have_federated_engine", (char*) &have_federated_db, SHOW_HAVE}, +diff -B -N -r -u mysql-5.0.22/sql/sql_lex.h mysql-5.0.22.sx/sql/sql_lex.h +--- mysql-5.0.22/sql/sql_lex.h 2006-05-25 10:56:41.000000000 +0200 ++++ mysql-5.0.22.sx/sql/sql_lex.h 2006-06-06 19:49:38.000000000 +0200 +@@ -58,6 +58,7 @@ + SQLCOM_SHOW_DATABASES, SQLCOM_SHOW_TABLES, SQLCOM_SHOW_FIELDS, + SQLCOM_SHOW_KEYS, SQLCOM_SHOW_VARIABLES, SQLCOM_SHOW_LOGS, SQLCOM_SHOW_STATUS, + SQLCOM_SHOW_INNODB_STATUS, SQLCOM_SHOW_NDBCLUSTER_STATUS, SQLCOM_SHOW_MUTEX_STATUS, ++ SQLCOM_SHOW_SPHINX_STATUS, + SQLCOM_SHOW_PROCESSLIST, SQLCOM_SHOW_MASTER_STAT, SQLCOM_SHOW_SLAVE_STAT, + SQLCOM_SHOW_GRANTS, SQLCOM_SHOW_CREATE, SQLCOM_SHOW_CHARSETS, + SQLCOM_SHOW_COLLATIONS, SQLCOM_SHOW_CREATE_DB, SQLCOM_SHOW_TABLE_STATUS, +diff -B -N -r -u mysql-5.0.22/sql/sql_parse.cc mysql-5.0.22.sx/sql/sql_parse.cc +--- mysql-5.0.22/sql/sql_parse.cc 2006-05-25 10:56:41.000000000 +0200 ++++ mysql-5.0.22.sx/sql/sql_parse.cc 2006-06-06 19:49:38.000000000 +0200 +@@ -25,6 +25,9 @@ + #ifdef HAVE_INNOBASE_DB + #include "ha_innodb.h" + #endif ++#ifdef HAVE_SPHINX_DB ++#include "sphinx/ha_sphinx.h" ++#endif + + #ifdef HAVE_NDBCLUSTER_DB + #include "ha_ndbcluster.h" +@@ -2722,6 +2725,15 @@ + break; + } + #endif ++#ifdef HAVE_SPHINX_DB ++ case SQLCOM_SHOW_SPHINX_STATUS: ++ { ++ if (check_global_access(thd, SUPER_ACL)) ++ goto error; ++ res = sphinx_show_status(thd); ++ break; ++ } ++#endif + #ifdef HAVE_REPLICATION + case SQLCOM_LOAD_MASTER_TABLE: + { +diff -B -N -r -u mysql-5.0.22/sql/sql_yacc.yy mysql-5.0.22.sx/sql/sql_yacc.yy +--- mysql-5.0.22/sql/sql_yacc.yy 2006-05-25 10:56:43.000000000 +0200 ++++ mysql-5.0.22.sx/sql/sql_yacc.yy 2006-06-06 19:49:38.000000000 +0200 +@@ -6584,6 +6584,9 @@ + case DB_TYPE_INNODB: + Lex->sql_command = SQLCOM_SHOW_INNODB_STATUS; + break; ++ case DB_TYPE_SPHINX_DB: ++ Lex->sql_command = SQLCOM_SHOW_SPHINX_STATUS; ++ break; + default: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "STATUS"); + YYABORT; diff --git a/storage/sphinx/sphinx.5.0.37.diff b/storage/sphinx/sphinx.5.0.37.diff new file mode 100644 index 00000000000..3f86e545b4d --- /dev/null +++ b/storage/sphinx/sphinx.5.0.37.diff @@ -0,0 +1,338 @@ +--- mysql-5.0.67/config/ac-macros/ha_sphinx.m4 1970-01-01 10:00:00.000000000 +1000 ++++ mysql-5.0.67-sphinx/config/ac-macros/ha_sphinx.m4 2009-02-14 09:15:48.000000000 +1000 +@@ -0,0 +1,30 @@ ++dnl --------------------------------------------------------------------------- ++dnl Macro: MYSQL_CHECK_EXAMPLEDB ++dnl Sets HAVE_SPHINX_DB if --with-sphinx-storage-engine is used ++dnl --------------------------------------------------------------------------- ++AC_DEFUN([MYSQL_CHECK_SPHINXDB], [ ++ AC_ARG_WITH([sphinx-storage-engine], ++ [ ++ --with-sphinx-storage-engine ++ Enable the Sphinx Storage Engine], ++ [sphinxdb="$withval"], ++ [sphinxdb=no]) ++ AC_MSG_CHECKING([for example storage engine]) ++ ++ case "$sphinxdb" in ++ yes ) ++ AC_DEFINE([HAVE_SPHINX_DB], [1], [Builds Sphinx Engine]) ++ AC_MSG_RESULT([yes]) ++ [sphinxdb=yes] ++ ;; ++ * ) ++ AC_MSG_RESULT([no]) ++ [sphinxdb=no] ++ ;; ++ esac ++ ++]) ++dnl --------------------------------------------------------------------------- ++dnl END OF MYSQL_CHECK_EXAMPLE SECTION ++dnl --------------------------------------------------------------------------- ++ +--- mysql-5.0.67/configure.in 2008-08-04 23:19:07.000000000 +1100 ++++ mysql-5.0.67-sphinx/configure.in 2009-02-14 09:15:48.000000000 +1000 +@@ -58,6 +58,7 @@ + sinclude(config/ac-macros/ha_berkeley.m4) + sinclude(config/ac-macros/ha_blackhole.m4) + sinclude(config/ac-macros/ha_example.m4) ++sinclude(config/ac-macros/ha_sphinx.m4) + sinclude(config/ac-macros/ha_federated.m4) + sinclude(config/ac-macros/ha_innodb.m4) + sinclude(config/ac-macros/ha_ndbcluster.m4) +@@ -2625,6 +2626,7 @@ + MYSQL_CHECK_BDB + MYSQL_CHECK_INNODB + MYSQL_CHECK_EXAMPLEDB ++MYSQL_CHECK_SPHINXDB + MYSQL_CHECK_ARCHIVEDB + MYSQL_CHECK_CSVDB + MYSQL_CHECK_BLACKHOLEDB +--- mysql-5.0.67/libmysqld/Makefile.am 2008-08-04 23:19:18.000000000 +1100 ++++ mysql-5.0.67-sphinx/libmysqld/Makefile.am 2009-02-14 09:15:48.000000000 +1000 +@@ -29,6 +29,7 @@ + -I$(top_builddir)/include -I$(top_srcdir)/include \ + -I$(top_builddir)/sql -I$(top_srcdir)/sql \ + -I$(top_srcdir)/sql/examples \ ++ -I$(top_srcdir)/sql/sphinx \ + -I$(top_srcdir)/regex \ + $(openssl_includes) @ZLIB_INCLUDES@ + +@@ -39,6 +40,7 @@ + libmysqlsources = errmsg.c get_password.c libmysql.c client.c pack.c \ + my_time.c + sqlexamplessources = ha_example.cc ha_tina.cc ++sqlsphinxsources = ha_sphinx.cc + + noinst_HEADERS = embedded_priv.h emb_qcache.h + +@@ -67,7 +69,7 @@ + parse_file.cc sql_view.cc sql_trigger.cc my_decimal.cc \ + ha_blackhole.cc ha_archive.cc my_user.c + +-libmysqld_int_a_SOURCES= $(libmysqld_sources) $(libmysqlsources) $(sqlsources) $(sqlexamplessources) ++libmysqld_int_a_SOURCES= $(libmysqld_sources) $(libmysqlsources) $(sqlsources) $(sqlexamplessources) $(sqlsphinxsources) + libmysqld_a_SOURCES= + + # automake misses these +@@ -147,12 +149,16 @@ + rm -f $$f; \ + @LN_CP_F@ $(top_srcdir)/sql/examples/$$f $$f; \ + done; \ ++ for f in $(sqlsphinxsources); do \ ++ rm -f $$f; \ ++ @LN_CP_F@ $(top_srcdir)/sql/sphinx/$$f $$f; \ ++ done; \ + rm -f client_settings.h; \ + @LN_CP_F@ $(top_srcdir)/libmysql/client_settings.h client_settings.h + + + clean-local: +- rm -f `echo $(sqlsources) $(libmysqlsources) $(sqlexamplessources) | sed "s;\.lo;.c;g"` \ ++ rm -f `echo $(sqlsources) $(libmysqlsources) $(sqlexamplessources) $(sqlsphinxsources) | sed "s;\.lo;.c;g"` \ + $(top_srcdir)/linked_libmysqld_sources; \ + rm -f client_settings.h + +--- mysql-5.0.67/sql/handler.cc 2008-08-04 23:20:04.000000000 +1100 ++++ mysql-5.0.67-sphinx/sql/handler.cc 2009-02-14 09:15:48.000000000 +1000 +@@ -77,6 +77,15 @@ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + HTON_NO_FLAGS }; + #endif ++#ifdef HAVE_SPHINX_DB ++#include "sphinx/ha_sphinx.h" ++extern handlerton sphinx_hton; ++#else ++handlerton sphinx_hton = { "SPHINX", SHOW_OPTION_NO, "SPHINX storage engine", ++ DB_TYPE_SPHINX_DB, NULL, 0, 0, NULL, NULL, ++ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ++ HTON_NO_FLAGS }; ++#endif + #ifdef HAVE_INNOBASE_DB + #include "ha_innodb.h" + extern handlerton innobase_hton; +@@ -141,6 +150,7 @@ + &example_hton, + &archive_hton, + &tina_hton, ++ &sphinx_hton, + &ndbcluster_hton, + &federated_hton, + &myisammrg_hton, +@@ -341,6 +351,12 @@ + return new (alloc) ha_tina(table); + return NULL; + #endif ++#ifdef HAVE_SPHINX_DB ++ case DB_TYPE_SPHINX_DB: ++ if (have_sphinx_db == SHOW_OPTION_YES) ++ return new (alloc) ha_sphinx(table); ++ return NULL; ++#endif + #ifdef HAVE_NDBCLUSTER_DB + case DB_TYPE_NDBCLUSTER: + if (have_ndbcluster == SHOW_OPTION_YES) +--- mysql-5.0.67/sql/handler.h 2008-08-04 23:20:04.000000000 +1100 ++++ mysql-5.0.67-sphinx/sql/handler.h 2009-02-14 09:15:48.000000000 +1000 +@@ -186,8 +186,9 @@ + DB_TYPE_BERKELEY_DB, DB_TYPE_INNODB, + DB_TYPE_GEMINI, DB_TYPE_NDBCLUSTER, + DB_TYPE_EXAMPLE_DB, DB_TYPE_ARCHIVE_DB, DB_TYPE_CSV_DB, +- DB_TYPE_FEDERATED_DB, ++ DB_TYPE_FEDERATED_DB, + DB_TYPE_BLACKHOLE_DB, ++ DB_TYPE_SPHINX_DB, + DB_TYPE_DEFAULT // Must be last + }; + +--- mysql-5.0.67/sql/Makefile.am 2008-08-04 23:20:02.000000000 +1100 ++++ mysql-5.0.67-sphinx/sql/Makefile.am 2009-02-14 09:23:28.000000000 +1000 +@@ -68,6 +68,7 @@ + sql_array.h sql_cursor.h \ + examples/ha_example.h ha_archive.h \ + examples/ha_tina.h ha_blackhole.h \ ++ sphinx/ha_sphinx.h \ + ha_federated.h + mysqld_SOURCES = sql_lex.cc sql_handler.cc \ + item.cc item_sum.cc item_buff.cc item_func.cc \ +@@ -105,6 +106,7 @@ + sp_cache.cc parse_file.cc sql_trigger.cc \ + examples/ha_example.cc ha_archive.cc \ + examples/ha_tina.cc ha_blackhole.cc \ ++ sphinx/ha_sphinx.cc \ + ha_federated.cc + + gen_lex_hash_SOURCES = gen_lex_hash.cc +@@ -174,6 +176,10 @@ + udf_example_la_SOURCES= udf_example.c + udf_example_la_LDFLAGS= -module -rpath $(pkglibdir) + ++pkglib_LTLIBRARIES = sphinx/sphinx.la ++sphinx_sphinx_la_SOURCES = sphinx/snippets_udf.cc ++sphinx_sphinx_la_LDFLAGS = -module ++ + + # Don't update the files from bitkeeper + %::SCCS/s.% +--- mysql-5.0.67/sql/mysqld.cc 2008-08-04 23:20:07.000000000 +1100 ++++ mysql-5.0.67-sphinx/sql/mysqld.cc 2009-02-14 09:15:48.000000000 +1000 +@@ -36,6 +36,10 @@ + #include <sys/prctl.h> + #endif + ++#ifdef HAVE_SPHINX_DB ++#include "sphinx/ha_sphinx.h" ++#endif ++ + #ifdef HAVE_INNOBASE_DB + #define OPT_INNODB_DEFAULT 1 + #else +@@ -6633,6 +6637,13 @@ + {"Threads_running", (char*) &thread_running, SHOW_INT_CONST}, + {"Uptime", (char*) 0, SHOW_STARTTIME}, + {"Uptime_since_flush_status",(char*) 0, SHOW_FLUSHTIME}, ++#ifdef HAVE_SPHINX_DB ++ {"sphinx_total", (char *)sphinx_showfunc_total, SHOW_SPHINX_FUNC}, ++ {"sphinx_total_found", (char *)sphinx_showfunc_total_found, SHOW_SPHINX_FUNC}, ++ {"sphinx_time", (char *)sphinx_showfunc_time, SHOW_SPHINX_FUNC}, ++ {"sphinx_word_count", (char *)sphinx_showfunc_word_count, SHOW_SPHINX_FUNC}, ++ {"sphinx_words", (char *)sphinx_showfunc_words, SHOW_SPHINX_FUNC}, ++#endif + {NullS, NullS, SHOW_LONG} + }; + +@@ -6875,6 +6886,11 @@ + #else + have_csv_db= SHOW_OPTION_NO; + #endif ++#ifdef HAVE_SPHINX_DB ++ have_sphinx_db= SHOW_OPTION_YES; ++#else ++ have_sphinx_db= SHOW_OPTION_NO; ++#endif + #ifdef HAVE_NDBCLUSTER_DB + have_ndbcluster=SHOW_OPTION_DISABLED; + #else +@@ -7983,6 +7999,7 @@ + #undef have_example_db + #undef have_archive_db + #undef have_csv_db ++#undef have_sphinx_db + #undef have_federated_db + #undef have_partition_db + #undef have_blackhole_db +@@ -7993,6 +8010,7 @@ + SHOW_COMP_OPTION have_example_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_archive_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_csv_db= SHOW_OPTION_NO; ++SHOW_COMP_OPTION have_sphinx_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_federated_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_partition_db= SHOW_OPTION_NO; + SHOW_COMP_OPTION have_blackhole_db= SHOW_OPTION_NO; +--- mysql-5.0.67/sql/mysql_priv.h 2008-08-04 23:20:07.000000000 +1100 ++++ mysql-5.0.67-sphinx/sql/mysql_priv.h 2009-02-14 09:15:48.000000000 +1000 +@@ -1439,6 +1439,12 @@ + #else + extern SHOW_COMP_OPTION have_csv_db; + #endif ++#ifdef HAVE_SPHINX_DB ++extern handlerton sphinx_hton; ++#define have_sphinx_db sphinx_hton.state ++#else ++extern SHOW_COMP_OPTION have_sphinx_db; ++#endif + #ifdef HAVE_FEDERATED_DB + extern handlerton federated_hton; + #define have_federated_db federated_hton.state +--- mysql-5.0.67/sql/set_var.cc 2008-08-04 23:20:08.000000000 +1100 ++++ mysql-5.0.67-sphinx/sql/set_var.cc 2009-02-14 09:15:48.000000000 +1000 +@@ -888,6 +888,7 @@ + {"have_compress", (char*) &have_compress, SHOW_HAVE}, + {"have_crypt", (char*) &have_crypt, SHOW_HAVE}, + {"have_csv", (char*) &have_csv_db, SHOW_HAVE}, ++ {"have_sphinx", (char*) &have_sphinx_db, SHOW_HAVE}, + {"have_dynamic_loading", (char*) &have_dlopen, SHOW_HAVE}, + {"have_example_engine", (char*) &have_example_db, SHOW_HAVE}, + {"have_federated_engine", (char*) &have_federated_db, SHOW_HAVE}, +--- mysql-5.0.67/sql/sql_lex.h 2008-08-04 23:20:10.000000000 +1100 ++++ mysql-5.0.67-sphinx/sql/sql_lex.h 2009-02-14 09:15:48.000000000 +1000 +@@ -57,6 +57,7 @@ + SQLCOM_SHOW_DATABASES, SQLCOM_SHOW_TABLES, SQLCOM_SHOW_FIELDS, + SQLCOM_SHOW_KEYS, SQLCOM_SHOW_VARIABLES, SQLCOM_SHOW_LOGS, SQLCOM_SHOW_STATUS, + SQLCOM_SHOW_INNODB_STATUS, SQLCOM_SHOW_NDBCLUSTER_STATUS, SQLCOM_SHOW_MUTEX_STATUS, ++ SQLCOM_SHOW_SPHINX_STATUS, + SQLCOM_SHOW_PROCESSLIST, SQLCOM_SHOW_MASTER_STAT, SQLCOM_SHOW_SLAVE_STAT, + SQLCOM_SHOW_GRANTS, SQLCOM_SHOW_CREATE, SQLCOM_SHOW_CHARSETS, + SQLCOM_SHOW_COLLATIONS, SQLCOM_SHOW_CREATE_DB, SQLCOM_SHOW_TABLE_STATUS, +--- mysql-5.0.67/sql/sql_parse.cc 2008-08-04 23:20:10.000000000 +1100 ++++ mysql-5.0.67-sphinx/sql/sql_parse.cc 2009-02-14 09:15:48.000000000 +1000 +@@ -24,6 +24,9 @@ + #ifdef HAVE_INNOBASE_DB + #include "ha_innodb.h" + #endif ++#ifdef HAVE_SPHINX_DB ++#include "sphinx/ha_sphinx.h" ++#endif + + #ifdef HAVE_NDBCLUSTER_DB + #include "ha_ndbcluster.h" +@@ -3006,6 +3009,15 @@ + break; + } + #endif ++#ifdef HAVE_SPHINX_DB ++ case SQLCOM_SHOW_SPHINX_STATUS: ++ { ++ if (check_global_access(thd, SUPER_ACL)) ++ goto error; ++ res = sphinx_show_status(thd); ++ break; ++ } ++#endif + #ifdef HAVE_REPLICATION + case SQLCOM_LOAD_MASTER_TABLE: + { +--- mysql-5.0.67/sql/sql_yacc.yy 2008-08-04 23:20:12.000000000 +1100 ++++ mysql-5.0.67-sphinx/sql/sql_yacc.yy 2009-02-14 09:15:48.000000000 +1000 +@@ -7393,6 +7393,9 @@ + case DB_TYPE_INNODB: + Lex->sql_command = SQLCOM_SHOW_INNODB_STATUS; + break; ++ case DB_TYPE_SPHINX_DB: ++ Lex->sql_command = SQLCOM_SHOW_SPHINX_STATUS; ++ break; + default: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "STATUS"); + MYSQL_YYABORT; +--- mysql-5.0.67/sql/structs.h 2008-08-04 23:20:12.000000000 +1100 ++++ mysql-5.0.67-sphinx/sql/structs.h 2009-02-14 09:15:48.000000000 +1000 +@@ -188,6 +188,9 @@ + SHOW_SSL_CTX_SESS_TIMEOUTS, SHOW_SSL_CTX_SESS_CACHE_FULL, + SHOW_SSL_GET_CIPHER_LIST, + #endif /* HAVE_OPENSSL */ ++#ifdef HAVE_SPHINX_DB ++ SHOW_SPHINX_FUNC, ++#endif + SHOW_NET_COMPRESSION, + SHOW_RPL_STATUS, SHOW_SLAVE_RUNNING, SHOW_SLAVE_RETRIED_TRANS, + SHOW_KEY_CACHE_LONG, SHOW_KEY_CACHE_CONST_LONG, SHOW_KEY_CACHE_LONGLONG, +--- mysql-5.0.67/sql/sql_show.cc 2008-08-04 23:20:11.000000000 +1100 ++++ mysql-5.0.67-sphinx/sql/sql_show.cc 2009-02-14 09:15:48.000000000 +1000 +@@ -1473,6 +1473,16 @@ + value= (char*) ((sys_var*) value)->value_ptr(thd, value_type, + &null_lex_str); + } ++ #ifdef HAVE_SPHINX_DB ++ else if (show_type == SHOW_SPHINX_FUNC) ++ { ++ SHOW_VAR var; ++ ((int (*)(THD *, SHOW_VAR *, char *))value)(thd, &var, buff); ++ ++ value = var.value; ++ show_type = var.type; ++ } ++ #endif /* HAVE_SPHINX_DB */ + + pos= end= buff; + switch (show_type) { diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt index 38a9700df20..759bb525bdf 100644 --- a/storage/xtradb/CMakeLists.txt +++ b/storage/xtradb/CMakeLists.txt @@ -31,7 +31,7 @@ ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8) ADD_DEFINITIONS(-D_WIN32 -D_LIB -DMYSQL_SERVER) -# Include directories under innobase +# Include directories under xtradb INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/xtradb/include ${CMAKE_SOURCE_DIR}/storage/xtradb/handler) @@ -49,7 +49,7 @@ IF (MSVC AND $(WIN64)) PROPERTIES COMPILE_FLAGS -Od) ENDIF (MSVC AND $(WIN64)) -SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c +SET(XTRADB_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c buf/buf0buddy.c buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c data/data0data.c data/data0type.c dict/dict0boot.c dict/dict0crea.c dict/dict0dict.c dict/dict0load.c dict/dict0mem.c @@ -80,22 +80,11 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c usr/usr0sess.c - ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c + ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0list.c ut/ut0wqueue.c) -ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DIB_HAVE_PAUSE_INSTRUCTION) +# Windows atomics do not perform well. Disable Windows atomics by default. +# See bug#52102 for details. +#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION) +ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION) -IF (MYSQL_VERSION_ID GREATER "50137") - MYSQL_STORAGE_ENGINE(INNOBASE) - # Use ha_innodb for plugin name, if plugin is built - GET_TARGET_PROPERTY(LIB_LOCATION ha_innobase LOCATION) - IF(LIB_LOCATION) - SET_TARGET_PROPERTIES(ha_innobase PROPERTIES OUTPUT_NAME ha_innodb) - ENDIF(LIB_LOCATION) -ELSE (MYSQL_VERSION_ID GREATER "50137") - IF (NOT SOURCE_SUBLIBS) - ADD_DEFINITIONS(-D_WIN32 -DMYSQL_SERVER) - ADD_LIBRARY(innobase STATIC ${INNOBASE_SOURCES}) - # Require mysqld_error.h, which is built as part of the GenError - ADD_DEPENDENCIES(innobase GenError) - ENDIF (NOT SOURCE_SUBLIBS) -ENDIF (MYSQL_VERSION_ID GREATER "50137") +MYSQL_STORAGE_ENGINE(XTRADB) diff --git a/storage/xtradb/ChangeLog b/storage/xtradb/ChangeLog index 1a6e07fd147..5ebcf1e87a2 100644 --- a/storage/xtradb/ChangeLog +++ b/storage/xtradb/ChangeLog @@ -1,3 +1,393 @@ +2010-06-24 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#54679 alter table causes compressed row_format to revert + to compact + +2010-06-22 The InnoDB Team + + * dict/dict0dict.c, dict/dict0mem.c, include/dict0mem.h, + include/univ.i, page/page0zip.c, row/row0merge.c: + Fix Bug#47991 InnoDB Dictionary Cache memory usage increases + indefinitely when renaming tables + +2010-06-22 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#54686: "field->col->mtype == type" assertion error at + row/row0sel.c + +2010-06-22 The InnoDB Team + + * handler/ha_innodb.cc, innodb_bug54044.result, innodb_bug54044.test: + Fix Bug#54044 Create temporary tables and using innodb crashes. + +2010-06-21 The InnoDB Team + + * dict/dict0load.c, fil/fil0fil.c: + Fix Bug#54658: InnoDB: Warning: allocated tablespace %lu, + old maximum was 0 (introduced in Bug #53578 fix) + +2010-06-16 The InnoDB Team + + * row/row0merge.c: + Fix Bug#54330 Broken fast index creation + +2010-06-10 The InnoDB Team + + * include/log0log.ic, row/row0ins.c, row/row0purge.c, + row/row0uins.c, row/row0umod.c, row/row0upd.c: + Fix Bug#39168 ERROR: the age of the last checkpoint ... exceeds + the log group capacity + +2010-06-08 The InnoDB Team + + * dict/dict0load.c: + Fix Bug#54009 Server crashes when data is selected from non backed + up table for InnoDB plugin + +2010-06-02 The InnoDB Team + + * include/db0err.h, include/lock0lock.h, include/row0mysql.h, + lock/lock0lock.c, row/row0ins.c, row/row0mysql.c, row/row0sel.c: + Fix Bug#53674 InnoDB: Error: unlock row could not find a + 4 mode lock on the record + +2010-06-01 The InnoDB Team + + * include/sync0rw.h, sync/sync0rw.c: + Fix Bug#48197 Concurrent rw_lock_free may cause assertion failure + +2010-06-01 The InnoDB Team + + * row/row0umod.c: + Fix Bug#53812 assert row/row0umod.c line 660 in txn rollback + after crash recovery + +2010-05-25 The InnoDB Team + + * handler/ha_innodb.cc, include/row0mysql.h, row/row0mysql.c: + Fix Bug#53592: crash replacing duplicates into table after fast + alter table added unique key + +2010-05-24 The InnoDB Team + + * dict/dict0boot.c, dict/dict0crea.c, fil/fil0fil.c, + include/dict0boot.h, include/fil0fil.h, row/row0mysql.c: + Fix Bug#53578: assert on invalid page access, in fil_io() + +2010-05-14 The InnoDB Team + * mysql-test/innodb_bug48024.test, mysql-test/innodb_bug48024.result, + dict/dict0dict.c, handler/ha_innodb.cc, handler/ha_innodb.h, + include/dict0dict.h, include/ha_prototypes.h, include/row0mysql.h, + include/trx0trx.h, row/row0mysql.c, trx/trx0i_s.c, trx/trx0trx.c: + Fix Bug#48024 Innodb doesn't work with multi-statements + Fix Bug#53644 InnoDB thinks that /*/ starts and ends a comment + +2010-05-12 The InnoDB Team + + * handler/handler0alter.cc: + Fix Bug#53591 crash with fast alter table and text/blob prefix + primary key + +2010-05-12 The InnoDB Team + + * row/row0merge.c: + Fix Bug#53471 row_merge_drop_temp_indexes() refers freed memory, SEGVs + +2010-05-11 The InnoDB Team + + * mysql-test/innodb_bug53290.test, mysql-test/innodb_bug53290.result, + include/rem0cmp.h, rem/rem0cmp.c, row/row0merge.c: + Fix Bug#53290 wrong duplicate key error when adding a unique index + via fast alter table + +2010-05-11 The InnoDB Team + * buf/buf0lru.c, include/buf0buf.ic: + Fix Bug#53307 valgrind: warnings in main.partition_innodb_plugin + +2010-05-05 The InnoDB Team + + * row/row0merge.c: + Fix Bug#53256 in a stress test, assert dict/dict0dict.c:815 + table2 == NULL + +2010-05-05 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#53165 Setting innodb_change_buffering=DEFAULT produces + incorrect result + +2010-05-04 The InnoDB Team + + * fsp/fsp0fsp.c: + Fix Bug#53306 valgrind: warnings in innodb.innodb + +2010-05-03 The InnoDB Team + + * buf0buf.c: + Fix Bug#53248 compressed tables page checksum mismatch after + re-enabling innodb_checksums + +2010-04-28 The InnoDB Team + + * log/log0recv.h, log/log0recv.c: + Fix Bug#53122 InnoDB recovery uses too big a hash table for redo + log records + +2010-04-27 The InnoDB Team + + * handler/ha_innodb.cc, lock/lock0lock.c, row/row0mysql.c, + row/row0sel.c: + Fix Bug#48607 READ UNCOMMITTED uses more locks than READ COMMITTED + in InnoDB 5.1+ + +2010-04-26 The InnoDB Team + + * row/row0sel.c: + Fix Bug#52663 Lost update incrementing column value under + READ COMMITTED isolation level + +2010-04-22 The InnoDB Team + + * include/dict0boot.h, dict/dict0boot.c: + Fix a bug that prevented the crash recovery of fast CREATE INDEX + from dropping partially created indexes. + +2010-04-21 The InnoDB Team + + * btr/btr0btr.c: + Fix Bug#52964 Infinite loop in btr_page_split_and_insert() + in ROW_FORMAT=COMPRESSED + +2010-04-21 The InnoDB Team + + * data/data0data.c: + Fix Bug#52745 Failing assertion: blob_no < page_zip->n_blobs + +2010-04-20 The InnoDB Team + + * dict/dict0crea.c, handler/ha_innodb.cc, include/trx0trx.h: + Fix Bug#50495 'Row size too large' for plugin, but works for + built-in InnoDB + Only check the record size at index creation time when + innodb_strict_mode is set or when ROW_FORMAT is DYNAMIC or COMPRESSED. + +2010-04-15 The InnoDB Team + + * trx/trx0rec.c: + Fix Bug#52746 InnoDB purge thread crashed with table containing + prefix indexed blobs + +2010-03-31 The InnoDB Team + + * mysql-test/innodb_bug51920.test, mysql-test/innodb_bug51920.result, + srv/srv0srv.c: + Fix Bug#51920 InnoDB connections in row lock wait ignore KILL + until lock wait timeout + +2010-03-31 The InnoDB Team + + * mysql-test/innodb_bug38231.test: + Remove non-determinism in the test case. + +2010-03-29 The InnoDB Team + + InnoDB Plugin 1.0.7 released + +2010-03-18 The InnoDB Team + + * CMakeLists.txt: + Fix Bug#52102 InnoDB Plugin shows performance drop compared to + InnoDB (Windows) + +2010-03-18 The InnoDB Team + + * buf0buf.ic: + When comparing the time of the first access to a block against + innodb_old_blocks_time, use 32-bit arithmetics. The comparison was + incorrect on 64-bit systems. + +2010-03-11 The InnoDB Team + + * buf0buf.h, buf0buf.ic: + Fix and clarify the latching of some buf_block_t members. + Note that check_index_page_at_flush is not protected by any mutex. + Note and assert that lock_hash_val is protected by the rw-latch. + +2010-03-10 The InnoDB Team + + * trx/trx0sys.c: + Fix Bug#51653 outdated reference to set-variable + +2010-03-10 The InnoDB Team + + * handler/ha_innodb.cc, mysql-test/innodb_bug21704.result, + mysql-test/innodb_bug47621.result, mysql-test/innodb_bug47621.test: + Fix Bug#47621 MySQL and InnoDB data dictionaries will become out of + sync when renaming columns + +2010-03-10 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#51356 Many Valgrind errors in error messages + with concurrent DDL + +2010-03-10 The InnoDB Team + + * handler/ha_innodb.cc, handler/handler0alter.cc, + mysql-test/innodb_bug51378.result, mysql-test/innodb_bug51378.test: + Fix Bug#51378 Init 'ref_length' to correct value, in case an out + of bound MySQL primary_key + +2010-03-10 The InnoDB Team + + * log/log0recv.c: + Remove a bogus assertion about page numbers exceeding 0x90000000 + in the redo log. Abort when encountering a corrupted redo log + record, unless innodb_force_recovery is set. + +2010-03-09 The InnoDB Team + + * handler/ha_innodb.cc: + Make SHOW ENGINE INNODB MUTEX STATUS display SUM(os_waits) + for the buffer pool block mutexes and locks. + +2010-03-08 The InnoDB Team + + * fil/fil0fil.c: + Fix ALTER TABLE ... IMPORT TABLESPACE of compressed tables. + +2010-03-03 The InnoDB Team + + * handler/handler0alter.cc, innodb-index.result, innodb-index.test, + innodb.result, innodb.test: + Disallow a duplicate index name when creating an index. + +2010-02-11 The InnoDB Team + + * include/mem0mem.h, include/mem0mem.ic, mem/mem0mem.c: + Fix Bug#49535 Available memory check slows down crash + recovery tens of times + +2010-02-09 The InnoDB Team + + * buf/buf0buf.c: + Fix Bug#38901 InnoDB logs error repeatedly when trying to load + page into buffer pool + +2010-02-09 The InnoDB Team + + * srv/srv0srv.c: + Let the master thread sleep if the amount of work to be done is + calibrated as taking less than a second. + +2010-02-04 The InnoDB Team + + * btr/btr0btr.c, btr/btr0cur.c, btr/btr0pcur.c, buf/buf0buf.c, + include/btr0btr.h, include/btr0cur.h, include/btr0pcur.h, + include/btr0pcur.ic, include/buf0buf.h, row/row0ins.c, row/row0sel.c: + Pass the file name and line number of the caller of the + b-tree cursor functions to the buffer pool requests, in order + to make the latch diagnostics more accurate. + +2010-02-03 The InnoDB Team + + * lock/lock0lock.c: + Fix Bug#49001 SHOW INNODB STATUS deadlock info incorrect + when deadlock detection aborts + +2010-02-03 The InnoDB Team + + * buf/buf0lru.c: + Fix Bug#35077 Very slow DROP TABLE (ALTER TABLE, OPTIMIZE TABLE) + on compressed tables + +2010-02-03 The InnoDB Team + + * handler/ha_innodb.cc, include/row0mysql.h, row/row0mysql.c: + Clean up CHECK TABLE error handling. + +2010-02-01 The InnoDB Team + + * handler/ha_innodb.cc, mysql-test/innodb-autoinc.test, + mysql-test/innodb-autoinc.result, + mysql-test/innodb-autoinc-44030.test, + mysql-test/innodb-autoinc-44030.result: + Fix Bug#49497 Error 1467 (ER_AUTOINC_READ_FAILED) on inserting + a negative value + +2010-01-28 The InnoDB Team + * handler/ha_innodb.h, handler/ha_innodb.cc, + handler/handler0alter.cc, + mysql-test/innodb_bug47622.test, + mysql-test/innodb_bug47622.result: + Fix Bug#47622 the new index is added before the existing ones + in MySQL, but after one in SE + +2010-01-27 The InnoDB Team + + * include/row0mysql.h, log/log0recv.c, row/row0mysql.c: + Drop temporary tables at startup. + This addresses the third aspect of + Bug#41609 Crash recovery does not work for InnoDB temporary tables. + +2010-01-21 The InnoDB Team + + * buf/buf0buf.c: + Do not merge buffered inserts to compressed pages before + the redo log has been applied in crash recovery. + +2010-01-13 The InnoDB Team + + * row/row0sel.c: + On the READ UNCOMMITTED isolation level, do not attempt to access + a clustered index record that has been marked for deletion. The + built-in InnoDB in MySQL 5.1 and earlier would attempt to retrieve + a previous version of the record in this case. + +2010-01-13 The InnoDB Team + + * buf/buf0buf.c: + When disabling the adaptive hash index, check the block state + before checking block->is_hashed, because the latter may be + uninitialized right after server startup. + +2010-01-12 The InnoDB Team + + * handler/ha_innodb.cc, handler/ha_innodb.h: + Fix Bug#46193 crash when accessing tables after enabling + innodb_force_recovery option + +2010-01-12 The InnoDB Team + + * row/row0mysql.c: + Fix Bug#49238 Creating/Dropping a temporary table while at 1023 + transactions will cause assert. + +2009-12-02 The InnoDB Team + + * srv/srv0start.c: + Display the zlib version number at startup. + InnoDB compressed tables use zlib, and the implementation depends + on the zlib function compressBound(), whose definition was slightly + changed in zlib version 1.2.3.1 in 2006. MySQL bundles zlib 1.2.3 + from 2005, but some installations use a more recent zlib. + +2009-11-30 The InnoDB Team + + * dict/dict0crea.c, dict/dict0mem.c, dict/dict0load.c, + dict/dict0boot.c, fil/fil0fil.c, handler/ha_innodb.cc, + include/dict0mem.h, row/row0mysql.c: + Fix the bogus warning messages for non-existing temporary + tables that were reported in + Bug#41609 Crash recovery does not work for InnoDB temporary tables. + The actual crash recovery bug was corrected on 2009-04-29. + +2009-11-27 The InnoDB Team + + InnoDB Plugin 1.0.6 released + 2009-11-20 The InnoDB Team * handler/ha_innodb.cc: @@ -79,8 +469,8 @@ sync/sync0arr.c, sync/sync0sync.c, thr/thr0loc.c, trx/trx0i_s.c, trx/trx0purge.c, trx/trx0rseg.c, trx/trx0sys.c, trx/trx0undo.c, usr/usr0sess.c, ut/ut0mem.c: - Fix Bug #45992 innodb memory not freed after shutdown - Fix Bug #46656 InnoDB plugin: memory leaks (Valgrind) + Fix Bug#45992 innodb memory not freed after shutdown + Fix Bug#46656 InnoDB plugin: memory leaks (Valgrind) 2009-10-29 The InnoDB Team @@ -422,7 +812,7 @@ * dict/dict0dict.c: When an index column cannot be found in the table during index creation, display additional diagnostic before an assertion failure. - This does NOT fix Bug #44571 InnoDB Plugin crashes on ADD INDEX, + This does NOT fix Bug#44571 InnoDB Plugin crashes on ADD INDEX, but it helps understand the reason of the crash. 2009-06-17 The InnoDB Team @@ -535,6 +925,12 @@ Fix Bug#44320 InnoDB: missing DB_ROLL_PTR in Table Monitor COLUMNS output +2009-04-29 The InnoDB Team + + * fil/fil0fil.c, include/fil0fil.h, include/mtr0mtr.h, + log/log0recv.c: + Fix Bug#41609 Crash recovery does not work for InnoDB temporary tables + 2009-04-23 The InnoDB Team * row/row0mysql.c: diff --git a/storage/xtradb/Makefile.am b/storage/xtradb/Makefile.am index 53413dfaeb8..3813b6602b8 100644 --- a/storage/xtradb/Makefile.am +++ b/storage/xtradb/Makefile.am @@ -217,6 +217,7 @@ noinst_HEADERS= \ include/ut0lst.h \ include/ut0mem.h \ include/ut0mem.ic \ + include/ut0rbt.h \ include/ut0rnd.h \ include/ut0rnd.ic \ include/ut0sort.h \ @@ -228,9 +229,9 @@ noinst_HEADERS= \ handler/innodb_patch_info.h \ mem/mem0dbg.c -EXTRA_LIBRARIES= libinnobase.a -noinst_LIBRARIES= @plugin_innobase_static_target@ -libinnobase_a_SOURCES= \ +EXTRA_LIBRARIES= libxtradb.a +noinst_LIBRARIES= @plugin_xtradb_static_target@ +libxtradb_a_SOURCES= \ btr/btr0btr.c \ btr/btr0cur.c \ btr/btr0pcur.c \ @@ -319,21 +320,22 @@ libinnobase_a_SOURCES= \ ut/ut0dbg.c \ ut/ut0list.c \ ut/ut0mem.c \ + ut/ut0rbt.c \ ut/ut0rnd.c \ ut/ut0ut.c \ ut/ut0vec.c \ ut/ut0wqueue.c -libinnobase_a_CXXFLAGS= $(AM_CFLAGS) -libinnobase_a_CFLAGS= $(AM_CFLAGS) +libxtradb_a_CXXFLAGS= $(AM_CFLAGS) +libxtradb_a_CFLAGS= $(AM_CFLAGS) -EXTRA_LTLIBRARIES= ha_innodb.la -pkgplugin_LTLIBRARIES= @plugin_innobase_shared_target@ +EXTRA_LTLIBRARIES= ha_xtradb.la +pkgplugin_LTLIBRARIES= @plugin_xtradb_shared_target@ -ha_innodb_la_LDFLAGS= -module -rpath $(pkgplugindir) -ha_innodb_la_CXXFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) -ha_innodb_la_CFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) -ha_innodb_la_SOURCES= $(libinnobase_a_SOURCES) +ha_xtradb_la_LDFLAGS= -module -rpath $(pkgplugindir) -L$(top_builddir)/libservices -lmysqlservices +ha_xtradb_la_CXXFLAGS= -shared $(AM_CXXFLAGS) $(INNODB_DYNAMIC_CFLAGS) +ha_xtradb_la_CFLAGS= -shared $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) +ha_xtradb_la_SOURCES= $(libxtradb_a_SOURCES) EXTRA_DIST= CMakeLists.txt plug.in \ pars/make_bison.sh pars/make_flex.sh \ diff --git a/storage/xtradb/btr/btr0btr.c b/storage/xtradb/btr/btr0btr.c index 520c18553ea..ff047095aa4 100644 --- a/storage/xtradb/btr/btr0btr.c +++ b/storage/xtradb/btr/btr0btr.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -604,13 +604,15 @@ an x-latch on the tree. @return rec_get_offsets() of the node pointer record */ static ulint* -btr_page_get_father_node_ptr( -/*=========================*/ +btr_page_get_father_node_ptr_func( +/*==============================*/ ulint* offsets,/*!< in: work area for the return value */ mem_heap_t* heap, /*!< in: memory heap to use */ btr_cur_t* cursor, /*!< in: cursor pointing to user record, out: cursor on node pointer record, its page x-latched */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { dtuple_t* tuple; @@ -634,7 +636,8 @@ btr_page_get_father_node_ptr( tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level); btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE, - BTR_CONT_MODIFY_TREE, cursor, 0, mtr); + BTR_CONT_MODIFY_TREE, cursor, 0, + file, line, mtr); node_ptr = btr_cur_get_rec(cursor); ut_ad(!page_rec_is_comp(node_ptr) @@ -682,6 +685,9 @@ btr_page_get_father_node_ptr( return(offsets); } +#define btr_page_get_father_node_ptr(of,heap,cur,mtr) \ + btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr) + /************************************************************//** Returns the upper level node pointer to a page. It is assumed that mtr holds an x-latch on the tree. @@ -1475,11 +1481,11 @@ Calculates a split record such that the tuple will certainly fit on its half-page when the split is performed. We assume in this function only that the cursor page has at least one user record. @return split record, or NULL if tuple will be the first record on -upper half-page */ +the lower or upper half-page (determined by btr_page_tuple_smaller()) */ static rec_t* -btr_page_get_sure_split_rec( -/*========================*/ +btr_page_get_split_rec( +/*===================*/ btr_cur_t* cursor, /*!< in: cursor at which insert should be made */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext) /*!< in: number of externally stored columns */ @@ -1692,11 +1698,13 @@ Inserts a data tuple to a tree on a non-leaf level. It is assumed that mtr holds an x-latch on the tree. */ UNIV_INTERN void -btr_insert_on_non_leaf_level( -/*=========================*/ +btr_insert_on_non_leaf_level_func( +/*==============================*/ dict_index_t* index, /*!< in: index */ ulint level, /*!< in: level, must be > 0 */ dtuple_t* tuple, /*!< in: the record to be inserted */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { big_rec_t* dummy_big_rec; @@ -1708,7 +1716,7 @@ btr_insert_on_non_leaf_level( btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE, BTR_CONT_MODIFY_TREE, - &cursor, 0, mtr); + &cursor, 0, file, line, mtr); err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG @@ -1854,6 +1862,37 @@ btr_attach_half_pages( } /*************************************************************//** +Determine if a tuple is smaller than any record on the page. +@return TRUE if smaller */ +static +ibool +btr_page_tuple_smaller( +/*===================*/ + btr_cur_t* cursor, /*!< in: b-tree cursor */ + const dtuple_t* tuple, /*!< in: tuple to consider */ + ulint* offsets,/*!< in/out: temporary storage */ + ulint n_uniq, /*!< in: number of unique fields + in the index page records */ + mem_heap_t** heap) /*!< in/out: heap for offsets */ +{ + buf_block_t* block; + const rec_t* first_rec; + page_cur_t pcur; + + /* Read the first user record in the page. */ + block = btr_cur_get_block(cursor); + page_cur_set_before_first(block, &pcur); + page_cur_move_to_next(&pcur); + first_rec = page_cur_get_rec(&pcur); + + offsets = rec_get_offsets( + first_rec, cursor->index, offsets, + n_uniq, heap); + + return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0); +} + +/*************************************************************//** Splits an index page to halves and inserts the tuple. It is assumed that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is released within this function! NOTE that the operation of this @@ -1923,12 +1962,17 @@ func_start: /* 1. Decide the split record; split_rec == NULL means that the tuple to be inserted should be the first record on the upper half-page */ + insert_left = FALSE; if (n_iterations > 0) { direction = FSP_UP; hint_page_no = page_no + 1; - split_rec = btr_page_get_sure_split_rec(cursor, tuple, n_ext); + split_rec = btr_page_get_split_rec(cursor, tuple, n_ext); + if (UNIV_UNLIKELY(split_rec == NULL)) { + insert_left = btr_page_tuple_smaller( + cursor, tuple, offsets, n_uniq, &heap); + } } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) { direction = FSP_UP; hint_page_no = page_no + 1; @@ -1936,37 +1980,24 @@ func_start: } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) { direction = FSP_DOWN; hint_page_no = page_no - 1; + ut_ad(split_rec); } else { direction = FSP_UP; hint_page_no = page_no + 1; - if (page_get_n_recs(page) == 1) { - page_cur_t pcur; - - /* There is only one record in the index page - therefore we can't split the node in the middle - by default. We need to determine whether the - new record will be inserted to the left or right. */ - - /* Read the first (and only) record in the page. */ - page_cur_set_before_first(block, &pcur); - page_cur_move_to_next(&pcur); - first_rec = page_cur_get_rec(&pcur); + /* If there is only one record in the index page, we + can't split the node in the middle by default. We need + to determine whether the new record will be inserted + to the left or right. */ - offsets = rec_get_offsets( - first_rec, cursor->index, offsets, - n_uniq, &heap); - - /* If the new record is less than the existing record - the split in the middle will copy the existing - record to the new node. */ - if (cmp_dtuple_rec(tuple, first_rec, offsets) < 0) { - split_rec = page_get_middle_rec(page); - } else { - split_rec = NULL; - } - } else { + if (page_get_n_recs(page) > 1) { split_rec = page_get_middle_rec(page); + } else if (btr_page_tuple_smaller(cursor, tuple, + offsets, n_uniq, &heap)) { + split_rec = page_rec_get_next( + page_get_infimum_rec(page)); + } else { + split_rec = NULL; } } @@ -1996,11 +2027,16 @@ func_start: avoid further splits by inserting the record to an empty page. */ split_rec = NULL; - goto insert_right; + goto insert_empty; } + } else if (UNIV_UNLIKELY(insert_left)) { + ut_a(n_iterations > 0); + first_rec = page_rec_get_next(page_get_infimum_rec(page)); + move_limit = page_rec_get_next(btr_cur_get_rec(cursor)); } else { -insert_right: - insert_left = FALSE; +insert_empty: + ut_ad(!split_rec); + ut_ad(!insert_left); buf = mem_alloc(rec_get_converted_size(cursor->index, tuple, n_ext)); @@ -2024,7 +2060,11 @@ insert_right: && btr_page_insert_fits(cursor, split_rec, offsets, tuple, n_ext, heap); } else { - mem_free(buf); + if (!insert_left) { + mem_free(buf); + buf = NULL; + } + insert_will_fit = !new_page_zip && btr_page_insert_fits(cursor, NULL, NULL, tuple, n_ext, heap); diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c index 06b54bc7120..9b87d969a64 100644 --- a/storage/xtradb/btr/btr0cur.c +++ b/storage/xtradb/btr/btr0cur.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -372,6 +372,8 @@ btr_cur_search_to_nth_level( ulint has_search_latch,/*!< in: info on the latch mode the caller currently has on btr_search_latch: RW_S_LATCH, or 0 */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { page_cur_t* page_cursor; @@ -550,7 +552,7 @@ btr_cur_search_to_nth_level( retry_page_get: block = buf_page_get_gen(space, zip_size, page_no, rw_latch, guess, buf_mode, - __FILE__, __LINE__, mtr); + file, line, mtr); if (block == NULL) { if (srv_pass_corrupt_table && buf_mode != BUF_GET_IF_IN_POOL) { page_cursor->block = 0; @@ -727,13 +729,15 @@ func_exit: Opens a cursor at either end of an index. */ UNIV_INTERN void -btr_cur_open_at_index_side( -/*=======================*/ +btr_cur_open_at_index_side_func( +/*============================*/ ibool from_left, /*!< in: TRUE if open to the low end, FALSE if to the high end */ dict_index_t* index, /*!< in: index */ ulint latch_mode, /*!< in: latch mode */ btr_cur_t* cursor, /*!< in: cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { page_cur_t* page_cursor; @@ -778,7 +782,7 @@ btr_cur_open_at_index_side( page_t* page; block = buf_page_get_gen(space, zip_size, page_no, RW_NO_LATCH, NULL, BUF_GET, - __FILE__, __LINE__, mtr); + file, line, mtr); page = buf_block_get_frame(block); if (srv_pass_corrupt_table && !page) { @@ -869,11 +873,13 @@ btr_cur_open_at_index_side( Positions a cursor at a randomly chosen position within a B-tree. */ UNIV_INTERN void -btr_cur_open_at_rnd_pos( -/*====================*/ +btr_cur_open_at_rnd_pos_func( +/*=========================*/ dict_index_t* index, /*!< in: index */ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_cur_t* cursor, /*!< in/out: B-tree cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { page_cur_t* page_cursor; @@ -908,7 +914,7 @@ btr_cur_open_at_rnd_pos( block = buf_page_get_gen(space, zip_size, page_no, RW_NO_LATCH, NULL, BUF_GET, - __FILE__, __LINE__, mtr); + file, line, mtr); page = buf_block_get_frame(block); if (srv_pass_corrupt_table && !page) { @@ -1229,7 +1235,6 @@ btr_cur_optimistic_insert( ibool inherit; ulint zip_size; ulint rec_size; - mem_heap_t* heap = NULL; ulint err; *big_rec = NULL; @@ -1315,10 +1320,6 @@ btr_cur_optimistic_insert( index, entry, big_rec_vec); } - if (heap) { - mem_heap_free(heap); - } - return(DB_TOO_BIG_RECORD); } } @@ -1341,15 +1342,11 @@ fail_err: dtuple_convert_back_big_rec(index, entry, big_rec_vec); } - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); } if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT - || max_size < rec_size) + || max_size < rec_size) && UNIV_LIKELY(page_get_n_recs(page) > 1) && page_get_max_insert_size(page, 1) < rec_size) { @@ -1415,10 +1412,6 @@ fail_err: } } - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - #ifdef BTR_CUR_HASH_ADAPT if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) { btr_search_update_hash_node_on_insert(cursor); @@ -2143,9 +2136,8 @@ any_extern: err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, thr, mtr, &roll_ptr); if (err != DB_SUCCESS) { -err_exit: - mem_heap_free(heap); - return(err); + + goto err_exit; } /* Ok, we may do the replacement. Store on the page infimum the @@ -2191,9 +2183,10 @@ err_exit: page_cur_move_to_next(page_cursor); + err = DB_SUCCESS; +err_exit: mem_heap_free(heap); - - return(DB_SUCCESS); + return(err); } /*************************************************************//** @@ -3282,7 +3275,8 @@ btr_estimate_n_rows_in_range( btr_cur_search_to_nth_level(index, 0, tuple1, mode1, BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, &mtr); + &cursor, 0, + __FILE__, __LINE__, &mtr); } else { btr_cur_open_at_index_side(TRUE, index, BTR_SEARCH_LEAF | BTR_ESTIMATE, @@ -3299,7 +3293,8 @@ btr_estimate_n_rows_in_range( btr_cur_search_to_nth_level(index, 0, tuple2, mode2, BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, &mtr); + &cursor, 0, + __FILE__, __LINE__, &mtr); } else { btr_cur_open_at_index_side(FALSE, index, BTR_SEARCH_LEAF | BTR_ESTIMATE, @@ -3438,7 +3433,7 @@ btr_estimate_n_pages_not_null( btr_cur_search_to_nth_level(index, 0, tuple1, PAGE_CUR_G, BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, &mtr); + &cursor, 0, __FILE__, __LINE__, &mtr); mtr_commit(&mtr); @@ -3588,9 +3583,11 @@ btr_estimate_number_of_different_key_vals( effective_pages = btr_estimate_n_pages_not_null(index, 1 /*k*/, first_rec_path); if (!effective_pages) { + dict_index_stat_mutex_enter(index); for (j = 0; j <= n_cols; j++) { index->stat_n_diff_key_vals[j] = (ib_int64_t)index->stat_n_leaf_pages; } + dict_index_stat_mutex_exit(index); return; } else if (effective_pages > index->stat_n_leaf_pages) { effective_pages = index->stat_n_leaf_pages; @@ -3732,6 +3729,8 @@ btr_estimate_number_of_different_key_vals( also the pages used for external storage of fields (those pages are included in index->stat_n_leaf_pages) */ + dict_index_stat_mutex_enter(index); + for (j = 0; j <= n_cols; j++) { index->stat_n_diff_key_vals[j] = ((n_diff[j] @@ -3770,8 +3769,9 @@ btr_estimate_number_of_different_key_vals( } } - mem_free(n_diff); + dict_index_stat_mutex_exit(index); + mem_free(n_diff); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } @@ -4259,6 +4259,8 @@ btr_store_big_rec_extern_fields( field_ref += local_len; } extern_len = big_rec_vec->fields[i].len; + UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data, + extern_len); ut_a(extern_len > 0); @@ -4639,7 +4641,7 @@ btr_free_externally_stored_field( /* In the rollback of uncommitted transactions, we may encounter a clustered index record whose BLOBs have not been written. There is nothing to free then. */ - ut_a(rb_ctx == RB_RECOVERY); + ut_a(rb_ctx == RB_RECOVERY || rb_ctx == RB_RECOVERY_PURGE_REC); return; } @@ -4685,7 +4687,7 @@ btr_free_externally_stored_field( || (mach_read_from_1(field_ref + BTR_EXTERN_LEN) & BTR_EXTERN_OWNER_FLAG) /* Rollback and inherited field */ - || (rb_ctx != RB_NONE + || ((rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY) && (mach_read_from_1(field_ref + BTR_EXTERN_LEN) & BTR_EXTERN_INHERITED_FLAG))) { @@ -4895,6 +4897,7 @@ btr_copy_blob_prefix( mtr_commit(&mtr); if (page_no == FIL_NULL || copy_len != part_len) { + UNIV_MEM_ASSERT_RW(buf, copied_len); return(copied_len); } @@ -5078,6 +5081,7 @@ btr_copy_externally_stored_field_prefix_low( space_id, page_no, offset); inflateEnd(&d_stream); mem_heap_free(heap); + UNIV_MEM_ASSERT_RW(buf, d_stream.total_out); return(d_stream.total_out); } else { return(btr_copy_blob_prefix(buf, len, space_id, diff --git a/storage/xtradb/btr/btr0pcur.c b/storage/xtradb/btr/btr0pcur.c index 86beea5a899..537c26f6bf2 100644 --- a/storage/xtradb/btr/btr0pcur.c +++ b/storage/xtradb/btr/btr0pcur.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -211,10 +211,12 @@ record and it can be restored on a user record whose ordering fields are identical to the ones of the original user record */ UNIV_INTERN ibool -btr_pcur_restore_position( -/*======================*/ +btr_pcur_restore_position_func( +/*===========================*/ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in: detached persistent cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { dict_index_t* index; @@ -223,6 +225,9 @@ btr_pcur_restore_position( ulint old_mode; mem_heap_t* heap; + ut_ad(mtr); + ut_ad(mtr->state == MTR_ACTIVE); + index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor)); if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED) @@ -263,7 +268,8 @@ btr_pcur_restore_position( if (UNIV_LIKELY(buf_page_optimistic_get( latch_mode, cursor->block_when_stored, - cursor->modify_clock, mtr))) { + cursor->modify_clock, + file, line, mtr))) { cursor->pos_state = BTR_PCUR_IS_POSITIONED; buf_block_dbg_add_level(btr_pcur_get_block(cursor), @@ -318,8 +324,8 @@ btr_pcur_restore_position( mode = PAGE_CUR_L; } - btr_pcur_open_with_no_init(index, tuple, mode, latch_mode, - cursor, 0, mtr); + btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode, + cursor, 0, file, line, mtr); /* Restore the old search mode */ cursor->search_mode = old_mode; @@ -568,8 +574,8 @@ before first in tree. The latching mode must be BTR_SEARCH_LEAF or BTR_MODIFY_LEAF. */ UNIV_INTERN void -btr_pcur_open_on_user_rec( -/*======================*/ +btr_pcur_open_on_user_rec_func( +/*===========================*/ dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ... */ @@ -577,9 +583,12 @@ btr_pcur_open_on_user_rec( BTR_MODIFY_LEAF */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { - btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr); + btr_pcur_open_func(index, tuple, mode, latch_mode, cursor, + file, line, mtr); if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) { diff --git a/storage/xtradb/btr/btr0sea.c b/storage/xtradb/btr/btr0sea.c index 61909903a67..36dadd47e69 100644 --- a/storage/xtradb/btr/btr0sea.c +++ b/storage/xtradb/btr/btr0sea.c @@ -182,6 +182,7 @@ void btr_search_sys_free(void) /*=====================*/ { + rw_lock_free(&btr_search_latch); mem_free(btr_search_latch_temp); btr_search_latch_temp = NULL; mem_heap_free(btr_search_sys->hash_index->heap); diff --git a/storage/xtradb/buf/buf0buddy.c b/storage/xtradb/buf/buf0buddy.c index d5e45745757..e6b80bcda55 100644 --- a/storage/xtradb/buf/buf0buddy.c +++ b/storage/xtradb/buf/buf0buddy.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -430,6 +430,8 @@ buf_buddy_relocate_block( } mutex_exit(&flush_list_mutex); + UNIV_MEM_INVALID(bpage, sizeof *bpage); + mutex_exit(&buf_pool_zip_mutex); mutex_exit(&zip_free_mutex); return(TRUE); @@ -450,6 +452,8 @@ buf_buddy_relocate( buf_page_t* bpage; const ulint size = BUF_BUDDY_LOW << i; ullint usec = ut_time_us(NULL); + ulint space; + ulint page_no; //ut_ad(buf_pool_mutex_own()); ut_ad(mutex_own(&zip_free_mutex)); @@ -488,11 +492,15 @@ buf_buddy_relocate( pool), so there is nothing wrong about this. The mach_read_from_4() calls here will only trigger bogus Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */ - bpage = buf_page_hash_get( - mach_read_from_4((const byte*) src - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID), - mach_read_from_4((const byte*) src - + FIL_PAGE_OFFSET)); + space = mach_read_from_4( + (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + page_no = mach_read_from_4( + (const byte*) src + FIL_PAGE_OFFSET); + /* Suppress Valgrind warnings about conditional jump + on uninitialized value. */ + UNIV_MEM_VALID(&space, sizeof space); + UNIV_MEM_VALID(&page_no, sizeof page_no); + bpage = buf_page_hash_get(space, page_no); if (!bpage || bpage->zip.data != src) { /* The block has probably been freshly @@ -567,7 +575,12 @@ success: } } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) { /* This must be a buf_page_t object. */ +#if UNIV_WORD_SIZE == 4 + /* On 32-bit systems, there is no padding in + buf_page_t. On other systems, Valgrind could complain + about uninitialized pad bytes. */ UNIV_MEM_ASSERT_RW(src, size); +#endif mutex_exit(&zip_free_mutex); diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c index 79a9488e339..94a67c1759c 100644 --- a/storage/xtradb/buf/buf0buf.c +++ b/storage/xtradb/buf/buf0buf.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -53,6 +53,10 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "trx0trx.h" #include "srv0start.h" +#include "que0que.h" +#include "read0read.h" +#include "row0row.h" +#include "ha_prototypes.h" /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); @@ -78,9 +82,9 @@ inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) block_hash_byte = block_hash >> 3; block_hash_offset = (byte) block_hash & 0x07; if (block_hash_byte >= DPAH_SIZE) - fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", (unsigned long) block_hash_byte, (unsigned long) block_hash_offset); + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); if (block_hash_offset > 7) - fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", (unsigned long) block_hash_byte, (unsigned long) block_hash_offset); + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0) trx->distinct_page_access++; trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset; @@ -277,6 +281,8 @@ the read requests for the whole area. #ifndef UNIV_HOTBACKUP /** Value in microseconds */ static const int WAIT_FOR_READ = 5000; +/** Number of attemtps made to read in a page in the buffer pool */ +static const ulint BUF_PAGE_READ_MAX_RETRIES = 100; /** The buffer buf_pool of the database */ UNIV_INTERN buf_pool_t* buf_pool = NULL; @@ -308,14 +314,30 @@ read-ahead or flush occurs */ UNIV_INTERN ibool buf_debug_prints = FALSE; #endif /* UNIV_DEBUG */ -/** A chunk of buffers. The buffer pool is allocated in chunks. */ -struct buf_chunk_struct{ - ulint mem_size; /*!< allocated size of the chunk */ - ulint size; /*!< size of frames[] and blocks[] */ - void* mem; /*!< pointer to the memory area which - was allocated for the frames */ - buf_block_t* blocks; /*!< array of buffer control blocks */ +/* Buffer pool shared memory segment information */ +typedef struct buf_shm_info_struct buf_shm_info_t; + +struct buf_shm_info_struct { + char head_str[8]; + ulint binary_id; + ibool is_new; /* during initializing */ + ibool clean; /* clean shutdowned and free */ + ibool reusable; /* reusable */ + ulint buf_pool_size; /* backup value */ + ulint page_size; /* backup value */ + ulint frame_offset; /* offset of the first frame based on chunk->mem */ + ulint zip_hash_offset; + ulint zip_hash_n; + + ulint checksum; + + buf_pool_t buf_pool_backup; + buf_chunk_t chunk_backup; + + ib_uint64_t dummy; }; + +#define BUF_SHM_INFO_HEAD "XTRA_SHM" #endif /* !UNIV_HOTBACKUP */ /********************************************************************//** @@ -762,6 +784,45 @@ buf_block_init( #endif /* UNIV_SYNC_DEBUG */ } +static +void +buf_block_reuse( +/*============*/ + buf_block_t* block, + ptrdiff_t frame_offset) +{ + /* block_init */ + block->frame = ((byte*)(block->frame) + frame_offset); + + UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block); + + block->index = NULL; + +#ifdef UNIV_DEBUG + /* recreate later */ + block->page.in_page_hash = FALSE; + block->page.in_zip_hash = FALSE; +#endif /* UNIV_DEBUG */ + +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + block->n_pointers = 0; +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + + if (block->page.zip.data) + block->page.zip.data = ((byte*)(block->page.zip.data) + frame_offset); + + block->is_hashed = FALSE; + + mutex_create(&block->mutex, SYNC_BUF_BLOCK); + + rw_lock_create(&block->lock, SYNC_LEVEL_VARYING); + ut_ad(rw_lock_validate(&(block->lock))); + +#ifdef UNIV_SYNC_DEBUG + rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK); +#endif /* UNIV_SYNC_DEBUG */ +} + /********************************************************************//** Allocates a chunk of buffer frames. @return chunk, or NULL on failure */ @@ -774,26 +835,167 @@ buf_chunk_init( { buf_block_t* block; byte* frame; + ulint zip_hash_n = 0; + ulint zip_hash_mem_size = 0; + hash_table_t* zip_hash_tmp = NULL; ulint i; + buf_shm_info_t* shm_info = NULL; /* Round down to a multiple of page size, although it already should be. */ mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE); + + if (srv_buffer_pool_shm_key) { + /* zip_hash size */ + zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2; + zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n) + + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + } + /* Reserve space for the block descriptors. */ mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block) + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + if (srv_buffer_pool_shm_key) { + mem_size += ut_2pow_round(sizeof(buf_shm_info_t) + + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + mem_size += zip_hash_mem_size; + } chunk->mem_size = mem_size; + + if (srv_buffer_pool_shm_key) { + ulint binary_id; + ibool is_new; + + ut_a(buf_pool->n_chunks == 1); + + fprintf(stderr, + "InnoDB: Notice: innodb_buffer_pool_shm_key option is specified.\n" + "InnoDB: This option may not be safe to keep consistency of datafiles.\n" + "InnoDB: Because InnoDB cannot lock datafiles when shutdown until reusing shared memory segment.\n" + "InnoDB: You should ensure no change of InnoDB files while using innodb_buffer_pool_shm_key.\n"); + + /* FIXME: This is vague id still */ + binary_id = (ulint) ((char*)mtr_commit - (char *)btr_root_get) + + (ulint) ((char *)os_get_os_version - (char *)buf_calc_page_new_checksum) + + (ulint) ((char *)page_dir_find_owner_slot - (char *)dfield_data_is_binary_equal) + + (ulint) ((char *)que_graph_publish - (char *)dict_casedn_str) + + (ulint) ((char *)read_view_oldest_copy_or_open_new - (char *)fil_space_get_version) + + (ulint) ((char *)rec_get_n_extern_new - (char *)fsp_get_size_low) + + (ulint) ((char *)row_get_trx_id_offset - (char *)ha_create_func) + + (ulint) ((char *)srv_set_io_thread_op_info - (char *)thd_is_replication_slave_thread) + + (ulint) ((char *)mutex_create_func - (char *)ibuf_inside) + + (ulint) ((char *)trx_set_detailed_error - (char *)lock_check_trx_id_sanity) + + (ulint) ((char *)ut_time - (char *)mem_heap_strdup); + + chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new); + + if (UNIV_UNLIKELY(chunk->mem == NULL)) { + return(NULL); + } + +#ifdef UNIV_SET_MEM_TO_ZERO + if (is_new) { + memset(chunk->mem, '\0', chunk->mem_size); + } +#endif + + shm_info = chunk->mem; + + zip_hash_tmp = (hash_table_t*)((char *)chunk->mem + chunk->mem_size - zip_hash_mem_size); + + if (is_new) { + strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8); + shm_info->binary_id = binary_id; + shm_info->is_new = TRUE; /* changed to FALSE when the initialization is finished */ + shm_info->clean = FALSE; /* changed to TRUE when free the segment. */ + shm_info->reusable = FALSE; /* changed to TRUE when validation is finished. */ + shm_info->buf_pool_size = srv_buf_pool_size; + shm_info->page_size = srv_page_size; + shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size; + shm_info->zip_hash_n = zip_hash_n; + } else { + ulint checksum; + + if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) { + fprintf(stderr, + "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n"); + return(NULL); + } + if (shm_info->binary_id != binary_id) { + fprintf(stderr, + "InnoDB: Error: The shared memory segment seems not to be for this binary.\n"); + return(NULL); + } + if (shm_info->is_new) { + fprintf(stderr, + "InnoDB: Error: The shared memory was not initialized yet.\n"); + return(NULL); + } + if (!shm_info->clean) { + fprintf(stderr, + "InnoDB: Error: The shared memory was not shut down cleanly.\n"); + return(NULL); + } + if (!shm_info->reusable) { + fprintf(stderr, + "InnoDB: Error: The shared memory has unrecoverable contents.\n"); + return(NULL); + } + if (shm_info->buf_pool_size != srv_buf_pool_size) { + fprintf(stderr, + "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n", + shm_info->buf_pool_size, srv_buf_pool_size); + return(NULL); + } + if (shm_info->page_size != srv_page_size) { + fprintf(stderr, + "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n", + shm_info->page_size, srv_page_size); + return(NULL); + } + + ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size); + ut_a(shm_info->zip_hash_n == zip_hash_n); + + /* check checksum */ + checksum = ut_fold_binary((byte*)chunk->mem + sizeof(buf_shm_info_t), + chunk->mem_size - sizeof(buf_shm_info_t)); + if (shm_info->checksum != checksum) { + fprintf(stderr, + "InnoDB: Error: checksum of the shared memory is not match. " + "(stored=%lu calculated=%lu)\n", + shm_info->checksum, checksum); + return(NULL); + } + + /* flag to use the segment. */ + shm_info->clean = FALSE; /* changed to TRUE when free the segment. */ + } + + /* init zip_hash contents */ + if (is_new) { + hash_create_init(zip_hash_tmp, zip_hash_n); + } else { + /* adjust offset is done later */ + hash_create_reuse(zip_hash_tmp); + } + } else { chunk->mem = os_mem_alloc_large(&chunk->mem_size); if (UNIV_UNLIKELY(chunk->mem == NULL)) { return(NULL); } + } /* Allocate the block descriptors from the start of the memory block. */ + if (srv_buffer_pool_shm_key) { + chunk->blocks = (buf_block_t*)((char*)chunk->mem + sizeof(buf_shm_info_t)); + } else { chunk->blocks = chunk->mem; + } /* Align a pointer to the first frame. Note that when os_large_page_size is smaller than UNIV_PAGE_SIZE, @@ -801,8 +1003,13 @@ buf_chunk_init( it is bigger, we may allocate more blocks than requested. */ frame = ut_align(chunk->mem, UNIV_PAGE_SIZE); + if (srv_buffer_pool_shm_key) { + /* reserve zip_hash space and always -1 for reproductibity */ + chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1; + } else { chunk->size = chunk->mem_size / UNIV_PAGE_SIZE - (frame != chunk->mem); + } /* Subtract the space needed for block descriptors. */ { @@ -816,6 +1023,98 @@ buf_chunk_init( chunk->size = size; } + if (shm_info && !(shm_info->is_new)) { + /* convert the shared memory segment for reuse */ + ptrdiff_t phys_offset; + ptrdiff_t logi_offset; + ptrdiff_t blocks_offset; + byte* previous_frame_address; + + if (chunk->size < shm_info->chunk_backup.size) { + fprintf(stderr, + "InnoDB: Error: The buffer pool became smaller because of allocated address.\n" + "InnoDB: Retrying may avoid this situation.\n"); + shm_info->clean = TRUE; /* release the flag for retrying */ + return(NULL); + } + + chunk->size = shm_info->chunk_backup.size; + phys_offset = (char*)frame - ((char*)chunk->mem + shm_info->frame_offset); + logi_offset = (char *)frame - (char *)chunk->blocks[0].frame; + previous_frame_address = chunk->blocks[0].frame; + blocks_offset = (char *)chunk->blocks - (char *)shm_info->chunk_backup.blocks; + + if (phys_offset || logi_offset || blocks_offset) { + fprintf(stderr, + "InnoDB: Buffer pool in the shared memory segment should be converted.\n" + "InnoDB: Previous frames in address : %p\n" + "InnoDB: Previous frames were located : %p\n" + "InnoDB: Current frames should be located: %p\n" + "InnoDB: Pysical offset : %ld (%#lx)\n" + "InnoDB: Logical offset (frames) : %ld (%#lx)\n" + "InnoDB: Logical offset (blocks) : %ld (%#lx)\n", + (char *)chunk->mem + shm_info->frame_offset, + chunk->blocks[0].frame, frame, + (ulong) phys_offset, (ulong) phys_offset, (ulong) logi_offset, (ulong) logi_offset, + (ulong) blocks_offset, (ulong) blocks_offset); + } else { + fprintf(stderr, + "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n"); + } + + if (phys_offset) { + fprintf(stderr, + "InnoDB: Aligning physical offset..."); + + memmove(frame, ((char*)chunk->mem + shm_info->frame_offset), + chunk->size * UNIV_PAGE_SIZE); + + fprintf(stderr, + " Done.\n"); + } + + if (logi_offset || blocks_offset) { + fprintf(stderr, + "InnoDB: Aligning logical offset..."); + + /* buf_block_t */ + block = chunk->blocks; + + for (i = chunk->size; i--; ) { + buf_block_reuse(block, logi_offset); + block++; + } + + /* buf_pool_t buf_pool_backup */ + UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list, + previous_frame_address, logi_offset, blocks_offset); + UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free, + previous_frame_address, logi_offset, blocks_offset); + UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU, + previous_frame_address, logi_offset, blocks_offset); + if (shm_info->buf_pool_backup.LRU_old) + shm_info->buf_pool_backup.LRU_old = + (buf_page_t*)((char*)(shm_info->buf_pool_backup.LRU_old) + + (((byte*)shm_info->buf_pool_backup.LRU_old > previous_frame_address) + ? logi_offset : blocks_offset)); + + UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU, + previous_frame_address, logi_offset, blocks_offset); + + UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean, + previous_frame_address, logi_offset, blocks_offset); + for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) { + UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i], + previous_frame_address, logi_offset, blocks_offset); + } + + HASH_OFFSET(zip_hash_tmp, buf_page_t, hash, + previous_frame_address, logi_offset, blocks_offset); + + fprintf(stderr, + " Done.\n"); + } + } else { /* Init block structs and assign frames for them. Then we assign the frames to the first blocks (we already mapped the memory above). */ @@ -826,7 +1125,7 @@ buf_chunk_init( buf_block_init(block, frame); -#ifdef HAVE_purify +#ifdef HAVE_valgrind /* Wipe contents of frame to eliminate a Purify warning */ memset(block->frame, '\0', UNIV_PAGE_SIZE); #endif @@ -839,6 +1138,11 @@ buf_chunk_init( block++; frame += UNIV_PAGE_SIZE; } + } + + if (shm_info) { + shm_info->frame_offset = (char*)chunk->blocks[0].frame - (char*)chunk->mem; + } return(chunk); } @@ -938,6 +1242,11 @@ buf_chunk_not_freed( ready = buf_flush_ready_for_replace(&block->page); mutex_exit(&block->mutex); + if (block->page.is_corrupt) { + /* corrupt page may remain, it can be skipped */ + break; + } + if (!ready) { return(block); @@ -1015,6 +1324,8 @@ buf_chunk_free( UNIV_MEM_UNDESC(block); } + ut_a(!srv_buffer_pool_shm_key); + os_mem_free_large(chunk->mem, chunk->mem_size); } @@ -1064,7 +1375,10 @@ buf_pool_init(void) srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE; buf_pool->page_hash = hash_create(2 * buf_pool->curr_size); + /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */ + if (!srv_buffer_pool_shm_key) { buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size); + } buf_pool->last_printout_time = time(NULL); @@ -1079,6 +1393,86 @@ buf_pool_init(void) --------------------------- */ /* All fields are initialized by mem_zalloc(). */ + if (srv_buffer_pool_shm_key) { + buf_shm_info_t* shm_info; + + ut_a((char*)chunk->blocks == (char*)chunk->mem + sizeof(buf_shm_info_t)); + shm_info = chunk->mem; + + buf_pool->zip_hash = (hash_table_t*)((char*)chunk->mem + shm_info->zip_hash_offset); + + if(shm_info->is_new) { + shm_info->is_new = FALSE; /* initialization was finished */ + } else { + buf_block_t* block = chunk->blocks; + buf_page_t* b; + + /* shm_info->buf_pool_backup should be converted */ + /* at buf_chunk_init(). So copy simply. */ + buf_pool->flush_list = shm_info->buf_pool_backup.flush_list; + buf_pool->freed_page_clock = shm_info->buf_pool_backup.freed_page_clock; + buf_pool->free = shm_info->buf_pool_backup.free; + buf_pool->LRU = shm_info->buf_pool_backup.LRU; + buf_pool->LRU_old = shm_info->buf_pool_backup.LRU_old; + buf_pool->LRU_old_len = shm_info->buf_pool_backup.LRU_old_len; + buf_pool->unzip_LRU = shm_info->buf_pool_backup.unzip_LRU; + buf_pool->zip_clean = shm_info->buf_pool_backup.zip_clean; + for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) { + buf_pool->zip_free[i] = shm_info->buf_pool_backup.zip_free[i]; + } + + for (i = 0; i < chunk->size; i++, block++) { + if (buf_block_get_state(block) + == BUF_BLOCK_FILE_PAGE) { + ut_d(block->page.in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold( + block->page.space, + block->page.offset), + &block->page); + } + } + + for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b; + b = UT_LIST_GET_NEXT(zip_list, b)) { + ut_ad(!b->in_flush_list); + ut_ad(b->in_LRU_list); + + ut_d(b->in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold(b->space, b->offset), b); + } + + for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b; + b = UT_LIST_GET_NEXT(flush_list, b)) { + ut_ad(b->in_flush_list); + ut_ad(b->in_LRU_list); + + switch (buf_page_get_state(b)) { + case BUF_BLOCK_ZIP_DIRTY: + ut_d(b->in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold(b->space, + b->offset), b); + break; + case BUF_BLOCK_FILE_PAGE: + /* uncompressed page */ + break; + case BUF_BLOCK_ZIP_FREE: + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_NOT_USED: + case BUF_BLOCK_READY_FOR_USE: + case BUF_BLOCK_MEMORY: + case BUF_BLOCK_REMOVE_HASH: + ut_error; + break; + } + } + + + } + } + mutex_exit(&LRU_list_mutex); rw_lock_x_unlock(&page_hash_latch); buf_pool_mutex_exit(); @@ -1103,6 +1497,30 @@ buf_pool_free(void) buf_chunk_t* chunk; buf_chunk_t* chunks; + if (srv_buffer_pool_shm_key) { + buf_shm_info_t* shm_info; + + ut_a(buf_pool->n_chunks == 1); + + chunk = buf_pool->chunks; + shm_info = chunk->mem; + ut_a((char*)chunk->blocks == (char*)chunk->mem + sizeof(buf_shm_info_t)); + + /* validation the shared memory segment doesn't have unrecoverable contents. */ + /* Currently, validation became not needed */ + shm_info->reusable = TRUE; + + memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t)); + memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t)); + + if (srv_fast_shutdown < 2) { + shm_info->checksum = ut_fold_binary((byte*)chunk->mem + sizeof(buf_shm_info_t), + chunk->mem_size - sizeof(buf_shm_info_t)); + shm_info->clean = TRUE; + } + + os_shm_free(chunk->mem, chunk->mem_size); + } else { chunks = buf_pool->chunks; chunk = chunks + buf_pool->n_chunks; @@ -1111,10 +1529,13 @@ buf_pool_free(void) would fail at shutdown. */ os_mem_free_large(chunk->mem, chunk->mem_size); } + } mem_free(buf_pool->chunks); hash_table_free(buf_pool->page_hash); + if (!srv_buffer_pool_shm_key) { hash_table_free(buf_pool->zip_hash); + } mem_free(buf_pool); buf_pool = NULL; } @@ -1150,7 +1571,9 @@ buf_pool_drop_hash_index(void) when we have an x-latch on btr_search_latch; see the comment in buf0buf.h */ - if (!block->is_hashed) { + if (buf_block_get_state(block) + != BUF_BLOCK_FILE_PAGE + || !block->is_hashed) { continue; } @@ -1283,8 +1706,6 @@ buf_relocate( HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage); HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage); - - UNIV_MEM_INVALID(bpage, sizeof *bpage); } /********************************************************************//** @@ -1309,6 +1730,11 @@ try_again: //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); + if (srv_buffer_pool_shm_key) { + /* Cannot support shrink */ + goto func_done; + } + shrink_again: if (buf_pool->n_chunks <= 1) { @@ -1552,6 +1978,11 @@ void buf_pool_resize(void) /*=================*/ { + if (srv_buffer_pool_shm_key) { + /* Cannot support resize */ + return; + } + //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); @@ -1980,14 +2411,14 @@ buf_zip_decompress( buf_block_t* block, /*!< in/out: block */ ibool check) /*!< in: TRUE=verify the page checksum */ { - const byte* frame = block->page.zip.data; + const byte* frame = block->page.zip.data; + ulint stamp_checksum = mach_read_from_4( + frame + FIL_PAGE_SPACE_OR_CHKSUM); ut_ad(buf_block_get_zip_size(block)); ut_a(buf_block_get_space(block) != 0); - if (UNIV_LIKELY(check)) { - ulint stamp_checksum = mach_read_from_4( - frame + FIL_PAGE_SPACE_OR_CHKSUM); + if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) { ulint calc_checksum = page_zip_calc_checksum( frame, page_zip_get_size(&block->page.zip)); @@ -2196,6 +2627,7 @@ buf_page_get_gen( unsigned access_time; ulint fix_type; ibool must_read; + ulint retries = 0; mutex_t* block_mutex; trx_t* trx = NULL; ulint sec; @@ -2204,6 +2636,7 @@ buf_page_get_gen( ib_uint64_t finish_time; ut_ad(mtr); + ut_ad(mtr->state == MTR_ACTIVE); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH) || (rw_latch == RW_NO_LATCH)); @@ -2271,7 +2704,29 @@ loop2: return(NULL); } - buf_read_page(space, zip_size, offset, trx); + if (buf_read_page(space, zip_size, offset, trx)) { + retries = 0; + } else if (retries < BUF_PAGE_READ_MAX_RETRIES) { + ++retries; + } else { + fprintf(stderr, "InnoDB: Error: Unable" + " to read tablespace %lu page no" + " %lu into the buffer pool after" + " %lu attempts\n" + "InnoDB: The most probable cause" + " of this error may be that the" + " table has been corrupted.\n" + "InnoDB: You can try to fix this" + " problem by using" + " innodb_force_recovery.\n" + "InnoDB: Please see reference manual" + " for more details.\n" + "InnoDB: Aborting...\n", + space, offset, + BUF_PAGE_READ_MAX_RETRIES); + + ut_error; + } #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 37 || buf_validate()); @@ -2414,22 +2869,8 @@ wait_until_unfixed: ut_ad(!block->page.in_flush_list); } else { /* Relocate buf_pool->flush_list. */ - buf_page_t* b; - - b = UT_LIST_GET_PREV(flush_list, &block->page); - ut_ad(block->page.in_flush_list); - UT_LIST_REMOVE(flush_list, buf_pool->flush_list, - &block->page); - - if (b) { - UT_LIST_INSERT_AFTER( - flush_list, buf_pool->flush_list, b, - &block->page); - } else { - UT_LIST_ADD_FIRST( - flush_list, buf_pool->flush_list, - &block->page); - } + buf_flush_relocate_on_flush_list(bpage, + &block->page); } mutex_exit(&flush_list_mutex); @@ -2446,7 +2887,10 @@ wait_until_unfixed: block->page.buf_fix_count = 1; buf_block_set_io_fix(block, BUF_IO_READ); - rw_lock_x_lock(&block->lock); + rw_lock_x_lock_func(&block->lock, 0, file, line); + + UNIV_MEM_INVALID(bpage, sizeof *bpage); + mutex_exit(block_mutex); mutex_exit(&buf_pool_zip_mutex); @@ -2461,8 +2905,9 @@ wait_until_unfixed: /* Decompress the page and apply buffered operations while not holding buf_pool_mutex or block->mutex. */ success = buf_zip_decompress(block, srv_use_checksums); + ut_a(success); - if (UNIV_LIKELY(success)) { + if (UNIV_LIKELY(!recv_no_ibuf_operations)) { ibuf_merge_or_delete_for_page(block, space, offset, zip_size, TRUE); } @@ -2478,14 +2923,6 @@ wait_until_unfixed: buf_pool->n_pend_unzip--; mutex_exit(&buf_pool_mutex); rw_lock_x_unlock(&block->lock); - - if (UNIV_UNLIKELY(!success)) { - - //buf_pool_mutex_exit(); - mutex_exit(block_mutex); - return(NULL); - } - break; case BUF_BLOCK_ZIP_FREE: @@ -2500,7 +2937,12 @@ wait_until_unfixed: ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); //mutex_enter(&block->mutex); +#if UNIV_WORD_SIZE == 4 + /* On 32-bit systems, there is no padding in buf_page_t. On + other systems, Valgrind could complain about uninitialized pad + bytes. */ UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page); +#endif buf_block_buf_fix_inc(block, file, line); @@ -2603,8 +3045,8 @@ page. @return TRUE if success */ UNIV_INTERN ibool -buf_page_optimistic_get_func( -/*=========================*/ +buf_page_optimistic_get( +/*====================*/ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ buf_block_t* block, /*!< in: guessed buffer block */ ib_uint64_t modify_clock,/*!< in: modify clock value if mode is @@ -2618,7 +3060,9 @@ buf_page_optimistic_get_func( ulint fix_type; trx_t* trx = NULL; - ut_ad(mtr && block); + ut_ad(block); + ut_ad(mtr); + ut_ad(mtr->state == MTR_ACTIVE); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); mutex_enter(&block->mutex); @@ -2738,6 +3182,7 @@ buf_page_get_known_nowait( trx_t* trx = NULL; ut_ad(mtr); + ut_ad(mtr->state == MTR_ACTIVE); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); mutex_enter(&block->mutex); @@ -2846,6 +3291,9 @@ buf_page_try_get_func( ibool success; ulint fix_type; + ut_ad(mtr); + ut_ad(mtr->state == MTR_ACTIVE); + //buf_pool_mutex_enter(); rw_lock_s_lock(&page_hash_latch); block = buf_block_hash_get(space_id, page_no); @@ -3249,6 +3697,7 @@ buf_page_create( ulint time_ms = ut_time_ms(); ut_ad(mtr); + ut_ad(mtr->state == MTR_ACTIVE); ut_ad(space || !zip_size); free_block = buf_LRU_get_free_block(0); @@ -3431,7 +3880,8 @@ buf_page_io_complete( read_space_id = mach_read_from_4( frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - if (bpage->space == TRX_SYS_SPACE + if ((bpage->space == TRX_SYS_SPACE + || (srv_doublewrite_file && bpage->space == TRX_DOUBLEWRITE_SPACE)) && trx_doublewrite_page_inside(bpage->offset)) { ut_print_timestamp(stderr); @@ -3503,7 +3953,7 @@ corrupt: REFMAN "forcing-recovery.html\n" "InnoDB: about forcing recovery.\n", stderr); - if (srv_pass_corrupt_table && bpage->space > 0 + if (srv_pass_corrupt_table && !trx_sys_sys_space(bpage->space) && bpage->space < SRV_LOG_SPACE_FIRST_ID) { fprintf(stderr, "InnoDB: space %u will be treated as corrupt.\n", diff --git a/storage/xtradb/buf/buf0flu.c b/storage/xtradb/buf/buf0flu.c index 1735f6ac3cb..0a03d583549 100644 --- a/storage/xtradb/buf/buf0flu.c +++ b/storage/xtradb/buf/buf0flu.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -88,6 +88,146 @@ buf_flush_validate_low(void); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ /********************************************************************//** +Insert a block in the flush_rbt and returns a pointer to its +predecessor or NULL if no predecessor. The ordering is maintained +on the basis of the <oldest_modification, space, offset> key. +@return pointer to the predecessor or NULL if no predecessor. */ +static +buf_page_t* +buf_flush_insert_in_flush_rbt( +/*==========================*/ + buf_page_t* bpage) /*!< in: bpage to be inserted. */ +{ + buf_page_t* prev = NULL; + const ib_rbt_node_t* c_node; + const ib_rbt_node_t* p_node; + + //ut_ad(buf_pool_mutex_own()); + ut_ad(mutex_own(&flush_list_mutex)); + + /* Insert this buffer into the rbt. */ + c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage); + ut_a(c_node != NULL); + + /* Get the predecessor. */ + p_node = rbt_prev(buf_pool->flush_rbt, c_node); + + if (p_node != NULL) { + prev = *rbt_value(buf_page_t*, p_node); + ut_a(prev != NULL); + } + + return(prev); +} + +/********************************************************************//** +Delete a bpage from the flush_rbt. */ +static +void +buf_flush_delete_from_flush_rbt( +/*============================*/ + buf_page_t* bpage) /*!< in: bpage to be removed. */ +{ + + ibool ret = FALSE; + + //ut_ad(buf_pool_mutex_own()); + ut_ad(mutex_own(&flush_list_mutex)); + ret = rbt_delete(buf_pool->flush_rbt, &bpage); + ut_ad(ret); +} + +/********************************************************************//** +Compare two modified blocks in the buffer pool. The key for comparison +is: +key = <oldest_modification, space, offset> +This comparison is used to maintian ordering of blocks in the +buf_pool->flush_rbt. +Note that for the purpose of flush_rbt, we only need to order blocks +on the oldest_modification. The other two fields are used to uniquely +identify the blocks. +@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */ +static +int +buf_flush_block_cmp( +/*================*/ + const void* p1, /*!< in: block1 */ + const void* p2) /*!< in: block2 */ +{ + int ret; + const buf_page_t* b1; + const buf_page_t* b2; + + ut_ad(p1 != NULL); + ut_ad(p2 != NULL); + + b1 = *(const buf_page_t**) p1; + b2 = *(const buf_page_t**) p2; + + ut_ad(b1 != NULL); + ut_ad(b2 != NULL); + + ut_ad(b1->in_flush_list); + ut_ad(b2->in_flush_list); + + if (b2->oldest_modification + > b1->oldest_modification) { + return(1); + } + + if (b2->oldest_modification + < b1->oldest_modification) { + return(-1); + } + + /* If oldest_modification is same then decide on the space. */ + ret = (int)(b2->space - b1->space); + + /* Or else decide ordering on the offset field. */ + return(ret ? ret : (int)(b2->offset - b1->offset)); +} + +/********************************************************************//** +Initialize the red-black tree to speed up insertions into the flush_list +during recovery process. Should be called at the start of recovery +process before any page has been read/written. */ +UNIV_INTERN +void +buf_flush_init_flush_rbt(void) +/*==========================*/ +{ + //buf_pool_mutex_enter(); + mutex_enter(&flush_list_mutex); + + /* Create red black tree for speedy insertions in flush list. */ + buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*), + buf_flush_block_cmp); + //buf_pool_mutex_exit(); + mutex_exit(&flush_list_mutex); +} + +/********************************************************************//** +Frees up the red-black tree. */ +UNIV_INTERN +void +buf_flush_free_flush_rbt(void) +/*==========================*/ +{ + //buf_pool_mutex_enter(); + mutex_enter(&flush_list_mutex); + +#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG + ut_a(buf_flush_validate_low()); +#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ + + rbt_free(buf_pool->flush_rbt); + buf_pool->flush_rbt = NULL; + + //buf_pool_mutex_exit(); + mutex_exit(&flush_list_mutex); +} + +/********************************************************************//** Inserts a modified block into the flush list. */ UNIV_INTERN void @@ -102,6 +242,13 @@ buf_flush_insert_into_flush_list( || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification <= block->page.oldest_modification)); + /* If we are in the recovery then we need to update the flush + red-black tree as well. */ + if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) { + buf_flush_insert_sorted_into_flush_list(block); + return; + } + ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); ut_ad(block->page.in_LRU_list); ut_ad(block->page.in_page_hash); @@ -110,6 +257,17 @@ buf_flush_insert_into_flush_list( ut_d(block->page.in_flush_list = TRUE); UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page); +#ifdef UNIV_DEBUG_VALGRIND + { + ulint zip_size = buf_block_get_zip_size(block); + + if (UNIV_UNLIKELY(zip_size)) { + UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size); + } else { + UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); + } + } +#endif /* UNIV_DEBUG_VALGRIND */ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(buf_flush_validate_low()); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ @@ -139,27 +297,40 @@ buf_flush_insert_sorted_into_flush_list( ut_ad(!block->page.in_flush_list); ut_d(block->page.in_flush_list = TRUE); - prev_b = NULL; - b = UT_LIST_GET_FIRST(buf_pool->flush_list); +#ifdef UNIV_DEBUG_VALGRIND + { + ulint zip_size = buf_block_get_zip_size(block); - if (srv_fast_recovery) { - /* speed hack */ - if (b == NULL || b->oldest_modification < block->page.oldest_modification) { - UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page); - } else { - b = UT_LIST_GET_LAST(buf_pool->flush_list); - if (b->oldest_modification < block->page.oldest_modification) { - /* align oldest_modification not to sort */ - block->page.oldest_modification = b->oldest_modification; + if (UNIV_UNLIKELY(zip_size)) { + UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size); + } else { + UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); } - UT_LIST_ADD_LAST(flush_list, buf_pool->flush_list, &block->page); } +#endif /* UNIV_DEBUG_VALGRIND */ + + prev_b = NULL; + + /* For the most part when this function is called the flush_rbt + should not be NULL. In a very rare boundary case it is possible + that the flush_rbt has already been freed by the recovery thread + before the last page was hooked up in the flush_list by the + io-handler thread. In that case we'll just do a simple + linear search in the else block. */ + if (buf_pool->flush_rbt) { + + prev_b = buf_flush_insert_in_flush_rbt(&block->page); + } else { - /* normal */ - while (b && b->oldest_modification > block->page.oldest_modification) { - ut_ad(b->in_flush_list); - prev_b = b; - b = UT_LIST_GET_NEXT(flush_list, b); + + b = UT_LIST_GET_FIRST(buf_pool->flush_list); + + while (b && b->oldest_modification + > block->page.oldest_modification) { + ut_ad(b->in_flush_list); + prev_b = b; + b = UT_LIST_GET_NEXT(flush_list, b); + } } if (prev_b == NULL) { @@ -168,7 +339,6 @@ buf_flush_insert_sorted_into_flush_list( UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list, prev_b, &block->page); } - } #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(buf_flush_validate_low()); @@ -262,7 +432,6 @@ buf_flush_remove( mutex_enter(&flush_list_mutex); ut_ad(bpage->in_flush_list); - ut_d(bpage->in_flush_list = FALSE); switch (buf_page_get_state(bpage)) { case BUF_BLOCK_ZIP_PAGE: @@ -285,6 +454,15 @@ buf_flush_remove( break; } + /* If the flush_rbt is active then delete from it as well. */ + if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) { + buf_flush_delete_from_flush_rbt(bpage); + } + + /* Must be done after we have removed it from the flush_rbt + because we assert on in_flush_list in comparison function. */ + ut_d(bpage->in_flush_list = FALSE); + bpage->oldest_modification = 0; ut_d(UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list, @@ -293,6 +471,64 @@ buf_flush_remove( } /********************************************************************//** +Relocates a buffer control block on the flush_list. +Note that it is assumed that the contents of bpage has already been +copied to dpage. */ +UNIV_INTERN +void +buf_flush_relocate_on_flush_list( +/*=============================*/ + buf_page_t* bpage, /*!< in/out: control block being moved */ + buf_page_t* dpage) /*!< in/out: destination block */ +{ + buf_page_t* prev; + buf_page_t* prev_b = NULL; + + //ut_ad(buf_pool_mutex_own()); + ut_ad(mutex_own(&flush_list_mutex)); + + ut_ad(mutex_own(buf_page_get_mutex(bpage))); + + ut_ad(bpage->in_flush_list); + ut_ad(dpage->in_flush_list); + + /* If recovery is active we must swap the control blocks in + the flush_rbt as well. */ + if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) { + buf_flush_delete_from_flush_rbt(bpage); + prev_b = buf_flush_insert_in_flush_rbt(dpage); + } + + /* Must be done after we have removed it from the flush_rbt + because we assert on in_flush_list in comparison function. */ + ut_d(bpage->in_flush_list = FALSE); + + prev = UT_LIST_GET_PREV(flush_list, bpage); + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage); + + if (prev) { + ut_ad(prev->in_flush_list); + UT_LIST_INSERT_AFTER( + flush_list, + buf_pool->flush_list, + prev, dpage); + } else { + UT_LIST_ADD_FIRST( + flush_list, + buf_pool->flush_list, + dpage); + } + + /* Just an extra check. Previous in flush_list + should be the same control block as in flush_rbt. */ + ut_a(!buf_pool->flush_rbt || prev_b == prev); + +#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG + ut_a(buf_flush_validate_low()); +#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ +} + +/********************************************************************//** Updates the flush system data structures when a write is completed. */ UNIV_INTERN void @@ -452,7 +688,8 @@ corrupted_page: write_buf = trx_doublewrite->write_buf; i = 0; - fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0, + fil_io(OS_FILE_WRITE, TRUE, + (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0, trx_doublewrite->block1, 0, len, (void*) write_buf, NULL); @@ -489,7 +726,8 @@ corrupted_page: + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE); - fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0, + fil_io(OS_FILE_WRITE, TRUE, + (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0, trx_doublewrite->block2, 0, len, (void*) write_buf, NULL); @@ -519,7 +757,7 @@ corrupted_page: flush: /* Now flush the doublewrite buffer data to disk */ - fil_flush(TRX_SYS_SPACE); + fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE); /* We know that the writes have been flushed to disk now and in recovery we will find them in the doublewrite buffer @@ -615,6 +853,7 @@ try_again: zip_size = buf_page_get_zip_size(bpage); if (UNIV_UNLIKELY(zip_size)) { + UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size); /* Copy the compressed page and clear the rest. */ memcpy(trx_doublewrite->write_buf + UNIV_PAGE_SIZE * trx_doublewrite->first_free, @@ -624,6 +863,8 @@ try_again: + zip_size, 0, UNIV_PAGE_SIZE - zip_size); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); + UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame, + UNIV_PAGE_SIZE); memcpy(trx_doublewrite->write_buf + UNIV_PAGE_SIZE * trx_doublewrite->first_free, @@ -1318,6 +1559,7 @@ retry: } else if (!have_LRU_mutex) { /* confirm it again with LRU_mutex for exactness */ have_LRU_mutex = TRUE; + distance = 0; goto retry; } @@ -1473,24 +1715,45 @@ ibool buf_flush_validate_low(void) /*========================*/ { - buf_page_t* bpage; + buf_page_t* bpage; + const ib_rbt_node_t* rnode = NULL; UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list, ut_ad(ut_list_node_313->in_flush_list)); bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); + /* If we are in recovery mode i.e.: flush_rbt != NULL + then each block in the flush_list must also be present + in the flush_rbt. */ + if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) { + rnode = rbt_first(buf_pool->flush_rbt); + } + while (bpage != NULL) { const ib_uint64_t om = bpage->oldest_modification; ut_ad(bpage->in_flush_list); //ut_a(buf_page_in_file(bpage)); /* optimistic */ ut_a(om > 0); + if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) { + ut_a(rnode); + buf_page_t* rpage = *rbt_value(buf_page_t*, + rnode); + ut_a(rpage); + ut_a(rpage == bpage); + rnode = rbt_next(buf_pool->flush_rbt, rnode); + } + bpage = UT_LIST_GET_NEXT(flush_list, bpage); ut_a(!bpage || om >= bpage->oldest_modification); } + /* By this time we must have exhausted the traversal of + flush_rbt (if active) as well. */ + ut_a(rnode == NULL); + return(TRUE); } diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c index 58e2c23275b..14ec1720873 100644 --- a/storage/xtradb/buf/buf0lru.c +++ b/storage/xtradb/buf/buf0lru.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -371,21 +371,39 @@ scan_again: bpage = UT_LIST_GET_LAST(buf_pool->LRU); while (bpage != NULL) { - mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); buf_page_t* prev_bpage; + ibool prev_bpage_buf_fix = FALSE; ut_a(buf_page_in_file(bpage)); prev_bpage = UT_LIST_GET_PREV(LRU, bpage); - if (!block_mutex) { - bpage = prev_bpage; - continue; - } + /* bpage->space and bpage->io_fix are protected by + buf_pool_mutex and block_mutex. It is safe to check + them while holding buf_pool_mutex only. */ + + if (buf_page_get_space(bpage) != id) { + /* Skip this block, as it does not belong to + the space that is being invalidated. */ + } else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) { + /* We cannot remove this page during this scan + yet; maybe the system is currently reading it + in, or flushing the modifications to the file */ + + all_freed = FALSE; + } else { + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); - if (buf_page_get_space(bpage) == id) { - if (bpage->buf_fix_count > 0 - || buf_page_get_io_fix(bpage) != BUF_IO_NONE) { + if (!block_mutex) { + /* It may be impossible case... + Something wrong, so will be scan_again */ + + all_freed = FALSE; + + goto next_page_no_mutex; + } + + if (bpage->buf_fix_count > 0) { /* We cannot remove this page during this scan yet; maybe the system is @@ -405,8 +423,40 @@ scan_again: (ulong) buf_page_get_page_no(bpage)); } #endif - if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE - && ((buf_block_t*) bpage)->is_hashed) { + if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { + /* This is a compressed-only block + descriptor. Ensure that prev_bpage + cannot be relocated when bpage is freed. */ + if (UNIV_LIKELY(prev_bpage != NULL)) { + switch (buf_page_get_state( + prev_bpage)) { + case BUF_BLOCK_FILE_PAGE: + /* Descriptors of uncompressed + blocks will not be relocated, + because we are holding the + buf_pool_mutex. */ + break; + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_ZIP_DIRTY: + /* Descriptors of compressed- + only blocks can be relocated, + unless they are buffer-fixed. + Because both bpage and + prev_bpage are protected by + buf_pool_zip_mutex, it is + not necessary to acquire + further mutexes. */ + ut_ad(&buf_pool_zip_mutex + == block_mutex); + ut_ad(mutex_own(block_mutex)); + prev_bpage_buf_fix = TRUE; + prev_bpage->buf_fix_count++; + break; + default: + ut_error; + } + } + } else if (((buf_block_t*) bpage)->is_hashed) { ulint page_no; ulint zip_size; @@ -432,7 +482,8 @@ scan_again: buf_flush_remove(bpage); } - /* Remove from the LRU list */ + /* Remove from the LRU list. */ + if (buf_LRU_block_remove_hashed_page(bpage, TRUE) != BUF_BLOCK_ZIP_FREE) { buf_LRU_block_free_hashed_page((buf_block_t*) @@ -444,18 +495,27 @@ scan_again: ut_ad(block_mutex == &buf_pool_zip_mutex); ut_ad(!mutex_own(block_mutex)); - /* The compressed block descriptor - (bpage) has been deallocated and - block_mutex released. Also, - buf_buddy_free() may have relocated - prev_bpage. Rescan the LRU list. */ + if (prev_bpage_buf_fix) { + /* We temporarily buffer-fixed + prev_bpage, so that + buf_buddy_free() could not + relocate it, in case it was a + compressed-only block + descriptor. */ + + mutex_enter(block_mutex); + ut_ad(prev_bpage->buf_fix_count > 0); + prev_bpage->buf_fix_count--; + mutex_exit(block_mutex); + } - bpage = UT_LIST_GET_LAST(buf_pool->LRU); - continue; + goto next_page_no_mutex; } - } next_page: - mutex_exit(block_mutex); + mutex_exit(block_mutex); + } + +next_page_no_mutex: bpage = prev_bpage; } @@ -1395,7 +1455,7 @@ buf_LRU_make_block_old( Try to free a block. If bpage is a descriptor of a compressed-only page, the descriptor object will be freed as well. -NOTE: If this function returns BUF_LRU_FREED, it will not temporarily +NOTE: If this function returns BUF_LRU_FREED, it will temporarily release buf_pool_mutex. Furthermore, the page frame will no longer be accessible via bpage. @@ -1425,7 +1485,12 @@ buf_LRU_free_block( ut_ad(buf_page_in_file(bpage)); //ut_ad(bpage->in_LRU_list); ut_ad(!bpage->in_flush_list == !bpage->oldest_modification); +#if UNIV_WORD_SIZE == 4 + /* On 32-bit systems, there is no padding in buf_page_t. On + other systems, Valgrind could complain about uninitialized pad + bytes. */ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage); +#endif if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) { @@ -1558,8 +1623,13 @@ not_freed: ut_ad(prev_b->in_LRU_list); ut_ad(buf_page_in_file(prev_b)); +#if UNIV_WORD_SIZE == 4 + /* On 32-bit systems, there is no + padding in buf_page_t. On other + systems, Valgrind could complain about + uninitialized pad bytes. */ UNIV_MEM_ASSERT_RW(prev_b, sizeof *prev_b); - +#endif UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, prev_b, b); @@ -1600,26 +1670,8 @@ not_freed: if (b->state == BUF_BLOCK_ZIP_PAGE) { buf_LRU_insert_zip_clean(b); } else { - buf_page_t* prev; - - ut_ad(b->in_flush_list); - ut_d(bpage->in_flush_list = FALSE); - - prev = UT_LIST_GET_PREV(flush_list, b); - UT_LIST_REMOVE(flush_list, buf_pool->flush_list, b); - - if (prev) { - ut_ad(prev->in_flush_list); - UT_LIST_INSERT_AFTER( - flush_list, - buf_pool->flush_list, - prev, b); - } else { - UT_LIST_ADD_FIRST( - flush_list, - buf_pool->flush_list, - b); - } + /* Relocate on buf_pool->flush_list. */ + buf_flush_relocate_on_flush_list(bpage, b); } mutex_exit(&flush_list_mutex); @@ -1792,7 +1844,12 @@ buf_LRU_block_remove_hashed_page( ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE); ut_a(bpage->buf_fix_count == 0); +#if UNIV_WORD_SIZE == 4 + /* On 32-bit systems, there is no padding in + buf_page_t. On other systems, Valgrind could complain + about uninitialized pad bytes. */ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage); +#endif buf_LRU_remove_block(bpage); @@ -2074,7 +2131,7 @@ ibool buf_LRU_file_dump(void) /*===================*/ { - os_file_t dump_file = -1; + os_file_t dump_file = (os_file_t) -1; ibool success; byte* buffer_base = NULL; byte* buffer = NULL; @@ -2164,7 +2221,7 @@ buf_LRU_file_dump(void) ret = TRUE; end: - if (dump_file != -1) + if (dump_file != (os_file_t) -1) os_file_close(dump_file); if (buffer_base) ut_free(buffer_base); @@ -2178,7 +2235,7 @@ ibool buf_LRU_file_restore(void) /*======================*/ { - os_file_t dump_file = -1; + os_file_t dump_file = (os_file_t) -1; ibool success; byte* buffer_base = NULL; byte* buffer = NULL; @@ -2269,7 +2326,7 @@ buf_LRU_file_restore(void) " (requested: %lu, read: %lu)\n", req, reads); ret = TRUE; end: - if (dump_file != -1) + if (dump_file != (os_file_t) -1) os_file_close(dump_file); if (buffer_base) ut_free(buffer_base); diff --git a/storage/xtradb/buf/buf0rea.c b/storage/xtradb/buf/buf0rea.c index e5d04df797f..59de70d9a8a 100644 --- a/storage/xtradb/buf/buf0rea.c +++ b/storage/xtradb/buf/buf0rea.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -86,7 +86,9 @@ buf_read_page_low( wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER; - if (trx_doublewrite && space == TRX_SYS_SPACE + if (trx_doublewrite + && (space == TRX_SYS_SPACE + || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE)) && ( (offset >= trx_doublewrite->block1 && offset < trx_doublewrite->block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) @@ -656,6 +658,50 @@ buf_read_recv_pages( /* It is a single table tablespace and the .ibd file is missing: do nothing */ + /* the log records should be treated here same reason + for http://bugs.mysql.com/bug.php?id=43948 */ + + if (recv_recovery_is_on()) { + recv_addr_t* recv_addr; + + mutex_enter(&(recv_sys->mutex)); + + if (recv_sys->apply_log_recs == FALSE) { + mutex_exit(&(recv_sys->mutex)); + goto not_to_recover; + } + + for (i = 0; i < n_stored; i++) { + /* recv_get_fil_addr_struct() */ + recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, + hash_calc_hash(ut_fold_ulint_pair(space, page_nos[i]), + recv_sys->addr_hash)); + while (recv_addr) { + if ((recv_addr->space == space) + && (recv_addr->page_no == page_nos[i])) { + break; + } + recv_addr = HASH_GET_NEXT(addr_hash, recv_addr); + } + + if ((recv_addr == NULL) + || (recv_addr->state == RECV_BEING_PROCESSED) + || (recv_addr->state == RECV_PROCESSED)) { + continue; + } + + recv_addr->state = RECV_PROCESSED; + + ut_a(recv_sys->n_addrs); + recv_sys->n_addrs--; + } + + mutex_exit(&(recv_sys->mutex)); + + fprintf(stderr, " (cannot find space: %lu)", space); + } +not_to_recover: + return; } @@ -674,10 +720,10 @@ buf_read_recv_pages( count++; - if (count > 5000) { + if (count > 1000) { fprintf(stderr, "InnoDB: Error: InnoDB has waited for" - " 50 seconds for pending\n" + " 10 seconds for pending\n" "InnoDB: reads to the buffer pool to" " be finished.\n" "InnoDB: Number of pending reads %lu," diff --git a/storage/xtradb/data/data0data.c b/storage/xtradb/data/data0data.c index e3c1f1b4f23..0715b49bf9c 100644 --- a/storage/xtradb/data/data0data.c +++ b/storage/xtradb/data/data0data.c @@ -666,6 +666,21 @@ dtuple_convert_big_rec( goto skip_field; } + /* In DYNAMIC and COMPRESSED format, store + locally any non-BLOB columns whose maximum + length does not exceed 256 bytes. This is + because there is no room for the "external + storage" flag when the maximum length is 255 + bytes or less. This restriction trivially + holds in REDUNDANT and COMPACT format, because + there we always store locally columns whose + length is up to local_len == 788 bytes. + @see rec_init_offsets_comp_ordinary */ + if (ifield->col->mtype != DATA_BLOB + && ifield->col->len < 256) { + goto skip_field; + } + longest_i = i; longest = savings; diff --git a/storage/xtradb/dict/dict0boot.c b/storage/xtradb/dict/dict0boot.c index 0eb73e6c2f9..43cfced65a0 100644 --- a/storage/xtradb/dict/dict0boot.c +++ b/storage/xtradb/dict/dict0boot.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -62,32 +62,47 @@ dict_hdr_get( } /**********************************************************************//** -Returns a new table, index, or tree id. -@return the new id */ +Returns a new table, index, or space id. */ UNIV_INTERN -dulint +void dict_hdr_get_new_id( /*================*/ - ulint type) /*!< in: DICT_HDR_ROW_ID, ... */ + dulint* table_id, /*!< out: table id (not assigned if NULL) */ + dulint* index_id, /*!< out: index id (not assigned if NULL) */ + ulint* space_id) /*!< out: space id (not assigned if NULL) */ { dict_hdr_t* dict_hdr; dulint id; mtr_t mtr; - ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID)); - mtr_start(&mtr); dict_hdr = dict_hdr_get(&mtr); - id = mtr_read_dulint(dict_hdr + type, &mtr); - id = ut_dulint_add(id, 1); + if (table_id) { + id = mtr_read_dulint(dict_hdr + DICT_HDR_TABLE_ID, &mtr); + id = ut_dulint_add(id, 1); + mlog_write_dulint(dict_hdr + DICT_HDR_TABLE_ID, id, &mtr); + *table_id = id; + } - mlog_write_dulint(dict_hdr + type, id, &mtr); + if (index_id) { + id = mtr_read_dulint(dict_hdr + DICT_HDR_INDEX_ID, &mtr); + id = ut_dulint_add(id, 1); + mlog_write_dulint(dict_hdr + DICT_HDR_INDEX_ID, id, &mtr); + *index_id = id; + } - mtr_commit(&mtr); + if (space_id) { + *space_id = mtr_read_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID, + MLOG_4BYTES, &mtr); + if (fil_assign_new_space_id(space_id)) { + mlog_write_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID, + *space_id, MLOG_4BYTES, &mtr); + } + } - return(id); + mtr_commit(&mtr); } /**********************************************************************//** @@ -151,9 +166,12 @@ dict_hdr_create( mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID, ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr); - /* Obsolete, but we must initialize it to 0 anyway. */ - mlog_write_dulint(dict_header + DICT_HDR_MIX_ID, - ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr); + mlog_write_ulint(dict_header + DICT_HDR_MAX_SPACE_ID, + 0, MLOG_4BYTES, mtr); + + /* Obsolete, but we must initialize it anyway. */ + mlog_write_ulint(dict_header + DICT_HDR_MIX_ID_LOW, + DICT_HDR_FIRST_ID, MLOG_4BYTES, mtr); /* Create the B-tree roots for the clustered indexes of the basic system tables */ @@ -245,6 +263,29 @@ dict_boot(void) /* Get the dictionary header */ dict_hdr = dict_hdr_get(&mtr); + if (ut_dulint_cmp(mtr_read_dulint(dict_hdr + DICT_HDR_XTRADB_MARK, &mtr), + DICT_HDR_XTRADB_FLAG) != 0) { + /* not extended yet by XtraDB, need to be extended */ + ulint root_page_no; + + root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, + DICT_HDR_SPACE, 0, DICT_STATS_ID, + dict_ind_redundant, &mtr); + if (root_page_no == FIL_NULL) { + fprintf(stderr, "InnoDB: Warning: failed to create SYS_STATS btr.\n"); + srv_use_sys_stats_table = FALSE; + } else { + mlog_write_ulint(dict_hdr + DICT_HDR_STATS, root_page_no, + MLOG_4BYTES, &mtr); + mlog_write_dulint(dict_hdr + DICT_HDR_XTRADB_MARK, + DICT_HDR_XTRADB_FLAG, &mtr); + } + mtr_commit(&mtr); + /* restart mtr */ + mtr_start(&mtr); + dict_hdr = dict_hdr_get(&mtr); + } + /* Because we only write new row ids to disk-based data structure (dictionary header) when it is divisible by DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover @@ -275,6 +316,9 @@ dict_boot(void) and (TYPE & DICT_TF_FORMAT_MASK) are nonzero and TYPE = table->flags */ dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4); dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0); + /* MIX_LEN may contain additional table flags when + ROW_FORMAT!=REDUNDANT. Currently, these flags include + DICT_TF2_TEMPORARY. */ dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4); dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0); dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4); @@ -358,7 +402,7 @@ dict_boot(void) dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4); dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4); - /* The '+ 2' below comes from the 2 system fields */ + /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */ #if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2 #error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2" #endif @@ -368,6 +412,9 @@ dict_boot(void) #if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2 #error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2" #endif +#if DICT_SYS_INDEXES_NAME_FIELD != 2 + 2 +#error "DICT_SYS_INDEXES_NAME_FIELD != 2 + 2" +#endif table->id = DICT_INDEXES_ID; dict_table_add_to_cache(table, heap); @@ -400,7 +447,7 @@ dict_boot(void) table->id = DICT_FIELDS_ID; dict_table_add_to_cache(table, heap); dict_sys->sys_fields = table; - mem_heap_free(heap); + mem_heap_empty(heap); index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND", DICT_HDR_SPACE, @@ -417,6 +464,41 @@ dict_boot(void) FALSE); ut_a(error == DB_SUCCESS); + /*-------------------------*/ + table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 3, 0); + table->n_mysql_handles_opened = 1; /* for pin */ + + dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0); + + /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */ +#if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2 +#error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2" +#endif + + table->id = DICT_STATS_ID; + dict_table_add_to_cache(table, heap); + dict_sys->sys_stats = table; + mem_heap_empty(heap); + + index = dict_mem_index_create("SYS_STATS", "CLUST_IND", + DICT_HDR_SPACE, + DICT_UNIQUE | DICT_CLUSTERED, 2); + + dict_mem_index_add_field(index, "INDEX_ID", 0); + dict_mem_index_add_field(index, "KEY_COLS", 0); + + index->id = DICT_STATS_ID; + error = dict_index_add_to_cache(table, index, + mtr_read_ulint(dict_hdr + + DICT_HDR_STATS, + MLOG_4BYTES, &mtr), + FALSE); + ut_a(error == DB_SUCCESS); + + mem_heap_free(heap); + mtr_commit(&mtr); /*-------------------------*/ @@ -430,6 +512,7 @@ dict_boot(void) dict_load_sys_table(dict_sys->sys_columns); dict_load_sys_table(dict_sys->sys_indexes); dict_load_sys_table(dict_sys->sys_fields); + dict_load_sys_table(dict_sys->sys_stats); mutex_exit(&(dict_sys->mutex)); } diff --git a/storage/xtradb/dict/dict0crea.c b/storage/xtradb/dict/dict0crea.c index e315716551e..258bf77d1fc 100644 --- a/storage/xtradb/dict/dict0crea.c +++ b/storage/xtradb/dict/dict0crea.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -51,16 +51,18 @@ static dtuple_t* dict_create_sys_tables_tuple( /*=========================*/ - dict_table_t* table, /*!< in: table */ - mem_heap_t* heap) /*!< in: memory heap from which the memory for - the built tuple is allocated */ + const dict_table_t* table, /*!< in: table */ + mem_heap_t* heap) /*!< in: memory heap from + which the memory for the built + tuple is allocated */ { dict_table_t* sys_tables; dtuple_t* entry; dfield_t* dfield; byte* ptr; - ut_ad(table && heap); + ut_ad(table); + ut_ad(heap); sys_tables = dict_sys->sys_tables; @@ -69,18 +71,18 @@ dict_create_sys_tables_tuple( dict_table_copy_types(entry, sys_tables); /* 0: NAME -----------------------------*/ - dfield = dtuple_get_nth_field(entry, 0); + dfield = dtuple_get_nth_field(entry, 0/*NAME*/); dfield_set_data(dfield, table->name, ut_strlen(table->name)); /* 3: ID -------------------------------*/ - dfield = dtuple_get_nth_field(entry, 1); + dfield = dtuple_get_nth_field(entry, 1/*ID*/); ptr = mem_heap_alloc(heap, 8); mach_write_to_8(ptr, table->id); dfield_set_data(dfield, ptr, 8); /* 4: N_COLS ---------------------------*/ - dfield = dtuple_get_nth_field(entry, 2); + dfield = dtuple_get_nth_field(entry, 2/*N_COLS*/); #if DICT_TF_COMPACT != 1 #error @@ -91,40 +93,41 @@ dict_create_sys_tables_tuple( | ((table->flags & DICT_TF_COMPACT) << 31)); dfield_set_data(dfield, ptr, 4); /* 5: TYPE -----------------------------*/ - dfield = dtuple_get_nth_field(entry, 3); + dfield = dtuple_get_nth_field(entry, 3/*TYPE*/); ptr = mem_heap_alloc(heap, 4); - if (table->flags & ~DICT_TF_COMPACT) { + if (table->flags & (~DICT_TF_COMPACT & ~(~0 << DICT_TF_BITS))) { ut_a(table->flags & DICT_TF_COMPACT); ut_a(dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP); ut_a(((ulonglong) table->flags & DICT_TF_ZSSIZE_MASK) <= (ulonglong) (DICT_TF_ZSSIZE_MAX << DICT_TF_ZSSIZE_SHIFT)); - ut_a(!(table->flags & (~0 << DICT_TF_BITS))); - mach_write_to_4(ptr, table->flags); + ut_a(!(table->flags & (~0 << DICT_TF2_BITS))); + mach_write_to_4(ptr, table->flags & ~(~0 << DICT_TF_BITS)); } else { mach_write_to_4(ptr, DICT_TABLE_ORDINARY); } dfield_set_data(dfield, ptr, 4); /* 6: MIX_ID (obsolete) ---------------------------*/ - dfield = dtuple_get_nth_field(entry, 4); + dfield = dtuple_get_nth_field(entry, 4/*MIX_ID*/); ptr = mem_heap_zalloc(heap, 8); dfield_set_data(dfield, ptr, 8); - /* 7: MIX_LEN (obsolete) --------------------------*/ + /* 7: MIX_LEN (additional flags) --------------------------*/ - dfield = dtuple_get_nth_field(entry, 5); + dfield = dtuple_get_nth_field(entry, 5/*MIX_LEN*/); - ptr = mem_heap_zalloc(heap, 4); + ptr = mem_heap_alloc(heap, 4); + mach_write_to_4(ptr, table->flags >> DICT_TF2_SHIFT); dfield_set_data(dfield, ptr, 4); /* 8: CLUSTER_NAME ---------------------*/ - dfield = dtuple_get_nth_field(entry, 6); + dfield = dtuple_get_nth_field(entry, 6/*CLUSTER_NAME*/); dfield_set_null(dfield); /* not supported */ /* 9: SPACE ----------------------------*/ - dfield = dtuple_get_nth_field(entry, 7); + dfield = dtuple_get_nth_field(entry, 7/*SPACE*/); ptr = mem_heap_alloc(heap, 4); mach_write_to_4(ptr, table->space); @@ -143,19 +146,21 @@ static dtuple_t* dict_create_sys_columns_tuple( /*==========================*/ - dict_table_t* table, /*!< in: table */ - ulint i, /*!< in: column number */ - mem_heap_t* heap) /*!< in: memory heap from which the memory for - the built tuple is allocated */ + const dict_table_t* table, /*!< in: table */ + ulint i, /*!< in: column number */ + mem_heap_t* heap) /*!< in: memory heap from + which the memory for the built + tuple is allocated */ { dict_table_t* sys_columns; dtuple_t* entry; const dict_col_t* column; dfield_t* dfield; byte* ptr; - const char* col_name; + const char* col_name; - ut_ad(table && heap); + ut_ad(table); + ut_ad(heap); column = dict_table_get_nth_col(table, i); @@ -166,47 +171,47 @@ dict_create_sys_columns_tuple( dict_table_copy_types(entry, sys_columns); /* 0: TABLE_ID -----------------------*/ - dfield = dtuple_get_nth_field(entry, 0); + dfield = dtuple_get_nth_field(entry, 0/*TABLE_ID*/); ptr = mem_heap_alloc(heap, 8); mach_write_to_8(ptr, table->id); dfield_set_data(dfield, ptr, 8); /* 1: POS ----------------------------*/ - dfield = dtuple_get_nth_field(entry, 1); + dfield = dtuple_get_nth_field(entry, 1/*POS*/); ptr = mem_heap_alloc(heap, 4); mach_write_to_4(ptr, i); dfield_set_data(dfield, ptr, 4); /* 4: NAME ---------------------------*/ - dfield = dtuple_get_nth_field(entry, 2); + dfield = dtuple_get_nth_field(entry, 2/*NAME*/); col_name = dict_table_get_col_name(table, i); dfield_set_data(dfield, col_name, ut_strlen(col_name)); /* 5: MTYPE --------------------------*/ - dfield = dtuple_get_nth_field(entry, 3); + dfield = dtuple_get_nth_field(entry, 3/*MTYPE*/); ptr = mem_heap_alloc(heap, 4); mach_write_to_4(ptr, column->mtype); dfield_set_data(dfield, ptr, 4); /* 6: PRTYPE -------------------------*/ - dfield = dtuple_get_nth_field(entry, 4); + dfield = dtuple_get_nth_field(entry, 4/*PRTYPE*/); ptr = mem_heap_alloc(heap, 4); mach_write_to_4(ptr, column->prtype); dfield_set_data(dfield, ptr, 4); /* 7: LEN ----------------------------*/ - dfield = dtuple_get_nth_field(entry, 5); + dfield = dtuple_get_nth_field(entry, 5/*LEN*/); ptr = mem_heap_alloc(heap, 4); mach_write_to_4(ptr, column->len); dfield_set_data(dfield, ptr, 4); /* 8: PREC ---------------------------*/ - dfield = dtuple_get_nth_field(entry, 6); + dfield = dtuple_get_nth_field(entry, 6/*PREC*/); ptr = mem_heap_alloc(heap, 4); mach_write_to_4(ptr, 0/* unused */); @@ -230,19 +235,38 @@ dict_build_table_def_step( dict_table_t* table; dtuple_t* row; ulint error; + ulint flags; const char* path_or_name; ibool is_path; mtr_t mtr; + ulint space = 0; + ibool file_per_table; ut_ad(mutex_own(&(dict_sys->mutex))); table = node->table; - table->id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + /* Cache the global variable "srv_file_per_table" to + a local variable before using it. Please note + "srv_file_per_table" is not under dict_sys mutex + protection, and could be changed while executing + this function. So better to cache the current value + to a local variable, and all future reference to + "srv_file_per_table" should use this local variable. */ + file_per_table = srv_file_per_table; + + dict_hdr_get_new_id(&table->id, NULL, NULL); thr_get_trx(thr)->table_id = table->id; - if (srv_file_per_table) { + if (file_per_table) { + /* Get a new space id if srv_file_per_table is set */ + dict_hdr_get_new_id(NULL, NULL, &space); + + if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) { + return(DB_ERROR); + } + /* We create a new single-table tablespace for the table. We initially let it be 4 pages: - page 0 is the fsp header and an extent descriptor page, @@ -251,8 +275,6 @@ dict_build_table_def_step( - page 3 will contain the root of the clustered index of the table we create here. */ - ulint space = 0; /* reset to zero for the call below */ - if (table->dir_path_of_temp_table) { /* We place tables created with CREATE TEMPORARY TABLE in the tmp dir of mysqld server */ @@ -268,9 +290,10 @@ dict_build_table_def_step( ut_ad(!dict_table_zip_size(table) || dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP); + flags = table->flags & ~(~0 << DICT_TF_BITS); error = fil_create_new_single_table_tablespace( - &space, path_or_name, is_path, - table->flags == DICT_TF_COMPACT ? 0 : table->flags, + space, path_or_name, is_path, + flags == DICT_TF_COMPACT ? 0 : flags, FIL_IBD_FILE_INITIAL_SIZE); table->space = (unsigned int) space; @@ -286,7 +309,7 @@ dict_build_table_def_step( mtr_commit(&mtr); } else { /* Create in the system tablespace: disallow new features */ - table->flags &= DICT_TF_COMPACT; + table->flags &= (~0 << DICT_TF_BITS) | DICT_TF_COMPACT; } row = dict_create_sys_tables_tuple(table, node->heap); @@ -322,9 +345,10 @@ static dtuple_t* dict_create_sys_indexes_tuple( /*==========================*/ - dict_index_t* index, /*!< in: index */ - mem_heap_t* heap) /*!< in: memory heap from which the memory for - the built tuple is allocated */ + const dict_index_t* index, /*!< in: index */ + mem_heap_t* heap) /*!< in: memory heap from + which the memory for the built + tuple is allocated */ { dict_table_t* sys_indexes; dict_table_t* table; @@ -333,7 +357,8 @@ dict_create_sys_indexes_tuple( byte* ptr; ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(index && heap); + ut_ad(index); + ut_ad(heap); sys_indexes = dict_sys->sys_indexes; @@ -344,32 +369,32 @@ dict_create_sys_indexes_tuple( dict_table_copy_types(entry, sys_indexes); /* 0: TABLE_ID -----------------------*/ - dfield = dtuple_get_nth_field(entry, 0); + dfield = dtuple_get_nth_field(entry, 0/*TABLE_ID*/); ptr = mem_heap_alloc(heap, 8); mach_write_to_8(ptr, table->id); dfield_set_data(dfield, ptr, 8); /* 1: ID ----------------------------*/ - dfield = dtuple_get_nth_field(entry, 1); + dfield = dtuple_get_nth_field(entry, 1/*ID*/); ptr = mem_heap_alloc(heap, 8); mach_write_to_8(ptr, index->id); dfield_set_data(dfield, ptr, 8); /* 4: NAME --------------------------*/ - dfield = dtuple_get_nth_field(entry, 2); + dfield = dtuple_get_nth_field(entry, 2/*NAME*/); dfield_set_data(dfield, index->name, ut_strlen(index->name)); /* 5: N_FIELDS ----------------------*/ - dfield = dtuple_get_nth_field(entry, 3); + dfield = dtuple_get_nth_field(entry, 3/*N_FIELDS*/); ptr = mem_heap_alloc(heap, 4); mach_write_to_4(ptr, index->n_fields); dfield_set_data(dfield, ptr, 4); /* 6: TYPE --------------------------*/ - dfield = dtuple_get_nth_field(entry, 4); + dfield = dtuple_get_nth_field(entry, 4/*TYPE*/); ptr = mem_heap_alloc(heap, 4); mach_write_to_4(ptr, index->type); @@ -381,7 +406,7 @@ dict_create_sys_indexes_tuple( #error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 7" #endif - dfield = dtuple_get_nth_field(entry, 5); + dfield = dtuple_get_nth_field(entry, 5/*SPACE*/); ptr = mem_heap_alloc(heap, 4); mach_write_to_4(ptr, index->space); @@ -393,7 +418,7 @@ dict_create_sys_indexes_tuple( #error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 8" #endif - dfield = dtuple_get_nth_field(entry, 6); + dfield = dtuple_get_nth_field(entry, 6/*PAGE_NO*/); ptr = mem_heap_alloc(heap, 4); mach_write_to_4(ptr, FIL_NULL); @@ -412,10 +437,11 @@ static dtuple_t* dict_create_sys_fields_tuple( /*=========================*/ - dict_index_t* index, /*!< in: index */ - ulint i, /*!< in: field number */ - mem_heap_t* heap) /*!< in: memory heap from which the memory for - the built tuple is allocated */ + const dict_index_t* index, /*!< in: index */ + ulint i, /*!< in: field number */ + mem_heap_t* heap) /*!< in: memory heap from + which the memory for the built + tuple is allocated */ { dict_table_t* sys_fields; dtuple_t* entry; @@ -425,7 +451,8 @@ dict_create_sys_fields_tuple( ibool index_contains_column_prefix_field = FALSE; ulint j; - ut_ad(index && heap); + ut_ad(index); + ut_ad(heap); for (j = 0; j < index->n_fields; j++) { if (dict_index_get_nth_field(index, j)->prefix_len > 0) { @@ -443,7 +470,7 @@ dict_create_sys_fields_tuple( dict_table_copy_types(entry, sys_fields); /* 0: INDEX_ID -----------------------*/ - dfield = dtuple_get_nth_field(entry, 0); + dfield = dtuple_get_nth_field(entry, 0/*INDEX_ID*/); ptr = mem_heap_alloc(heap, 8); mach_write_to_8(ptr, index->id); @@ -451,7 +478,7 @@ dict_create_sys_fields_tuple( dfield_set_data(dfield, ptr, 8); /* 1: POS + PREFIX LENGTH ----------------------------*/ - dfield = dtuple_get_nth_field(entry, 1); + dfield = dtuple_get_nth_field(entry, 1/*POS*/); ptr = mem_heap_alloc(heap, 4); @@ -471,7 +498,7 @@ dict_create_sys_fields_tuple( dfield_set_data(dfield, ptr, 4); /* 4: COL_NAME -------------------------*/ - dfield = dtuple_get_nth_field(entry, 2); + dfield = dtuple_get_nth_field(entry, 2/*COL_NAME*/); dfield_set_data(dfield, field->name, ut_strlen(field->name)); @@ -481,6 +508,51 @@ dict_create_sys_fields_tuple( } /*****************************************************************//** +Based on an index object, this function builds the entry to be inserted +in the SYS_STATS system table. +@return the tuple which should be inserted */ +static +dtuple_t* +dict_create_sys_stats_tuple( +/*========================*/ + const dict_index_t* index, + ulint i, + mem_heap_t* heap) +{ + dict_table_t* sys_stats; + dtuple_t* entry; + dfield_t* dfield; + byte* ptr; + + ut_ad(index); + ut_ad(heap); + + sys_stats = dict_sys->sys_stats; + + entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + + dict_table_copy_types(entry, sys_stats); + + /* 0: INDEX_ID -----------------------*/ + dfield = dtuple_get_nth_field(entry, 0/*INDEX_ID*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, index->id); + dfield_set_data(dfield, ptr, 8); + /* 1: KEY_COLS -----------------------*/ + dfield = dtuple_get_nth_field(entry, 1/*KEY_COLS*/); + ptr = mem_heap_alloc(heap, 4); + mach_write_to_4(ptr, i); + dfield_set_data(dfield, ptr, 4); + /* 4: DIFF_VALS ----------------------*/ + dfield = dtuple_get_nth_field(entry, 2/*DIFF_VALS*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, ut_dulint_zero); /* initial value is 0 */ + dfield_set_data(dfield, ptr, 8); + + return(entry); +} + +/*****************************************************************//** Creates the tuple with which the index entry is searched for writing the index tree root page number, if such a tree is created. @return the tuple for search */ @@ -550,7 +622,7 @@ dict_build_index_def_step( ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) || dict_index_is_clust(index)); - index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID); + dict_hdr_get_new_id(NULL, &index->id, NULL); /* Inherit the space id from the table; we store all indexes of a table in the same tablespace */ @@ -590,6 +662,27 @@ dict_build_field_def_step( } /***************************************************************//** +Builds a row for storing stats to insert. +@return DB_SUCCESS */ +static +ulint +dict_build_stats_def_step( +/*======================*/ + ind_node_t* node) +{ + dict_index_t* index; + dtuple_t* row; + + index = node->index; + + row = dict_create_sys_stats_tuple(index, node->stats_no, node->heap); + + ins_node_set_new_row(node->stats_def, row); + + return(DB_SUCCESS); +} + +/***************************************************************//** Creates an index tree for the index if it is not a member of a cluster. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static @@ -602,6 +695,7 @@ dict_create_index_tree_step( dict_table_t* sys_indexes; dict_table_t* table; dtuple_t* search_tuple; + ulint zip_size; btr_pcur_t pcur; mtr_t mtr; @@ -626,8 +720,9 @@ dict_create_index_tree_step( btr_pcur_move_to_next_user_rec(&pcur, &mtr); - node->page_no = btr_create(index->type, index->space, - dict_table_zip_size(index->table), + zip_size = dict_table_zip_size(index->table); + + node->page_no = btr_create(index->type, index->space, zip_size, index->id, index, &mtr); /* printf("Created a new index tree in space %lu root page %lu\n", index->space, index->page_no); */ @@ -911,6 +1006,49 @@ ind_create_graph_create( dict_sys->sys_fields, heap); node->field_def->common.parent = node; + if (srv_use_sys_stats_table) { + node->stats_def = ins_node_create(INS_DIRECT, + dict_sys->sys_stats, heap); + node->stats_def->common.parent = node; + } else { + node->stats_def = NULL; + } + + node->commit_node = commit_node_create(heap); + node->commit_node->common.parent = node; + + return(node); +} + +/*********************************************************************//** +*/ +UNIV_INTERN +ind_node_t* +ind_insert_stats_graph_create( +/*==========================*/ + dict_index_t* index, + mem_heap_t* heap) +{ + ind_node_t* node; + + node = mem_heap_alloc(heap, sizeof(ind_node_t)); + + node->common.type = QUE_NODE_INSERT_STATS; + + node->index = index; + + node->state = INDEX_BUILD_STATS_COLS; + node->page_no = FIL_NULL; + node->heap = mem_heap_create(256); + + node->ind_def = NULL; + node->field_def = NULL; + + node->stats_def = ins_node_create(INS_DIRECT, + dict_sys->sys_stats, heap); + node->stats_def->common.parent = node; + node->stats_no = 0; + node->commit_node = commit_node_create(heap); node->commit_node->common.parent = node; @@ -1061,6 +1199,7 @@ dict_create_index_step( node->state = INDEX_BUILD_FIELD_DEF; node->field_no = 0; + node->stats_no = 0; thr->run_node = node->ind_def; @@ -1092,8 +1231,11 @@ dict_create_index_step( dulint index_id = node->index->id; - err = dict_index_add_to_cache(node->table, node->index, - FIL_NULL, TRUE); + err = dict_index_add_to_cache( + node->table, node->index, FIL_NULL, + trx_is_strict(trx) + || dict_table_get_format(node->table) + >= DICT_TF_FORMAT_ZIP); node->index = dict_index_get_if_in_cache_low(index_id); ut_a(!node->index == (err != DB_SUCCESS)); @@ -1103,7 +1245,31 @@ dict_create_index_step( goto function_exit; } - node->state = INDEX_CREATE_INDEX_TREE; + if (srv_use_sys_stats_table) { + node->state = INDEX_BUILD_STATS_COLS; + } else { + node->state = INDEX_CREATE_INDEX_TREE; + } + } + + if (node->state == INDEX_BUILD_STATS_COLS) { + if (node->stats_no <= dict_index_get_n_unique(node->index)) { + + err = dict_build_stats_def_step(node); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->stats_no++; + + thr->run_node = node->stats_def; + + return(thr); + } else { + node->state = INDEX_CREATE_INDEX_TREE; + } } if (node->state == INDEX_CREATE_INDEX_TREE) { @@ -1155,6 +1321,66 @@ function_exit: } /****************************************************************//** +*/ +UNIV_INTERN +que_thr_t* +dict_insert_stats_step( +/*===================*/ + que_thr_t* thr) /*!< in: query thread */ +{ + ind_node_t* node; + ulint err = DB_ERROR; + trx_t* trx; + + ut_ad(thr); + + trx = thr_get_trx(thr); + + node = thr->run_node; + + if (thr->prev_node == que_node_get_parent(node)) { + node->state = INDEX_BUILD_STATS_COLS; + } + + if (node->state == INDEX_BUILD_STATS_COLS) { + if (node->stats_no <= dict_index_get_n_unique(node->index)) { + + err = dict_build_stats_def_step(node); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->stats_no++; + + thr->run_node = node->stats_def; + + return(thr); + } else { + node->state = INDEX_COMMIT_WORK; + } + } + + if (node->state == INDEX_COMMIT_WORK) { + + /* do not commit transaction here for now */ + } + +function_exit: + trx->error_state = err; + + if (err == DB_SUCCESS) { + } else { + return(NULL); + } + + thr->run_node = que_node_get_parent(node); + + return(thr); +} + +/****************************************************************//** Creates the foreign key constraints system tables inside InnoDB at database creation or database start if they are not found or are not of the right form. diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c index 7b4174b4b15..51ee7f9246f 100644 --- a/storage/xtradb/dict/dict0dict.c +++ b/storage/xtradb/dict/dict0dict.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -81,6 +81,10 @@ UNIV_INTERN rw_lock_t dict_operation_lock; /** Identifies generated InnoDB foreign key names */ static char dict_ibfk[] = "_ibfk_"; +/** array of mutexes protecting dict_index_t::stat_n_diff_key_vals[] */ +#define DICT_INDEX_STAT_MUTEX_SIZE 32 +static mutex_t dict_index_stat_mutex[DICT_INDEX_STAT_MUTEX_SIZE]; + /*******************************************************************//** Tries to find column names for the index and sets the col field of the index. @@ -141,7 +145,7 @@ static void dict_field_print_low( /*=================*/ - dict_field_t* field); /*!< in: field */ + const dict_field_t* field); /*!< in: field */ /*********************************************************************//** Frees a foreign key struct. */ static @@ -240,6 +244,45 @@ dict_mutex_exit_for_mysql(void) mutex_exit(&(dict_sys->mutex)); } +/** Get the mutex that protects index->stat_n_diff_key_vals[] */ +#define GET_INDEX_STAT_MUTEX(index) \ + (&dict_index_stat_mutex[ut_fold_dulint(index->id) \ + % DICT_INDEX_STAT_MUTEX_SIZE]) + +/**********************************************************************//** +Lock the appropriate mutex to protect index->stat_n_diff_key_vals[]. +index->id is used to pick the right mutex and it should not change +before dict_index_stat_mutex_exit() is called on this index. */ +UNIV_INTERN +void +dict_index_stat_mutex_enter( +/*========================*/ + const dict_index_t* index) /*!< in: index */ +{ + ut_ad(index != NULL); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + ut_ad(index->cached); + ut_ad(!index->to_be_dropped); + + mutex_enter(GET_INDEX_STAT_MUTEX(index)); +} + +/**********************************************************************//** +Unlock the appropriate mutex that protects index->stat_n_diff_key_vals[]. */ +UNIV_INTERN +void +dict_index_stat_mutex_exit( +/*=======================*/ + const dict_index_t* index) /*!< in: index */ +{ + ut_ad(index != NULL); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + ut_ad(index->cached); + ut_ad(!index->to_be_dropped); + + mutex_exit(GET_INDEX_STAT_MUTEX(index)); +} + /********************************************************************//** Decrements the count of open MySQL handles to a table. */ UNIV_INTERN @@ -528,13 +571,11 @@ dict_table_get_on_id( if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0 || trx->dict_operation_lock_mode == RW_X_LATCH) { - /* It is a system table which will always exist in the table - cache: we avoid acquiring the dictionary mutex, because - if we are doing a rollback to handle an error in TABLE - CREATE, for example, we already have the mutex! */ - ut_ad(mutex_own(&(dict_sys->mutex)) - || trx->dict_operation_lock_mode == RW_X_LATCH); + /* Note: An X latch implies that the transaction + already owns the dictionary mutex. */ + + ut_ad(mutex_own(&dict_sys->mutex)); return(dict_table_get_on_id_low(table_id)); } @@ -608,6 +649,8 @@ void dict_init(void) /*===========*/ { + int i; + dict_sys = mem_alloc(sizeof(dict_sys_t)); mutex_create(&dict_sys->mutex, SYNC_DICT); @@ -628,6 +671,10 @@ dict_init(void) ut_a(dict_foreign_err_file); mutex_create(&dict_foreign_err_mutex, SYNC_ANY_LATCH); + + for (i = 0; i < DICT_INDEX_STAT_MUTEX_SIZE; i++) { + mutex_create(&dict_index_stat_mutex[i], SYNC_INDEX_TREE); + } } /**********************************************************************//** @@ -663,7 +710,7 @@ dict_table_get( /* If table->ibd_file_missing == TRUE, this will print an error message and return without doing anything. */ - dict_update_statistics(table); + dict_update_statistics(table, FALSE); } } @@ -806,7 +853,8 @@ dict_table_add_to_cache( /* Add table to LRU list of tables */ UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); - dict_sys->size += mem_heap_get_size(table->heap); + dict_sys->size += mem_heap_get_size(table->heap) + + strlen(table->name) + 1; } /**********************************************************************//** @@ -860,14 +908,21 @@ dict_table_rename_in_cache( dict_foreign_t* foreign; dict_index_t* index; ulint fold; - ulint old_size; - const char* old_name; + char old_name[MAX_TABLE_NAME_LEN + 1]; ut_ad(table); ut_ad(mutex_own(&(dict_sys->mutex))); - old_size = mem_heap_get_size(table->heap); - old_name = table->name; + /* store the old/current name to an automatic variable */ + if (strlen(table->name) + 1 <= sizeof(old_name)) { + memcpy(old_name, table->name, strlen(table->name) + 1); + } else { + ut_print_timestamp(stderr); + fprintf(stderr, "InnoDB: too long table name: '%s', " + "max length is %d\n", table->name, + MAX_TABLE_NAME_LEN); + ut_error; + } fold = ut_fold_string(new_name); @@ -913,12 +968,22 @@ dict_table_rename_in_cache( /* Remove table from the hash tables of tables */ HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, ut_fold_string(old_name), table); - table->name = mem_heap_strdup(table->heap, new_name); + + if (strlen(new_name) > strlen(table->name)) { + /* We allocate MAX_TABLE_NAME_LEN+1 bytes here to avoid + memory fragmentation, we assume a repeated calls of + ut_realloc() with the same size do not cause fragmentation */ + ut_a(strlen(new_name) <= MAX_TABLE_NAME_LEN); + table->name = ut_realloc(table->name, MAX_TABLE_NAME_LEN + 1); + } + memcpy(table->name, new_name, strlen(new_name) + 1); /* Add table to hash table of tables */ HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table); - dict_sys->size += (mem_heap_get_size(table->heap) - old_size); + + dict_sys->size += strlen(new_name) - strlen(old_name); + ut_a(dict_sys->size > 0); /* Update the table_name field in indexes */ index = dict_table_get_first_index(table); @@ -1143,7 +1208,7 @@ dict_table_remove_from_cache( /* Remove table from LRU list of tables */ UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); - size = mem_heap_get_size(table->heap); + size = mem_heap_get_size(table->heap) + strlen(table->name) + 1; ut_ad(dict_sys->size >= size); @@ -1442,11 +1507,7 @@ dict_index_too_big_for_tree( goto add_field_size; } - if (srv_relax_table_creation) { - field_max_size = dict_col_get_min_size(col); - } else { field_max_size = dict_col_get_max_size(col); - } field_ext_max_size = field_max_size < 256 ? 1 : 2; if (field->prefix_len) { @@ -1527,6 +1588,7 @@ dict_index_add_to_cache( if (!dict_index_find_cols(table, index)) { + dict_mem_index_free(index); return(DB_CORRUPTION); } @@ -3030,25 +3092,28 @@ static char* dict_strip_comments( /*================*/ - const char* sql_string) /*!< in: SQL string */ + const char* sql_string, /*!< in: SQL string */ + size_t sql_length) /*!< in: length of sql_string */ { char* str; const char* sptr; + const char* eptr = sql_string + sql_length; char* ptr; /* unclosed quote character (0 if none) */ char quote = 0; - str = mem_alloc(strlen(sql_string) + 1); + str = mem_alloc(sql_length + 1); sptr = sql_string; ptr = str; for (;;) { scan_more: - if (*sptr == '\0') { + if (sptr >= eptr || *sptr == '\0') { +end_of_string: *ptr = '\0'; - ut_a(ptr <= str + strlen(sql_string)); + ut_a(ptr <= str + sql_length); return(str); } @@ -3067,30 +3132,35 @@ scan_more: || (sptr[0] == '-' && sptr[1] == '-' && sptr[2] == ' ')) { for (;;) { + if (++sptr >= eptr) { + goto end_of_string; + } + /* In Unix a newline is 0x0A while in Windows it is 0x0D followed by 0x0A */ - if (*sptr == (char)0x0A - || *sptr == (char)0x0D - || *sptr == '\0') { - + switch (*sptr) { + case (char) 0X0A: + case (char) 0x0D: + case '\0': goto scan_more; } - - sptr++; } } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') { + sptr += 2; for (;;) { - if (*sptr == '*' && *(sptr + 1) == '/') { - - sptr += 2; - - goto scan_more; + if (sptr >= eptr) { + goto end_of_string; } - if (*sptr == '\0') { - + switch (*sptr) { + case '\0': goto scan_more; + case '*': + if (sptr[1] == '/') { + sptr += 2; + goto scan_more; + } } sptr++; @@ -3771,6 +3841,7 @@ dict_create_foreign_constraints( name before it: test.table2; the default database id the database of parameter name */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -3785,7 +3856,7 @@ dict_create_foreign_constraints( ut_a(trx); ut_a(trx->mysql_thd); - str = dict_strip_comments(sql_string); + str = dict_strip_comments(sql_string, sql_length); heap = mem_heap_create(10000); err = dict_create_foreign_constraints_low( @@ -3818,6 +3889,7 @@ dict_foreign_parse_drop_constraints( dict_foreign_t* foreign; ibool success; char* str; + size_t len; const char* ptr; const char* id; FILE* ef = dict_foreign_err_file; @@ -3832,7 +3904,10 @@ dict_foreign_parse_drop_constraints( *constraints_to_drop = mem_heap_alloc(heap, 1000 * sizeof(char*)); - str = dict_strip_comments(*(trx->mysql_query_str)); + ptr = innobase_get_stmt(trx->mysql_thd, &len); + + str = dict_strip_comments(ptr, len); + ptr = str; ut_ad(mutex_own(&(dict_sys->mutex))); @@ -4173,6 +4248,259 @@ dict_index_calc_min_rec_len( } /*********************************************************************//** +functions to use SYS_STATS system table. */ +static +ibool +dict_reload_statistics( +/*===================*/ + dict_table_t* table, + ulint* sum_of_index_sizes) +{ + dict_index_t* index; + ulint size; + mem_heap_t* heap; + + index = dict_table_get_first_index(table); + + if (index == NULL) { + /* Table definition is corrupt */ + + return(FALSE); + } + + heap = mem_heap_create(1000); + + while (index) { + if (table->is_corrupt) { + ut_a(srv_pass_corrupt_table); + mem_heap_free(heap); + return(FALSE); + } + + size = btr_get_size(index, BTR_TOTAL_SIZE); + + index->stat_index_size = size; + + *sum_of_index_sizes += size; + + size = btr_get_size(index, BTR_N_LEAF_PAGES); + + if (size == 0) { + /* The root node of the tree is a leaf */ + size = 1; + } + + index->stat_n_leaf_pages = size; + +/*===========================================*/ +{ + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + ulint key_cols; + ulint n_cols; + const rec_t* rec; + const byte* field; + ulint len; + ib_int64_t* stat_n_diff_key_vals_tmp; + byte* buf; + ulint i; + mtr_t mtr; + + n_cols = dict_index_get_n_unique(index); + stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + for (i = 0; i <= n_cols; i++) { + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || ut_dulint_cmp(mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)), + index->id)) { + /* not found: even 1 if not found should not be alowed */ + fprintf(stderr, "InnoDB: Warning: stats for %s/%s (%lu/%lu)" + " not fonund in SYS_STATS\n", + index->table_name, index->name, i, n_cols); + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + return(FALSE); + } + + if (rec_get_deleted_flag(rec, 0)) { + goto next_rec; + } + + field = rec_get_nth_field_old(rec, 1, &len); + ut_a(len == 4); + + key_cols = mach_read_from_4(field); + + ut_a(i == key_cols); + + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len); + ut_a(len == 8); + + stat_n_diff_key_vals_tmp[i] = ut_conv_dulint_to_longlong(mach_read_from_8(field)); +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + dict_index_stat_mutex_enter(index); + for (i = 0; i <= n_cols; i++) { + index->stat_n_diff_key_vals[i] = stat_n_diff_key_vals_tmp[i]; + } + dict_index_stat_mutex_exit(index); +} +/*===========================================*/ + + index = dict_table_get_next_index(index); + } + + mem_heap_free(heap); + return(TRUE); +} + +static +void +dict_store_statistics( +/*==================*/ + dict_table_t* table) +{ + dict_index_t* index; + mem_heap_t* heap; + + index = dict_table_get_first_index(table); + + ut_a(index); + + heap = mem_heap_create(1000); + + while (index) { + if (table->is_corrupt) { + ut_a(srv_pass_corrupt_table); + mem_heap_free(heap); + return; + } + +/*===========================================*/ +{ + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + ulint key_cols; + ulint n_cols; + ulint rests; + const rec_t* rec; + const byte* field; + ulint len; + ib_int64_t* stat_n_diff_key_vals_tmp; + byte* buf; + ulint i; + mtr_t mtr; + + n_cols = dict_index_get_n_unique(index); + stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + dict_index_stat_mutex_enter(index); + for (i = 0; i <= n_cols; i++) { + stat_n_diff_key_vals_tmp[i] = index->stat_n_diff_key_vals[i]; + } + dict_index_stat_mutex_exit(index); + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &pcur, &mtr); + rests = n_cols + 1; + for (i = 0; i <= n_cols; i++) { + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || ut_dulint_cmp(mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)), + index->id)) { + /* not found */ + btr_pcur_close(&pcur); + mtr_commit(&mtr); + break; + } + + if (rec_get_deleted_flag(rec, 0)) { + goto next_rec; + } + + field = rec_get_nth_field_old(rec, 1, &len); + ut_a(len == 4); + + key_cols = mach_read_from_4(field); + + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len); + ut_a(len == 8); + + mlog_write_dulint((byte*)field, + ut_dulint_create((ulint) (stat_n_diff_key_vals_tmp[key_cols] >> 32), + (ulint) stat_n_diff_key_vals_tmp[key_cols] & 0xFFFFFFFF), + &mtr); + + rests--; + +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + if (rests) { + fprintf(stderr, "InnoDB: Warning: failed to store %lu stats entries" + " of %s/%s to SYS_STATS system table.\n", + rests, index->table_name, index->name); + } +} +/*===========================================*/ + + index = dict_table_get_next_index(index); + } + + mem_heap_free(heap); +} + +/*********************************************************************//** Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ UNIV_INTERN @@ -4180,9 +4508,10 @@ void dict_update_statistics_low( /*=======================*/ dict_table_t* table, /*!< in/out: table */ - ibool has_dict_mutex __attribute__((unused))) + ibool has_dict_mutex __attribute__((unused)), /*!< in: TRUE if the caller has the dictionary mutex */ + ibool sync) /*!< in: TRUE if must update SYS_STATS */ { dict_index_t* index; ulint size; @@ -4208,6 +4537,23 @@ dict_update_statistics_low( return; } + if (srv_use_sys_stats_table && !sync) { + /* reload statistics from SYS_STATS table */ + if (dict_reload_statistics(table, &sum_of_index_sizes)) { + /* success */ +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: DEBUG: reload_statistics is scceeded for %s.\n", + table->name); +#endif + goto end; + } + } +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: DEBUG: update_statistics for %s.\n", + table->name); +#endif + sum_of_index_sizes = 0; + /* Find out the sizes of the indexes and how many different values for the key they approximately have */ @@ -4245,11 +4591,20 @@ dict_update_statistics_low( index = dict_table_get_next_index(index); } + if (srv_use_sys_stats_table) { + /* store statistics to SYS_STATS table */ + dict_store_statistics(table); + } +end: index = dict_table_get_first_index(table); + dict_index_stat_mutex_enter(index); + table->stat_n_rows = index->stat_n_diff_key_vals[ dict_index_get_n_unique(index)]; + dict_index_stat_mutex_exit(index); + table->stat_clustered_index_size = index->stat_index_size; table->stat_sum_of_other_index_sizes = sum_of_index_sizes @@ -4267,9 +4622,10 @@ UNIV_INTERN void dict_update_statistics( /*===================*/ - dict_table_t* table) /*!< in/out: table */ + dict_table_t* table, /*!< in/out: table */ + ibool sync) { - dict_update_statistics_low(table, FALSE); + dict_update_statistics_low(table, FALSE, sync); } /**********************************************************************//** @@ -4350,7 +4706,7 @@ dict_table_print_low( ut_ad(mutex_own(&(dict_sys->mutex))); if (srv_stats_auto_update) - dict_update_statistics_low(table, TRUE); + dict_update_statistics_low(table, TRUE, FALSE); fprintf(stderr, "--------------------------------------\n" @@ -4428,6 +4784,8 @@ dict_index_print_low( ut_ad(mutex_own(&(dict_sys->mutex))); + dict_index_stat_mutex_enter(index); + if (index->n_user_defined_cols > 0) { n_vals = index->stat_n_diff_key_vals[ index->n_user_defined_cols]; @@ -4435,6 +4793,8 @@ dict_index_print_low( n_vals = index->stat_n_diff_key_vals[1]; } + dict_index_stat_mutex_exit(index); + if (dict_index_is_clust(index)) { type_string = "clustered index"; } else if (dict_index_is_unique(index)) { @@ -4480,7 +4840,7 @@ static void dict_field_print_low( /*=================*/ - dict_field_t* field) /*!< in: field */ + const dict_field_t* field) /*!< in: field */ { ut_ad(mutex_own(&(dict_sys->mutex))); @@ -4844,8 +5204,10 @@ UNIV_INTERN void dict_table_check_for_dup_indexes( /*=============================*/ - const dict_table_t* table) /*!< in: Check for dup indexes + const dict_table_t* table, /*!< in: Check for dup indexes in this table */ + ibool tmp_ok) /*!< in: TRUE=allow temporary + index names */ { /* Check for duplicates, ignoring indexes that are marked as to be dropped */ @@ -4853,13 +5215,17 @@ dict_table_check_for_dup_indexes( const dict_index_t* index1; const dict_index_t* index2; + ut_ad(mutex_own(&dict_sys->mutex)); + /* The primary index _must_ exist */ ut_a(UT_LIST_GET_LEN(table->indexes) > 0); index1 = UT_LIST_GET_FIRST(table->indexes); - index2 = UT_LIST_GET_NEXT(indexes, index1); - while (index1 && index2) { + do { + ut_ad(tmp_ok || *index1->name != TEMP_INDEX_PREFIX); + + index2 = UT_LIST_GET_NEXT(indexes, index1); while (index2) { @@ -4871,8 +5237,7 @@ dict_table_check_for_dup_indexes( } index1 = UT_LIST_GET_NEXT(indexes, index1); - index2 = UT_LIST_GET_NEXT(indexes, index1); - } + } while (index1); } #endif /* UNIV_DEBUG */ @@ -4925,6 +5290,10 @@ dict_close(void) mem_free(dict_sys); dict_sys = NULL; + + for (i = 0; i < DICT_INDEX_STAT_MUTEX_SIZE; i++) { + mutex_free(&dict_index_stat_mutex[i]); + } } /************************************************************************* @@ -4939,7 +5308,7 @@ dict_table_set_corrupt_by_space( dict_table_t* table; ibool found = FALSE; - ut_a(space_id != 0 && space_id < SRV_LOG_SPACE_FIRST_ID); + ut_a(!trx_sys_sys_space(space_id) && space_id < SRV_LOG_SPACE_FIRST_ID); if (need_mutex) mutex_enter(&(dict_sys->mutex)); diff --git a/storage/xtradb/dict/dict0load.c b/storage/xtradb/dict/dict0load.c index 46cce5050cd..0d8292cc2bf 100644 --- a/storage/xtradb/dict/dict0load.c +++ b/storage/xtradb/dict/dict0load.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -40,6 +40,7 @@ Created 4/24/1996 Heikki Tuuri #include "rem0cmp.h" #include "srv0start.h" #include "srv0srv.h" +#include "trx0sys.h" /****************************************************************//** Compare the name of an index column. @@ -222,7 +223,7 @@ loop: is no index */ if (srv_stats_auto_update && dict_table_get_first_index(table)) { - dict_update_statistics_low(table, TRUE); + dict_update_statistics_low(table, TRUE, FALSE); } dict_table_print_low(table); @@ -260,7 +261,7 @@ dict_sys_tables_get_flags( return(0); } - field = rec_get_nth_field_old(rec, 4, &len); + field = rec_get_nth_field_old(rec, 4/*N_COLS*/, &len); n_cols = mach_read_from_4(field); if (UNIV_UNLIKELY(!(n_cols & 0x80000000UL))) { @@ -316,7 +317,7 @@ dict_check_tablespaces_and_store_max_id( dict_index_t* sys_index; btr_pcur_t pcur; const rec_t* rec; - ulint max_space_id = 0; + ulint max_space_id; mtr_t mtr; mutex_enter(&(dict_sys->mutex)); @@ -327,6 +328,11 @@ dict_check_tablespaces_and_store_max_id( sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); ut_a(!dict_table_is_comp(sys_tables)); + max_space_id = mtr_read_ulint(dict_hdr_get(&mtr) + + DICT_HDR_MAX_SPACE_ID, + MLOG_4BYTES, &mtr); + fil_set_max_space_id_if_bigger(max_space_id); + btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr); loop: @@ -390,15 +396,35 @@ loop: mtr_commit(&mtr); - if (space_id != 0 && in_crash_recovery) { + if (trx_sys_sys_space(space_id)) { + /* The system tablespace always exists. */ + } else if (in_crash_recovery) { /* Check that the tablespace (the .ibd file) really - exists; print a warning to the .err log if not */ - - fil_space_for_table_exists_in_mem(space_id, name, - FALSE, TRUE, TRUE); - } + exists; print a warning to the .err log if not. + Do not print warnings for temporary tables. */ + ibool is_temp; + + field = rec_get_nth_field_old(rec, 4, &len); + if (0x80000000UL & mach_read_from_4(field)) { + /* ROW_FORMAT=COMPACT: read the is_temp + flag from SYS_TABLES.MIX_LEN. */ + field = rec_get_nth_field_old(rec, 7, &len); + is_temp = mach_read_from_4(field) + & DICT_TF2_TEMPORARY; + } else { + /* For tables created with old versions + of InnoDB, SYS_TABLES.MIX_LEN may contain + garbage. Such tables would always be + in ROW_FORMAT=REDUNDANT. Pretend that + all such tables are non-temporary. That is, + do not suppress error printouts about + temporary tables not being found. */ + is_temp = FALSE; + } - if (space_id != 0 && !in_crash_recovery) { + fil_space_for_table_exists_in_mem( + space_id, name, is_temp, TRUE, !is_temp); + } else { /* It is a normal database startup: create the space object and check that the .ibd file exists. */ @@ -878,7 +904,7 @@ err_exit: space = mach_read_from_4(field); /* Check if the tablespace exists and has the right name */ - if (space != 0) { + if (!trx_sys_sys_space(space)) { flags = dict_sys_tables_get_flags(rec); if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) { @@ -894,43 +920,73 @@ err_exit: (ulong) flags); goto err_exit; } + } else { + flags = 0; + } - if (fil_space_for_table_exists_in_mem(space, name, FALSE, - FALSE, FALSE)) { - /* Ok; (if we did a crash recovery then the tablespace - can already be in the memory cache) */ - } else { - /* In >= 4.1.9, InnoDB scans the data dictionary also - at a normal mysqld startup. It is an error if the - space object does not exist in memory. */ + ut_a(name_of_col_is(sys_tables, sys_index, 4, "N_COLS")); + + field = rec_get_nth_field_old(rec, 4, &len); + n_cols = mach_read_from_4(field); + + /* The high-order bit of N_COLS is the "compact format" flag. + For tables in that format, MIX_LEN may hold additional flags. */ + if (n_cols & 0x80000000UL) { + ulint flags2; + + flags |= DICT_TF_COMPACT; + + ut_a(name_of_col_is(sys_tables, sys_index, 7, "MIX_LEN")); + field = rec_get_nth_field_old(rec, 7, &len); + + flags2 = mach_read_from_4(field); + if (flags2 & (~0 << (DICT_TF2_BITS - DICT_TF2_SHIFT))) { + ut_print_timestamp(stderr); + fputs(" InnoDB: Warning: table ", stderr); + ut_print_filename(stderr, name); + fprintf(stderr, "\n" + "InnoDB: in InnoDB data dictionary" + " has unknown flags %lx.\n", + (ulong) flags2); + + flags2 &= ~(~0 << (DICT_TF2_BITS - DICT_TF2_SHIFT)); + } + + flags |= flags2 << DICT_TF2_SHIFT; + } + + /* See if the tablespace is available. */ + if (trx_sys_sys_space(space)) { + /* The system tablespace is always available. */ + } else if (!fil_space_for_table_exists_in_mem( + space, name, + (flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY, + FALSE, FALSE)) { + + if ((flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) { + /* Do not bother to retry opening temporary tables. */ + ibd_file_missing = TRUE; + } else { ut_print_timestamp(stderr); fprintf(stderr, - " InnoDB: error: space object of table %s,\n" + " InnoDB: error: space object of table"); + ut_print_filename(stderr, name); + fprintf(stderr, ",\n" "InnoDB: space id %lu did not exist in memory." " Retrying an open.\n", - name, (ulong)space); + (ulong) space); /* Try to open the tablespace */ if (!fil_open_single_table_tablespace( - TRUE, space, flags, name)) { - /* We failed to find a sensible tablespace - file */ + TRUE, space, + flags == DICT_TF_COMPACT ? 0 : + flags & ~(~0 << DICT_TF_BITS), name)) { + /* We failed to find a sensible + tablespace file */ ibd_file_missing = TRUE; } } - } else { - flags = 0; - } - - ut_a(name_of_col_is(sys_tables, sys_index, 4, "N_COLS")); - - field = rec_get_nth_field_old(rec, 4, &len); - n_cols = mach_read_from_4(field); - - /* The high-order bit of N_COLS is the "compact format" flag. */ - if (n_cols & 0x80000000UL) { - flags |= DICT_TF_COMPACT; } table = dict_mem_table_create(name, space, n_cols & ~0x80000000UL, diff --git a/storage/xtradb/dict/dict0mem.c b/storage/xtradb/dict/dict0mem.c index acf550befad..f2d219bfd4f 100644 --- a/storage/xtradb/dict/dict0mem.c +++ b/storage/xtradb/dict/dict0mem.c @@ -59,7 +59,7 @@ dict_mem_table_create( mem_heap_t* heap; ut_ad(name); - ut_a(!(flags & (~0 << DICT_TF_BITS))); + ut_a(!(flags & (~0 << DICT_TF2_BITS))); heap = mem_heap_create(DICT_HEAP_SIZE); @@ -68,7 +68,8 @@ dict_mem_table_create( table->heap = heap; table->flags = (unsigned int) flags; - table->name = mem_heap_strdup(heap, name); + table->name = ut_malloc(strlen(name) + 1); + memcpy(table->name, name, strlen(name) + 1); table->space = (unsigned int) space; table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS); @@ -108,6 +109,7 @@ dict_mem_table_free( #ifndef UNIV_HOTBACKUP mutex_free(&(table->autoinc_mutex)); #endif /* UNIV_HOTBACKUP */ + ut_free(table->name); mem_heap_free(table->heap); } diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c index 3e63c0795f8..0139fa0cce5 100644 --- a/storage/xtradb/fil/fil0fil.c +++ b/storage/xtradb/fil/fil0fil.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -38,12 +38,13 @@ Created 10/25/1995 Heikki Tuuri #include "mtr0mtr.h" #include "mtr0log.h" #include "dict0dict.h" +#include "page0page.h" #include "page0zip.h" #include "trx0trx.h" #include "trx0sys.h" #include "pars0pars.h" -#include "row0row.h" #include "row0mysql.h" +#include "row0row.h" #include "que0que.h" #ifndef UNIV_HOTBACKUP # include "buf0lru.h" @@ -285,6 +286,10 @@ struct fil_system_struct { request */ UT_LIST_BASE_NODE_T(fil_space_t) space_list; /*!< list of all file spaces */ + ibool space_id_reuse_warned; + /* !< TRUE if fil_space_create() + has issued a warning about + potential space_id reuse */ }; /** The tablespace memory cache. This variable is NULL before the module is @@ -675,14 +680,14 @@ fil_node_open_file( size_bytes = (((ib_uint64_t)size_high) << 32) + (ib_uint64_t)size_low; #ifdef UNIV_HOTBACKUP - if (space->id == 0) { + if (trx_sys_sys_space(space->id)) { node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE); os_file_close(node->handle); goto add_size; } #endif /* UNIV_HOTBACKUP */ ut_a(space->purpose != FIL_LOG); - ut_a(space->id != 0); + ut_a(!trx_sys_sys_space(space->id)); if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { fprintf(stderr, @@ -728,7 +733,7 @@ fil_node_open_file( } if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED - || space_id == 0)) { + || trx_sys_sys_space(space_id))) { fprintf(stderr, "InnoDB: Error: tablespace id %lu" " in file %s is not sensible\n", @@ -790,7 +795,7 @@ add_size: system->n_open++; - if (space->purpose == FIL_TABLESPACE && space->id != 0) { + if (space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(space->id)) { /* Put the node to the LRU list */ UT_LIST_ADD_FIRST(LRU, system->LRU, node); } @@ -823,7 +828,7 @@ fil_node_close_file( ut_a(system->n_open > 0); system->n_open--; - if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) { + if (node->space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(node->space->id)) { ut_a(UT_LIST_GET_LEN(system->LRU) > 0); /* The node is in the LRU list, remove it */ @@ -909,7 +914,7 @@ fil_mutex_enter_and_prepare_for_io( retry: mutex_enter(&fil_system->mutex); - if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) { + if (trx_sys_sys_space(space_id) || space_id >= SRV_LOG_SPACE_FIRST_ID) { /* We keep log files and system tablespace files always open; this is important in preventing deadlocks in this module, as a page read completion often performs another read from the @@ -1104,10 +1109,13 @@ fil_space_create( fil_space_t* space; /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for - ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and + ROW_FORMAT=COMPACT + ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and ROW_FORMAT=REDUNDANT (table->flags == 0). For any other - format, the tablespace flags should equal table->flags. */ + format, the tablespace flags should equal + (table->flags & ~(~0 << DICT_TF_BITS)). */ ut_a(flags != DICT_TF_COMPACT); + ut_a(!(flags & (~0UL << DICT_TF_BITS))); try_again: /*printf( @@ -1136,7 +1144,7 @@ try_again: " tablespace memory cache!\n", (ulong) space->id); - if (id == 0 || purpose != FIL_TABLESPACE) { + if (trx_sys_sys_space(id) || purpose != FIL_TABLESPACE) { mutex_exit(&fil_system->mutex); @@ -1196,7 +1204,19 @@ try_again: space->tablespace_version = fil_system->tablespace_version; space->mark = FALSE; - if (purpose == FIL_TABLESPACE && id > fil_system->max_assigned_id) { + if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on) + && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) { + if (!fil_system->space_id_reuse_warned) { + fil_system->space_id_reuse_warned = TRUE; + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: allocated tablespace %lu," + " old maximum was %lu\n", + (ulong) id, + (ulong) fil_system->max_assigned_id); + } + fil_system->max_assigned_id = id; } @@ -1236,19 +1256,25 @@ try_again: Assigns a new space id for a new single-table tablespace. This works simply by incrementing the global counter. If 4 billion id's is not enough, we may need to recycle id's. -@return new tablespace id; ULINT_UNDEFINED if could not assign an id */ -static -ulint -fil_assign_new_space_id(void) -/*=========================*/ +@return TRUE if assigned, FALSE if not */ +UNIV_INTERN +ibool +fil_assign_new_space_id( +/*====================*/ + ulint* space_id) /*!< in/out: space id */ { - ulint id; + ulint id; + ibool success; mutex_enter(&fil_system->mutex); - fil_system->max_assigned_id++; + id = *space_id; + + if (id < fil_system->max_assigned_id) { + id = fil_system->max_assigned_id; + } - id = fil_system->max_assigned_id; + id++; if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) { ut_print_timestamp(stderr); @@ -1264,7 +1290,11 @@ fil_assign_new_space_id(void) (ulong) SRV_LOG_SPACE_FIRST_ID); } - if (id >= SRV_LOG_SPACE_FIRST_ID) { + success = (id < SRV_LOG_SPACE_FIRST_ID); + + if (success) { + *space_id = fil_system->max_assigned_id = id; + } else { ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: You have run out of single-table" @@ -1274,14 +1304,12 @@ fil_assign_new_space_id(void) " have to dump all your tables and\n" "InnoDB: recreate the whole InnoDB installation.\n", (ulong) id); - fil_system->max_assigned_id--; - - id = ULINT_UNDEFINED; + *space_id = ULINT_UNDEFINED; } mutex_exit(&fil_system->mutex); - return(id); + return(success); } /*******************************************************************//** @@ -1517,7 +1545,7 @@ fil_init( ut_a(hash_size > 0); ut_a(max_n_open > 0); - fil_system = mem_alloc(sizeof(fil_system_t)); + fil_system = mem_zalloc(sizeof(fil_system_t)); mutex_create(&fil_system->mutex, SYNC_ANY_LATCH); @@ -1526,16 +1554,9 @@ fil_init( UT_LIST_INIT(fil_system->LRU); - fil_system->n_open = 0; fil_system->max_n_open = max_n_open; - fil_system->modification_counter = 0; - fil_system->max_assigned_id = 0; - - fil_system->tablespace_version = 0; - - UT_LIST_INIT(fil_system->unflushed_spaces); - UT_LIST_INIT(fil_system->space_list); + fil_system->max_assigned_id = TRX_SYS_SPACE_MAX; } /*******************************************************************//** @@ -1557,7 +1578,7 @@ fil_open_log_and_system_tablespace_files(void) space = UT_LIST_GET_FIRST(fil_system->space_list); while (space != NULL) { - if (space->purpose != FIL_TABLESPACE || space->id == 0) { + if (space->purpose != FIL_TABLESPACE || trx_sys_sys_space(space->id)) { node = UT_LIST_GET_FIRST(space->chain); while (node != NULL) { @@ -2120,7 +2141,7 @@ fil_op_log_parse_or_replay( fil_create_directory_for_tablename(name); if (fil_create_new_single_table_tablespace( - &space_id, name, FALSE, flags, + space_id, name, FALSE, flags, FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) { ut_error; } @@ -2567,9 +2588,7 @@ UNIV_INTERN ulint fil_create_new_single_table_tablespace( /*===================================*/ - ulint* space_id, /*!< in/out: space id; if this is != 0, - then this is an input parameter, - otherwise output */ + ulint space_id, /*!< in: space id */ const char* tablename, /*!< in: the table name in the usual databasename/tablename format of InnoDB, or a dir path to a temp @@ -2589,12 +2608,17 @@ fil_create_new_single_table_tablespace( ibool success; char* path; + ut_a(space_id > 0); + ut_a(space_id < SRV_LOG_SPACE_FIRST_ID); ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE); /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for - ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and + ROW_FORMAT=COMPACT + ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and ROW_FORMAT=REDUNDANT (table->flags == 0). For any other - format, the tablespace flags should equal table->flags. */ + format, the tablespace flags should equal + (table->flags & ~(~0 << DICT_TF_BITS)). */ ut_a(flags != DICT_TF_COMPACT); + ut_a(!(flags & (~0UL << DICT_TF_BITS))); path = fil_make_ibd_name(tablename, is_temp); @@ -2642,38 +2666,21 @@ fil_create_new_single_table_tablespace( return(DB_ERROR); } - buf2 = ut_malloc(3 * UNIV_PAGE_SIZE); - /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); - ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0); if (!ret) { - ut_free(buf2); - os_file_close(file); - os_file_delete(path); - - mem_free(path); - return(DB_OUT_OF_FILE_SPACE); - } - - if (*space_id == 0) { - *space_id = fil_assign_new_space_id(); - } - - /* printf("Creating tablespace %s id %lu\n", path, *space_id); */ - - if (*space_id == ULINT_UNDEFINED) { - ut_free(buf2); + err = DB_OUT_OF_FILE_SPACE; error_exit: os_file_close(file); error_exit2: os_file_delete(path); mem_free(path); - return(DB_ERROR); + return(err); } + /* printf("Creating tablespace %s id %lu\n", path, space_id); */ + /* We have to write the space id to the file immediately and flush the file to disk. This is because in crash recovery we must be aware what tablespaces exist and what are their space id's, so that we can apply @@ -2683,10 +2690,14 @@ error_exit2: with zeros from the call of os_file_set_size(), until a buffer pool flush would write to it. */ + buf2 = ut_malloc(3 * UNIV_PAGE_SIZE); + /* Align the memory for file i/o if we might have O_DIRECT set */ + page = ut_align(buf2, UNIV_PAGE_SIZE); + memset(page, '\0', UNIV_PAGE_SIZE); - fsp_header_init_fields(page, *space_id, flags); - mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, *space_id); + fsp_header_init_fields(page, space_id, flags); + mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); if (!(flags & DICT_TF_ZSSIZE_MASK)) { buf_flush_init_for_writing(page, NULL, 0); @@ -2717,6 +2728,7 @@ error_exit2: " to tablespace ", stderr); ut_print_filename(stderr, path); putc('\n', stderr); + err = DB_ERROR; goto error_exit; } @@ -2726,22 +2738,20 @@ error_exit2: fputs("InnoDB: Error: file flush of tablespace ", stderr); ut_print_filename(stderr, path); fputs(" failed\n", stderr); + err = DB_ERROR; goto error_exit; } os_file_close(file); - if (*space_id == ULINT_UNDEFINED) { - goto error_exit2; - } - - success = fil_space_create(path, *space_id, flags, FIL_TABLESPACE); + success = fil_space_create(path, space_id, flags, FIL_TABLESPACE); if (!success) { + err = DB_ERROR; goto error_exit2; } - fil_node_create(path, size, *space_id, FALSE); + fil_node_create(path, size, space_id, FALSE); #ifndef UNIV_HOTBACKUP { @@ -2752,7 +2762,7 @@ error_exit2: fil_op_write_log(flags ? MLOG_FILE_CREATE2 : MLOG_FILE_CREATE, - *space_id, + space_id, is_temp ? MLOG_FILE_FLAG_TEMP : 0, flags, tablename, NULL, &mtr); @@ -2795,6 +2805,7 @@ fil_reset_too_high_lsns( ib_int64_t offset; ulint zip_size; ibool success; + page_zip_des_t page_zip; filepath = fil_make_ibd_name(name, FALSE); @@ -2842,6 +2853,12 @@ fil_reset_too_high_lsns( space_id = fsp_header_get_space_id(page); zip_size = fsp_header_get_zip_size(page); + page_zip_des_init(&page_zip); + page_zip_set_size(&page_zip, zip_size); + if (zip_size) { + page_zip.data = page + UNIV_PAGE_SIZE; + } + ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Flush lsn in the tablespace file %lu" @@ -2876,20 +2893,23 @@ fil_reset_too_high_lsns( /* We have to reset the lsn */ if (zip_size) { - memcpy(page + UNIV_PAGE_SIZE, page, zip_size); + memcpy(page_zip.data, page, zip_size); buf_flush_init_for_writing( - page, page + UNIV_PAGE_SIZE, - current_lsn); + page, &page_zip, current_lsn); + success = os_file_write( + filepath, file, page_zip.data, + (ulint) offset & 0xFFFFFFFFUL, + (ulint) (offset >> 32), zip_size); } else { buf_flush_init_for_writing( page, NULL, current_lsn); + success = os_file_write( + filepath, file, page, + (ulint)(offset & 0xFFFFFFFFUL), + (ulint)(offset >> 32), + UNIV_PAGE_SIZE); } - success = os_file_write(filepath, file, page, - (ulint)(offset & 0xFFFFFFFFUL), - (ulint)(offset >> 32), - zip_size - ? zip_size - : UNIV_PAGE_SIZE); + if (!success) { goto func_exit; @@ -2965,10 +2985,13 @@ fil_open_single_table_tablespace( filepath = fil_make_ibd_name(name, FALSE); /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for - ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and + ROW_FORMAT=COMPACT + ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and ROW_FORMAT=REDUNDANT (table->flags == 0). For any other - format, the tablespace flags should equal table->flags. */ + format, the tablespace flags should equal + (table->flags & ~(~0 << DICT_TF_BITS)). */ ut_a(flags != DICT_TF_COMPACT); + ut_a(!(flags & (~0UL << DICT_TF_BITS))); file = os_file_create_simple_no_error_handling( filepath, OS_FILE_OPEN, OS_FILE_READ_WRITE, &success); @@ -3018,12 +3041,13 @@ fil_open_single_table_tablespace( space_id = fsp_header_get_space_id(page); space_flags = fsp_header_get_flags(page); - if (srv_expand_import && (space_id != id || space_flags != flags)) { + if (srv_expand_import + && (space_id != id || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) { dulint old_id[31]; dulint new_id[31]; ulint root_page[31]; ulint n_index; - os_file_t info_file = -1; + os_file_t info_file = (os_file_t) -1; char* info_file_path; ulint i; int len; @@ -3103,13 +3127,13 @@ fil_open_single_table_tablespace( for (i = 0; i < n_index; i++) { new_id[i] = dict_table_get_index_on_name(table, - (page + (i + 1) * 512 + 12))->id; + (char*)(page + (i + 1) * 512 + 12))->id; old_id[i] = mach_read_from_8(page + (i + 1) * 512); root_page[i] = mach_read_from_4(page + (i + 1) * 512 + 8); } skip_info: - if (info_file != -1) + if (info_file != (os_file_t) -1) os_file_close(info_file); /* @@ -3127,7 +3151,7 @@ skip_info: /* over write space id of all pages */ rec_offs_init(offsets_); - fprintf(stderr, "%s", "InnoDB: Progress in %:"); + fprintf(stderr, "InnoDB: Progress in %%:"); for (offset = 0; offset < size_bytes; offset += UNIV_PAGE_SIZE) { ulint checksum_field; @@ -3359,7 +3383,8 @@ skip_write: ut_free(buf2); - if (UNIV_UNLIKELY(space_id != id || space_flags != flags)) { + if (UNIV_UNLIKELY(space_id != id + || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) { ut_print_timestamp(stderr); fputs(" InnoDB: Error: tablespace id and flags in file ", @@ -3598,7 +3623,7 @@ fil_load_single_table_tablespace( } #ifndef UNIV_HOTBACKUP - if (space_id == ULINT_UNDEFINED || space_id == 0) { + if (space_id == ULINT_UNDEFINED || trx_sys_sys_space(space_id)) { fprintf(stderr, "InnoDB: Error: tablespace id %lu in file %s" " is not sensible\n", @@ -3607,7 +3632,7 @@ fil_load_single_table_tablespace( goto func_exit; } #else - if (space_id == ULINT_UNDEFINED || space_id == 0) { + if (space_id == ULINT_UNDEFINED || trx_sys_sys_space(space_id)) { char* new_path; fprintf(stderr, @@ -3868,39 +3893,6 @@ next_datadir_item: return(err); } -/********************************************************************//** -If we need crash recovery, and we have called -fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), -we can call this function to print an error message of orphaned .ibd files -for which there is not a data dictionary entry with a matching table name -and space id. */ -UNIV_INTERN -void -fil_print_orphaned_tablespaces(void) -/*================================*/ -{ - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = UT_LIST_GET_FIRST(fil_system->space_list); - - while (space) { - if (space->purpose == FIL_TABLESPACE && space->id != 0 - && !space->mark) { - fputs("InnoDB: Warning: tablespace ", stderr); - ut_print_filename(stderr, space->name); - fprintf(stderr, " of id %lu has no matching table in\n" - "InnoDB: the InnoDB data dictionary.\n", - (ulong) space->id); - } - - space = UT_LIST_GET_NEXT(space_list, space); - } - - mutex_exit(&fil_system->mutex); -} - /*******************************************************************//** Returns TRUE if a single-table tablespace does not exist in the memory cache, or is being deleted there. @@ -4461,7 +4453,7 @@ fil_node_prepare_for_io( } if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE - && space->id != 0) { + && !trx_sys_sys_space(space->id)) { /* The node is in the LRU list, remove it */ ut_a(UT_LIST_GET_LEN(system->LRU) > 0); @@ -4507,7 +4499,7 @@ fil_node_complete_io( } if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE - && node->space->id != 0) { + && !trx_sys_sys_space(node->space->id)) { /* The node must be put back to the LRU list */ UT_LIST_ADD_FIRST(LRU, system->LRU, node); } @@ -5141,7 +5133,7 @@ fil_validate(void) ut_a(fil_node->n_pending == 0); ut_a(fil_node->open); ut_a(fil_node->space->purpose == FIL_TABLESPACE); - ut_a(fil_node->space->id != 0); + ut_a(!trx_sys_sys_space(fil_node->space->id)); fil_node = UT_LIST_GET_NEXT(LRU, fil_node); } @@ -5223,8 +5215,10 @@ void fil_close(void) /*===========*/ { +#ifndef UNIV_HOTBACKUP /* The mutex should already have been freed. */ ut_ad(fil_system->mutex.magic_n == 0); +#endif /* !UNIV_HOTBACKUP */ hash_table_free(fil_system->spaces); diff --git a/storage/xtradb/fsp/fsp0fsp.c b/storage/xtradb/fsp/fsp0fsp.c index 19722623611..cd28186109f 100644 --- a/storage/xtradb/fsp/fsp0fsp.c +++ b/storage/xtradb/fsp/fsp0fsp.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -48,7 +48,7 @@ Created 11/29/1995 Heikki Tuuri # include "log0log.h" #endif /* UNIV_HOTBACKUP */ #include "dict0mem.h" - +#include "trx0sys.h" #define FSP_HEADER_OFFSET FIL_PAGE_DATA /* Offset of the space header within a file page */ @@ -392,11 +392,11 @@ UNIV_INLINE ibool xdes_get_bit( /*=========*/ - xdes_t* descr, /*!< in: descriptor */ - ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */ - ulint offset, /*!< in: page offset within extent: - 0 ... FSP_EXTENT_SIZE - 1 */ - mtr_t* mtr) /*!< in: mtr */ + const xdes_t* descr, /*!< in: descriptor */ + ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */ + ulint offset, /*!< in: page offset within extent: + 0 ... FSP_EXTENT_SIZE - 1 */ + mtr_t* mtr) /*!< in: mtr */ { ulint index; ulint byte_index; @@ -533,8 +533,8 @@ UNIV_INLINE ulint xdes_get_n_used( /*============*/ - xdes_t* descr, /*!< in: descriptor */ - mtr_t* mtr) /*!< in: mtr */ + const xdes_t* descr, /*!< in: descriptor */ + mtr_t* mtr) /*!< in: mtr */ { ulint i; ulint count = 0; @@ -557,8 +557,8 @@ UNIV_INLINE ibool xdes_is_free( /*=========*/ - xdes_t* descr, /*!< in: descriptor */ - mtr_t* mtr) /*!< in: mtr */ + const xdes_t* descr, /*!< in: descriptor */ + mtr_t* mtr) /*!< in: mtr */ { if (0 == xdes_get_n_used(descr, mtr)) { @@ -575,8 +575,8 @@ UNIV_INLINE ibool xdes_is_full( /*=========*/ - xdes_t* descr, /*!< in: descriptor */ - mtr_t* mtr) /*!< in: mtr */ + const xdes_t* descr, /*!< in: descriptor */ + mtr_t* mtr) /*!< in: mtr */ { if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) { @@ -592,7 +592,7 @@ UNIV_INLINE void xdes_set_state( /*===========*/ - xdes_t* descr, /*!< in: descriptor */ + xdes_t* descr, /*!< in/out: descriptor */ ulint state, /*!< in: state to set */ mtr_t* mtr) /*!< in: mtr handle */ { @@ -611,8 +611,8 @@ UNIV_INLINE ulint xdes_get_state( /*===========*/ - xdes_t* descr, /*!< in: descriptor */ - mtr_t* mtr) /*!< in: mtr handle */ + const xdes_t* descr, /*!< in: descriptor */ + mtr_t* mtr) /*!< in: mtr handle */ { ulint state; @@ -708,7 +708,7 @@ UNIV_INLINE xdes_t* xdes_get_descriptor_with_space_hdr( /*===============================*/ - fsp_header_t* sp_header,/*!< in: space header, x-latched */ + fsp_header_t* sp_header,/*!< in/out: space header, x-latched */ ulint space, /*!< in: space id */ ulint offset, /*!< in: page offset; if equal to the free limit, @@ -878,14 +878,10 @@ fsp_init_file_page_low( return; } -#ifdef UNIV_BASIC_LOG_DEBUG - memset(page, 0xff, UNIV_PAGE_SIZE); -#endif + memset(page, 0, UNIV_PAGE_SIZE); mach_write_to_4(page + FIL_PAGE_OFFSET, buf_block_get_page_no(block)); - memset(page + FIL_PAGE_LSN, 0, 8); mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, buf_block_get_space(block)); - memset(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, 0, 8); } #ifndef UNIV_HOTBACKUP @@ -1013,10 +1009,10 @@ fsp_header_init( flst_init(header + FSP_SEG_INODES_FREE, mtr); mlog_write_dulint(header + FSP_SEG_ID, ut_dulint_create(0, 1), mtr); - if (space == 0) { + if (space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE) { fsp_fill_free_list(FALSE, space, header, mtr); btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, - 0, 0, ut_dulint_add(DICT_IBUF_ID_MIN, space), + space, 0, ut_dulint_add(DICT_IBUF_ID_MIN, space), dict_ind_redundant, mtr); } else { fsp_fill_free_list(TRUE, space, header, mtr); @@ -1351,7 +1347,7 @@ fsp_fill_free_list( descriptor page and ibuf bitmap page; then we do not allocate more extents */ ulint space, /*!< in: space */ - fsp_header_t* header, /*!< in: space header */ + fsp_header_t* header, /*!< in/out: space header */ mtr_t* mtr) /*!< in: mtr */ { ulint limit; diff --git a/storage/xtradb/ha/ha0ha.c b/storage/xtradb/ha/ha0ha.c index cb5e541b55d..9d9d341ad39 100644 --- a/storage/xtradb/ha/ha0ha.c +++ b/storage/xtradb/ha/ha0ha.c @@ -101,6 +101,8 @@ ha_clear( ulint i; ulint n; + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE)); #endif /* UNIV_SYNC_DEBUG */ @@ -146,7 +148,9 @@ ha_insert_for_fold_func( ha_node_t* prev_node; ulint hash; - ut_ad(table && data); + ut_ad(data); + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG ut_a(block->frame == page_align(data)); #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ @@ -237,6 +241,8 @@ ha_delete_hash_node( hash_table_t* table, /*!< in: hash table */ ha_node_t* del_node) /*!< in: node to be deleted */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG # ifndef UNIV_HOTBACKUP if (table->adaptive) { @@ -267,6 +273,8 @@ ha_search_and_update_if_found_func( { ha_node_t* node; + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ASSERT_HASH_MUTEX_OWN(table, fold); #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG ut_a(new_block->frame == page_align(new_data)); @@ -304,6 +312,8 @@ ha_remove_all_nodes_to_page( { ha_node_t* node; + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ASSERT_HASH_MUTEX_OWN(table, fold); node = ha_chain_get_first(table, fold); @@ -353,6 +363,8 @@ ha_validate( ibool ok = TRUE; ulint i; + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ut_a(start_index <= end_index); ut_a(start_index < hash_get_n_cells(table)); ut_a(end_index < hash_get_n_cells(table)); @@ -404,6 +416,8 @@ builds, see http://bugs.mysql.com/36941 */ #endif /* PRINT_USED_CELLS */ ulint n_bufs; + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); #ifdef PRINT_USED_CELLS for (i = 0; i < hash_get_n_cells(table); i++) { diff --git a/storage/xtradb/ha/hash0hash.c b/storage/xtradb/ha/hash0hash.c index 2800d7793f8..70516deb005 100644 --- a/storage/xtradb/ha/hash0hash.c +++ b/storage/xtradb/ha/hash0hash.c @@ -119,7 +119,7 @@ hash_create( table->heaps = NULL; #endif /* !UNIV_HOTBACKUP */ table->heap = NULL; - table->magic_n = HASH_TABLE_MAGIC_N; + ut_d(table->magic_n = HASH_TABLE_MAGIC_N); /* Initialize the cell array */ hash_table_clear(table); @@ -128,6 +128,70 @@ hash_create( } /*************************************************************//** +*/ +UNIV_INTERN +ulint +hash_create_needed( +/*===============*/ + ulint n) +{ + ulint prime; + ulint offset; + + prime = ut_find_prime(n); + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + return(offset + sizeof(hash_cell_t) * prime); +} + +UNIV_INTERN +void +hash_create_init( +/*=============*/ + hash_table_t* table, + ulint n) +{ + ulint prime; + ulint offset; + + prime = ut_find_prime(n); + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + table->array = (hash_cell_t*)(((char*)table) + offset); + table->n_cells = prime; +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + table->adaptive = FALSE; +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + table->n_mutexes = 0; + table->mutexes = NULL; + table->heaps = NULL; + table->heap = NULL; + ut_d(table->magic_n = HASH_TABLE_MAGIC_N); + + /* Initialize the cell array */ + hash_table_clear(table); +} + +UNIV_INTERN +void +hash_create_reuse( +/*==============*/ + hash_table_t* table) +{ + ulint offset; + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + table->array = (hash_cell_t*)(((char*)table) + offset); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); +} + +/*************************************************************//** Frees a hash table. */ UNIV_INTERN void @@ -135,6 +199,8 @@ hash_table_free( /*============*/ hash_table_t* table) /*!< in, own: hash table */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); #ifndef UNIV_HOTBACKUP ut_a(table->mutexes == NULL); #endif /* !UNIV_HOTBACKUP */ @@ -160,6 +226,8 @@ hash_create_mutexes_func( { ulint i; + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ut_a(n_mutexes > 0); ut_a(ut_is_2pow(n_mutexes)); diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 731502a3971..ef1ef280435 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -1,7 +1,8 @@ /***************************************************************************** -Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved. +Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. +Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The contributions by Google are incorporated with their permission, and subject to the conditions contained in the file COPYING.Google. +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. @@ -22,32 +30,6 @@ this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *****************************************************************************/ -/*********************************************************************** - -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. -Copyright (c) 2009, Percona Inc. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -***********************************************************************/ /* TODO list for the InnoDB handler in 5.0: - Remove the flag trx->active_trans and look at trx->conc_state @@ -141,7 +123,6 @@ static ulong commit_threads = 0; static pthread_mutex_t commit_threads_m; static pthread_cond_t commit_cond; static pthread_mutex_t commit_cond_m; -static pthread_mutex_t analyze_mutex; static bool innodb_inited = 0; C_MODE_START @@ -190,6 +171,7 @@ static char* innobase_data_file_path = NULL; static char* innobase_log_group_home_dir = NULL; static char* innobase_file_format_name = NULL; static char* innobase_change_buffering = NULL; +static char* innobase_doublewrite_file = NULL; /* Note: This variable can be set to on/off and any of the supported file formats in the configuration file, but can only be set to any @@ -217,6 +199,7 @@ static my_bool innobase_overwrite_relay_log_info = FALSE; static my_bool innobase_rollback_on_timeout = FALSE; static my_bool innobase_create_status_file = FALSE; static my_bool innobase_stats_on_metadata = TRUE; +static my_bool innobase_use_sys_stats_table = FALSE; static char* internal_innobase_data_file_path = NULL; @@ -360,9 +343,15 @@ static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.", NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0); +static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit_session, PLUGIN_VAR_RQCMDARG, + "Control innodb_flush_log_at_trx_commit for each sessions. " + "The value 0~2 are same meanings to innodb_flush_log_at_trx_commit. " + "The value 3 regards innodb_flush_log_at_trx_commit (default).", + NULL, NULL, 3, 0, 3, 0); + static handler *innobase_create_handler(handlerton *hton, - TABLE_SHARE *table, + TABLE_SHARE *table, MEM_ROOT *mem_root) { return new (mem_root) ha_innobase(hton, table); @@ -475,8 +464,9 @@ static int innobase_start_trx_and_assign_read_view( /*====================================*/ - handlerton* hton, /*!< in: Innodb handlerton */ - THD* thd); /*!< in: MySQL thread handle of the user for whom + /* out: 0 */ + handlerton* hton, /* in: Innodb handlerton */ + THD* thd); /* in: MySQL thread handle of the user for whom the transaction should be committed */ /****************************************************************//** Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes @@ -558,6 +548,8 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_data_written, SHOW_LONG}, {"dblwr_pages_written", (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG}, + {"deadlocks", + (char*) &export_vars.innodb_deadlocks, SHOW_LONG}, {"dblwr_writes", (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG}, {"dict_tables", @@ -737,6 +729,17 @@ thd_lock_wait_timeout( return(THDVAR((THD*) thd, lock_wait_timeout)); } +/******************************************************************//** +*/ +extern "C" UNIV_INTERN +ulong +thd_flush_log_at_trx_commit_session( +/*================================*/ + void* thd) +{ + return(THDVAR((THD*) thd, flush_log_at_trx_commit_session)); +} + /********************************************************************//** Obtain the InnoDB transaction of a MySQL thread. @return reference to transaction pointer */ @@ -1052,6 +1055,29 @@ innobase_get_charset( return(thd_charset((THD*) mysql_thd)); } +/**********************************************************************//** +Determines the current SQL statement. +@return SQL statement string */ +extern "C" UNIV_INTERN +const char* +innobase_get_stmt( +/*==============*/ + void* mysql_thd, /*!< in: MySQL thread handle */ + size_t* length) /*!< out: length of the SQL statement */ +{ +#if MYSQL_VERSION_ID >= 50142 + LEX_STRING* stmt; + + stmt = thd_query_string((THD*) mysql_thd); + *length = stmt->length; + return(stmt->str); +#else + const char* stmt_str = thd_query((THD*) mysql_thd); + *length = strlen(stmt_str); + return(stmt_str); +#endif +} + #if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list; /*******************************************************************//** @@ -1372,7 +1398,6 @@ innobase_trx_allocate( trx = trx_allocate_for_mysql(); trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); innobase_trx_init(thd, trx); @@ -1899,6 +1924,19 @@ trx_is_interrupted( return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd)); } +/**********************************************************************//** +Determines if the currently running transaction is in strict mode. +@return TRUE if strict */ +extern "C" UNIV_INTERN +ibool +trx_is_strict( +/*==========*/ + trx_t* trx) /*!< in: transaction */ +{ + return(trx && trx->mysql_thd + && THDVAR((THD*) trx->mysql_thd, strict_mode)); +} + /**************************************************************//** Resets some fields of a prebuilt struct. The template is used in fast retrieval of just those column values MySQL needs in its processing. */ @@ -2043,12 +2081,12 @@ innobase_init( srv_page_size_shift = 0; if (innobase_page_size != (1 << 14)) { - int n_shift; + uint n_shift; fprintf(stderr, "InnoDB: Warning: innodb_page_size has been changed from default value 16384. (###EXPERIMENTAL### operation)\n"); for (n_shift = 12; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX; n_shift++) { - if (innobase_page_size == (1u << n_shift)) { + if (innobase_page_size == ((ulong)1 << n_shift)) { srv_page_size_shift = n_shift; srv_page_size = (1 << srv_page_size_shift); fprintf(stderr, @@ -2235,8 +2273,12 @@ mem_free_and_error: goto error; } + srv_doublewrite_file = innobase_doublewrite_file; + srv_extra_undoslots = (ibool) innobase_extra_undoslots; + srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table; + /* -------------- Log files ---------------------------*/ /* The default dir for log files is the datadir of MySQL */ @@ -2333,7 +2375,7 @@ mem_free_and_error: } sql_print_error("InnoDB: invalid value " - "innodb_file_format_check=%s", + "innodb_change_buffering=%s", innobase_change_buffering); goto mem_free_and_error; } @@ -2372,7 +2414,6 @@ innobase_change_buffering_inited_ok: srv_force_recovery = (ulint) innobase_force_recovery; - srv_fast_recovery = (ibool) innobase_fast_recovery; srv_recovery_stats = (ibool) innobase_recovery_stats; srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; @@ -2466,11 +2507,11 @@ innobase_change_buffering_inited_ok: my_b_seek(&info_file, 0L); pos=strmov(buff, trx_sys_mysql_relay_log_name); *pos++='\n'; - pos=longlong2str(trx_sys_mysql_relay_log_pos, pos, 10); + pos=longlong10_to_str(trx_sys_mysql_relay_log_pos, pos, 10); *pos++='\n'; pos=strmov(pos, trx_sys_mysql_master_log_name); *pos++='\n'; - pos=longlong2str(trx_sys_mysql_master_log_pos, pos, 10); + pos=longlong10_to_str(trx_sys_mysql_master_log_pos, pos, 10); *pos='\n'; if (my_b_write(&info_file, (uchar*) buff, (size_t) (pos-buff)+1)) @@ -2502,7 +2543,6 @@ skip_overwrite: pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST); pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST); pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST); - pthread_mutex_init(&analyze_mutex, MY_MUTEX_INIT_FAST); pthread_cond_init(&commit_cond, NULL); innodb_inited= 1; #ifdef MYSQL_DYNAMIC_PLUGIN @@ -2557,7 +2597,6 @@ innobase_end( pthread_mutex_destroy(&prepare_commit_mutex); pthread_mutex_destroy(&commit_threads_m); pthread_mutex_destroy(&commit_cond_m); - pthread_mutex_destroy(&analyze_mutex); pthread_cond_destroy(&commit_cond); } @@ -2594,10 +2633,7 @@ innobase_alter_table_flags( { return(HA_ONLINE_ADD_INDEX_NO_WRITES | HA_ONLINE_DROP_INDEX_NO_WRITES - /* Current InnoDB doesn't sort unique indexes along mysqld's order - It is dangerous to use index. So it is disabled until - the bug http://bugs.mysql.com/47622 */ - /* | HA_ONLINE_ADD_UNIQUE_INDEX_NO_WRITES */ + | HA_ONLINE_ADD_UNIQUE_INDEX_NO_WRITES | HA_ONLINE_DROP_UNIQUE_INDEX_NO_WRITES | HA_ONLINE_ADD_PK_INDEX_NO_WRITES); } @@ -2943,7 +2979,7 @@ innobase_rollback_to_savepoint( /* TODO: use provided savepoint data area to store savepoint data */ - longlong2str((ulint)savepoint, name, 36); + longlong2str((ulint)savepoint, name, 36, 1); error = (int) trx_rollback_to_savepoint_for_mysql(trx, name, &mysql_binlog_cache_pos); @@ -2974,7 +3010,7 @@ innobase_release_savepoint( /* TODO: use provided savepoint data area to store savepoint data */ - longlong2str((ulint)savepoint, name, 36); + longlong2str((ulint)savepoint, name, 36, 1); error = (int) trx_release_savepoint_for_mysql(trx, name); @@ -3021,7 +3057,7 @@ innobase_savepoint( /* TODO: use provided savepoint data area to store savepoint data */ char name[64]; - longlong2str((ulint)savepoint,name,36); + longlong2str((ulint)savepoint,name,36,1); error = (int) trx_savepoint_for_mysql(trx, name, (ib_int64_t)0); @@ -3292,59 +3328,370 @@ normalize_table_name( } /********************************************************************//** +Get the upper limit of the MySQL integral and floating-point type. +@return maximum allowed value for the field */ +static +ulonglong +innobase_get_int_col_max_value( +/*===========================*/ + const Field* field) /*!< in: MySQL field */ +{ + ulonglong max_value = 0; + + switch(field->key_type()) { + /* TINY */ + case HA_KEYTYPE_BINARY: + max_value = 0xFFULL; + break; + case HA_KEYTYPE_INT8: + max_value = 0x7FULL; + break; + /* SHORT */ + case HA_KEYTYPE_USHORT_INT: + max_value = 0xFFFFULL; + break; + case HA_KEYTYPE_SHORT_INT: + max_value = 0x7FFFULL; + break; + /* MEDIUM */ + case HA_KEYTYPE_UINT24: + max_value = 0xFFFFFFULL; + break; + case HA_KEYTYPE_INT24: + max_value = 0x7FFFFFULL; + break; + /* LONG */ + case HA_KEYTYPE_ULONG_INT: + max_value = 0xFFFFFFFFULL; + break; + case HA_KEYTYPE_LONG_INT: + max_value = 0x7FFFFFFFULL; + break; + /* BIG */ + case HA_KEYTYPE_ULONGLONG: + max_value = 0xFFFFFFFFFFFFFFFFULL; + break; + case HA_KEYTYPE_LONGLONG: + max_value = 0x7FFFFFFFFFFFFFFFULL; + break; + case HA_KEYTYPE_FLOAT: + /* We use the maximum as per IEEE754-2008 standard, 2^24 */ + max_value = 0x1000000ULL; + break; + case HA_KEYTYPE_DOUBLE: + /* We use the maximum as per IEEE754-2008 standard, 2^53 */ + max_value = 0x20000000000000ULL; + break; + default: + ut_error; + } + + return(max_value); +} + +/*******************************************************************//** +This function checks whether the index column information +is consistent between KEY info from mysql and that from innodb index. +@return TRUE if all column types match. */ +static +ibool +innobase_match_index_columns( +/*=========================*/ + const KEY* key_info, /*!< in: Index info + from mysql */ + const dict_index_t* index_info) /*!< in: Index info + from Innodb */ +{ + const KEY_PART_INFO* key_part; + const KEY_PART_INFO* key_end; + const dict_field_t* innodb_idx_fld; + const dict_field_t* innodb_idx_fld_end; + + DBUG_ENTER("innobase_match_index_columns"); + + /* Check whether user defined index column count matches */ + if (key_info->key_parts != index_info->n_user_defined_cols) { + DBUG_RETURN(FALSE); + } + + key_part = key_info->key_part; + key_end = key_part + key_info->key_parts; + innodb_idx_fld = index_info->fields; + innodb_idx_fld_end = index_info->fields + index_info->n_fields; + + /* Check each index column's datatype. We do not check + column name because there exists case that index + column name got modified in mysql but such change does not + propagate to InnoDB. + One hidden assumption here is that the index column sequences + are matched up between those in mysql and Innodb. */ + for (; key_part != key_end; ++key_part) { + ulint col_type; + ibool is_unsigned; + ulint mtype = innodb_idx_fld->col->mtype; + + /* Need to translate to InnoDB column type before + comparison. */ + col_type = get_innobase_type_from_mysql_type(&is_unsigned, + key_part->field); + + /* Ignore Innodb specific system columns. */ + while (mtype == DATA_SYS) { + innodb_idx_fld++; + + if (innodb_idx_fld >= innodb_idx_fld_end) { + DBUG_RETURN(FALSE); + } + } + + if (col_type != mtype) { + /* Column Type mismatches */ + DBUG_RETURN(FALSE); + } + + innodb_idx_fld++; + } + + DBUG_RETURN(TRUE); +} + +/*******************************************************************//** +This function builds a translation table in INNOBASE_SHARE +structure for fast index location with mysql array number from its +table->key_info structure. This also provides the necessary translation +between the key order in mysql key_info and Innodb ib_table->indexes if +they are not fully matched with each other. +Note we do not have any mutex protecting the translation table +building based on the assumption that there is no concurrent +index creation/drop and DMLs that requires index lookup. All table +handle will be closed before the index creation/drop. +@return TRUE if index translation table built successfully */ +static +ibool +innobase_build_index_translation( +/*=============================*/ + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + dict_table_t* ib_table, /*!< in: table in Innodb data + dictionary */ + INNOBASE_SHARE* share) /*!< in/out: share structure + where index translation table + will be constructed in. */ +{ + ulint mysql_num_index; + ulint ib_num_index; + dict_index_t** index_mapping; + ibool ret = TRUE; + + DBUG_ENTER("innobase_build_index_translation"); + + mysql_num_index = table->s->keys; + ib_num_index = UT_LIST_GET_LEN(ib_table->indexes); + + index_mapping = share->idx_trans_tbl.index_mapping; + + /* If there exists inconsistency between MySQL and InnoDB dictionary + (metadata) information, the number of index defined in MySQL + could exceed that in InnoDB, do not build index translation + table in such case */ + if (UNIV_UNLIKELY(ib_num_index < mysql_num_index)) { + ret = FALSE; + goto func_exit; + } + + /* If index entry count is non-zero, nothing has + changed since last update, directly return TRUE */ + if (share->idx_trans_tbl.index_count) { + /* Index entry count should still match mysql_num_index */ + ut_a(share->idx_trans_tbl.index_count == mysql_num_index); + goto func_exit; + } + + /* The number of index increased, rebuild the mapping table */ + if (mysql_num_index > share->idx_trans_tbl.array_size) { + index_mapping = (dict_index_t**) my_realloc(index_mapping, + mysql_num_index * + sizeof(*index_mapping), + MYF(MY_ALLOW_ZERO_PTR)); + + if (!index_mapping) { + ret = FALSE; + goto func_exit; + } + + share->idx_trans_tbl.array_size = mysql_num_index; + } + + + /* For each index in the mysql key_info array, fetch its + corresponding InnoDB index pointer into index_mapping + array. */ + for (ulint count = 0; count < mysql_num_index; count++) { + + /* Fetch index pointers into index_mapping according to mysql + index sequence */ + index_mapping[count] = dict_table_get_index_on_name( + ib_table, table->key_info[count].name); + + if (!index_mapping[count]) { + sql_print_error("Cannot find index %s in InnoDB " + "index dictionary.", + table->key_info[count].name); + ret = FALSE; + goto func_exit; + } + + /* Double check fetched index has the same + column info as those in mysql key_info. */ + if (!innobase_match_index_columns(&table->key_info[count], + index_mapping[count])) { + sql_print_error("Found index %s whose column info " + "does not match that of MySQL.", + table->key_info[count].name); + ret = FALSE; + goto func_exit; + } + } + + /* Successfully built the translation table */ + share->idx_trans_tbl.index_count = mysql_num_index; + +func_exit: + if (!ret) { + /* Build translation table failed. */ + my_free(index_mapping, MYF(MY_ALLOW_ZERO_PTR)); + + share->idx_trans_tbl.array_size = 0; + share->idx_trans_tbl.index_count = 0; + index_mapping = NULL; + } + + share->idx_trans_tbl.index_mapping = index_mapping; + + DBUG_RETURN(ret); +} + +/*******************************************************************//** +This function uses index translation table to quickly locate the +requested index structure. +Note we do not have mutex protection for the index translatoin table +access, it is based on the assumption that there is no concurrent +translation table rebuild (fter create/drop index) and DMLs that +require index lookup. +@return dict_index_t structure for requested index. NULL if +fail to locate the index structure. */ +static +dict_index_t* +innobase_index_lookup( +/*==================*/ + INNOBASE_SHARE* share, /*!< in: share structure for index + translation table. */ + uint keynr) /*!< in: index number for the requested + index */ +{ + if (!share->idx_trans_tbl.index_mapping + || keynr >= share->idx_trans_tbl.index_count) { + return(NULL); + } + + return(share->idx_trans_tbl.index_mapping[keynr]); +} + +/************************************************************************ Set the autoinc column max value. This should only be called once from -ha_innobase::open(). Therefore there's no need for a covering lock. -@return DB_SUCCESS or error code */ +ha_innobase::open(). Therefore there's no need for a covering lock. */ UNIV_INTERN -ulint +void ha_innobase::innobase_initialize_autoinc() /*======================================*/ { - dict_index_t* index; ulonglong auto_inc; - const char* col_name; - ulint error; + const Field* field = table->found_next_number_field; + + if (field != NULL) { + auto_inc = innobase_get_int_col_max_value(field); + } else { + /* We have no idea what's been passed in to us as the + autoinc column. We set it to the 0, effectively disabling + updates to the table. */ + auto_inc = 0; - col_name = table->found_next_number_field->field_name; - index = innobase_get_index(table->s->next_number_index); + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Unable to determine the AUTOINC " + "column name\n"); + } - /* Execute SELECT MAX(col_name) FROM TABLE; */ - error = row_search_max_autoinc(index, col_name, &auto_inc); + if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { + /* If the recovery level is set so high that writes + are disabled we force the AUTOINC counter to 0 + value effectively disabling writes to the table. + Secondly, we avoid reading the table in case the read + results in failure due to a corrupted table/index. + + We will not return an error to the client, so that the + tables can be dumped with minimal hassle. If an error + were returned in this case, the first attempt to read + the table would fail and subsequent SELECTs would succeed. */ + auto_inc = 0; + } else if (field == NULL) { + /* This is a far more serious error, best to avoid + opening the table and return failure. */ + my_error(ER_AUTOINC_READ_FAILED, MYF(0)); + } else { + dict_index_t* index; + const char* col_name; + ulonglong read_auto_inc; + ulint err; - switch (error) { - case DB_SUCCESS: + update_thd(ha_thd()); - /* At the this stage we don't know the increment - or the offset, so use default inrement of 1. */ - ++auto_inc; - break; + ut_a(prebuilt->trx == thd_to_trx(user_thd)); - case DB_RECORD_NOT_FOUND: - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: MySQL and InnoDB data " - "dictionaries are out of sync.\n" - "InnoDB: Unable to find the AUTOINC column %s in the " - "InnoDB table %s.\n" - "InnoDB: We set the next AUTOINC column value to the " - "maximum possible value,\n" - "InnoDB: in effect disabling the AUTOINC next value " - "generation.\n" - "InnoDB: You can either set the next AUTOINC value " - "explicitly using ALTER TABLE\n" - "InnoDB: or fix the data dictionary by recreating " - "the table.\n", - col_name, index->table->name); - - auto_inc = 0xFFFFFFFFFFFFFFFFULL; - break; + col_name = field->field_name; + index = innobase_get_index(table->s->next_number_index); - default: - return(error); + /* Execute SELECT MAX(col_name) FROM TABLE; */ + err = row_search_max_autoinc(index, col_name, &read_auto_inc); + + switch (err) { + case DB_SUCCESS: + /* At the this stage we do not know the increment + or the offset, so use a default increment of 1. */ + auto_inc = read_auto_inc + 1; + break; + + case DB_RECORD_NOT_FOUND: + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: MySQL and InnoDB data " + "dictionaries are out of sync.\n" + "InnoDB: Unable to find the AUTOINC column " + "%s in the InnoDB table %s.\n" + "InnoDB: We set the next AUTOINC column " + "value to 0,\n" + "InnoDB: in effect disabling the AUTOINC " + "next value generation.\n" + "InnoDB: You can either set the next " + "AUTOINC value explicitly using ALTER TABLE\n" + "InnoDB: or fix the data dictionary by " + "recreating the table.\n", + col_name, index->table->name); + + /* This will disable the AUTOINC generation. */ + auto_inc = 0; + + /* We want the open to succeed, so that the user can + take corrective action. ie. reads should succeed but + updates should fail. */ + err = DB_SUCCESS; + break; + default: + /* row_search_max_autoinc() should only return + one of DB_SUCCESS or DB_RECORD_NOT_FOUND. */ + ut_error; + } } dict_table_autoinc_initialize(prebuilt->table, auto_inc); - - return(DB_SUCCESS); } /*****************************************************************//** @@ -3497,6 +3844,11 @@ retry: primary_key = table->s->primary_key; key_used_on_scan = primary_key; + if (!innobase_build_index_translation(table, ib_table, share)) { + sql_print_error("Build InnoDB index translation table for" + " Table %s failed", name); + } + /* Allocate a buffer for a 'row reference'. A row reference is a string of bytes of length ref_length which uniquely specifies a row in our table. Note that MySQL may also compare two row @@ -3504,31 +3856,85 @@ retry: of length ref_length! */ if (!row_table_got_default_clust_index(ib_table)) { - if (primary_key >= MAX_KEY) { - sql_print_error("Table %s has a primary key in InnoDB data " - "dictionary, but not in MySQL!", name); - } - prebuilt->clust_index_was_generated = FALSE; - /* MySQL allocates the buffer for ref. key_info->key_length - includes space for all key columns + one byte for each column - that may be NULL. ref_length must be as exact as possible to - save space, because all row reference buffers are allocated - based on ref_length. */ + if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) { + sql_print_error("Table %s has a primary key in " + "InnoDB data dictionary, but not " + "in MySQL!", name); + + /* This mismatch could cause further problems + if not attended, bring this to the user's attention + by printing a warning in addition to log a message + in the errorlog */ + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_NO_SUCH_INDEX, + "InnoDB: Table %s has a " + "primary key in InnoDB data " + "dictionary, but not in " + "MySQL!", name); + + /* If primary_key >= MAX_KEY, its (primary_key) + value could be out of bound if continue to index + into key_info[] array. Find InnoDB primary index, + and assign its key_length to ref_length. + In addition, since MySQL indexes are sorted starting + with primary index, unique index etc., initialize + ref_length to the first index key length in + case we fail to find InnoDB cluster index. + + Please note, this will not resolve the primary + index mismatch problem, other side effects are + possible if users continue to use the table. + However, we allow this table to be opened so + that user can adopt necessary measures for the + mismatch while still being accessible to the table + date. */ + ref_length = table->key_info[0].key_length; + + /* Find correspoinding cluster index + key length in MySQL's key_info[] array */ + for (ulint i = 0; i < table->s->keys; i++) { + dict_index_t* index; + index = innobase_get_index(i); + if (dict_index_is_clust(index)) { + ref_length = + table->key_info[i].key_length; + } + } + } else { + /* MySQL allocates the buffer for ref. + key_info->key_length includes space for all key + columns + one byte for each column that may be + NULL. ref_length must be as exact as possible to + save space, because all row reference buffers are + allocated based on ref_length. */ - ref_length = table->key_info[primary_key].key_length; + ref_length = table->key_info[primary_key].key_length; + } } else { if (primary_key != MAX_KEY) { - sql_print_error("Table %s has no primary key in InnoDB data " - "dictionary, but has one in MySQL! If you " - "created the table with a MySQL version < " - "3.23.54 and did not define a primary key, " - "but defined a unique key with all non-NULL " - "columns, then MySQL internally treats that " - "key as the primary key. You can fix this " - "error by dump + DROP + CREATE + reimport " - "of the table.", name); + sql_print_error( + "Table %s has no primary key in InnoDB data " + "dictionary, but has one in MySQL! If you " + "created the table with a MySQL version < " + "3.23.54 and did not define a primary key, " + "but defined a unique key with all non-NULL " + "columns, then MySQL internally treats that " + "key as the primary key. You can fix this " + "error by dump + DROP + CREATE + reimport " + "of the table.", name); + + /* This mismatch could cause further problems + if not attended, bring this to the user attention + by printing a warning in addition to log a message + in the errorlog */ + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_NO_SUCH_INDEX, + "InnoDB: Table %s has no " + "primary key in InnoDB data " + "dictionary, but has one in " + "MySQL!", name); } prebuilt->clust_index_was_generated = TRUE; @@ -3570,8 +3976,6 @@ retry: /* Only if the table has an AUTOINC column. */ if (prebuilt->table != NULL && table->found_next_number_field != NULL) { - ulint error; - dict_table_autoinc_lock(prebuilt->table); /* Since a table can already be "open" in InnoDB's internal @@ -3580,8 +3984,7 @@ retry: autoinc value from a previous MySQL open. */ if (dict_table_autoinc_read(prebuilt->table) == 0) { - error = innobase_initialize_autoinc(); - ut_a(error == DB_SUCCESS); + innobase_initialize_autoinc(); } dict_table_autoinc_unlock(prebuilt->table); @@ -3872,6 +4275,11 @@ get_innobase_type_from_mysql_type( case MYSQL_TYPE_BLOB: case MYSQL_TYPE_LONG_BLOB: return(DATA_BLOB); + case MYSQL_TYPE_NULL: + /* MySQL currently accepts "NULL" datatype, but will + reject such datatype in the next release. We will cope + with it and not trigger assertion failure in 5.1 */ + break; default: ut_error; } @@ -4440,67 +4848,6 @@ skip_field: } /********************************************************************//** -Get the upper limit of the MySQL integral and floating-point type. */ -UNIV_INTERN -ulonglong -ha_innobase::innobase_get_int_col_max_value( -/*========================================*/ - const Field* field) -{ - ulonglong max_value = 0; - - switch(field->key_type()) { - /* TINY */ - case HA_KEYTYPE_BINARY: - max_value = 0xFFULL; - break; - case HA_KEYTYPE_INT8: - max_value = 0x7FULL; - break; - /* SHORT */ - case HA_KEYTYPE_USHORT_INT: - max_value = 0xFFFFULL; - break; - case HA_KEYTYPE_SHORT_INT: - max_value = 0x7FFFULL; - break; - /* MEDIUM */ - case HA_KEYTYPE_UINT24: - max_value = 0xFFFFFFULL; - break; - case HA_KEYTYPE_INT24: - max_value = 0x7FFFFFULL; - break; - /* LONG */ - case HA_KEYTYPE_ULONG_INT: - max_value = 0xFFFFFFFFULL; - break; - case HA_KEYTYPE_LONG_INT: - max_value = 0x7FFFFFFFULL; - break; - /* BIG */ - case HA_KEYTYPE_ULONGLONG: - max_value = 0xFFFFFFFFFFFFFFFFULL; - break; - case HA_KEYTYPE_LONGLONG: - max_value = 0x7FFFFFFFFFFFFFFFULL; - break; - case HA_KEYTYPE_FLOAT: - /* We use the maximum as per IEEE754-2008 standard, 2^24 */ - max_value = 0x1000000ULL; - break; - case HA_KEYTYPE_DOUBLE: - /* We use the maximum as per IEEE754-2008 standard, 2^53 */ - max_value = 0x20000000000000ULL; - break; - default: - ut_error; - } - - return(max_value); -} - -/********************************************************************//** This special handling is really to overcome the limitations of MySQL's binlogging. We need to eliminate the non-determinism that will arise in INSERT ... SELECT type of statements, since MySQL binlog only stores the @@ -4729,11 +5076,17 @@ no_commit: prebuilt->autoinc_error = DB_SUCCESS; if ((error = update_auto_increment())) { - /* We don't want to mask autoinc overflow errors. */ - if (prebuilt->autoinc_error != DB_SUCCESS) { - error = (int) prebuilt->autoinc_error; + /* Handle the case where the AUTOINC sub-system + failed during initialization. */ + if (prebuilt->autoinc_error == DB_UNSUPPORTED) { + error_result = ER_AUTOINC_READ_FAILED; + /* Set the error message to report too. */ + my_error(ER_AUTOINC_READ_FAILED, MYF(0)); + goto func_exit; + } else if (prebuilt->autoinc_error != DB_SUCCESS) { + error = (int) prebuilt->autoinc_error; goto report_error; } @@ -5218,7 +5571,7 @@ ha_innobase::unlock_row(void) case ROW_READ_WITH_LOCKS: if (!srv_locks_unsafe_for_binlog && prebuilt->trx->isolation_level - != TRX_ISO_READ_COMMITTED) { + > TRX_ISO_READ_COMMITTED) { break; } /* fall through */ @@ -5257,7 +5610,7 @@ ha_innobase::try_semi_consistent_read(bool yes) if (yes && (srv_locks_unsafe_for_binlog - || prebuilt->trx->isolation_level == TRX_ISO_READ_COMMITTED)) { + || prebuilt->trx->isolation_level <= TRX_ISO_READ_COMMITTED)) { prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; } else { prebuilt->row_read_type = ROW_READ_WITH_LOCKS; @@ -5444,6 +5797,9 @@ ha_innobase::index_read( prebuilt->index_usable = FALSE; DBUG_RETURN(HA_ERR_CRASHED); } + if (UNIV_UNLIKELY(!prebuilt->index_usable)) { + DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED); + } /* Note that if the index for which the search template is built is not necessarily prebuilt->index, but can also be the clustered index */ @@ -5563,14 +5919,30 @@ ha_innobase::innobase_get_index( DBUG_ENTER("innobase_get_index"); ha_statistic_increment(&SSV::ha_read_key_count); - ut_ad(user_thd == ha_thd()); - ut_a(prebuilt->trx == thd_to_trx(user_thd)); - if (keynr != MAX_KEY && table->s->keys > 0) { key = table->key_info + keynr; - index = dict_table_get_index_on_name(prebuilt->table, - key->name); + index = innobase_index_lookup(share, keynr); + + if (index) { + ut_a(ut_strcmp(index->name, key->name) == 0); + } else { + /* Can't find index with keynr in the translation + table. Only print message if the index translation + table exists */ + if (share->idx_trans_tbl.index_mapping) { + sql_print_error("InnoDB could not find " + "index %s key no %u for " + "table %s through its " + "index translation table", + key ? key->name : "NULL", + keynr, + prebuilt->table->name); + } + + index = dict_table_get_index_on_name(prebuilt->table, + key->name); + } } else { index = dict_table_get_first_index(prebuilt->table); } @@ -5636,7 +6008,7 @@ ha_innobase::change_active_index( dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields); dict_index_copy_types(prebuilt->search_tuple, prebuilt->index, - prebuilt->index->n_fields); + prebuilt->index->n_fields); /* MySQL changes the active index for a handle also during some queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX() @@ -6071,7 +6443,22 @@ create_table_def( continue; col_type = get_innobase_type_from_mysql_type(&unsigned_type, - field); + field); + + if (!col_type) { + push_warning_printf( + (THD*) trx->mysql_thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_CANT_CREATE_TABLE, + "Error creating table '%s' with " + "column '%s'. Please check its " + "column type and try to re-create " + "the table with an appropriate " + "column type.", + table->name, (char*) field->field_name); + goto err_col; + } + if (field->null_ptr) { nulls_allowed = 0; } else { @@ -6129,7 +6516,7 @@ create_table_def( if (dict_col_name_is_reserved(field->field_name)){ my_error(ER_WRONG_COLUMN_NAME, MYF(0), field->field_name); - +err_col: dict_mem_table_free(table); trx_commit_for_mysql(trx); @@ -6152,9 +6539,11 @@ create_table_def( if (error == DB_DUPLICATE_KEY) { char buf[100]; - innobase_convert_identifier(buf, sizeof buf, - table_name, strlen(table_name), - trx->mysql_thd, TRUE); + char* buf_end = innobase_convert_identifier( + buf, sizeof buf - 1, table_name, strlen(table_name), + trx->mysql_thd, TRUE); + + *buf_end = '\0'; my_error(ER_TABLE_EXISTS_ERROR, MYF(0), buf); } @@ -6527,6 +6916,9 @@ ha_innobase::create( /* Cache the value of innodb_file_format, in case it is modified by another thread while the table is being created. */ const ulint file_format = srv_file_format; + const char* stmt; + size_t stmt_len; + enum row_type row_type; DBUG_ENTER("ha_innobase::create"); @@ -6647,94 +7039,94 @@ ha_innobase::create( } } - if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) { - if (flags) { - /* KEY_BLOCK_SIZE was specified. */ - if (form->s->row_type != ROW_TYPE_COMPRESSED) { - /* ROW_FORMAT other than COMPRESSED - ignores KEY_BLOCK_SIZE. It does not - make sense to reject conflicting - KEY_BLOCK_SIZE and ROW_FORMAT, because - such combinations can be obtained - with ALTER TABLE anyway. */ - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" - " unless ROW_FORMAT=COMPRESSED.", - create_info->key_block_size); - flags = 0; - } - } else { - /* No KEY_BLOCK_SIZE */ - if (form->s->row_type == ROW_TYPE_COMPRESSED) { - /* ROW_FORMAT=COMPRESSED without - KEY_BLOCK_SIZE implies half the - maximum KEY_BLOCK_SIZE. */ - flags = (DICT_TF_ZSSIZE_MAX - 1) - << DICT_TF_ZSSIZE_SHIFT - | DICT_TF_COMPACT - | DICT_TF_FORMAT_ZIP - << DICT_TF_FORMAT_SHIFT; + row_type = form->s->row_type; + + if (flags) { + /* KEY_BLOCK_SIZE was specified. */ + if (!(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) { + /* ROW_FORMAT was not specified; + default to ROW_FORMAT=COMPRESSED */ + row_type = ROW_TYPE_COMPRESSED; + } else if (row_type != ROW_TYPE_COMPRESSED) { + /* ROW_FORMAT other than COMPRESSED + ignores KEY_BLOCK_SIZE. It does not + make sense to reject conflicting + KEY_BLOCK_SIZE and ROW_FORMAT, because + such combinations can be obtained + with ALTER TABLE anyway. */ + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" + " unless ROW_FORMAT=COMPRESSED.", + create_info->key_block_size); + flags = 0; + } + } else { + /* No KEY_BLOCK_SIZE */ + if (row_type == ROW_TYPE_COMPRESSED) { + /* ROW_FORMAT=COMPRESSED without + KEY_BLOCK_SIZE implies half the + maximum KEY_BLOCK_SIZE. */ + flags = (DICT_TF_ZSSIZE_MAX - 1) + << DICT_TF_ZSSIZE_SHIFT + | DICT_TF_COMPACT + | DICT_TF_FORMAT_ZIP + << DICT_TF_FORMAT_SHIFT; //#if DICT_TF_ZSSIZE_MAX < 1 //# error "DICT_TF_ZSSIZE_MAX < 1" //#endif - } } + } - switch (form->s->row_type) { - const char* row_format_name; - case ROW_TYPE_REDUNDANT: - break; - case ROW_TYPE_COMPRESSED: - case ROW_TYPE_DYNAMIC: - row_format_name - = form->s->row_type == ROW_TYPE_COMPRESSED - ? "COMPRESSED" - : "DYNAMIC"; - - if (!srv_file_per_table) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_per_table.", - row_format_name); - } else if (file_format < DICT_TF_FORMAT_ZIP) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_format >" - " Antelope.", - row_format_name); - } else { - flags |= DICT_TF_COMPACT - | (DICT_TF_FORMAT_ZIP - << DICT_TF_FORMAT_SHIFT); - break; - } + switch (row_type) { + const char* row_format_name; + case ROW_TYPE_REDUNDANT: + break; + case ROW_TYPE_COMPRESSED: + case ROW_TYPE_DYNAMIC: + row_format_name + = row_type == ROW_TYPE_COMPRESSED + ? "COMPRESSED" + : "DYNAMIC"; - /* fall through */ - case ROW_TYPE_NOT_USED: - case ROW_TYPE_FIXED: - default: - push_warning(thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: assuming ROW_FORMAT=COMPACT."); - case ROW_TYPE_DEFAULT: - case ROW_TYPE_COMPACT: - flags = DICT_TF_COMPACT; + if (!srv_file_per_table) { + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ROW_FORMAT=%s" + " requires innodb_file_per_table.", + row_format_name); + } else if (file_format < DICT_TF_FORMAT_ZIP) { + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ROW_FORMAT=%s" + " requires innodb_file_format >" + " Antelope.", + row_format_name); + } else { + flags |= DICT_TF_COMPACT + | (DICT_TF_FORMAT_ZIP + << DICT_TF_FORMAT_SHIFT); break; } - } else if (!flags) { - /* No KEY_BLOCK_SIZE or ROW_FORMAT specified: - use ROW_FORMAT=COMPACT by default. */ + + /* fall through */ + case ROW_TYPE_NOT_USED: + case ROW_TYPE_FIXED: + default: + push_warning(thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: assuming ROW_FORMAT=COMPACT."); + case ROW_TYPE_DEFAULT: + case ROW_TYPE_COMPACT: flags = DICT_TF_COMPACT; + break; } /* Look for a primary key */ @@ -6743,7 +7135,7 @@ ha_innobase::create( (int) form->s->primary_key : -1); - /* Our function row_get_mysql_key_number_for_index assumes + /* Our function innobase_get_mysql_key_number_for_index assumes the primary key is always number 0, if it exists */ ut_a(primary_key_no == -1 || primary_key_no == 0); @@ -6756,6 +7148,10 @@ ha_innobase::create( goto cleanup; } + if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { + flags |= DICT_TF2_TEMPORARY << DICT_TF2_SHIFT; + } + error = create_table_def(trx, form, norm_name, create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL, flags); @@ -6799,9 +7195,11 @@ ha_innobase::create( } } - if (*trx->mysql_query_str) { - error = row_table_add_foreign_constraints(trx, - *trx->mysql_query_str, norm_name, + stmt = innobase_get_stmt(thd, &stmt_len); + + if (stmt) { + error = row_table_add_foreign_constraints( + trx, stmt, stmt_len, norm_name, create_info->options & HA_LEX_CREATE_TMP_TABLE); error = convert_error_code_to_mysql(error, flags, NULL); @@ -7094,7 +7492,6 @@ innobase_drop_database( /* In the Windows plugin, thd = current_thd is always NULL */ trx = trx_allocate_for_mysql(); trx->mysql_thd = NULL; - trx->mysql_query_str = NULL; #else trx = innobase_trx_allocate(thd); #endif @@ -7287,10 +7684,19 @@ ha_innobase::records_in_range( key = table->key_info + active_index; - index = dict_table_get_index_on_name(prebuilt->table, key->name); + index = innobase_get_index(keynr); - /* MySQL knows about this index and so we must be able to find it.*/ - ut_a(index); + /* There exists possibility of not being able to find requested + index due to inconsistency between MySQL and InoDB dictionary info. + Necessary message should have been printed in innobase_get_index() */ + if (UNIV_UNLIKELY(!index)) { + n_rows = HA_POS_ERROR; + goto func_exit; + } + if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) { + n_rows = HA_ERR_TABLE_DEF_CHANGED; + goto func_exit; + } heap = mem_heap_create(2 * (key->key_parts * sizeof(dfield_t) + sizeof(dtuple_t))); @@ -7335,6 +7741,7 @@ ha_innobase::records_in_range( mem_heap_free(heap); +func_exit: my_free(key_val_buff2, MYF(0)); prebuilt->trx->op_info = (char*)""; @@ -7469,6 +7876,86 @@ ha_innobase::is_corrupt() const } /*********************************************************************//** +Calculates the key number used inside MySQL for an Innobase index. We will +first check the "index translation table" for a match of the index to get +the index number. If there does not exist an "index translation table", +or not able to find the index in the translation table, then we will fall back +to the traditional way of looping through dict_index_t list to find a +match. In this case, we have to take into account if we generated a +default clustered index for the table +@return the key number used inside MySQL */ +static +unsigned int +innobase_get_mysql_key_number_for_index( +/*====================================*/ + INNOBASE_SHARE* share, /*!< in: share structure for index + translation table. */ + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + dict_table_t* ib_table,/*!< in: table in Innodb data + dictionary */ + const dict_index_t* index) /*!< in: index */ +{ + const dict_index_t* ind; + unsigned int i; + + ut_ad(index); + ut_ad(ib_table); + ut_ad(table); + ut_ad(share); + + /* If index does not belong to the table of share structure. Search + index->table instead */ + if (index->table != ib_table) { + i = 0; + ind = dict_table_get_first_index(index->table); + + while (index != ind) { + ind = dict_table_get_next_index(ind); + i++; + } + + if (row_table_got_default_clust_index(index->table)) { + ut_a(i > 0); + i--; + } + + return(i); + } + + /* If index translation table exists, we will first check + the index through index translation table for a match. */ + if (share->idx_trans_tbl.index_mapping) { + for (i = 0; i < share->idx_trans_tbl.index_count; i++) { + if (share->idx_trans_tbl.index_mapping[i] == index) { + return(i); + } + } + + /* Print an error message if we cannot find the index + ** in the "index translation table". */ + sql_print_error("Cannot find index %s in InnoDB index " + "translation table.", index->name); + } + + /* If we do not have an "index translation table", or not able + to find the index in the translation table, we'll directly find + matching index in the dict_index_t list */ + for (i = 0; i < table->s->keys; i++) { + ind = dict_table_get_index_on_name( + ib_table, table->key_info[i].name); + + if (index == ind) { + return(i); + } + } + + sql_print_error("Cannot find matching index number for index %s " + "in InnoDB index list.", index->name); + + return(0); +} +/*********************************************************************//** Returns statistics information of the table to the MySQL interpreter, in various fields of the handle object. */ UNIV_INTERN @@ -7486,6 +7973,7 @@ ha_innobase::info( char path[FN_REFLEN]; os_file_stat_t stat_info; + DBUG_ENTER("info"); /* If we are forcing recovery at a high level, we will suppress @@ -7524,9 +8012,30 @@ ha_innobase::info( /* In sql_show we call with this flag: update then statistics so that they are up-to-date */ + if (srv_use_sys_stats_table + && thd_sql_command(user_thd) == SQLCOM_ANALYZE) { + /* If the indexes on the table don't have enough rows in SYS_STATS system table, */ + /* they need to be created. */ + dict_index_t* index; + + prebuilt->trx->op_info = "confirming rows of SYS_STATS to store statistics"; + + ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED); + + for (index = dict_table_get_first_index(ib_table); + index != NULL; + index = dict_table_get_next_index(index)) { + row_insert_stats_for_mysql(index, prebuilt->trx); + innobase_commit_low(prebuilt->trx); + } + + ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED); + } + prebuilt->trx->op_info = "updating table statistics"; - dict_update_statistics(ib_table); + dict_update_statistics(ib_table, + (thd_sql_command(user_thd) == SQLCOM_ANALYZE)?TRUE:FALSE); prebuilt->trx->op_info = "returning various info to MySQL"; } @@ -7649,13 +8158,29 @@ ha_innobase::info( } if (flag & HA_STATUS_CONST) { - index = dict_table_get_first_index(ib_table); + /* Verify the number of index in InnoDB and MySQL + matches up. If prebuilt->clust_index_was_generated + holds, InnoDB defines GEN_CLUST_INDEX internally */ + ulint num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes) + - prebuilt->clust_index_was_generated; - if (prebuilt->clust_index_was_generated) { - index = dict_table_get_next_index(index); + if (table->s->keys != num_innodb_index) { + sql_print_error("Table %s contains %lu " + "indexes inside InnoDB, which " + "is different from the number of " + "indexes %u defined in the MySQL ", + ib_table->name, num_innodb_index, + table->s->keys); } for (i = 0; i < table->s->keys; i++) { + /* We could get index quickly through internal + index mapping with the index translation table. + The identity of index (match up index name with + that of table->key_info[i]) is already verified in + innobase_get_index(). */ + index = innobase_get_index(i); + if (index == NULL) { sql_print_error("Table %s contains fewer " "indexes inside InnoDB than " @@ -7684,6 +8209,8 @@ ha_innobase::info( break; } + dict_index_stat_mutex_enter(index); + if (index->stat_n_diff_key_vals[j + 1] == 0) { rec_per_key = stats.records; @@ -7692,6 +8219,8 @@ ha_innobase::info( index->stat_n_diff_key_vals[j + 1]); } + dict_index_stat_mutex_exit(index); + /* Since MySQL seems to favor table scans too much over index searches, we pretend index selectivity is 2 times better than @@ -7707,8 +8236,6 @@ ha_innobase::info( rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 : (ulong) rec_per_key; } - - index = dict_table_get_next_index(index); } } @@ -7721,8 +8248,8 @@ ha_innobase::info( err_index = trx_get_error_info(prebuilt->trx); if (err_index) { - errkey = (unsigned int) - row_get_mysql_key_number_for_index(err_index); + errkey = innobase_get_mysql_key_number_for_index( + share, table, ib_table, err_index); } else { errkey = (unsigned int) prebuilt->trx->error_key_num; } @@ -7752,15 +8279,9 @@ ha_innobase::analyze( return(HA_ADMIN_CORRUPT); } - /* Serialize ANALYZE TABLE inside InnoDB, see - Bug#38996 Race condition in ANALYZE TABLE */ - pthread_mutex_lock(&analyze_mutex); - /* Simply call ::info() with all the flags */ info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE); - pthread_mutex_unlock(&analyze_mutex); - if (share->ib_table->is_corrupt) { return(HA_ADMIN_CORRUPT); } @@ -7794,8 +8315,13 @@ ha_innobase::check( HA_CHECK_OPT* check_opt) /*!< in: check options, currently ignored */ { - ulint ret; + dict_index_t* index; + ulint n_rows; + ulint n_rows_in_table = ULINT_UNDEFINED; + ibool is_ok = TRUE; + ulint old_isolation_level; + DBUG_ENTER("ha_innobase::check"); DBUG_ASSERT(thd == ha_thd()); ut_a(prebuilt->trx); ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N); @@ -7808,21 +8334,144 @@ ha_innobase::check( build_template(prebuilt, NULL, table, this, ROW_MYSQL_WHOLE_ROW); } - ret = row_check_table_for_mysql(prebuilt); + if (prebuilt->table->ibd_file_missing) { + sql_print_error("InnoDB: Error:\n" + "InnoDB: MySQL is trying to use a table handle" + " but the .ibd file for\n" + "InnoDB: table %s does not exist.\n" + "InnoDB: Have you deleted the .ibd file" + " from the database directory under\n" + "InnoDB: the MySQL datadir, or have you" + " used DISCARD TABLESPACE?\n" + "InnoDB: Please refer to\n" + "InnoDB: " REFMAN "innodb-troubleshooting.html\n" + "InnoDB: how you can resolve the problem.\n", + prebuilt->table->name); + DBUG_RETURN(HA_ADMIN_CORRUPT); + } + + prebuilt->trx->op_info = "checking table"; - if (ret != DB_INTERRUPTED && share->ib_table->is_corrupt) { - return(HA_ADMIN_CORRUPT); + old_isolation_level = prebuilt->trx->isolation_level; + + /* We must run the index record counts at an isolation level + >= READ COMMITTED, because a dirty read can see a wrong number + of records in some index; to play safe, we use always + REPEATABLE READ here */ + + prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ; + + /* Enlarge the fatal lock wait timeout during CHECK TABLE. */ + mutex_enter(&kernel_mutex); + srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ + mutex_exit(&kernel_mutex); + + for (index = dict_table_get_first_index(prebuilt->table); + index != NULL; + index = dict_table_get_next_index(index)) { +#if 0 + fputs("Validating index ", stderr); + ut_print_name(stderr, trx, FALSE, index->name); + putc('\n', stderr); +#endif + + if (!btr_validate_index(index, prebuilt->trx)) { + is_ok = FALSE; + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_NOT_KEYFILE, + "InnoDB: The B-tree of" + " index '%-.200s' is corrupted.", + index->name); + continue; + } + + /* Instead of invoking change_active_index(), set up + a dummy template for non-locking reads, disabling + access to the clustered index. */ + prebuilt->index = index; + + prebuilt->index_usable = row_merge_is_index_usable( + prebuilt->trx, prebuilt->index); + + if (UNIV_UNLIKELY(!prebuilt->index_usable)) { + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + HA_ERR_TABLE_DEF_CHANGED, + "InnoDB: Insufficient history for" + " index '%-.200s'", + index->name); + continue; + } + + prebuilt->sql_stat_start = TRUE; + prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE; + prebuilt->n_template = 0; + prebuilt->need_to_access_clustered = FALSE; + + dtuple_set_n_fields(prebuilt->search_tuple, 0); + + prebuilt->select_lock_type = LOCK_NONE; + + if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) { + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_NOT_KEYFILE, + "InnoDB: The B-tree of" + " index '%-.200s' is corrupted.", + index->name); + is_ok = FALSE; + } + + if (thd_killed(user_thd)) { + break; + } + +#if 0 + fprintf(stderr, "%lu entries in index %s\n", n_rows, + index->name); +#endif + + if (index == dict_table_get_first_index(prebuilt->table)) { + n_rows_in_table = n_rows; + } else if (n_rows != n_rows_in_table) { + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_NOT_KEYFILE, + "InnoDB: Index '%-.200s'" + " contains %lu entries," + " should be %lu.", + index->name, + (ulong) n_rows, + (ulong) n_rows_in_table); + is_ok = FALSE; + } } - switch (ret) { - case DB_SUCCESS: - return(HA_ADMIN_OK); - case DB_INTERRUPTED: + /* Restore the original isolation level */ + prebuilt->trx->isolation_level = old_isolation_level; + + /* We validate also the whole adaptive hash index for all tables + at every CHECK TABLE */ + + if (!btr_search_validate()) { + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_NOT_KEYFILE, + "InnoDB: The adaptive hash index is corrupted."); + is_ok = FALSE; + } + + /* Restore the fatal lock wait timeout after CHECK TABLE. */ + mutex_enter(&kernel_mutex); + srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */ + mutex_exit(&kernel_mutex); + + prebuilt->trx->op_info = ""; + if (thd_killed(user_thd)) { my_error(ER_QUERY_INTERRUPTED, MYF(0)); - return(-1); - default: + } + + if (share->ib_table->is_corrupt) { return(HA_ADMIN_CORRUPT); } + + DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT); } /*************************************************************//** @@ -8496,7 +9145,9 @@ ha_innobase::external_lock( if (trx->n_mysql_tables_in_use == 0) { #ifdef EXTENDED_SLOWLOG - increment_thd_innodb_stats(thd, trx->io_reads, + increment_thd_innodb_stats(thd, + (unsigned long long) ut_conv_dulint_to_longlong(trx->id), + trx->io_reads, trx->io_read, trx->io_reads_wait_timer, trx->lock_que_wait_timer, @@ -8676,8 +9327,8 @@ innodb_show_status( mutex_enter(&srv_monitor_file_mutex); rewind(srv_monitor_file); - srv_printf_innodb_monitor(srv_monitor_file, - &trx_list_start, &trx_list_end); + srv_printf_innodb_monitor(srv_monitor_file, FALSE, + &trx_list_start, &trx_list_end); flen = ftell(srv_monitor_file); os_file_set_eof(srv_monitor_file); @@ -8734,19 +9385,25 @@ innodb_show_status( } /************************************************************************//** -Implements the SHOW MUTEX STATUS command. . */ +Implements the SHOW MUTEX STATUS command. +@return TRUE on failure, FALSE on success. */ static bool innodb_mutex_show_status( /*=====================*/ - handlerton* hton, /*!< in: the innodb handlerton */ + handlerton* hton, /*!< in: the innodb handlerton */ THD* thd, /*!< in: the MySQL query thread of the caller */ - stat_print_fn* stat_print) + stat_print_fn* stat_print) /*!< in: function for printing + statistics */ { char buf1[IO_SIZE], buf2[IO_SIZE]; mutex_t* mutex; rw_lock_t* lock; + ulint block_mutex_oswait_count = 0; + ulint block_lock_oswait_count = 0; + mutex_t* block_mutex = NULL; + rw_lock_t* block_lock = NULL; #ifdef UNIV_DEBUG ulint rw_lock_count= 0; ulint rw_lock_count_spin_loop= 0; @@ -8761,12 +9418,16 @@ innodb_mutex_show_status( mutex_enter(&mutex_list_mutex); - mutex = UT_LIST_GET_FIRST(mutex_list); + for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL; + mutex = UT_LIST_GET_NEXT(list, mutex)) { + if (mutex->count_os_wait == 0) { + continue; + } - while (mutex != NULL) { - if (mutex->count_os_wait == 0 - || buf_pool_is_block_mutex(mutex)) { - goto next_mutex; + if (buf_pool_is_block_mutex(mutex)) { + block_mutex = mutex; + block_mutex_oswait_count += mutex->count_os_wait; + continue; } #ifdef UNIV_DEBUG if (mutex->mutex_type != 1) { @@ -8793,8 +9454,7 @@ innodb_mutex_show_status( DBUG_RETURN(1); } } - } - else { + } else { rw_lock_count += mutex->count_using; rw_lock_count_spin_loop += mutex->count_spin_loop; rw_lock_count_spin_rounds += mutex->count_spin_rounds; @@ -8806,7 +9466,7 @@ innodb_mutex_show_status( buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s", mutex->cmutex_name); buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu", - mutex->count_os_wait); + (ulong) mutex->count_os_wait); if (stat_print(thd, innobase_hton_name, hton_name_len, buf1, buf1len, @@ -8815,45 +9475,81 @@ innodb_mutex_show_status( DBUG_RETURN(1); } #endif /* UNIV_DEBUG */ + } + + if (block_mutex) { + buf1len = (uint) my_snprintf(buf1, sizeof buf1, + "combined %s", + block_mutex->cmutex_name); + buf2len = (uint) my_snprintf(buf2, sizeof buf2, + "os_waits=%lu", + (ulong) block_mutex_oswait_count); -next_mutex: - mutex = UT_LIST_GET_NEXT(list, mutex); + if (stat_print(thd, innobase_hton_name, + hton_name_len, buf1, buf1len, + buf2, buf2len)) { + mutex_exit(&mutex_list_mutex); + DBUG_RETURN(1); + } } mutex_exit(&mutex_list_mutex); mutex_enter(&rw_lock_list_mutex); - lock = UT_LIST_GET_FIRST(rw_lock_list); - - while (lock != NULL) { - if (lock->count_os_wait - && !buf_pool_is_block_lock(lock)) { - buf1len= my_snprintf(buf1, sizeof(buf1), "%s", - lock->lock_name); - buf2len= my_snprintf(buf2, sizeof(buf2), - "os_waits=%lu", lock->count_os_wait); - - if (stat_print(thd, innobase_hton_name, - hton_name_len, buf1, buf1len, - buf2, buf2len)) { - mutex_exit(&rw_lock_list_mutex); - DBUG_RETURN(1); - } + for (lock = UT_LIST_GET_FIRST(rw_lock_list); lock != NULL; + lock = UT_LIST_GET_NEXT(list, lock)) { + if (lock->count_os_wait == 0) { + continue; + } + + if (buf_pool_is_block_lock(lock)) { + block_lock = lock; + block_lock_oswait_count += lock->count_os_wait; + continue; + } + + buf1len = my_snprintf(buf1, sizeof buf1, "%s", + lock->lock_name); + buf2len = my_snprintf(buf2, sizeof buf2, "os_waits=%lu", + (ulong) lock->count_os_wait); + + if (stat_print(thd, innobase_hton_name, + hton_name_len, buf1, buf1len, + buf2, buf2len)) { + mutex_exit(&rw_lock_list_mutex); + DBUG_RETURN(1); + } + } + + if (block_lock) { + buf1len = (uint) my_snprintf(buf1, sizeof buf1, + "combined %s", + block_lock->lock_name); + buf2len = (uint) my_snprintf(buf2, sizeof buf2, + "os_waits=%lu", + (ulong) block_lock_oswait_count); + + if (stat_print(thd, innobase_hton_name, + hton_name_len, buf1, buf1len, + buf2, buf2len)) { + mutex_exit(&rw_lock_list_mutex); + DBUG_RETURN(1); } - lock = UT_LIST_GET_NEXT(list, lock); } mutex_exit(&rw_lock_list_mutex); #ifdef UNIV_DEBUG - buf2len= my_snprintf(buf2, sizeof(buf2), - "count=%lu, spin_waits=%lu, spin_rounds=%lu, " - "os_waits=%lu, os_yields=%lu, os_wait_times=%lu", - rw_lock_count, rw_lock_count_spin_loop, - rw_lock_count_spin_rounds, - rw_lock_count_os_wait, rw_lock_count_os_yield, - (ulong) (rw_lock_wait_time/1000)); + buf2len = my_snprintf(buf2, sizeof buf2, + "count=%lu, spin_waits=%lu, spin_rounds=%lu, " + "os_waits=%lu, os_yields=%lu, os_wait_times=%lu", + (ulong) rw_lock_count, + (ulong) rw_lock_count_spin_loop, + (ulong) rw_lock_count_spin_rounds, + (ulong) rw_lock_count_os_wait, + (ulong) rw_lock_count_os_yield, + (ulong) (rw_lock_wait_time / 1000)); if (stat_print(thd, innobase_hton_name, hton_name_len, STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) { @@ -8915,6 +9611,11 @@ static INNOBASE_SHARE* get_share(const char* table_name) innobase_open_tables, fold, share); thr_lock_init(&share->lock); + + /* Index translation table initialization */ + share->idx_trans_tbl.index_mapping = NULL; + share->idx_trans_tbl.index_count = 0; + share->idx_trans_tbl.array_size = 0; } share->use_count++; @@ -8945,6 +9646,11 @@ static void free_share(INNOBASE_SHARE* share) HASH_DELETE(INNOBASE_SHARE, table_name_hash, innobase_open_tables, fold, share); thr_lock_delete(&share->lock); + + /* Free any memory from index translation table */ + my_free(share->idx_trans_tbl.index_mapping, + MYF(MY_ALLOW_ZERO_PTR)); + my_free(share, MYF(0)); /* TODO: invoke HASH_MIGRATE if innobase_open_tables @@ -9047,7 +9753,7 @@ ha_innobase::store_lock( isolation_level = trx->isolation_level; if ((srv_locks_unsafe_for_binlog - || isolation_level == TRX_ISO_READ_COMMITTED) + || isolation_level <= TRX_ISO_READ_COMMITTED) && isolation_level != TRX_ISO_SERIALIZABLE && (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT) && (sql_command == SQLCOM_INSERT_SELECT @@ -9179,7 +9885,10 @@ ha_innobase::innobase_get_autoinc( *value = dict_table_autoinc_read(prebuilt->table); /* It should have been initialized during open. */ - ut_a(*value != 0); + if (*value == 0) { + prebuilt->autoinc_error = DB_UNSUPPORTED; + dict_table_autoinc_unlock(prebuilt->table); + } } return(prebuilt->autoinc_error); @@ -9795,33 +10504,60 @@ innobase_set_cursor_view( (cursor_view_t*) curview); } +/*******************************************************************//** +If col_name is not NULL, check whether the named column is being +renamed in the table. If col_name is not provided, check +whether any one of columns in the table is being renamed. +@return true if the column is being renamed */ +static +bool +check_column_being_renamed( +/*=======================*/ + const TABLE* table, /*!< in: MySQL table */ + const char* col_name) /*!< in: name of the column */ +{ + uint k; + Field* field; -/*********************************************************************** -Check whether any of the given columns is being renamed in the table. */ + for (k = 0; k < table->s->fields; k++) { + field = table->field[k]; + + if (field->flags & FIELD_IS_RENAMED) { + + /* If col_name is not provided, return + if the field is marked as being renamed. */ + if (!col_name) { + return(true); + } + + /* If col_name is provided, return only + if names match */ + if (innobase_strcasecmp(field->field_name, + col_name) == 0) { + return(true); + } + } + } + + return(false); +} + +/*******************************************************************//** +Check whether any of the given columns is being renamed in the table. +@return true if any of col_names is being renamed in table */ static bool column_is_being_renamed( /*====================*/ - /* out: true if any of col_names is - being renamed in table */ - TABLE* table, /* in: MySQL table */ - uint n_cols, /* in: number of columns */ - const char** col_names) /* in: names of the columns */ + TABLE* table, /*!< in: MySQL table */ + uint n_cols, /*!< in: number of columns */ + const char** col_names) /*!< in: names of the columns */ { uint j; - uint k; - Field* field; - const char* col_name; for (j = 0; j < n_cols; j++) { - col_name = col_names[j]; - for (k = 0; k < table->s->fields; k++) { - field = table->field[k]; - if ((field->flags & FIELD_IS_RENAMED) - && innobase_strcasecmp(field->field_name, - col_name) == 0) { - return(true); - } + if (check_column_being_renamed(table, col_names[j])) { + return(true); } } @@ -9912,18 +10648,13 @@ ha_innobase::check_if_incompatible_data( DBUG_RETURN(COMPATIBLE_DATA_NO); } - /* Renaming column asynchronizes dictionary between mysqld and InnoDB... - If not synchronized, treat as COMPATIBLE_DATA_NO - until the bug http://bugs.mysql.com/47621 is fixed officialily */ - { - uint i; - for (i = 0; i < table->s->fields; i++) { - if (table->field[i]->flags & FIELD_IN_ADD_INDEX - && innobase_strcasecmp(table->field[i]->field_name, - dict_table_get_col_name(prebuilt->table, i))) { - DBUG_RETURN(COMPATIBLE_DATA_NO); - } - } + /* For column rename operation, MySQL does not supply enough + information (new column name etc.) for InnoDB to make appropriate + system metadata change. To avoid system metadata inconsistency, + currently we can just request a table rebuild/copy by returning + COMPATIBLE_DATA_NO */ + if (check_column_being_renamed(table, NULL)) { + DBUG_RETURN(COMPATIBLE_DATA_NO); } /* Check if a column participating in a foreign key is being renamed. @@ -10295,7 +11026,35 @@ innodb_old_blocks_pct_update( } /*************************************************************//** -Check if it is a valid value of innodb_change_buffering. This function is +Find the corresponding ibuf_use_t value that indexes into +innobase_change_buffering_values[] array for the input +change buffering option name. +@return corresponding IBUF_USE_* value for the input variable +name, or IBUF_USE_COUNT if not able to find a match */ +static +ibuf_use_t +innodb_find_change_buffering_value( +/*===============================*/ + const char* input_name) /*!< in: input change buffering + option name */ +{ + ulint use; + + for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values); + use++) { + /* found a match */ + if (!innobase_strcasecmp( + input_name, innobase_change_buffering_values[use])) { + return((ibuf_use_t)use); + } + } + + /* Did not find any match */ + return(IBUF_USE_COUNT); +} + +/*************************************************************//** +Check if it is a valid value of innodb_change_buffering. This function is registered as a callback with MySQL. @return 0 for valid innodb_change_buffering */ static @@ -10319,19 +11078,22 @@ innodb_change_buffering_validate( change_buffering_input = value->val_str(value, buff, &len); if (change_buffering_input != NULL) { - ulint use; + ibuf_use_t use; - for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values); - use++) { - if (!innobase_strcasecmp( - change_buffering_input, - innobase_change_buffering_values[use])) { - *(ibuf_use_t*) save = (ibuf_use_t) use; - return(0); - } + use = innodb_find_change_buffering_value( + change_buffering_input); + + if (use != IBUF_USE_COUNT) { + /* Find a matching change_buffering option value. */ + *static_cast<const char**>(save) = + innobase_change_buffering_values[use]; + + return(0); } } + /* No corresponding change buffering option for user supplied + "change_buffering_input" */ return(1); } @@ -10342,21 +11104,27 @@ static void innodb_change_buffering_update( /*===========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr, /*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to + system variable */ + void* var_ptr,/*!< out: where the + formal string goes */ + const void* save) /*!< in: immediate result + from check function */ { + ibuf_use_t use; + ut_a(var_ptr != NULL); ut_a(save != NULL); - ut_a((*(ibuf_use_t*) save) < IBUF_USE_COUNT); - ibuf_use = *(const ibuf_use_t*) save; + use = innodb_find_change_buffering_value( + *static_cast<const char*const*>(save)); - *(const char**) var_ptr = innobase_change_buffering_values[ibuf_use]; + ut_a(use < IBUF_USE_COUNT); + + ibuf_use = use; + *static_cast<const char**>(var_ptr) = + *static_cast<const char*const*>(save); } static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff) @@ -10453,15 +11221,15 @@ static MYSQL_SYSVAR_BOOL(extra_undoslots, innobase_extra_undoslots, static MYSQL_SYSVAR_BOOL(fast_recovery, innobase_fast_recovery, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Enable to use speed hack of recovery avoiding flush list sorting.", - NULL, NULL, TRUE); + "obsolete option. affects nothing.", + NULL, NULL, FALSE); static MYSQL_SYSVAR_BOOL(recovery_stats, innobase_recovery_stats, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, "Output statistics of recovery process after it.", NULL, NULL, FALSE); -static MYSQL_SYSVAR_ULONG(use_purge_thread, srv_use_purge_thread, +static MYSQL_SYSVAR_ULINT(use_purge_thread, srv_use_purge_thread, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Number of purge devoted threads. #### over 1 is EXPERIMENTAL ####", NULL, NULL, 1, 0, 64, 0); @@ -10616,12 +11384,20 @@ static MYSQL_SYSVAR_ULONG(stats_auto_update, srv_stats_auto_update, "(except for ANALYZE TABLE command) 0:disable 1:enable", NULL, NULL, 1, 0, 1, 0); -static MYSQL_SYSVAR_ULONG(stats_update_need_lock, srv_stats_update_need_lock, +static MYSQL_SYSVAR_ULINT(stats_update_need_lock, srv_stats_update_need_lock, PLUGIN_VAR_RQCMDARG, "Enable/Disable InnoDB's update statistics which needs to lock dictionary. " "e.g. Data_free.", NULL, NULL, 1, 0, 1, 0); +static MYSQL_SYSVAR_BOOL(use_sys_stats_table, innobase_use_sys_stats_table, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Enable to use SYS_STATS system table to store statistics statically, " + "And avoids to calculate statistics at every first open of the tables. " + "This option may make the opportunities of update statistics less. " + "So you should use ANALYZE TABLE command intentionally.", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled, PLUGIN_VAR_OPCMDARG, "Enable InnoDB adaptive hash index (enabled by default). " @@ -10647,7 +11423,12 @@ static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment, static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", - NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L); + NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L); + +static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "[experimental] The key value of shared memory segment for the buffer pool. 0 means disable the feature (default).", + NULL, NULL, 0, 0, INT_MAX32, 0); static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency, PLUGIN_VAR_RQCMDARG, @@ -10747,6 +11528,11 @@ static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path, "Path to individual files and their sizes.", NULL, NULL, NULL); +static MYSQL_SYSVAR_STR(doublewrite_file, innobase_doublewrite_file, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Path to special datafile for doublewrite buffer. (default is "": not used) ### ONLY FOR EXPERTS!!! ###", + NULL, NULL, NULL); + static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "The AUTOINC lock modes supported by InnoDB: " @@ -10771,13 +11557,13 @@ static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc, static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering, PLUGIN_VAR_RQCMDARG, "Buffer changes to reduce random access: " - "OFF, ON, inserting, deleting, changing, or purging.", + "OFF, ON, none, inserts.", innodb_change_buffering_validate, - innodb_change_buffering_update, NULL); + innodb_change_buffering_update, "inserts"); static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold, PLUGIN_VAR_RQCMDARG, - "Number of pages that must be accessed sequentially for InnoDB to" + "Number of pages that must be accessed sequentially for InnoDB to " "trigger a readahead.", NULL, NULL, 56, 0, 64, 0); @@ -10796,7 +11582,7 @@ static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate, "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)", NULL, NULL, 100, 100, 999999999, 0); -static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target, +static MYSQL_SYSVAR_ULINT(checkpoint_age_target, srv_checkpoint_age_target, PLUGIN_VAR_RQCMDARG, "Control soft limit of checkpoint age. (0 : not control)", NULL, NULL, 0, 0, ~0UL, 0); @@ -10890,12 +11676,7 @@ static MYSQL_SYSVAR_ULONG(dict_size_limit, srv_dict_size_limit, "Limit the allocated memory for dictionary cache. (0: unlimited)", NULL, NULL, 0, 0, LONG_MAX, 0); -static MYSQL_SYSVAR_ULONG(relax_table_creation, srv_relax_table_creation, - PLUGIN_VAR_RQCMDARG, - "Relax limitation of column size at table creation as builtin InnoDB.", - NULL, NULL, 0, 0, 1, 0); - -static MYSQL_SYSVAR_ULONG(pass_corrupt_table, srv_pass_corrupt_table, +static MYSQL_SYSVAR_ULINT(pass_corrupt_table, srv_pass_corrupt_table, PLUGIN_VAR_RQCMDARG, "Pass corruptions of user tables as 'corrupt table' instead of not crashing itself, " "when used with file_per_table. " @@ -10908,11 +11689,13 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(buffer_pool_size), + MYSQL_SYSVAR(buffer_pool_shm_key), MYSQL_SYSVAR(checksums), MYSQL_SYSVAR(fast_checksum), MYSQL_SYSVAR(commit_concurrency), MYSQL_SYSVAR(concurrency_tickets), MYSQL_SYSVAR(data_file_path), + MYSQL_SYSVAR(doublewrite_file), MYSQL_SYSVAR(data_home_dir), MYSQL_SYSVAR(doublewrite), MYSQL_SYSVAR(extra_undoslots), @@ -10951,6 +11734,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(stats_auto_update), MYSQL_SYSVAR(stats_update_need_lock), + MYSQL_SYSVAR(use_sys_stats_table), MYSQL_SYSVAR(stats_sample_pages), MYSQL_SYSVAR(adaptive_hash_index), MYSQL_SYSVAR(replication_delay), @@ -10974,6 +11758,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(flush_neighbor_pages), MYSQL_SYSVAR(read_ahead), MYSQL_SYSVAR(adaptive_checkpoint), + MYSQL_SYSVAR(flush_log_at_trx_commit_session), MYSQL_SYSVAR(enable_unsafe_group_commit), MYSQL_SYSVAR(expand_import), MYSQL_SYSVAR(extra_rsegments), @@ -10983,18 +11768,17 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(read_ahead_threshold), MYSQL_SYSVAR(io_capacity), MYSQL_SYSVAR(use_purge_thread), - MYSQL_SYSVAR(relax_table_creation), MYSQL_SYSVAR(pass_corrupt_table), NULL }; -mysql_declare_plugin(innobase) +mysql_declare_plugin(xtradb) { MYSQL_STORAGE_ENGINE_PLUGIN, &innobase_storage_engine, innobase_hton_name, - "Innobase Oy", - "Supports transactions, row-level locking, and foreign keys", + "Percona", + "Percona-XtraDB, Supports transactions, row-level locking, and foreign keys", PLUGIN_LICENSE_GPL, innobase_init, /* Plugin Init */ NULL, /* Plugin Deinit */ @@ -11019,15 +11803,16 @@ i_s_innodb_index_stats, i_s_innodb_admin_command, i_s_innodb_sys_tables, i_s_innodb_sys_indexes, +i_s_innodb_sys_stats, i_s_innodb_patches mysql_declare_plugin_end; -maria_declare_plugin(innobase) +maria_declare_plugin(xtradb) { /* InnoDB */ MYSQL_STORAGE_ENGINE_PLUGIN, &innobase_storage_engine, innobase_hton_name, - "Innobase Oy", - "Supports transactions, row-level locking, and foreign keys", + "Percona", + "XtraDB engine based on InnoDB plugin. Supports transactions, row-level locking, and foreign keys", PLUGIN_LICENSE_GPL, innobase_init, /* Plugin Init */ NULL, /* Plugin Deinit */ @@ -11053,6 +11838,7 @@ i_s_innodb_index_stats_maria, i_s_innodb_admin_command_maria, i_s_innodb_sys_tables_maria, i_s_innodb_sys_indexes_maria, +i_s_innodb_sys_stats_maria, i_s_innodb_patches_maria maria_declare_plugin_end; diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index f6f407d6c8f..50a43aaebed 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved. +Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -27,16 +27,32 @@ Place, Suite 330, Boston, MA 02111-1307 USA #pragma interface /* gcc class implementation */ #endif +/* Structure defines translation table between mysql index and innodb +index structures */ +typedef struct innodb_idx_translate_struct { + ulint index_count; /*!< number of valid index entries + in the index_mapping array */ + ulint array_size; /*!< array size of index_mapping */ + dict_index_t** index_mapping; /*!< index pointer array directly + maps to index in Innodb from MySQL + array index */ +} innodb_idx_translate_t; + + /** InnoDB table share */ typedef struct st_innobase_share { - THR_LOCK lock; /*!< MySQL lock protecting - this structure */ - const char* table_name; /*!< InnoDB table name */ - uint use_count; /*!< reference count, - incremented in get_share() - and decremented in free_share() */ - void* table_name_hash;/*!< hash table chain node */ - dict_table_t* ib_table; + THR_LOCK lock; /*!< MySQL lock protecting + this structure */ + const char* table_name; /*!< InnoDB table name */ + uint use_count; /*!< reference count, + incremented in get_share() + and decremented in + free_share() */ + void* table_name_hash;/*!< hash table chain node */ + innodb_idx_translate_t idx_trans_tbl; /*!< index translation + table between MySQL and + Innodb */ + dict_table_t* ib_table; } INNOBASE_SHARE; @@ -92,9 +108,8 @@ class ha_innobase: public handler ulint innobase_reset_autoinc(ulonglong auto_inc); ulint innobase_get_autoinc(ulonglong* value); ulint innobase_update_autoinc(ulonglong auto_inc); - ulint innobase_initialize_autoinc(); + void innobase_initialize_autoinc(); dict_index_t* innobase_get_index(uint keynr); - ulonglong innobase_get_int_col_max_value(const Field* field); /* Init values for the class: */ public: @@ -235,7 +250,11 @@ the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */ extern "C" { struct charset_info_st *thd_charset(MYSQL_THD thd); +#if MYSQL_VERSION_ID >= 50142 +LEX_STRING *thd_query_string(MYSQL_THD thd); +#else char **thd_query(MYSQL_THD thd); +#endif /** Get the file name of the MySQL binlog. * @return the name of the binlog file diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index e060d88b3b8..3a32ed9cf36 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -229,9 +229,11 @@ static int innobase_check_index_keys( /*======================*/ - const KEY* key_info, /*!< in: Indexes to be created */ - ulint num_of_keys) /*!< in: Number of indexes to - be created */ + const KEY* key_info, /*!< in: Indexes to be + created */ + ulint num_of_keys, /*!< in: Number of + indexes to be created */ + const dict_table_t* table) /*!< in: Existing indexes */ { ulint key_num; @@ -248,9 +250,22 @@ innobase_check_index_keys( const KEY& key2 = key_info[i]; if (0 == strcmp(key.name, key2.name)) { - sql_print_error("InnoDB: key name `%s` appears" - " twice in CREATE INDEX\n", - key.name); + my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), + key.name); + + return(ER_WRONG_NAME_FOR_INDEX); + } + } + + /* Check that the same index name does not already exist. */ + + for (const dict_index_t* index + = dict_table_get_first_index(table); + index; index = dict_table_get_next_index(index)) { + + if (0 == strcmp(key.name, index->name)) { + my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), + key.name); return(ER_WRONG_NAME_FOR_INDEX); } @@ -258,7 +273,7 @@ innobase_check_index_keys( /* Check that MySQL does not try to create a column prefix index field on an inappropriate data type and - that the same colum does not appear twice in the index. */ + that the same column does not appear twice in the index. */ for (ulint i = 0; i < key.key_parts; i++) { const KEY_PART_INFO& key_part1 @@ -289,14 +304,8 @@ innobase_check_index_keys( } } - sql_print_error("InnoDB: MySQL is trying to" - " create a column prefix" - " index field on an" - " inappropriate data type." - " column `%s`," - " index `%s`.\n", - field->field_name, - key.name); + my_error(ER_WRONG_KEY_COLUMN, MYF(0), + field->field_name); return(ER_WRONG_KEY_COLUMN); } @@ -309,11 +318,8 @@ innobase_check_index_keys( continue; } - sql_print_error("InnoDB: column `%s`" - " is not allowed to occur" - " twice in index `%s`.\n", - key_part1.field->field_name, - key.name); + my_error(ER_WRONG_KEY_COLUMN, MYF(0), + key_part1.field->field_name); return(ER_WRONG_KEY_COLUMN); } } @@ -522,12 +528,14 @@ innobase_create_key_def( key_info->name, "PRIMARY"); /* If there is a UNIQUE INDEX consisting entirely of NOT NULL - columns, MySQL will treat it as a PRIMARY KEY unless the - table already has one. */ + columns and if the index does not contain column prefix(es) + (only prefix/part of the column is indexed), MySQL will treat the + index as a PRIMARY KEY unless the table already has one. */ if (!new_primary && (key_info->flags & HA_NOSAME) + && (!(key_info->flags & HA_KEY_HAS_PART_KEY_SEG)) && row_table_got_default_clust_index(table)) { - uint key_part = key_info->key_parts; + uint key_part = key_info->key_parts; new_primary = TRUE; @@ -656,12 +664,18 @@ ha_innobase::add_index( innodb_table = indexed_table = dict_table_get(prebuilt->table->name, FALSE); + if (UNIV_UNLIKELY(!innodb_table)) { + error = HA_ERR_NO_SUCH_TABLE; + goto err_exit; + } + /* Check if the index name is reserved. */ if (innobase_index_name_is_reserved(trx, key_info, num_of_keys)) { error = ER_WRONG_NAME_FOR_INDEX; } else { /* Check that index keys are sensible */ - error = innobase_check_index_keys(key_info, num_of_keys); + error = innobase_check_index_keys(key_info, num_of_keys, + innodb_table); } if (UNIV_UNLIKELY(error)) { @@ -708,6 +722,8 @@ err_exit: row_mysql_lock_data_dictionary(trx); dict_locked = TRUE; + ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE)); + /* If a new primary key is defined for the table we need to drop the original table and rebuild all indexes. */ @@ -740,6 +756,8 @@ err_exit: user_thd); } + ut_d(dict_table_check_for_dup_indexes(innodb_table, + FALSE)); row_mysql_unlock_data_dictionary(trx); goto err_exit; } @@ -764,6 +782,10 @@ err_exit: ut_ad(error == DB_SUCCESS); + /* We will need to rebuild index translation table. Set + valid index entry count in the translation table to zero */ + share->idx_trans_tbl.index_count = 0; + /* Commit the data dictionary transaction in order to release the table locks on the system tables. This means that if MySQL crashes while creating a new primary key inside @@ -799,18 +821,6 @@ err_exit: index, num_of_idx, table); error_handling: -#ifdef UNIV_DEBUG - /* TODO: At the moment we can't handle the following statement - in our debugging code below: - - alter table t drop index b, add index (b); - - The fix will have to parse the SQL and note that the index - being added has the same name as the one being dropped and - ignore that in the dup index check.*/ - //dict_table_check_for_dup_indexes(prebuilt->table); -#endif - /* After an error, remove all those index definitions from the dictionary which were defined. */ @@ -822,6 +832,8 @@ error_handling: row_mysql_lock_data_dictionary(trx); dict_locked = TRUE; + ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE)); + if (!new_primary) { error = row_merge_rename_indexes(trx, indexed_table); @@ -882,6 +894,8 @@ error: prebuilt->trx->error_info = NULL; /* fall through */ default: + trx->error_state = DB_SUCCESS; + if (new_primary) { if (indexed_table != innodb_table) { row_merge_drop_table(trx, indexed_table); @@ -909,6 +923,7 @@ convert_error: } if (dict_locked) { + ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE)); row_mysql_unlock_data_dictionary(trx); } @@ -951,6 +966,7 @@ ha_innobase::prepare_drop_index( /* Test and mark all the indexes to be dropped */ row_mysql_lock_data_dictionary(trx); + ut_d(dict_table_check_for_dup_indexes(prebuilt->table, FALSE)); /* Check that none of the indexes have previously been flagged for deletion. */ @@ -1116,6 +1132,7 @@ func_exit: } while (index); } + ut_d(dict_table_check_for_dup_indexes(prebuilt->table, FALSE)); row_mysql_unlock_data_dictionary(trx); DBUG_RETURN(err); @@ -1162,6 +1179,7 @@ ha_innobase::final_drop_index( prebuilt->table->flags, user_thd); row_mysql_lock_data_dictionary(trx); + ut_d(dict_table_check_for_dup_indexes(prebuilt->table, FALSE)); if (UNIV_UNLIKELY(err)) { @@ -1198,11 +1216,12 @@ ha_innobase::final_drop_index( ut_a(!index->to_be_dropped); } -#ifdef UNIV_DEBUG - dict_table_check_for_dup_indexes(prebuilt->table); -#endif + /* We will need to rebuild index translation table. Set + valid index entry count in the translation table to zero */ + share->idx_trans_tbl.index_count = 0; func_exit: + ut_d(dict_table_check_for_dup_indexes(prebuilt->table, FALSE)); trx_commit_for_mysql(trx); trx_commit_for_mysql(prebuilt->trx); row_mysql_unlock_data_dictionary(trx); diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index c0112cbf1e3..0f656528315 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -43,20 +43,11 @@ extern "C" { #include "ha_prototypes.h" /* for innobase_convert_name() */ #include "srv0start.h" /* for srv_was_started */ #include "btr0btr.h" /* for btr_page_get_index_id */ -#include "dict0dict.h" /* for dict_index_get_if_in_cache */ #include "trx0rseg.h" /* for trx_rseg_struct */ #include "trx0sys.h" /* for trx_sys */ #include "dict0dict.h" /* for dict_sys */ #include "btr0pcur.h" #include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */ -/* from buf0buf.c */ -struct buf_chunk_struct{ - ulint mem_size; /* allocated size of the chunk */ - ulint size; /* size of frames[] and blocks[] */ - void* mem; /* pointer to the memory area which - was allocated for the frames */ - buf_block_t* blocks; /* array of buffer control blocks */ -}; } static const char plugin_author[] = "Innobase Oy"; @@ -503,27 +494,11 @@ static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_fields_info[] = static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_index_fields_info[] = { - {STRUCT_FLD(field_name, "schema_name"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "table_name"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "index_name"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + {STRUCT_FLD(field_name, "index_id"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), STRUCT_FLD(old_name, ""), STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, @@ -724,7 +699,6 @@ i_s_innodb_buffer_pool_pages_fill( RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); buf_pool_mutex_enter(); - mutex_enter(&(dict_sys->mutex)); chunk = buf_pool->chunks; @@ -796,7 +770,6 @@ i_s_innodb_buffer_pool_pages_fill( } } - mutex_exit(&(dict_sys->mutex)); buf_pool_mutex_exit(); DBUG_RETURN(status); @@ -817,13 +790,8 @@ i_s_innodb_buffer_pool_pages_index_fill( int status = 0; ulint n_chunks, n_blocks; - dict_index_t* index; dulint index_id; - const char *p; - char db_name_raw[NAME_LEN*5+1], db_name[NAME_LEN+1]; - char table_name_raw[NAME_LEN*5+1], table_name[NAME_LEN+1]; - buf_chunk_t* chunk; DBUG_ENTER("i_s_innodb_buffer_pool_pages_index_fill"); @@ -837,7 +805,6 @@ i_s_innodb_buffer_pool_pages_index_fill( RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); buf_pool_mutex_enter(); - mutex_enter(&(dict_sys->mutex)); chunk = buf_pool->chunks; @@ -849,48 +816,28 @@ i_s_innodb_buffer_pool_pages_index_fill( if (fil_page_get_type(frame) == FIL_PAGE_INDEX) { index_id = btr_page_get_index_id(frame); - index = dict_index_get_if_in_cache_low(index_id); - if(index) - { - if((p = (char*) strchr(index->table_name, '/'))) - { - strncpy(db_name_raw, index->table_name, p-index->table_name); - db_name_raw[p-index->table_name] = 0; - filename_to_tablename(db_name_raw, db_name, sizeof(db_name)); - field_store_string(table->field[0], db_name); - p++; - } else { - field_store_string(table->field[0], NULL); - p = index->table_name; - } - strcpy(table_name_raw, (const char*)p); - filename_to_tablename(table_name_raw, table_name, sizeof(table_name)); - field_store_string(table->field[1], table_name); - field_store_string(table->field[2], index->name); + table->field[0]->store(ut_conv_dulint_to_longlong(index_id)); + table->field[1]->store(block->page.space); + table->field[2]->store(block->page.offset); + table->field[3]->store(page_get_n_recs(frame)); + table->field[4]->store(page_get_data_size(frame)); + table->field[5]->store(block->is_hashed); + table->field[6]->store(block->page.access_time); + table->field[7]->store(block->page.newest_modification != 0); + table->field[8]->store(block->page.oldest_modification != 0); + table->field[9]->store(block->page.old); + table->field[10]->store(0); + table->field[11]->store(block->page.buf_fix_count); + table->field[12]->store(block->page.flush_type); - table->field[3]->store(block->page.space); - table->field[4]->store(block->page.offset); - table->field[5]->store(page_get_n_recs(frame)); - table->field[6]->store(page_get_data_size(frame)); - table->field[7]->store(block->is_hashed); - table->field[8]->store(block->page.access_time); - table->field[9]->store(block->page.newest_modification != 0); - table->field[10]->store(block->page.oldest_modification != 0); - table->field[11]->store(block->page.old); - table->field[12]->store(0); - table->field[13]->store(block->page.buf_fix_count); - table->field[14]->store(block->page.flush_type); - - if (schema_table_store_record(thd, table)) { - status = 1; - break; - } + if (schema_table_store_record(thd, table)) { + status = 1; + break; } } } } - mutex_exit(&(dict_sys->mutex)); buf_pool_mutex_exit(); DBUG_RETURN(status); @@ -928,7 +875,6 @@ i_s_innodb_buffer_pool_pages_blob_fill( RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); buf_pool_mutex_enter(); - mutex_enter(&(dict_sys->mutex)); chunk = buf_pool->chunks; @@ -984,7 +930,6 @@ i_s_innodb_buffer_pool_pages_blob_fill( } } - mutex_exit(&(dict_sys->mutex)); buf_pool_mutex_exit(); DBUG_RETURN(status); @@ -3459,7 +3404,7 @@ i_s_innodb_table_stats_fill( field_store_string(i_s_table->field[0], buf); field_store_string(i_s_table->field[1], ptr); - i_s_table->field[2]->store(table->stat_n_rows); + i_s_table->field[2]->store(table->stat_n_rows, 1); i_s_table->field[3]->store(table->stat_clustered_index_size); i_s_table->field[4]->store(table->stat_sum_of_other_index_sizes); i_s_table->field[5]->store(table->stat_modified_counter); @@ -3538,6 +3483,9 @@ i_s_innodb_index_stats_fill( i_s_table->field[3]->store(index->n_uniq); row_per_keys[0] = '\0'; + + /* It is remained optimistic operation still for now */ + //dict_index_stat_mutex_enter(index); if (index->stat_n_diff_key_vals) { for (i = 1; i <= index->n_uniq; i++) { ib_int64_t rec_per_key; @@ -3551,6 +3499,8 @@ i_s_innodb_index_stats_fill( strncat(row_per_keys, buff, 256 - strlen(row_per_keys)); } } + //dict_index_stat_mutex_exit(index); + field_store_string(i_s_table->field[4], row_per_keys); i_s_table->field[5]->store(index->stat_index_size); @@ -3857,6 +3807,14 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_admin_command_maria = static ST_FIELD_INFO i_s_innodb_sys_tables_info[] = { + {STRUCT_FLD(field_name, "SCHEMA"), + STRUCT_FLD(field_length, NAME_LEN), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + {STRUCT_FLD(field_name, "NAME"), STRUCT_FLD(field_length, NAME_LEN), STRUCT_FLD(field_type, MYSQL_TYPE_STRING), @@ -3985,6 +3943,35 @@ static ST_FIELD_INFO i_s_innodb_sys_indexes_info[] = END_OF_ST_FIELD_INFO }; +static ST_FIELD_INFO i_s_innodb_sys_stats_info[] = +{ + {STRUCT_FLD(field_name, "INDEX_ID"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "KEY_COLS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "DIFF_VALS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + static int copy_string_field( @@ -4015,6 +4002,54 @@ copy_string_field( static int +copy_name_fields( +/*=============*/ + TABLE* table, + int table_field_1, + const rec_t* rec, + int rec_field) +{ + int status; + const byte* data; + ulint len; + + data = rec_get_nth_field_old(rec, rec_field, &len); + if (len == UNIV_SQL_NULL) { + table->field[table_field_1]->set_null(); + table->field[table_field_1 + 1]->set_null(); + status = 0; /* success */ + } else { + char buf[NAME_LEN * 2 + 2]; + char* ptr; + + if (len > NAME_LEN * 2 + 1) { + table->field[table_field_1]->set_null(); + status = field_store_string(table->field[table_field_1 + 1], + "###TOO LONG NAME###"); + goto end_func; + } + + strncpy(buf, (char*)data, len); + buf[len] = '\0'; + ptr = strchr(buf, '/'); + if (ptr) { + *ptr = '\0'; + ++ptr; + + status = field_store_string(table->field[table_field_1], buf); + status |= field_store_string(table->field[table_field_1 + 1], ptr); + } else { + table->field[table_field_1]->set_null(); + status = field_store_string(table->field[table_field_1 + 1], buf); + } + } + +end_func: + return status; +} + +static +int copy_int_field( /*===========*/ TABLE* table, @@ -4083,49 +4118,49 @@ copy_sys_tables_rec( /* NAME */ field = dict_index_get_nth_col_pos(index, 0); - status = copy_string_field(table, 0, rec, field); + status = copy_name_fields(table, 0, rec, field); if (status) { return status; } /* ID */ field = dict_index_get_nth_col_pos(index, 1); - status = copy_id_field(table, 1, rec, field); + status = copy_id_field(table, 2, rec, field); if (status) { return status; } /* N_COLS */ field = dict_index_get_nth_col_pos(index, 2); - status = copy_int_field(table, 2, rec, field); + status = copy_int_field(table, 3, rec, field); if (status) { return status; } /* TYPE */ field = dict_index_get_nth_col_pos(index, 3); - status = copy_int_field(table, 3, rec, field); + status = copy_int_field(table, 4, rec, field); if (status) { return status; } /* MIX_ID */ field = dict_index_get_nth_col_pos(index, 4); - status = copy_id_field(table, 4, rec, field); + status = copy_id_field(table, 5, rec, field); if (status) { return status; } /* MIX_LEN */ field = dict_index_get_nth_col_pos(index, 5); - status = copy_int_field(table, 5, rec, field); + status = copy_int_field(table, 6, rec, field); if (status) { return status; } /* CLUSTER_NAME */ field = dict_index_get_nth_col_pos(index, 6); - status = copy_string_field(table, 6, rec, field); + status = copy_string_field(table, 7, rec, field); if (status) { return status; } /* SPACE */ field = dict_index_get_nth_col_pos(index, 7); - status = copy_int_field(table, 7, rec, field); + status = copy_int_field(table, 8, rec, field); if (status) { return status; } @@ -4193,6 +4228,40 @@ copy_sys_indexes_rec( static int +copy_sys_stats_rec( +/*===============*/ + TABLE* table, + const dict_index_t* index, + const rec_t* rec +) +{ + int status; + int field; + + /* INDEX_ID */ + field = dict_index_get_nth_col_pos(index, 0); + status = copy_id_field(table, 0, rec, field); + if (status) { + return status; + } + /* KEY_COLS */ + field = dict_index_get_nth_col_pos(index, 1); + status = copy_int_field(table, 1, rec, field); + if (status) { + return status; + } + /* DIFF_VALS */ + field = dict_index_get_nth_col_pos(index, 2); + status = copy_id_field(table, 2, rec, field); + if (status) { + return status; + } + + return 0; +} + +static +int i_s_innodb_schema_table_fill( /*=========================*/ THD* thd, @@ -4220,6 +4289,8 @@ i_s_innodb_schema_table_fill( id = 0; } else if (innobase_strcasecmp(table_name, "innodb_sys_indexes") == 0) { id = 1; + } else if (innobase_strcasecmp(table_name, "innodb_sys_stats") == 0) { + id = 2; } else { DBUG_RETURN(1); } @@ -4233,8 +4304,10 @@ i_s_innodb_schema_table_fill( if (id == 0) { innodb_table = dict_table_get_low("SYS_TABLES"); - } else { + } else if (id == 1) { innodb_table = dict_table_get_low("SYS_INDEXES"); + } else { + innodb_table = dict_table_get_low("SYS_STATS"); } index = UT_LIST_GET_FIRST(innodb_table->indexes); @@ -4259,8 +4332,10 @@ i_s_innodb_schema_table_fill( if (id == 0) { status = copy_sys_tables_rec(table, index, rec); - } else { + } else if (id == 1) { status = copy_sys_indexes_rec(table, index, rec); + } else { + status = copy_sys_stats_rec(table, index, rec); } if (status) { btr_pcur_close(&pcur); @@ -4325,6 +4400,21 @@ i_s_innodb_sys_indexes_init( DBUG_RETURN(0); } +static +int +i_s_innodb_sys_stats_init( +/*======================*/ + void* p) +{ + DBUG_ENTER("i_s_innodb_sys_stats_init"); + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = i_s_innodb_sys_stats_info; + schema->fill_table = i_s_innodb_schema_table_fill; + + DBUG_RETURN(0); +} + UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_tables = { STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), @@ -4391,3 +4481,36 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_indexes_maria = STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_BETA) }; +UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_stats = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "INNODB_SYS_STATS"), + STRUCT_FLD(author, plugin_author), + STRUCT_FLD(descr, "InnoDB SYS_STATS table"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_innodb_sys_stats_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, 0x0100 /* 1.0 */), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL) +}; + +UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_stats_maria = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "INNODB_SYS_STATS"), + STRUCT_FLD(author, plugin_author), + STRUCT_FLD(descr, "InnoDB SYS_STATS table"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_innodb_sys_stats_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, 0x0100 /* 1.0 */), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(version_info, "1.0"), + STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_BETA) +}; + diff --git a/storage/xtradb/handler/i_s.h b/storage/xtradb/handler/i_s.h index 8252c6d1fa2..7a5c3ead5ed 100644 --- a/storage/xtradb/handler/i_s.h +++ b/storage/xtradb/handler/i_s.h @@ -43,6 +43,7 @@ extern struct st_mysql_plugin i_s_innodb_index_stats; extern struct st_mysql_plugin i_s_innodb_admin_command; extern struct st_mysql_plugin i_s_innodb_sys_tables; extern struct st_mysql_plugin i_s_innodb_sys_indexes; +extern struct st_mysql_plugin i_s_innodb_sys_stats; extern struct st_maria_plugin i_s_innodb_buffer_pool_pages_maria; extern struct st_maria_plugin i_s_innodb_buffer_pool_pages_index_maria; @@ -61,5 +62,6 @@ extern struct st_maria_plugin i_s_innodb_index_stats_maria; extern struct st_maria_plugin i_s_innodb_admin_command_maria; extern struct st_maria_plugin i_s_innodb_sys_tables_maria; extern struct st_maria_plugin i_s_innodb_sys_indexes_maria; +extern struct st_maria_plugin i_s_innodb_sys_stats_maria; #endif /* i_s_h */ diff --git a/storage/xtradb/handler/innodb_patch_info.h b/storage/xtradb/handler/innodb_patch_info.h index 5fa50129a04..e68f12d0fec 100644 --- a/storage/xtradb/handler/innodb_patch_info.h +++ b/storage/xtradb/handler/innodb_patch_info.h @@ -41,11 +41,12 @@ struct innodb_enhancement { {"innodb_admin_command_base","XtraDB specific command interface through i_s","","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_show_lock_name","Show mutex/lock name instead of crated file/line","","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_extend_slow","Extended statistics in slow.log","It is InnoDB-part only. It needs to patch also to mysqld.","http://www.percona.com/docs/wiki/percona-xtradb"}, -{"innodb_relax_table_creation","Relax limitation of column size at table creation as builtin InnoDB.","","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_lru_dump_restore","Dump and restore command for content of buffer pool","","http://www.percona.com/docs/wiki/percona-xtradb"}, +{"innodb_separate_doublewrite","Add option 'innodb_doublewrite_file' to separate doublewrite dedicated tablespace","","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_pass_corrupt_table","Treat tables as corrupt instead of crash, when meet corrupt blocks","","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_fast_checksum","Using the checksum on 32bit-unit calculation","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_files_extend","allow >4GB transaction log files, and can vary universal page size of datafiles","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_sys_tables_sys_indexes","Expose InnoDB SYS_TABLES and SYS_INDEXES schema tables","","http://www.percona.com/docs/wiki/percona-xtradb"}, +{"innodb_buffer_pool_shm","Put buffer pool contents to shared memory segment and reuse it at clean restart [experimental]","","http://www.percona.com/docs/wiki/percona-xtradb"}, {NULL, NULL, NULL, NULL} }; diff --git a/storage/xtradb/ibuf/ibuf0ibuf.c b/storage/xtradb/ibuf/ibuf0ibuf.c index 94f3751dd04..e01c2d6b800 100644 --- a/storage/xtradb/ibuf/ibuf0ibuf.c +++ b/storage/xtradb/ibuf/ibuf0ibuf.c @@ -733,24 +733,41 @@ page containing the descriptor bits for the file page; the bitmap page is x-latched */ static page_t* -ibuf_bitmap_get_map_page( -/*=====================*/ - ulint space, /*!< in: space id of the file page */ - ulint page_no,/*!< in: page number of the file page */ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - mtr_t* mtr) /*!< in: mtr */ +ibuf_bitmap_get_map_page_func( +/*==========================*/ + ulint space, /*!< in: space id of the file page */ + ulint page_no,/*!< in: page number of the file page */ + ulint zip_size,/*!< in: compressed page size in bytes; + 0 for uncompressed pages */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ + mtr_t* mtr) /*!< in: mtr */ { buf_block_t* block; - block = buf_page_get(space, zip_size, - ibuf_bitmap_page_no_calc(zip_size, page_no), - RW_X_LATCH, mtr); + block = buf_page_get_gen(space, zip_size, + ibuf_bitmap_page_no_calc(zip_size, page_no), + RW_X_LATCH, NULL, BUF_GET, + file, line, mtr); buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP); return(buf_block_get_frame(block)); } +/********************************************************************//** +Gets the ibuf bitmap page where the bits describing a given file page are +stored. +@return bitmap page where the file page is mapped, that is, the bitmap +page containing the descriptor bits for the file page; the bitmap page +is x-latched +@param space in: space id of the file page +@param page_no in: page number of the file page +@param zip_size in: compressed page size in bytes; 0 for uncompressed pages +@param mtr in: mini-transaction */ +#define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr) \ + ibuf_bitmap_get_map_page_func(space, page_no, zip_size, \ + __FILE__, __LINE__, mtr) + /************************************************************************//** Sets the free bits of the page in the ibuf bitmap. This is done in a separate mini-transaction, hence this operation does not restrict further work to only diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h index d5c8258513c..5e6a76c7d21 100644 --- a/storage/xtradb/include/btr0btr.h +++ b/storage/xtradb/include/btr0btr.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -193,6 +193,10 @@ btr_leaf_page_release( mtr_t* mtr); /*!< in: mtr */ /**************************************************************//** Gets the child node file address in a node pointer. +NOTE: the offsets array must contain all offsets for the record since +we read the last field according to offsets and assume that it contains +the child page number. In other words offsets must have been retrieved +with rec_get_offsets(n_fields=ULINT_UNDEFINED). @return child node address */ UNIV_INLINE ulint @@ -317,12 +321,16 @@ Inserts a data tuple to a tree on a non-leaf level. It is assumed that mtr holds an x-latch on the tree. */ UNIV_INTERN void -btr_insert_on_non_leaf_level( -/*=========================*/ +btr_insert_on_non_leaf_level_func( +/*==============================*/ dict_index_t* index, /*!< in: index */ ulint level, /*!< in: level, must be > 0 */ dtuple_t* tuple, /*!< in: the record to be inserted */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +# define btr_insert_on_non_leaf_level(i,l,t,m) \ + btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m) #endif /* !UNIV_HOTBACKUP */ /****************************************************************//** Sets a record as the predefined minimum record. */ diff --git a/storage/xtradb/include/btr0btr.ic b/storage/xtradb/include/btr0btr.ic index 4aa4a41f50d..c9c38f3c3b3 100644 --- a/storage/xtradb/include/btr0btr.ic +++ b/storage/xtradb/include/btr0btr.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -257,6 +257,10 @@ btr_page_set_prev( /**************************************************************//** Gets the child node file address in a node pointer. +NOTE: the offsets array must contain all offsets for the record since +we read the last field according to offsets and assume that it contains +the child page number. In other words offsets must have been retrieved +with rec_get_offsets(n_fields=ULINT_UNDEFINED). @return child node address */ UNIV_INLINE ulint diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h index 480a3877e54..716f15c4267 100644 --- a/storage/xtradb/include/btr0cur.h +++ b/storage/xtradb/include/btr0cur.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -152,29 +152,39 @@ btr_cur_search_to_nth_level( ulint has_search_latch,/*!< in: latch mode the caller currently has on btr_search_latch: RW_S_LATCH, or 0 */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ /*****************************************************************//** Opens a cursor at either end of an index. */ UNIV_INTERN void -btr_cur_open_at_index_side( -/*=======================*/ +btr_cur_open_at_index_side_func( +/*============================*/ ibool from_left, /*!< in: TRUE if open to the low end, FALSE if to the high end */ dict_index_t* index, /*!< in: index */ ulint latch_mode, /*!< in: latch mode */ btr_cur_t* cursor, /*!< in: cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_cur_open_at_index_side(f,i,l,c,m) \ + btr_cur_open_at_index_side_func(f,i,l,c,__FILE__,__LINE__,m) /**********************************************************************//** Positions a cursor at a randomly chosen position within a B-tree. */ UNIV_INTERN void -btr_cur_open_at_rnd_pos( -/*====================*/ +btr_cur_open_at_rnd_pos_func( +/*=========================*/ dict_index_t* index, /*!< in: index */ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_cur_t* cursor, /*!< in/out: B-tree cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_cur_open_at_rnd_pos(i,l,c,m) \ + btr_cur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m) /*************************************************************//** Tries to perform an insert to a page in an index tree, next to cursor. It is assumed that mtr holds an x-latch on the page. The operation does diff --git a/storage/xtradb/include/btr0pcur.h b/storage/xtradb/include/btr0pcur.h index 12b1375d8b7..2334a266280 100644 --- a/storage/xtradb/include/btr0pcur.h +++ b/storage/xtradb/include/btr0pcur.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -82,8 +82,8 @@ Initializes and opens a persistent cursor to an index tree. It should be closed with btr_pcur_close. */ UNIV_INLINE void -btr_pcur_open( -/*==========*/ +btr_pcur_open_func( +/*===============*/ dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ...; @@ -94,14 +94,18 @@ btr_pcur_open( record! */ ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_pcur_open(i,t,md,l,c,m) \ + btr_pcur_open_func(i,t,md,l,c,__FILE__,__LINE__,m) /**************************************************************//** Opens an persistent cursor to an index tree without initializing the cursor. */ UNIV_INLINE void -btr_pcur_open_with_no_init( -/*=======================*/ +btr_pcur_open_with_no_init_func( +/*============================*/ dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ...; @@ -119,7 +123,12 @@ btr_pcur_open_with_no_init( ulint has_search_latch,/*!< in: latch mode the caller currently has on btr_search_latch: RW_S_LATCH, or 0 */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m) \ + btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m) + /*****************************************************************//** Opens a persistent cursor at either end of an index. */ UNIV_INLINE @@ -160,8 +169,8 @@ before first in tree. The latching mode must be BTR_SEARCH_LEAF or BTR_MODIFY_LEAF. */ UNIV_INTERN void -btr_pcur_open_on_user_rec( -/*======================*/ +btr_pcur_open_on_user_rec_func( +/*===========================*/ dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ... */ @@ -169,17 +178,25 @@ btr_pcur_open_on_user_rec( BTR_MODIFY_LEAF */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_pcur_open_on_user_rec(i,t,md,l,c,m) \ + btr_pcur_open_on_user_rec_func(i,t,md,l,c,__FILE__,__LINE__,m) /**********************************************************************//** Positions a cursor at a randomly chosen position within a B-tree. */ UNIV_INLINE void -btr_pcur_open_at_rnd_pos( -/*=====================*/ +btr_pcur_open_at_rnd_pos_func( +/*==========================*/ dict_index_t* index, /*!< in: index */ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in/out: B-tree pcur */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_pcur_open_at_rnd_pos(i,l,c,m) \ + btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m) /**************************************************************//** Frees the possible old_rec_buf buffer of a persistent cursor and sets the latch mode of the persistent cursor to BTR_NO_LATCHES. */ @@ -218,11 +235,15 @@ record and it can be restored on a user record whose ordering fields are identical to the ones of the original user record */ UNIV_INTERN ibool -btr_pcur_restore_position( -/*======================*/ +btr_pcur_restore_position_func( +/*===========================*/ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in: detached persistent cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_pcur_restore_position(l,cur,mtr) \ + btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr) /**************************************************************//** If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY, releases the page latch and bufferfix reserved by the cursor. @@ -260,20 +281,13 @@ btr_pcur_get_mtr( /*=============*/ btr_pcur_t* cursor); /*!< in: persistent cursor */ /**************************************************************//** -Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES, +Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES, that is, the cursor becomes detached. If there have been modifications to the page where pcur is positioned, this can be used instead of btr_pcur_release_leaf. Function btr_pcur_store_position should be used before calling this, if restoration of cursor is wanted later. */ UNIV_INLINE void -btr_pcur_commit( -/*============*/ - btr_pcur_t* pcur); /*!< in: persistent cursor */ -/**************************************************************//** -Differs from btr_pcur_commit in that we can specify the mtr to commit. */ -UNIV_INLINE -void btr_pcur_commit_specify_mtr( /*========================*/ btr_pcur_t* pcur, /*!< in: persistent cursor */ diff --git a/storage/xtradb/include/btr0pcur.ic b/storage/xtradb/include/btr0pcur.ic index 0ca7223f861..0c38797e6c5 100644 --- a/storage/xtradb/include/btr0pcur.ic +++ b/storage/xtradb/include/btr0pcur.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -395,30 +395,13 @@ btr_pcur_move_to_next( } /**************************************************************//** -Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES, +Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES, that is, the cursor becomes detached. If there have been modifications to the page where pcur is positioned, this can be used instead of btr_pcur_release_leaf. Function btr_pcur_store_position should be used before calling this, if restoration of cursor is wanted later. */ UNIV_INLINE void -btr_pcur_commit( -/*============*/ - btr_pcur_t* pcur) /*!< in: persistent cursor */ -{ - ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED); - - pcur->latch_mode = BTR_NO_LATCHES; - - mtr_commit(pcur->mtr); - - pcur->pos_state = BTR_PCUR_WAS_POSITIONED; -} - -/**************************************************************//** -Differs from btr_pcur_commit in that we can specify the mtr to commit. */ -UNIV_INLINE -void btr_pcur_commit_specify_mtr( /*========================*/ btr_pcur_t* pcur, /*!< in: persistent cursor */ @@ -483,8 +466,8 @@ Initializes and opens a persistent cursor to an index tree. It should be closed with btr_pcur_close. */ UNIV_INLINE void -btr_pcur_open( -/*==========*/ +btr_pcur_open_func( +/*===============*/ dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ...; @@ -495,6 +478,8 @@ btr_pcur_open( record! */ ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { btr_cur_t* btr_cursor; @@ -511,7 +496,7 @@ btr_pcur_open( btr_cursor = btr_pcur_get_btr_cur(cursor); btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, - btr_cursor, 0, mtr); + btr_cursor, 0, file, line, mtr); cursor->pos_state = BTR_PCUR_IS_POSITIONED; cursor->trx_if_known = NULL; @@ -522,8 +507,8 @@ Opens an persistent cursor to an index tree without initializing the cursor. */ UNIV_INLINE void -btr_pcur_open_with_no_init( -/*=======================*/ +btr_pcur_open_with_no_init_func( +/*============================*/ dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ...; @@ -541,6 +526,8 @@ btr_pcur_open_with_no_init( ulint has_search_latch,/*!< in: latch mode the caller currently has on btr_search_latch: RW_S_LATCH, or 0 */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { btr_cur_t* btr_cursor; @@ -553,7 +540,8 @@ btr_pcur_open_with_no_init( btr_cursor = btr_pcur_get_btr_cur(cursor); btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, - btr_cursor, has_search_latch, mtr); + btr_cursor, has_search_latch, + file, line, mtr); cursor->pos_state = BTR_PCUR_IS_POSITIONED; cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; @@ -600,11 +588,13 @@ btr_pcur_open_at_index_side( Positions a cursor at a randomly chosen position within a B-tree. */ UNIV_INLINE void -btr_pcur_open_at_rnd_pos( -/*=====================*/ +btr_pcur_open_at_rnd_pos_func( +/*==========================*/ dict_index_t* index, /*!< in: index */ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in/out: B-tree pcur */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { /* Initialize the cursor */ @@ -614,8 +604,9 @@ btr_pcur_open_at_rnd_pos( btr_pcur_init(cursor); - btr_cur_open_at_rnd_pos(index, latch_mode, - btr_pcur_get_btr_cur(cursor), mtr); + btr_cur_open_at_rnd_pos_func(index, latch_mode, + btr_pcur_get_btr_cur(cursor), + file, line, mtr); cursor->pos_state = BTR_PCUR_IS_POSITIONED; cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index c95fcc00dd3..a7854e3038d 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -33,8 +33,10 @@ Created 11/5/1995 Heikki Tuuri #include "hash0hash.h" #include "ut0byte.h" #include "page0types.h" +#include "ut0rbt.h" #ifndef UNIV_HOTBACKUP #include "os0proc.h" +#include "srv0srv.h" /** @name Modes for buf_page_get_gen */ /* @{ */ @@ -202,20 +204,14 @@ with care. */ #define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\ SP, ZS, OF, RW_NO_LATCH, NULL,\ BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR) -/**************************************************************//** -NOTE! The following macros should be used instead of -buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and -RW_X_LATCH are allowed as LA! */ -#define buf_page_optimistic_get(LA, BL, MC, MTR) \ - buf_page_optimistic_get_func(LA, BL, MC, __FILE__, __LINE__, MTR) /********************************************************************//** This is the general function used to get optimistic access to a database page. @return TRUE if success */ UNIV_INTERN ibool -buf_page_optimistic_get_func( -/*=========================*/ +buf_page_optimistic_get( +/*====================*/ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ buf_block_t* block, /*!< in: guessed block */ ib_uint64_t modify_clock,/*!< in: modify clock value if mode is @@ -1205,15 +1201,21 @@ struct buf_block_struct{ rw_lock_t lock; /*!< read-write lock of the buffer frame */ unsigned lock_hash_val:32;/*!< hashed value of the page address - in the record lock hash table */ - unsigned check_index_page_at_flush:1; + in the record lock hash table; + protected by buf_block_t::lock + (or buf_block_t::mutex, buf_pool_mutex + in buf_page_get_gen(), + buf_page_init_for_read() + and buf_page_create()) */ + ibool check_index_page_at_flush; /*!< TRUE if we know that this is an index page, and want the database to check its consistency before flush; note that there may be pages in the buffer pool which are index pages, but this flag is not set because - we do not keep track of all pages */ + we do not keep track of all pages; + NOT protected by any mutex */ /* @} */ /** @name Optimistic search field */ /* @{ */ @@ -1300,11 +1302,23 @@ struct buf_block_struct{ /**********************************************************************//** Compute the hash fold value for blocks in buf_pool->zip_hash. */ /* @{ */ -#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE) +/* the fold should be relative when srv_buffer_pool_shm_key is enabled */ +#define BUF_POOL_ZIP_FOLD_PTR(ptr) (!srv_buffer_pool_shm_key\ + ?((ulint) (ptr) / UNIV_PAGE_SIZE)\ + :((ulint) ((char*)ptr - (char*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE)) #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame) #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) /* @} */ +/** A chunk of buffers. The buffer pool is allocated in chunks. */ +struct buf_chunk_struct{ + ulint mem_size; /*!< allocated size of the chunk */ + ulint size; /*!< size of frames[] and blocks[] */ + void* mem; /*!< pointer to the memory area which + was allocated for the frames */ + buf_block_t* blocks; /*!< array of buffer control blocks */ +}; + /** @brief The buffer pool statistics structure. */ struct buf_pool_stat_struct{ ulint n_page_gets; /*!< number of page gets performed; @@ -1379,6 +1393,19 @@ struct buf_pool_struct{ /*!< this is in the set state when there is no flush batch of the given type running */ + ib_rbt_t* flush_rbt; /* !< a red-black tree is used + exclusively during recovery to + speed up insertions in the + flush_list. This tree contains + blocks in order of + oldest_modification LSN and is + kept in sync with the + flush_list. + Each member of the tree MUST + also be on the flush_list. + This tree is relevant only in + recovery and is set to NULL + once the recovery is over. */ ulint freed_page_clock;/*!< a sequence number used to count the number of buffer blocks removed from the end of diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic index c9809fbdcd8..93cc68e7fc9 100644 --- a/storage/xtradb/include/buf0buf.ic +++ b/storage/xtradb/include/buf0buf.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -81,7 +81,7 @@ buf_page_peek_if_too_old( unsigned access_time = buf_page_is_accessed(bpage); if (access_time > 0 - && (ut_time_ms() - access_time) + && ((ib_uint32_t) (ut_time_ms() - access_time)) >= buf_LRU_old_threshold_ms) { return(TRUE); } @@ -743,6 +743,12 @@ buf_block_get_lock_hash_val( /*========================*/ const buf_block_t* block) /*!< in: block */ { + ut_ad(block); + ut_ad(buf_page_in_file(&block->page)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_EXCLUSIVE) + || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_SHARED)); +#endif /* UNIV_SYNC_DEBUG */ return(block->lock_hash_val); } @@ -967,7 +973,12 @@ buf_page_hash_get( ut_a(buf_page_in_file(bpage)); ut_ad(bpage->in_page_hash); ut_ad(!bpage->in_zip_hash); +#if UNIV_WORD_SIZE == 4 + /* On 32-bit systems, there is no padding in + buf_page_t. On other systems, Valgrind could complain + about uninitialized pad bytes. */ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage); +#endif } return(bpage); diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h index cac4bf9fe4b..2f7108fda1b 100644 --- a/storage/xtradb/include/buf0flu.h +++ b/storage/xtradb/include/buf0flu.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -40,6 +40,16 @@ buf_flush_remove( /*=============*/ buf_page_t* bpage); /*!< in: pointer to the block in question */ /********************************************************************//** +Relocates a buffer control block on the flush_list. +Note that it is assumed that the contents of bpage has already been +copied to dpage. */ +UNIV_INTERN +void +buf_flush_relocate_on_flush_list( +/*=============================*/ + buf_page_t* bpage, /*!< in/out: control block being moved */ + buf_page_t* dpage); /*!< in/out: destination block */ +/********************************************************************//** Updates the flush system data structures when a write is completed. */ UNIV_INTERN void @@ -140,8 +150,8 @@ how much redo the workload is generating and at what rate. */ struct buf_flush_stat_struct { - ib_uint64_t redo; /**< amount of redo generated. */ - ulint n_flushed; /**< number of pages flushed. */ + ib_uint64_t redo; /*!< amount of redo generated. */ + ulint n_flushed; /*!< number of pages flushed. */ }; /** Statistics for selecting flush rate of dirty pages. */ @@ -176,6 +186,22 @@ buf_flush_validate(void); /*====================*/ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ +/******************************************************************//** +Initialize the red-black tree to speed up insertions into the flush_list +during recovery process. Should be called at the start of recovery +process before any page has been read/written. */ +UNIV_INTERN +void +buf_flush_init_flush_rbt(void); +/*==========================*/ + +/******************************************************************//** +Frees up the red-black tree. */ +UNIV_INTERN +void +buf_flush_free_flush_rbt(void); +/*==========================*/ + /** When buf_flush_free_margin is called, it tries to make this many blocks available to replacement in the free list and at the end of the LRU list (to make sure that a read-ahead batch can be read efficiently in a single diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h index 0a7d01c95cf..d3b59e8b579 100644 --- a/storage/xtradb/include/buf0lru.h +++ b/storage/xtradb/include/buf0lru.h @@ -96,7 +96,7 @@ buf_LRU_insert_zip_clean( Try to free a block. If bpage is a descriptor of a compressed-only page, the descriptor object will be freed as well. -NOTE: If this function returns BUF_LRU_FREED, it will not temporarily +NOTE: If this function returns BUF_LRU_FREED, it will temporarily release buf_pool_mutex. Furthermore, the page frame will no longer be accessible via bpage. diff --git a/storage/xtradb/include/buf0rea.h b/storage/xtradb/include/buf0rea.h index 71f62ff7b49..56d3d24a3b7 100644 --- a/storage/xtradb/include/buf0rea.h +++ b/storage/xtradb/include/buf0rea.h @@ -158,8 +158,7 @@ buf_read_recv_pages( /** The size in pages of the area which the read-ahead algorithms read if invoked */ -#define BUF_READ_AHEAD_AREA \ - ut_min(64, ut_2_power_up(buf_pool->curr_size / 32)) +#define BUF_READ_AHEAD_AREA 64 /** @name Modes used in read-ahead @{ */ /** read only pages belonging to the insert buffer tree */ diff --git a/storage/xtradb/include/data0type.ic b/storage/xtradb/include/data0type.ic index 240b4288f39..2bf67a941bd 100644 --- a/storage/xtradb/include/data0type.ic +++ b/storage/xtradb/include/data0type.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -285,6 +285,10 @@ dtype_new_store_for_order_and_null_size( #endif ulint len; + ut_ad(type); + ut_ad(type->mtype >= DATA_VARCHAR); + ut_ad(type->mtype <= DATA_MYSQL); + buf[0] = (byte)(type->mtype & 0xFFUL); if (type->prtype & DATA_BINARY_TYPE) { diff --git a/storage/xtradb/include/db0err.h b/storage/xtradb/include/db0err.h index 747e9b5364e..c841c2b4afe 100644 --- a/storage/xtradb/include/db0err.h +++ b/storage/xtradb/include/db0err.h @@ -28,6 +28,8 @@ Created 5/24/1996 Heikki Tuuri enum db_err { + DB_SUCCESS_LOCKED_REC = 9, /*!< like DB_SUCCESS, but a new + explicit record lock was created */ DB_SUCCESS = 10, /* The following are error codes */ diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h index 51d37ee98d1..9239e031a7f 100644 --- a/storage/xtradb/include/dict0boot.h +++ b/storage/xtradb/include/dict0boot.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -46,13 +46,14 @@ dict_hdr_get( /*=========*/ mtr_t* mtr); /*!< in: mtr */ /**********************************************************************//** -Returns a new row, table, index, or tree id. -@return the new id */ +Returns a new table, index, or space id. */ UNIV_INTERN -dulint +void dict_hdr_get_new_id( /*================*/ - ulint type); /*!< in: DICT_HDR_ROW_ID, ... */ + dulint* table_id, /*!< out: table id (not assigned if NULL) */ + dulint* index_id, /*!< out: index id (not assigned if NULL) */ + ulint* space_id); /*!< out: space id (not assigned if NULL) */ /**********************************************************************//** Returns a new row id. @return the new id */ @@ -100,6 +101,7 @@ dict_create(void); #define DICT_COLUMNS_ID ut_dulint_create(0, 2) #define DICT_INDEXES_ID ut_dulint_create(0, 3) #define DICT_FIELDS_ID ut_dulint_create(0, 4) +#define DICT_STATS_ID ut_dulint_create(0, 6) /* The following is a secondary index on SYS_TABLES */ #define DICT_TABLE_IDS_ID ut_dulint_create(0, 5) @@ -119,17 +121,21 @@ dict_create(void); #define DICT_HDR_ROW_ID 0 /* The latest assigned row id */ #define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */ #define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */ -#define DICT_HDR_MIX_ID 24 /* Obsolete, always 0. */ +#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id, or 0*/ +#define DICT_HDR_MIX_ID_LOW 28 /* Obsolete,always DICT_HDR_FIRST_ID */ #define DICT_HDR_TABLES 32 /* Root of the table index tree */ #define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */ #define DICT_HDR_COLUMNS 40 /* Root of the column index tree */ #define DICT_HDR_INDEXES 44 /* Root of the index index tree */ #define DICT_HDR_FIELDS 48 /* Root of the index field index tree */ +#define DICT_HDR_STATS 52 /* Root of the stats tree */ #define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace segment into which the dictionary header is created */ + +#define DICT_HDR_XTRADB_MARK 256 /* Flag to distinguish expansion of XtraDB */ /*-------------------------------------------------------------*/ /* The field number of the page number field in the sys_indexes table @@ -137,12 +143,17 @@ clustered index */ #define DICT_SYS_INDEXES_PAGE_NO_FIELD 8 #define DICT_SYS_INDEXES_SPACE_NO_FIELD 7 #define DICT_SYS_INDEXES_TYPE_FIELD 6 +#define DICT_SYS_INDEXES_NAME_FIELD 4 + +#define DICT_SYS_STATS_DIFF_VALS_FIELD 4 /* When a row id which is zero modulo this number (which must be a power of two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is updated */ #define DICT_HDR_ROW_ID_WRITE_MARGIN 256 +#define DICT_HDR_XTRADB_FLAG ut_dulint_create(0x58545241UL,0x44425F31UL) /* "XTRADB_1" */ + #ifndef UNIV_NONINL #include "dict0boot.ic" #endif diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h index cce1246b789..0249091a195 100644 --- a/storage/xtradb/include/dict0crea.h +++ b/storage/xtradb/include/dict0crea.h @@ -53,6 +53,14 @@ ind_create_graph_create( dict_index_t* index, /*!< in: index to create, built as a memory data structure */ mem_heap_t* heap); /*!< in: heap where created */ +/*********************************************************************//** +*/ +UNIV_INTERN +ind_node_t* +ind_insert_stats_graph_create( +/*==========================*/ + dict_index_t* index, + mem_heap_t* heap); /***********************************************************//** Creates a table. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ @@ -62,6 +70,13 @@ dict_create_table_step( /*===================*/ que_thr_t* thr); /*!< in: query thread */ /***********************************************************//** +*/ +UNIV_INTERN +que_thr_t* +dict_insert_stats_step( +/*===================*/ + que_thr_t* thr); +/***********************************************************//** Creates an index. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ @@ -170,6 +185,7 @@ struct ind_node_struct{ ins_node_t* field_def; /* child node which does the inserts of the field definitions; the row to be inserted is built by the parent node */ + ins_node_t* stats_def; commit_node_t* commit_node; /* child node which performs a commit after a successful index creation */ @@ -180,6 +196,7 @@ struct ind_node_struct{ dict_table_t* table; /*!< table which owns the index */ dtuple_t* ind_row;/* index definition row built */ ulint field_no;/* next field definition to insert */ + ulint stats_no; mem_heap_t* heap; /*!< memory heap used as auxiliary storage */ }; @@ -189,6 +206,7 @@ struct ind_node_struct{ #define INDEX_CREATE_INDEX_TREE 3 #define INDEX_COMMIT_WORK 4 #define INDEX_ADD_TO_CACHE 5 +#define INDEX_BUILD_STATS_COLS 6 #ifndef UNIV_NONINL #include "dict0crea.ic" diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 8272bfe2422..3c5e620d3c1 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -352,6 +352,7 @@ dict_create_foreign_constraints( name before it: test.table2; the default database id the database of parameter name */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -928,9 +929,10 @@ UNIV_INTERN void dict_table_check_for_dup_indexes( /*=============================*/ - const dict_table_t* table); /*!< in: Check for dup indexes + const dict_table_t* table, /*!< in: Check for dup indexes in this table */ - + ibool tmp_ok);/*!< in: TRUE=allow temporary + index names */ #endif /* UNIV_DEBUG */ /**********************************************************************//** Builds a node pointer out of a physical record and a page number. @@ -1038,8 +1040,9 @@ void dict_update_statistics_low( /*=======================*/ dict_table_t* table, /*!< in/out: table */ - ibool has_dict_mutex);/*!< in: TRUE if the caller has the + ibool has_dict_mutex, /*!< in: TRUE if the caller has the dictionary mutex */ + ibool sync); /*********************************************************************//** Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ @@ -1047,7 +1050,8 @@ UNIV_INTERN void dict_update_statistics( /*===================*/ - dict_table_t* table); /*!< in/out: table */ + dict_table_t* table, /*!< in/out: table */ + ibool sync); /********************************************************************//** Reserves the dictionary system mutex for MySQL. */ UNIV_INTERN @@ -1060,6 +1064,22 @@ UNIV_INTERN void dict_mutex_exit_for_mysql(void); /*===========================*/ +/**********************************************************************//** +Lock the appropriate mutex to protect index->stat_n_diff_key_vals[]. +index->id is used to pick the right mutex and it should not change +before dict_index_stat_mutex_exit() is called on this index. */ +UNIV_INTERN +void +dict_index_stat_mutex_enter( +/*========================*/ + const dict_index_t* index); /*!< in: index */ +/**********************************************************************//** +Unlock the appropriate mutex that protects index->stat_n_diff_key_vals[]. */ +UNIV_INTERN +void +dict_index_stat_mutex_exit( +/*=======================*/ + const dict_index_t* index); /*!< in: index */ /********************************************************************//** Checks if the database name in two table names is the same. @return TRUE if same db name */ @@ -1142,6 +1162,7 @@ struct dict_sys_struct{ dict_table_t* sys_columns; /*!< SYS_COLUMNS table */ dict_table_t* sys_indexes; /*!< SYS_INDEXES table */ dict_table_t* sys_fields; /*!< SYS_FIELDS table */ + dict_table_t* sys_stats; /*!< SYS_STATS table */ }; #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index 603ecfda3f9..37c5a4a24fc 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -80,21 +80,39 @@ combination of types */ /** File format */ /* @{ */ #define DICT_TF_FORMAT_SHIFT 5 /* file format */ -#define DICT_TF_FORMAT_MASK (127 << DICT_TF_FORMAT_SHIFT) +#define DICT_TF_FORMAT_MASK \ +((~(~0 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT))) << DICT_TF_FORMAT_SHIFT) #define DICT_TF_FORMAT_51 0 /*!< InnoDB/MySQL up to 5.1 */ #define DICT_TF_FORMAT_ZIP 1 /*!< InnoDB plugin for 5.1: compressed tables, new BLOB treatment */ /** Maximum supported file format */ #define DICT_TF_FORMAT_MAX DICT_TF_FORMAT_ZIP - +/* @} */ #define DICT_TF_BITS 6 /*!< number of flag bits */ #if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX # error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX" #endif /* @} */ + +/** @brief Additional table flags. + +These flags will be stored in SYS_TABLES.MIX_LEN. All unused flags +will be written as 0. The column may contain garbage for tables +created with old versions of InnoDB that only implemented +ROW_FORMAT=REDUNDANT. */ +/* @{ */ +#define DICT_TF2_SHIFT DICT_TF_BITS + /*!< Shift value for + table->flags. */ +#define DICT_TF2_TEMPORARY 1 /*!< TRUE for tables from + CREATE TEMPORARY TABLE. */ +#define DICT_TF2_BITS (DICT_TF2_SHIFT + 1) + /*!< Total number of bits + in table->flags. */ /* @} */ + /**********************************************************************//** Creates a table memory object. @return own: table object */ @@ -364,7 +382,7 @@ initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_struct{ dulint id; /*!< id of the table */ mem_heap_t* heap; /*!< memory heap */ - const char* name; /*!< table name */ + char* name; /*!< table name */ const char* dir_path_of_temp_table;/*!< NULL or the directory path where a TEMPORARY table that was explicitly created by a user should be placed if @@ -374,7 +392,7 @@ struct dict_table_struct{ unsigned space:32; /*!< space where the clustered index of the table is placed */ - unsigned flags:DICT_TF_BITS;/*!< DICT_TF_COMPACT, ... */ + unsigned flags:DICT_TF2_BITS;/*!< DICT_TF_COMPACT, ... */ unsigned ibd_file_missing:1; /*!< TRUE if this is in a single-table tablespace and the .ibd file is missing; then diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index bacaafa1c72..07c80ef8609 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -110,9 +110,10 @@ extern fil_addr_t fil_addr_null; contents of this field is valid for all uncompressed pages. */ #define FIL_PAGE_FILE_FLUSH_LSN 26 /*!< this is only defined for the - first page in a data file: the file - has been flushed to disk at least up - to this lsn */ + first page in a system tablespace + data file (ibdata*, not *.ibd): + the file has been flushed to disk + at least up to this lsn */ #define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this contains the space id of the page */ #define FIL_PAGE_DATA 38 /*!< start of the data on the page */ @@ -225,6 +226,16 @@ fil_space_create( 0 for uncompressed tablespaces */ ulint purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ /*******************************************************************//** +Assigns a new space id for a new single-table tablespace. This works simply by +incrementing the global counter. If 4 billion id's is not enough, we may need +to recycle id's. +@return TRUE if assigned, FALSE if not */ +UNIV_INTERN +ibool +fil_assign_new_space_id( +/*====================*/ + ulint* space_id); /*!< in/out: space id */ +/*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. @return space size, 0 if space not found */ @@ -427,9 +438,7 @@ UNIV_INTERN ulint fil_create_new_single_table_tablespace( /*===================================*/ - ulint* space_id, /*!< in/out: space id; if this is != 0, - then this is an input parameter, - otherwise output */ + ulint space_id, /*!< in: space id */ const char* tablename, /*!< in: the table name in the usual databasename/tablename format of InnoDB, or a dir path to a temp @@ -498,16 +507,6 @@ UNIV_INTERN ulint fil_load_single_table_tablespaces(void); /*===================================*/ -/********************************************************************//** -If we need crash recovery, and we have called -fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), -we can call this function to print an error message of orphaned .ibd files -for which there is not a data dictionary entry with a matching table name -and space id. */ -UNIV_INTERN -void -fil_print_orphaned_tablespaces(void); -/*================================*/ /*******************************************************************//** Returns TRUE if a single-table tablespace does not exist in the memory cache, or is being deleted there. diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index b737a00b3dc..445d94eeabb 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -215,11 +215,21 @@ innobase_casedn_str( /**********************************************************************//** Determines the connection character set. @return connection character set */ +UNIV_INTERN struct charset_info_st* innobase_get_charset( /*=================*/ void* mysql_thd); /*!< in: MySQL thread handle */ - +/**********************************************************************//** +Determines the current SQL statement. +@return SQL statement string */ +UNIV_INTERN +const char* +innobase_get_stmt( +/*==============*/ + void* mysql_thd, /*!< in: MySQL thread handle */ + size_t* length) /*!< out: length of the SQL statement */ + __attribute__((nonnull)); /******************************************************************//** This function is used to find the storage length in bytes of the first n characters for prefix indexes using a multibyte character set. The function @@ -258,4 +268,12 @@ thd_lock_wait_timeout( void* thd); /*!< in: thread handle (THD*), or NULL to query the global innodb_lock_wait_timeout */ +/******************************************************************//** +*/ + +ulong +thd_flush_log_at_trx_commit_session( +/*================================*/ + void* thd); + #endif diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h index 977cb829f35..9dc20cc057f 100644 --- a/storage/xtradb/include/hash0hash.h +++ b/storage/xtradb/include/hash0hash.h @@ -49,6 +49,28 @@ hash_table_t* hash_create( /*========*/ ulint n); /*!< in: number of array cells */ + +/*************************************************************//** +*/ +UNIV_INTERN +ulint +hash_create_needed( +/*===============*/ + ulint n); + +UNIV_INTERN +void +hash_create_init( +/*=============*/ + hash_table_t* table, + ulint n); + +UNIV_INTERN +void +hash_create_reuse( +/*==============*/ + hash_table_t* table); + #ifndef UNIV_HOTBACKUP /*************************************************************//** Creates a mutex array to protect a hash table. */ @@ -328,6 +350,33 @@ do {\ }\ } while (0) +/********************************************************************//** +Align nodes with moving location.*/ +#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \ +do {\ + ulint i2222;\ + ulint cell_count2222;\ +\ + cell_count2222 = hash_get_n_cells(TABLE);\ +\ + for (i2222 = 0; i2222 < cell_count2222; i2222++) {\ + NODE_TYPE* node2222;\ +\ + if ((TABLE)->array[i2222].node) \ + (TABLE)->array[i2222].node = (void*)((char*)(TABLE)->array[i2222].node \ + + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\ + node2222 = HASH_GET_FIRST((TABLE), i2222);\ +\ + while (node2222) {\ + if (node2222->PTR_NAME) \ + node2222->PTR_NAME = (void*)((char*)node2222->PTR_NAME \ + + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\ +\ + node2222 = node2222->PTR_NAME;\ + }\ + }\ +} while (0) + /************************************************************//** Gets the mutex index for a fold value in a hash table. @return mutex number */ @@ -434,11 +483,12 @@ struct hash_table_struct { these heaps */ #endif /* !UNIV_HOTBACKUP */ mem_heap_t* heap; +#ifdef UNIV_DEBUG ulint magic_n; +# define HASH_TABLE_MAGIC_N 76561114 +#endif /* UNIV_DEBUG */ }; -#define HASH_TABLE_MAGIC_N 76561114 - #ifndef UNIV_NONINL #include "hash0hash.ic" #endif diff --git a/storage/xtradb/include/hash0hash.ic b/storage/xtradb/include/hash0hash.ic index 19da2d50701..0b437894e2e 100644 --- a/storage/xtradb/include/hash0hash.ic +++ b/storage/xtradb/include/hash0hash.ic @@ -35,6 +35,8 @@ hash_get_nth_cell( hash_table_t* table, /*!< in: hash table */ ulint n) /*!< in: cell index */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ut_ad(n < table->n_cells); return(table->array + n); @@ -48,6 +50,8 @@ hash_table_clear( /*=============*/ hash_table_t* table) /*!< in/out: hash table */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); memset(table->array, 0x0, table->n_cells * sizeof(*table->array)); } @@ -61,6 +65,8 @@ hash_get_n_cells( /*=============*/ hash_table_t* table) /*!< in: table */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); return(table->n_cells); } @@ -74,6 +80,8 @@ hash_calc_hash( ulint fold, /*!< in: folded value */ hash_table_t* table) /*!< in: hash table */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); return(ut_hash_ulint(fold, table->n_cells)); } @@ -88,6 +96,8 @@ hash_get_mutex_no( hash_table_t* table, /*!< in: hash table */ ulint fold) /*!< in: fold */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ut_ad(ut_is_2pow(table->n_mutexes)); return(ut_2pow_remainder(hash_calc_hash(fold, table), table->n_mutexes)); @@ -103,6 +113,8 @@ hash_get_nth_heap( hash_table_t* table, /*!< in: hash table */ ulint i) /*!< in: index of the heap */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ut_ad(i < table->n_mutexes); return(table->heaps[i]); @@ -120,6 +132,9 @@ hash_get_heap( { ulint i; + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); + if (table->heap) { return(table->heap); } @@ -139,6 +154,8 @@ hash_get_nth_mutex( hash_table_t* table, /*!< in: hash table */ ulint i) /*!< in: index of the mutex */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ut_ad(i < table->n_mutexes); return(table->mutexes + i); @@ -156,6 +173,9 @@ hash_get_mutex( { ulint i; + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); + i = hash_get_mutex_no(table, fold); return(hash_get_nth_mutex(table, i)); diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h index 82e4c9bd976..73f885ecf04 100644 --- a/storage/xtradb/include/lock0lock.h +++ b/storage/xtradb/include/lock0lock.h @@ -43,6 +43,7 @@ extern ibool lock_print_waits; #endif /* UNIV_DEBUG */ /* Buffer for storing information about the most recent deadlock error */ extern FILE* lock_latest_err_file; +extern ulint srv_n_lock_deadlock_count; /*********************************************************************//** Gets the size of a lock struct. @@ -340,11 +341,12 @@ lock_sec_rec_modify_check_and_lock( que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr); /*!< in/out: mini-transaction */ /*********************************************************************//** -Like the counterpart for a clustered index below, but now we read a +Like lock_clust_rec_read_check_and_lock(), but reads a secondary index record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_sec_rec_read_check_and_lock( /*=============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -371,9 +373,10 @@ if the query thread should anyway be suspended for some reason; if not, then puts the transaction and the query thread to the lock wait state and inserts a waiting request for a record lock to the lock queue. Sets the requested mode lock on the record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_clust_rec_read_check_and_lock( /*===============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -613,13 +616,16 @@ lock_rec_print( FILE* file, /*!< in: file where to print */ const lock_t* lock); /*!< in: record type lock */ /*********************************************************************//** -Prints info of locks for all transactions. */ +Prints info of locks for all transactions. +@return FALSE if not able to obtain kernel mutex +and exits without printing info */ UNIV_INTERN -void +ibool lock_print_info_summary( /*====================*/ - FILE* file); /*!< in: file where to print */ -/*********************************************************************//** + FILE* file, /*!< in: file where to print */ + ibool nowait);/*!< in: whether to wait for the kernel mutex */ +/************************************************************************* Prints info of locks for each transaction. */ UNIV_INTERN void diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index 135aeb69e2d..8fce4ef96bc 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -1,23 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ -/***************************************************************************** - -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2009, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -825,7 +808,17 @@ struct log_struct{ written to some log group; for this to be advanced, it is enough that the write i/o has been completed for all - log groups */ + log groups. + Note that since InnoDB currently + has only one log group therefore + this value is redundant. Also it + is possible that this value + falls behind the + flushed_to_disk_lsn transiently. + It is appropriate to use either + flushed_to_disk_lsn or + write_lsn which are always + up-to-date and accurate. */ ib_uint64_t write_lsn; /*!< end lsn for the current running write */ ulint write_end_offset;/*!< the data in buffer has diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic index 36d151a3064..1ce00fd7313 100644 --- a/storage/xtradb/include/log0log.ic +++ b/storage/xtradb/include/log0log.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -314,12 +314,15 @@ log_reserve_and_write_fast( ulint data_len; #ifdef UNIV_LOG_LSN_DEBUG /* length of the LSN pseudo-record */ - ulint lsn_len = 1 - + mach_get_compressed_size(log_sys->lsn >> 32) - + mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL); + ulint lsn_len; #endif /* UNIV_LOG_LSN_DEBUG */ mutex_enter(&log_sys->mutex); +#ifdef UNIV_LOG_LSN_DEBUG + lsn_len = 1 + + mach_get_compressed_size(log_sys->lsn >> 32) + + mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL); +#endif /* UNIV_LOG_LSN_DEBUG */ data_len = len #ifdef UNIV_LOG_LSN_DEBUG @@ -430,7 +433,10 @@ void log_free_check(void) /*================*/ { - /* ut_ad(sync_thread_levels_empty()); */ + +#ifdef UNIV_SYNC_DEBUG + ut_ad(sync_thread_levels_empty_gen(TRUE)); +#endif /* UNIV_SYNC_DEBUG */ if (log_sys->check_flush_or_checkpoint) { diff --git a/storage/xtradb/include/log0recv.h b/storage/xtradb/include/log0recv.h index ac6b19a3f6a..15065267250 100644 --- a/storage/xtradb/include/log0recv.h +++ b/storage/xtradb/include/log0recv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -176,6 +176,12 @@ UNIV_INTERN void recv_recovery_from_checkpoint_finish(void); /*======================================*/ +/********************************************************//** +Initiates the rollback of active transactions. */ +UNIV_INTERN +void +recv_recovery_rollback_active(void); +/*===============================*/ /*******************************************************//** Scans log from a buffer and stores new log data to the parsing buffer. Parses and hashes the log records if new data found. Unless @@ -258,12 +264,14 @@ void recv_sys_init( /*==========*/ ulint available_memory); /*!< in: available memory in bytes */ +#ifndef UNIV_HOTBACKUP /********************************************************//** Reset the state of the recovery system variables. */ UNIV_INTERN void recv_sys_var_init(void); /*===================*/ +#endif /* !UNIV_HOTBACKUP */ /*******************************************************************//** Empties the hash table of stored log records, applying them to appropriate pages. */ @@ -360,8 +368,8 @@ typedef struct recv_addr_struct recv_addr_t; struct recv_addr_struct{ enum recv_addr_state state; /*!< recovery state of the page */ - ulint space; /*!< space id */ - ulint page_no;/*!< page number */ + unsigned space:32;/*!< space id */ + unsigned page_no:32;/*!< page number */ UT_LIST_BASE_NODE_T(recv_t) rec_list;/*!< list of log records for this page */ hash_node_t addr_hash;/*!< hash node in the hash bucket chain */ diff --git a/storage/xtradb/include/mach0data.ic b/storage/xtradb/include/mach0data.ic index ef20356bd31..96d2417ac81 100644 --- a/storage/xtradb/include/mach0data.ic +++ b/storage/xtradb/include/mach0data.ic @@ -36,7 +36,7 @@ mach_write_to_1( ulint n) /*!< in: ulint integer to be stored, >= 0, < 256 */ { ut_ad(b); - ut_ad(n <= 0xFFUL); + ut_ad((n | 0xFFUL) <= 0xFFUL); b[0] = (byte)n; } @@ -65,7 +65,7 @@ mach_write_to_2( ulint n) /*!< in: ulint integer to be stored */ { ut_ad(b); - ut_ad(n <= 0xFFFFUL); + ut_ad((n | 0xFFFFUL) <= 0xFFFFUL); b[0] = (byte)(n >> 8); b[1] = (byte)(n); @@ -81,10 +81,7 @@ mach_read_from_2( /*=============*/ const byte* b) /*!< in: pointer to 2 bytes */ { - ut_ad(b); - return( ((ulint)(b[0]) << 8) - + (ulint)(b[1]) - ); + return(((ulint)(b[0]) << 8) | (ulint)(b[1])); } /********************************************************//** @@ -129,7 +126,7 @@ mach_write_to_3( ulint n) /*!< in: ulint integer to be stored */ { ut_ad(b); - ut_ad(n <= 0xFFFFFFUL); + ut_ad((n | 0xFFFFFFUL) <= 0xFFFFFFUL); b[0] = (byte)(n >> 16); b[1] = (byte)(n >> 8); @@ -148,8 +145,8 @@ mach_read_from_3( { ut_ad(b); return( ((ulint)(b[0]) << 16) - + ((ulint)(b[1]) << 8) - + (ulint)(b[2]) + | ((ulint)(b[1]) << 8) + | (ulint)(b[2]) ); } @@ -183,9 +180,9 @@ mach_read_from_4( { ut_ad(b); return( ((ulint)(b[0]) << 24) - + ((ulint)(b[1]) << 16) - + ((ulint)(b[2]) << 8) - + (ulint)(b[3]) + | ((ulint)(b[1]) << 16) + | ((ulint)(b[2]) << 8) + | (ulint)(b[3]) ); } @@ -721,7 +718,7 @@ mach_read_from_2_little_endian( /*===========================*/ const byte* buf) /*!< in: from where to read */ { - return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256); + return((ulint)(buf[0]) | ((ulint)(buf[1]) << 8)); } /*********************************************************//** diff --git a/storage/xtradb/include/mem0dbg.h b/storage/xtradb/include/mem0dbg.h index a064af5c678..d81e1418b2b 100644 --- a/storage/xtradb/include/mem0dbg.h +++ b/storage/xtradb/include/mem0dbg.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -28,6 +28,13 @@ Created 6/9/1994 Heikki Tuuri check fields whose sizes are given below */ #ifdef UNIV_MEM_DEBUG +# ifndef UNIV_HOTBACKUP +/* The mutex which protects in the debug version the hash table +containing the list of live memory heaps, and also the global +variables in mem0dbg.c. */ +extern mutex_t mem_hash_mutex; +# endif /* !UNIV_HOTBACKUP */ + #define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\ UNIV_MEM_ALIGNMENT) #define MEM_FIELD_TRAILER_SIZE sizeof(ulint) diff --git a/storage/xtradb/include/mem0dbg.ic b/storage/xtradb/include/mem0dbg.ic index cb9245411dc..b0c8178a623 100644 --- a/storage/xtradb/include/mem0dbg.ic +++ b/storage/xtradb/include/mem0dbg.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -25,9 +25,6 @@ Created 6/8/1994 Heikki Tuuri *************************************************************************/ #ifdef UNIV_MEM_DEBUG -# ifndef UNIV_HOTBACKUP -extern mutex_t mem_hash_mutex; -# endif /* !UNIV_HOTBACKUP */ extern ulint mem_current_allocated_memory; /******************************************************************//** diff --git a/storage/xtradb/include/mem0mem.h b/storage/xtradb/include/mem0mem.h index 98f8748e529..ee28cf7b225 100644 --- a/storage/xtradb/include/mem0mem.h +++ b/storage/xtradb/include/mem0mem.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -359,6 +359,9 @@ struct mem_block_info_struct { to the heap is also the first block in this list, though it also contains the base node of the list. */ ulint len; /*!< physical length of this block in bytes */ + ulint total_size; /* physical length in bytes of all blocks + in the heap. This is defined only in the base + node and is set to ULINT_UNDEFINED in others. */ ulint type; /*!< type of heap: MEM_HEAP_DYNAMIC, or MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */ ulint free; /*!< offset in bytes of the first free position for diff --git a/storage/xtradb/include/mem0mem.ic b/storage/xtradb/include/mem0mem.ic index e7080d8c508..cbce2edc661 100644 --- a/storage/xtradb/include/mem0mem.ic +++ b/storage/xtradb/include/mem0mem.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -579,18 +579,12 @@ mem_heap_get_size( /*==============*/ mem_heap_t* heap) /*!< in: heap */ { - mem_block_t* block; ulint size = 0; ut_ad(mem_heap_check(heap)); - block = heap; - - while (block != NULL) { + size = heap->total_size; - size += mem_block_get_len(block); - block = UT_LIST_GET_NEXT(list, block); - } #ifndef UNIV_HOTBACKUP if (heap->free_block) { size += UNIV_PAGE_SIZE; diff --git a/storage/xtradb/include/mtr0log.ic b/storage/xtradb/include/mtr0log.ic index 5c24c38b337..63af02ba409 100644 --- a/storage/xtradb/include/mtr0log.ic +++ b/storage/xtradb/include/mtr0log.ic @@ -27,8 +27,8 @@ Created 12/7/1995 Heikki Tuuri #include "ut0lst.h" #include "buf0buf.h" #include "fsp0types.h" +#include "srv0srv.h" #include "trx0sys.h" - /********************************************************//** Opens a buffer to mlog. It must be closed with mlog_close. @return buffer, NULL if log mode MTR_LOG_NONE */ @@ -201,8 +201,9 @@ mlog_write_initial_log_record_fast( the doublewrite buffer is located in pages FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the system tablespace */ - if (space == TRX_SYS_SPACE - && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) { + if ((space == TRX_SYS_SPACE + || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE)) + && offset >= (ulint)FSP_EXTENT_SIZE && offset < 3 * (ulint)FSP_EXTENT_SIZE) { if (trx_doublewrite_buf_is_being_created) { /* Do nothing: we only come to this branch in an InnoDB database creation. We do not redo log diff --git a/storage/xtradb/include/mtr0mtr.ic b/storage/xtradb/include/mtr0mtr.ic index 310c7c4117f..18f8e87b3cf 100644 --- a/storage/xtradb/include/mtr0mtr.ic +++ b/storage/xtradb/include/mtr0mtr.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -70,6 +70,7 @@ mtr_memo_push( ut_ad(type <= MTR_MEMO_X_LOCK); ut_ad(mtr); ut_ad(mtr->magic_n == MTR_MAGIC_N); + ut_ad(mtr->state == MTR_ACTIVE); memo = &(mtr->memo); @@ -92,6 +93,7 @@ mtr_set_savepoint( ut_ad(mtr); ut_ad(mtr->magic_n == MTR_MAGIC_N); + ut_ad(mtr->state == MTR_ACTIVE); memo = &(mtr->memo); @@ -149,6 +151,7 @@ mtr_memo_contains( ut_ad(mtr); ut_ad(mtr->magic_n == MTR_MAGIC_N); + ut_ad(mtr->state == MTR_ACTIVE || mtr->state == MTR_COMMITTING); memo = &(mtr->memo); diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 3eff5216867..eeab8a2b5d9 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -1,23 +1,6 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ /*********************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted diff --git a/storage/xtradb/include/os0proc.h b/storage/xtradb/include/os0proc.h index fd46bd7db87..582cef6f803 100644 --- a/storage/xtradb/include/os0proc.h +++ b/storage/xtradb/include/os0proc.h @@ -32,6 +32,11 @@ Created 9/30/1995 Heikki Tuuri #ifdef UNIV_LINUX #include <sys/ipc.h> #include <sys/shm.h> +#else +# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H +#include <sys/ipc.h> +#include <sys/shm.h> +# endif #endif typedef void* os_process_t; @@ -70,6 +75,29 @@ os_mem_free_large( ulint size); /*!< in: size returned by os_mem_alloc_large() */ + +/****************************************************************//** +Allocates or attaches and reuses shared memory segment. +The content is not cleared automatically. +@return allocated memory */ +UNIV_INTERN +void* +os_shm_alloc( +/*=========*/ + ulint* n, /*!< in/out: number of bytes */ + uint key, + ibool* is_new); + +/****************************************************************//** +Detach shared memory segment. */ +UNIV_INTERN +void +os_shm_free( +/*========*/ + void *ptr, /*!< in: pointer returned by + os_shm_alloc() */ + ulint size); /*!< in: size returned by + os_shm_alloc() */ #ifndef UNIV_NONINL #include "os0proc.ic" #endif diff --git a/storage/xtradb/include/page0page.h b/storage/xtradb/include/page0page.h index e182c8f58be..5b2bcf7c054 100644 --- a/storage/xtradb/include/page0page.h +++ b/storage/xtradb/include/page0page.h @@ -500,7 +500,7 @@ ibool page_is_leaf( /*=========*/ const page_t* page) /*!< in: page */ - __attribute__((nonnull, pure)); + __attribute__((pure)); /************************************************************//** Gets the pointer to the next record on the page. @return pointer to next record */ diff --git a/storage/xtradb/include/page0page.ic b/storage/xtradb/include/page0page.ic index 9655e6c7e27..dab9dc742e4 100644 --- a/storage/xtradb/include/page0page.ic +++ b/storage/xtradb/include/page0page.ic @@ -275,6 +275,9 @@ page_is_leaf( /*=========*/ const page_t* page) /*!< in: page */ { + if (!page) { + return(FALSE); + } return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL))); } diff --git a/storage/xtradb/include/page0zip.h b/storage/xtradb/include/page0zip.h index 574809e5227..4d37302ed20 100644 --- a/storage/xtradb/include/page0zip.h +++ b/storage/xtradb/include/page0zip.h @@ -114,7 +114,7 @@ page_zip_compress( const page_t* page, /*!< in: uncompressed page */ dict_index_t* index, /*!< in: index of the B-tree node */ mtr_t* mtr) /*!< in: mini-transaction, or NULL */ - __attribute__((nonnull(1,2,3))); + __attribute__((nonnull(1,3))); /**********************************************************************//** Decompress a page. This function should tolerate errors on the compressed diff --git a/storage/xtradb/include/que0que.h b/storage/xtradb/include/que0que.h index 420f34550e2..09a671f49b1 100644 --- a/storage/xtradb/include/que0que.h +++ b/storage/xtradb/include/que0que.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -30,6 +30,7 @@ Created 5/27/1996 Heikki Tuuri #include "data0data.h" #include "dict0types.h" #include "trx0trx.h" +#include "trx0roll.h" #include "srv0srv.h" #include "usr0types.h" #include "que0types.h" @@ -215,6 +216,16 @@ trx_t* thr_get_trx( /*========*/ que_thr_t* thr); /*!< in: query thread */ +/*******************************************************************//** +Determines if this thread is rolling back an incomplete transaction +in crash recovery. +@return TRUE if thr is rolling back an incomplete transaction in crash +recovery */ +UNIV_INLINE +ibool +thr_is_recv( +/*========*/ + const que_thr_t* thr); /*!< in: query thread */ /***********************************************************************//** Gets the type of a graph node. */ UNIV_INLINE @@ -481,6 +492,8 @@ struct que_fork_struct{ #define QUE_NODE_CALL 31 #define QUE_NODE_EXIT 32 +#define QUE_NODE_INSERT_STATS 34 + /* Query thread states */ #define QUE_THR_RUNNING 1 #define QUE_THR_PROCEDURE_WAIT 2 diff --git a/storage/xtradb/include/que0que.ic b/storage/xtradb/include/que0que.ic index a1c0dc1e77a..bd936670e1e 100644 --- a/storage/xtradb/include/que0que.ic +++ b/storage/xtradb/include/que0que.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -38,6 +38,20 @@ thr_get_trx( return(thr->graph->trx); } +/*******************************************************************//** +Determines if this thread is rolling back an incomplete transaction +in crash recovery. +@return TRUE if thr is rolling back an incomplete transaction in crash +recovery */ +UNIV_INLINE +ibool +thr_is_recv( +/*========*/ + const que_thr_t* thr) /*!< in: query thread */ +{ + return(trx_is_recv(thr->graph->trx)); +} + /***********************************************************************//** Gets the first thr in a fork. */ UNIV_INLINE diff --git a/storage/xtradb/include/rem0cmp.h b/storage/xtradb/include/rem0cmp.h index 421308af49b..fcea62ad486 100644 --- a/storage/xtradb/include/rem0cmp.h +++ b/storage/xtradb/include/rem0cmp.h @@ -148,7 +148,9 @@ cmp_rec_rec_simple( const rec_t* rec2, /*!< in: physical record */ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */ - const dict_index_t* index); /*!< in: data dictionary index */ + const dict_index_t* index, /*!< in: data dictionary index */ + ibool* null_eq);/*!< out: set to TRUE if + found matching null values */ /*************************************************************//** This function is used to compare two physical records. Only the common first fields are compared, and if an externally stored field is diff --git a/storage/xtradb/include/rem0rec.ic b/storage/xtradb/include/rem0rec.ic index 8e5bd9a7fcd..fa96c97f95e 100644 --- a/storage/xtradb/include/rem0rec.ic +++ b/storage/xtradb/include/rem0rec.ic @@ -268,7 +268,7 @@ rec_get_next_ptr_const( return(NULL); } - if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { + if (UNIV_LIKELY(comp != 0)) { #if UNIV_PAGE_SIZE <= 32768 /* Note that for 64 KiB pages, field_value can 'wrap around' and the debug assertion is not valid */ @@ -336,7 +336,7 @@ rec_get_next_offs( field_value = mach_read_from_2(rec - REC_NEXT); - if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { + if (UNIV_LIKELY(comp != 0)) { #if UNIV_PAGE_SIZE <= 32768 /* Note that for 64 KiB pages, field_value can 'wrap around' and the debug assertion is not valid */ @@ -647,7 +647,7 @@ rec_get_info_and_status_bits( & (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT) # error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap" #endif - if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { + if (UNIV_LIKELY(comp != 0)) { bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec); } else { bits = rec_get_info_bits(rec, FALSE); @@ -683,7 +683,7 @@ rec_get_deleted_flag( const rec_t* rec, /*!< in: physical record */ ulint comp) /*!< in: nonzero=compact page format */ { - if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { + if (UNIV_LIKELY(comp != 0)) { return(UNIV_UNLIKELY( rec_get_bit_field_1(rec, REC_NEW_INFO_BITS, REC_INFO_DELETED_FLAG, diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index 261ab239cd8..8c5b5b7e0da 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2000, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -253,15 +253,6 @@ row_table_got_default_clust_index( /*==============================*/ const dict_table_t* table); /*!< in: table */ /*********************************************************************//** -Calculates the key number used inside MySQL for an Innobase index. We have -to take into account if we generated a default clustered index for the table -@return the key number used inside MySQL */ -UNIV_INTERN -ulint -row_get_mysql_key_number_for_index( -/*===============================*/ - const dict_index_t* index); /*!< in: index */ -/*********************************************************************//** Does an update or delete of a row for MySQL. @return error code or DB_SUCCESS */ UNIV_INTERN @@ -273,27 +264,26 @@ row_update_for_mysql( row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL handle */ /*********************************************************************//** -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. -@return error code or DB_SUCCESS */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE or this +session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +Before calling this function row_search_for_mysql() must have +initialized prebuilt->new_rec_locks to store the information which new +record locks really were set. This function removes a newly set +clustered index record lock under prebuilt->pcur or +prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that +releases the latest clustered index record lock we set. +@return error code or DB_SUCCESS */ UNIV_INTERN int row_unlock_for_mysql( /*=================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL handle */ - ibool has_latches_on_recs);/*!< TRUE if called so that we have - the latches on the records under pcur - and clust_pcur, and we do not need to - reposition the cursors. */ + ibool has_latches_on_recs);/*!< in: TRUE if called + so that we have the latches on + the records under pcur and + clust_pcur, and we do not need + to reposition the cursors. */ /*********************************************************************//** Creates an query graph node of 'update' type to be used in the MySQL interface. @@ -386,6 +376,14 @@ row_create_index_for_mysql( then checked for not being too large. */ /*********************************************************************//** +*/ +UNIV_INTERN +int +row_insert_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx); +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. @@ -403,6 +401,7 @@ row_table_add_foreign_constraints( FOREIGN KEY (a, b) REFERENCES table2(c, d), table2 can be written also with the database name before it: test.table2 */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -451,6 +450,12 @@ row_drop_table_for_mysql( const char* name, /*!< in: table name */ trx_t* trx, /*!< in: transaction handle */ ibool drop_db);/*!< in: TRUE=dropping whole database */ +/*********************************************************************//** +Drop all temporary tables during crash recovery. */ +UNIV_INTERN +void +row_mysql_drop_temp_tables(void); +/*============================*/ /*********************************************************************//** Discards the tablespace of a table which stored in an .ibd file. Discarding @@ -494,14 +499,19 @@ row_rename_table_for_mysql( trx_t* trx, /*!< in: transaction handle */ ibool commit); /*!< in: if TRUE then commit trx */ /*********************************************************************//** -Checks a table for corruption. -@return DB_ERROR or DB_SUCCESS */ +Checks that the index contains entries in an ascending order, unique +constraint is not broken, and calculates the number of index entries +in the read view of the current transaction. +@return DB_SUCCESS if ok */ UNIV_INTERN ulint -row_check_table_for_mysql( +row_check_index_for_mysql( /*======================*/ - row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL - handle */ + row_prebuilt_t* prebuilt, /*!< in: prebuilt struct + in MySQL handle */ + const dict_index_t* index, /*!< in: index */ + ulint* n_rows); /*!< out: number of entries + seen in the consistent read */ /*********************************************************************//** Determines if a table is a magic monitor table. @@ -701,18 +711,17 @@ struct row_prebuilt_struct { ulint new_rec_locks; /*!< normally 0; if srv_locks_unsafe_for_binlog is TRUE or session is using READ - COMMITTED isolation level, in a - cursor search, if we set a new - record lock on an index, this is - incremented; this is used in - releasing the locks under the - cursors if we are performing an - UPDATE and we determine after - retrieving the row that it does - not need to be locked; thus, - these can be used to implement a - 'mini-rollback' that releases - the latest record locks */ + COMMITTED or READ UNCOMMITTED + isolation level, set in + row_search_for_mysql() if we set a new + record lock on the secondary + or clustered index; this is + used in row_unlock_for_mysql() + when releasing the lock under + the cursor if we determine + after retrieving the row that + it does not need to be locked + ('mini-rollback') */ ulint mysql_prefix_len;/*!< byte offset of the end of the last requested column */ ulint mysql_row_len; /*!< length in bytes of a row in the diff --git a/storage/xtradb/include/row0sel.h b/storage/xtradb/include/row0sel.h index 01a5afaa23e..8544b9d08ba 100644 --- a/storage/xtradb/include/row0sel.h +++ b/storage/xtradb/include/row0sel.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -105,17 +105,6 @@ row_fetch_print( /*============*/ void* row, /*!< in: sel_node_t* */ void* user_arg); /*!< in: not used */ -/****************************************************************//** -Callback function for fetch that stores an unsigned 4 byte integer to the -location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length -= 4. -@return always returns NULL */ -UNIV_INTERN -void* -row_fetch_store_uint4( -/*==================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg); /*!< in: data pointer */ /***********************************************************//** Prints a row in a select result. @return query thread to run next or NULL */ diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 99ad3ad03d0..0904a5da1eb 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -1,7 +1,8 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, 2009, Google Inc. +Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The contributions by Google are incorporated with their permission, and subject to the conditions contained in the file COPYING.Google. +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. @@ -22,32 +30,6 @@ this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *****************************************************************************/ -/*********************************************************************** - -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. -Copyright (c) 2009, Percona Inc. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -***********************************************************************/ /**************************************************//** @file include/srv0srv.h @@ -133,9 +115,10 @@ extern char** srv_data_file_names; extern ulint* srv_data_file_sizes; extern ulint* srv_data_file_is_raw_partition; +extern char* srv_doublewrite_file; + extern ibool srv_extra_undoslots; -extern ibool srv_fast_recovery; extern ibool srv_recovery_stats; extern ulint srv_use_purge_thread; @@ -173,6 +156,8 @@ extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */ extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; +extern uint srv_buffer_pool_shm_key; + extern ibool srv_thread_concurrency_timer_based; extern ulint srv_n_file_io_threads; @@ -224,6 +209,7 @@ extern ulong srv_stats_method; #define SRV_STATS_METHOD_IGNORE_NULLS 2 extern ulong srv_stats_auto_update; extern ulint srv_stats_update_need_lock; +extern ibool srv_use_sys_stats_table; extern ibool srv_use_doublewrite_buf; extern ibool srv_use_checksums; @@ -247,7 +233,6 @@ extern ulong srv_read_ahead; extern ulong srv_adaptive_checkpoint; extern ulong srv_expand_import; -extern ulint srv_relax_table_creation; extern ulint srv_pass_corrupt_table; extern ulong srv_extra_rsegments; @@ -265,7 +250,8 @@ extern ibool srv_print_innodb_tablespace_monitor; extern ibool srv_print_verbose_log; extern ibool srv_print_innodb_table_monitor; -extern ibool srv_lock_timeout_and_monitor_active; +extern ibool srv_lock_timeout_active; +extern ibool srv_monitor_active; extern ibool srv_error_monitor_active; extern ulong srv_n_spin_wait_rounds; @@ -384,8 +370,9 @@ enum { when writing data files, but do flush after writing to log files */ SRV_UNIX_NOSYNC, /*!< do not flush after writing */ - SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on + SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on data files */ + SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */ }; /** Alternatives for file i/o in Windows */ @@ -595,15 +582,23 @@ srv_release_mysql_thread_if_suspended( MySQL OS thread */ /*********************************************************************//** A thread which wakes up threads whose lock wait may have lasted too long. -This also prints the info output by various InnoDB monitors. @return a dummy parameter */ UNIV_INTERN os_thread_ret_t -srv_lock_timeout_and_monitor_thread( -/*================================*/ +srv_lock_timeout_thread( +/*====================*/ void* arg); /*!< in: a dummy parameter required by os_thread_create */ /*********************************************************************//** +A thread which prints the info output by various InnoDB monitors. +@return a dummy parameter */ +UNIV_INTERN +os_thread_ret_t +srv_monitor_thread( +/*===============*/ + void* arg); /*!< in: a dummy parameter required by + os_thread_create */ +/************************************************************************* A thread which prints warnings about semaphore waits which have lasted too long. These can be used to track bugs which cause hangs. @return a dummy parameter */ @@ -614,12 +609,15 @@ srv_error_monitor_thread( void* arg); /*!< in: a dummy parameter required by os_thread_create */ /******************************************************************//** -Outputs to a file the output of the InnoDB Monitor. */ +Outputs to a file the output of the InnoDB Monitor. +@return FALSE if not all information printed +due to failure to obtain necessary mutex */ UNIV_INTERN -void +ibool srv_printf_innodb_monitor( /*======================*/ FILE* file, /*!< in: output stream */ + ibool nowait, /*!< in: whether to wait for kernel mutex */ ulint* trx_start, /*!< out: file position of the start of the list of active transactions */ ulint* trx_end); /*!< out: file position of the end of @@ -664,6 +662,7 @@ struct export_var_struct{ ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */ ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */ ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/ + ulint innodb_deadlocks; /* ??? */ ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */ ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */ ibool innodb_have_atomic_builtins; /*!< HAVE_ATOMIC_BUILTINS */ diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h index 85fa014d77a..4edf93f4042 100644 --- a/storage/xtradb/include/sync0rw.h +++ b/storage/xtradb/include/sync0rw.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -429,8 +429,9 @@ ibool rw_lock_own( /*========*/ rw_lock_t* lock, /*!< in: rw-lock */ - ulint lock_type); /*!< in: lock type: RW_LOCK_SHARED, + ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED, RW_LOCK_EX */ + __attribute__((warn_unused_result)); #endif /* UNIV_SYNC_DEBUG */ /******************************************************************//** Checks if somebody has locked the rw-lock in the specified mode. */ @@ -555,11 +556,12 @@ struct rw_lock_struct { //unsigned cline:14; /*!< Line where created */ unsigned last_s_line:14; /*!< Line number where last time s-locked */ unsigned last_x_line:14; /*!< Line number where last time x-locked */ +#ifdef UNIV_DEBUG ulint magic_n; /*!< RW_LOCK_MAGIC_N */ -}; - /** Value of rw_lock_struct::magic_n */ #define RW_LOCK_MAGIC_N 22643 +#endif /* UNIV_DEBUG */ +}; #ifdef UNIV_SYNC_DEBUG /** The structure for storing debug info of an rw-lock */ diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h index c653e44b5bd..a500cf1da45 100644 --- a/storage/xtradb/include/sync0sync.h +++ b/storage/xtradb/include/sync0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -206,7 +206,8 @@ UNIV_INTERN ibool mutex_own( /*======*/ - const mutex_t* mutex); /*!< in: mutex */ + const mutex_t* mutex) /*!< in: mutex */ + __attribute__((warn_unused_result)); #endif /* UNIV_DEBUG */ #ifdef UNIV_SYNC_DEBUG /******************************************************************//** @@ -238,16 +239,27 @@ ibool sync_thread_levels_empty(void); /*==========================*/ /******************************************************************//** -Checks that the level array for the current thread is empty. -@return TRUE if empty except the exceptions specified below */ +Checks if the level array for the current thread contains a +mutex or rw-latch at the specified level. +@return a matching latch, or NULL if not found */ UNIV_INTERN -ibool -sync_thread_levels_empty_gen( -/*=========================*/ +void* +sync_thread_levels_contains( +/*========================*/ + ulint level); /*!< in: latching order level + (SYNC_DICT, ...)*/ +/******************************************************************//** +Checks if the level array for the current thread is empty. +@return a latch, or NULL if empty except the exceptions specified below */ +UNIV_INTERN +void* +sync_thread_levels_nonempty_gen( +/*============================*/ ibool dict_mutex_allowed); /*!< in: TRUE if dictionary mutex is allowed to be owned by the thread, also purge_is_running mutex is allowed */ +#define sync_thread_levels_empty_gen(d) (!sync_thread_levels_nonempty_gen(d)) /******************************************************************//** Gets the debug information for a reserved mutex. */ UNIV_INTERN @@ -426,7 +438,7 @@ or row lock! */ #define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the file format tag */ #define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve - this in X-mode, implicit or backround + this in X-mode; implicit or backround operations purge, rollback, foreign key checks reserve this in S-mode */ #define SYNC_DICT 1000 diff --git a/storage/xtradb/include/trx0rseg.h b/storage/xtradb/include/trx0rseg.h index 0d7dc60329f..303188f09f2 100644 --- a/storage/xtradb/include/trx0rseg.h +++ b/storage/xtradb/include/trx0rseg.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h index 13d93d5a77a..9ef9485b611 100644 --- a/storage/xtradb/include/trx0sys.h +++ b/storage/xtradb/include/trx0sys.h @@ -124,6 +124,22 @@ trx_sys_hdr_page( /*=============*/ ulint space, /*!< in: space */ ulint page_no);/*!< in: page number */ +/***************************************************************//** +Checks if a space is the system tablespaces. +@return TRUE if system tablespace */ +UNIV_INLINE +ibool +trx_sys_sys_space( +/*==============*/ + ulint space); /*!< in: space */ +/***************************************************************//** +Checks if a space is the doublewrite tablespace. +@return TRUE if doublewrite tablespace */ +UNIV_INLINE +ibool +trx_sys_doublewrite_space( +/*======================*/ + ulint space); /*!< in: space */ /*****************************************************************//** Creates and initializes the central memory structures for the transaction system. This is called when the database is started. */ @@ -137,6 +153,13 @@ UNIV_INTERN void trx_sys_create(void); /*================*/ +/*****************************************************************//** +Creates and initializes the dummy transaction system page for tablespace. */ +UNIV_INTERN +void +trx_sys_dummy_create( +/*=================*/ + ulint space); /********************************************************************* Create extra rollback segments when create_new_db */ UNIV_INTERN @@ -303,6 +326,7 @@ UNIV_INTERN void trx_sys_update_mysql_binlog_offset( /*===============================*/ + trx_sysf_t* sys_header, const char* file_name_in,/*!< in: MySQL log file name */ ib_int64_t offset, /*!< in: position in that log file */ ulint field, /*!< in: offset of the MySQL log info field in @@ -343,12 +367,14 @@ UNIV_INTERN void trx_sys_file_format_tag_init(void); /*==============================*/ +#ifndef UNIV_HOTBACKUP /*****************************************************************//** Shutdown/Close the transaction system. */ UNIV_INTERN void trx_sys_close(void); /*===============*/ +#endif /* !UNIV_HOTBACKUP */ /*****************************************************************//** Get the name representation of the file format from its id. @return pointer to the name */ @@ -444,6 +470,8 @@ trx_sys_file_format_id_to_name( /* Space id and page no where the trx system file copy resides */ #define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */ +#define TRX_DOUBLEWRITE_SPACE 1 /* the doublewrite buffer tablespace if used */ +#define TRX_SYS_SPACE_MAX 9 /* reserved max space id for system tablespaces */ #include "fsp0fsp.h" #define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO @@ -507,7 +535,6 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */ within that file */ #define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */ -#ifndef UNIV_HOTBACKUP /** Doublewrite buffer */ /* @{ */ /** The offset of the doublewrite buffer header on the trx system header page */ @@ -559,6 +586,7 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO. */ #define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE /* @} */ +#ifndef UNIV_HOTBACKUP /** File format tag */ /* @{ */ /** The offset of the file format tag on the trx system header page diff --git a/storage/xtradb/include/trx0sys.ic b/storage/xtradb/include/trx0sys.ic index 820d31d0692..c7b09d4aec2 100644 --- a/storage/xtradb/include/trx0sys.ic +++ b/storage/xtradb/include/trx0sys.ic @@ -71,6 +71,40 @@ trx_sys_hdr_page( } /***************************************************************//** +Checks if a space is the system tablespaces. +@return TRUE if system tablespace */ +UNIV_INLINE +ibool +trx_sys_sys_space( +/*==============*/ + ulint space) /*!< in: space */ +{ + if (srv_doublewrite_file) { + /* several spaces are reserved */ + return((ibool)(space <= TRX_SYS_SPACE_MAX)); + } else { + return((ibool)(space == TRX_SYS_SPACE)); + } +} + +/***************************************************************//** +Checks if a space is the doublewrite tablespace. +@return TRUE if doublewrite tablespace */ +UNIV_INLINE +ibool +trx_sys_doublewrite_space( +/*======================*/ + ulint space) /*!< in: space */ +{ + if (srv_doublewrite_file) { + /* doublewrite buffer is separated */ + return((ibool)(space == TRX_DOUBLEWRITE_SPACE)); + } else { + return((ibool)(space == TRX_SYS_SPACE)); + } +} + +/***************************************************************//** Gets the pointer in the nth slot of the rseg array. @return pointer to rseg object, NULL if slot not in use */ UNIV_INLINE diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h index 3b845e498d0..4c0ce392bcd 100644 --- a/storage/xtradb/include/trx0trx.h +++ b/storage/xtradb/include/trx0trx.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -338,9 +338,7 @@ trx_commit_step( /**********************************************************************//** Prints info about a transaction to the given file. The caller must own the -kernel mutex and must have called -innobase_mysql_prepare_print_arbitrary_thd(), unless he knows that MySQL -or InnoDB cannot meanwhile change the info printed here. */ +kernel mutex. */ UNIV_INTERN void trx_print( @@ -351,7 +349,7 @@ trx_print( use the default max length */ /** Type of data dictionary operation */ -enum trx_dict_op { +typedef enum trx_dict_op { /** The transaction is not modifying the data dictionary. */ TRX_DICT_OP_NONE = 0, /** The transaction is creating a table or an index, or @@ -363,7 +361,7 @@ enum trx_dict_op { existing table. In crash recovery, the data dictionary must be locked, but the table must not be dropped. */ TRX_DICT_OP_INDEX = 2 -}; +} trx_dict_op_t; /**********************************************************************//** Determine if a transaction is a dictionary operation. @@ -393,6 +391,14 @@ ibool trx_is_interrupted( /*===============*/ trx_t* trx); /*!< in: transaction */ +/**********************************************************************//** +Determines if the currently running transaction is in strict mode. +@return TRUE if strict */ +UNIV_INTERN +ibool +trx_is_strict( +/*==========*/ + trx_t* trx); /*!< in: transaction */ #else /* !UNIV_HOTBACKUP */ #define trx_is_interrupted(trx) FALSE #endif /* !UNIV_HOTBACKUP */ @@ -465,69 +471,80 @@ rolling back after a database recovery */ struct trx_struct{ ulint magic_n; - /* All the next fields are protected by the kernel mutex, except the - undo logs which are protected by undo_mutex */ + + /* These fields are not protected by any mutex. */ const char* op_info; /*!< English text describing the current operation, or an empty string */ - unsigned is_purge:1; /*!< 0=user transaction, 1=purge */ - unsigned is_recovered:1; /*!< 0=normal transaction, - 1=recovered, must be rolled back */ - unsigned conc_state:2; /*!< state of the trx from the point + ulint conc_state; /*!< state of the trx from the point of view of concurrency control: TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY, ... */ - unsigned que_state:2; /*!< valid when conc_state == TRX_ACTIVE: - TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT, - ... */ - unsigned isolation_level:2;/* TRX_ISO_REPEATABLE_READ, ... */ - unsigned check_foreigns:1;/* normally TRUE, but if the user + ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */ + ulint check_foreigns; /* normally TRUE, but if the user wants to suppress foreign key checks, (in table imports, for example) we set this FALSE */ - unsigned check_unique_secondary:1; + ulint check_unique_secondary; /* normally TRUE, but if the user wants to speed up inserts by suppressing unique key checks for secondary indexes when we decide if we can use the insert buffer for them, we set this FALSE */ - unsigned support_xa:1; /*!< normally we do the XA two-phase + ulint support_xa; /*!< normally we do the XA two-phase commit steps, but by setting this to FALSE, one can save CPU time and about 150 bytes in the undo log size as then we skip XA steps */ - unsigned flush_log_later:1;/* In 2PC, we hold the + ulint flush_log_at_trx_commit_session; + ulint flush_log_later;/* In 2PC, we hold the prepare_commit mutex across both phases. In that case, we defer flush of the logs to disk until after we release the mutex. */ - unsigned must_flush_log_later:1;/* this flag is set to TRUE in + ulint must_flush_log_later;/* this flag is set to TRUE in trx_commit_off_kernel() if flush_log_later was TRUE, and there were modifications by the transaction; in that case we must flush the log in trx_commit_complete_for_mysql() */ - unsigned dict_operation:2;/**< @see enum trx_dict_op */ - unsigned duplicates:2; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */ - unsigned active_trans:2; /*!< 1 - if a transaction in MySQL + ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */ + ulint active_trans; /*!< 1 - if a transaction in MySQL is active. 2 - if prepare_commit_mutex was taken */ - unsigned has_search_latch:1; + ulint has_search_latch; /* TRUE if this trx has latched the search system latch in S-mode */ - unsigned declared_to_be_inside_innodb:1; + ulint deadlock_mark; /*!< a mark field used in deadlock + checking algorithm. */ + trx_dict_op_t dict_operation; /**< @see enum trx_dict_op */ + + /* Fields protected by the srv_conc_mutex. */ + ulint declared_to_be_inside_innodb; /* this is TRUE if we have declared this transaction in srv_conc_enter_innodb to be inside the InnoDB engine */ - unsigned handling_signals:1;/* this is TRUE as long as the trx - is handling signals */ - unsigned dict_operation_lock_mode:2; - /* 0, RW_S_LATCH, or RW_X_LATCH: + + /* Fields protected by dict_operation_lock. The very latch + it is used to track. */ + ulint dict_operation_lock_mode; + /*!< 0, RW_S_LATCH, or RW_X_LATCH: the latch mode trx currently holds on dict_operation_lock */ + + /* All the next fields are protected by the kernel mutex, except the + undo logs which are protected by undo_mutex */ + ulint is_purge; /*!< 0=user transaction, 1=purge */ + ulint is_recovered; /*!< 0=normal transaction, + 1=recovered, must be rolled back */ + ulint que_state; /*!< valid when conc_state + == TRX_ACTIVE: TRX_QUE_RUNNING, + TRX_QUE_LOCK_WAIT, ... */ + ulint handling_signals;/* this is TRUE as long as the trx + is handling signals */ time_t start_time; /*!< time the trx object was created or the state last time became TRX_ACTIVE */ @@ -544,9 +561,6 @@ struct trx_struct{ /*------------------------------*/ void* mysql_thd; /*!< MySQL thread handle corresponding to this trx, or NULL */ - char** mysql_query_str;/* pointer to the field in mysqld_thd - which contains the pointer to the - current SQL query string */ const char* mysql_log_file_name; /* if MySQL binlog is used, this field contains a pointer to the latest file @@ -657,11 +671,6 @@ struct trx_struct{ wait_thrs; /*!< query threads belonging to this trx that are in the QUE_THR_LOCK_WAIT state */ - ulint deadlock_mark; /*!< a mark field used in deadlock - checking algorithm. This must be - in its own machine word, because - it can be changed by other - threads while holding kernel_mutex. */ /*------------------------------*/ mem_heap_t* lock_heap; /*!< memory heap for the locks of the transaction */ diff --git a/storage/xtradb/include/trx0types.h b/storage/xtradb/include/trx0types.h index 24cf57d53d5..40a7256cbfd 100644 --- a/storage/xtradb/include/trx0types.h +++ b/storage/xtradb/include/trx0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -70,6 +70,13 @@ typedef struct trx_named_savept_struct trx_named_savept_t; enum trx_rb_ctx { RB_NONE = 0, /*!< no rollback */ RB_NORMAL, /*!< normal rollback */ + RB_RECOVERY_PURGE_REC, + /*!< rolling back an incomplete transaction, + in crash recovery, rolling back an + INSERT that was performed by updating a + delete-marked record; if the delete-marked record + no longer exists in an active read view, it will + be purged */ RB_RECOVERY /*!< rolling back an incomplete transaction, in crash recovery */ }; diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 0d9417fab38..71476443964 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2009, Sun Microsystems, Inc. @@ -46,8 +46,8 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 6 -#define PERCONA_INNODB_VERSION 10 +#define INNODB_VERSION_BUGFIX 10 +#define PERCONA_INNODB_VERSION 12.0 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -118,7 +118,7 @@ if we are compiling on Windows. */ /* Include <sys/stat.h> to get S_I... macros defined for os0file.c */ # include <sys/stat.h> -# if !defined(__NETWARE__) && !defined(__WIN__) +# if !defined(__NETWARE__) && !defined(__WIN__) # include <sys/mman.h> /* mmap() for os0proc.c */ # endif @@ -168,6 +168,9 @@ command. Not tested on Windows. */ #define UNIV_COMPILE_TEST_FUNCS */ +#if defined(HAVE_valgrind)&& defined(HAVE_VALGRIND_MEMCHECK_H) +# define UNIV_DEBUG_VALGRIND +#endif #if 0 #define UNIV_DEBUG_VALGRIND /* Enable extra Valgrind instrumentation */ @@ -232,11 +235,6 @@ by one. */ /* the above option prevents forcing of log to disk at a buffer page write: it should be tested with this option off; also some ibuf tests are suppressed */ -/* -#define UNIV_BASIC_LOG_DEBUG -*/ - /* the above option enables basic recovery debugging: - new allocated file pages are reset */ /* Linkage specifier for non-static InnoDB symbols (variables and functions) that are only referenced from within InnoDB, not from MySQL */ @@ -299,6 +297,12 @@ management to ensure correct alignment for doubles etc. */ /* Maximum number of parallel threads in a parallelized operation */ #define UNIV_MAX_PARALLELISM 32 +/* The maximum length of a table name. This is the MySQL limit and is +defined in mysql_com.h like NAME_CHAR_LEN*SYSTEM_CHARSET_MBMAXLEN, the +number does not include a terminating '\0'. InnoDB probably can handle +longer names internally */ +#define MAX_TABLE_NAME_LEN 192 + /* UNIVERSAL TYPE DEFINITIONS ========================== @@ -326,10 +330,12 @@ macro ULINTPF. */ typedef unsigned __int64 ulint; #define ULINTPF "%I64u" typedef __int64 lint; +#define MYSQL_SYSVAR_ULINT MYSQL_SYSVAR_ULONGLONG #else typedef unsigned long int ulint; #define ULINTPF "%lu" typedef long int lint; +#define MYSQL_SYSVAR_ULINT MYSQL_SYSVAR_ULONG #endif #ifdef __WIN__ diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h index 7b15c052978..4305f6571b5 100644 --- a/storage/xtradb/include/ut0lst.h +++ b/storage/xtradb/include/ut0lst.h @@ -257,5 +257,48 @@ do { \ ut_a(ut_list_node_313 == NULL); \ } while (0) +/********************************************************************//** +Align nodes with moving location. +@param NAME the name of the list +@param TYPE node type +@param BASE base node (not a pointer to it) +@param OFFSET offset moved */ +#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET) \ +do { \ + ulint ut_list_i_313; \ + TYPE* ut_list_node_313; \ + \ + if ((BASE).start) \ + (BASE).start = (void*)((char*)((BASE).start) \ + + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\ + if ((BASE).end) \ + (BASE).end = (void*)((char*)((BASE).end) \ + + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\ + \ + ut_list_node_313 = (BASE).start; \ + \ + for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ + ut_a(ut_list_node_313); \ + if ((ut_list_node_313->NAME).prev) \ + (ut_list_node_313->NAME).prev = (void*)((char*)((ut_list_node_313->NAME).prev) \ + + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\ + if ((ut_list_node_313->NAME).next) \ + (ut_list_node_313->NAME).next = (void *)((char*)((ut_list_node_313->NAME).next) \ + + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\ + ut_list_node_313 = (ut_list_node_313->NAME).next; \ + } \ + \ + ut_a(ut_list_node_313 == NULL); \ + \ + ut_list_node_313 = (BASE).end; \ + \ + for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ + ut_a(ut_list_node_313); \ + ut_list_node_313 = (ut_list_node_313->NAME).prev; \ + } \ + \ + ut_a(ut_list_node_313 == NULL); \ +} while (0) + #endif diff --git a/storage/xtradb/include/ut0rbt.h b/storage/xtradb/include/ut0rbt.h new file mode 100644 index 00000000000..6fd050acfe7 --- /dev/null +++ b/storage/xtradb/include/ut0rbt.h @@ -0,0 +1,309 @@ +/***************************************************************************** +Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/*******************************************************************//** +@file include/ut0rbt.h +Red-Black tree implementation. + +Created 2007-03-20 Sunny Bains +************************************************************************/ + +#ifndef INNOBASE_UT0RBT_H +#define INNOBASE_UT0RBT_H + +#if !defined(IB_RBT_TESTING) +#include "univ.i" +#include "ut0mem.h" +#else +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#define ut_malloc malloc +#define ut_free free +#define ulint unsigned long +#define ut_a(c) assert(c) +#define ut_error assert(0) +#define ibool unsigned int +#define TRUE 1 +#define FALSE 0 +#endif + +/* Red black tree typedefs */ +typedef struct ib_rbt_struct ib_rbt_t; +typedef struct ib_rbt_node_struct ib_rbt_node_t; +/* FIXME: Iterator is a better name than _bound_ */ +typedef struct ib_rbt_bound_struct ib_rbt_bound_t; +typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node); +typedef int (*ib_rbt_compare)(const void* p1, const void* p2); + +/* Red black tree color types */ +enum ib_rbt_color_enum { + IB_RBT_RED, + IB_RBT_BLACK +}; + +typedef enum ib_rbt_color_enum ib_rbt_color_t; + +/* Red black tree node */ +struct ib_rbt_node_struct { + ib_rbt_color_t color; /* color of this node */ + + ib_rbt_node_t* left; /* points left child */ + ib_rbt_node_t* right; /* points right child */ + ib_rbt_node_t* parent; /* points parent node */ + + char value[1]; /* Data value */ +}; + +/* Red black tree instance.*/ +struct ib_rbt_struct { + ib_rbt_node_t* nil; /* Black colored node that is + used as a sentinel. This is + pre-allocated too.*/ + + ib_rbt_node_t* root; /* Root of the tree, this is + pre-allocated and the first + data node is the left child.*/ + + ulint n_nodes; /* Total number of data nodes */ + + ib_rbt_compare compare; /* Fn. to use for comparison */ + ulint sizeof_value; /* Sizeof the item in bytes */ +}; + +/* The result of searching for a key in the tree, this is useful for +a speedy lookup and insert if key doesn't exist.*/ +struct ib_rbt_bound_struct { + const ib_rbt_node_t* + last; /* Last node visited */ + + int result; /* Result of comparing with + the last non-nil node that + was visited */ +}; + +/* Size in elements (t is an rb tree instance) */ +#define rbt_size(t) (t->n_nodes) + +/* Check whether the rb tree is empty (t is an rb tree instance) */ +#define rbt_empty(t) (rbt_size(t) == 0) + +/* Get data value (t is the data type, n is an rb tree node instance) */ +#define rbt_value(t, n) ((t*) &n->value[0]) + +/* Compare a key with the node value (t is tree, k is key, n is node)*/ +#define rbt_compare(t, k, n) (t->compare(k, n->value)) + +/****************************************************************//** +Free an instance of a red black tree */ +UNIV_INTERN +void +rbt_free( +/*=====*/ + ib_rbt_t* tree); /*!< in: rb tree to free */ +/****************************************************************//** +Create an instance of a red black tree +@return rb tree instance */ +UNIV_INTERN +ib_rbt_t* +rbt_create( +/*=======*/ + size_t sizeof_value, /*!< in: size in bytes */ + ib_rbt_compare compare); /*!< in: comparator */ +/****************************************************************//** +Delete a node from the red black tree, identified by key. +@return TRUE if success FALSE if not found */ +UNIV_INTERN +ibool +rbt_delete( +/*=======*/ + ib_rbt_t* tree, /*!< in: rb tree */ + const void* key); /*!< in: key to delete */ +/****************************************************************//** +Remove a node from the rb tree, the node is not free'd, that is the +callers responsibility. +@return the deleted node with the const. */ +UNIV_INTERN +ib_rbt_node_t* +rbt_remove_node( +/*============*/ + ib_rbt_t* tree, /*!< in: rb tree */ + const ib_rbt_node_t* + node); /*!< in: node to delete, this + is a fudge and declared const + because the caller has access + only to const nodes.*/ +/****************************************************************//** +Find a matching node in the rb tree. +@return node if found else return NULL */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_lookup( +/*=======*/ + const ib_rbt_t* tree, /*!< in: rb tree to search */ + const void* key); /*!< in: key to lookup */ +/****************************************************************//** +Generic insert of a value in the rb tree. +@return inserted node */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_insert( +/*=======*/ + ib_rbt_t* tree, /*!< in: rb tree */ + const void* key, /*!< in: key for ordering */ + const void* value); /*!< in: data that will be + copied to the node.*/ +/****************************************************************//** +Add a new node to the tree, useful for data that is pre-sorted. +@return appended node */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_add_node( +/*=========*/ + ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_bound_t* parent, /*!< in: parent */ + const void* value); /*!< in: this value is copied + to the node */ +/****************************************************************//** +Return the left most data node in the tree +@return left most node */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_first( +/*======*/ + const ib_rbt_t* tree); /*!< in: rb tree */ +/****************************************************************//** +Return the right most data node in the tree +@return right most node */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_last( +/*=====*/ + const ib_rbt_t* tree); /*!< in: rb tree */ +/****************************************************************//** +Return the next node from current. +@return successor node to current that is passed in. */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_next( +/*=====*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const ib_rbt_node_t* /*!< in: current node */ + current); +/****************************************************************//** +Return the prev node from current. +@return precedessor node to current that is passed in */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_prev( +/*=====*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const ib_rbt_node_t* /*!< in: current node */ + current); +/****************************************************************//** +Find the node that has the lowest key that is >= key. +@return node that satisfies the lower bound constraint or NULL */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_lower_bound( +/*============*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const void* key); /*!< in: key to search */ +/****************************************************************//** +Find the node that has the greatest key that is <= key. +@return node that satisifies the upper bound constraint or NULL */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_upper_bound( +/*============*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const void* key); /*!< in: key to search */ +/****************************************************************//** +Search for the key, a node will be retuned in parent.last, whether it +was found or not. If not found then parent.last will contain the +parent node for the possibly new key otherwise the matching node. +@return result of last comparison */ +UNIV_INTERN +int +rbt_search( +/*=======*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_bound_t* parent, /*!< in: search bounds */ + const void* key); /*!< in: key to search */ +/****************************************************************//** +Search for the key, a node will be retuned in parent.last, whether it +was found or not. If not found then parent.last will contain the +parent node for the possibly new key otherwise the matching node. +@return result of last comparison */ +UNIV_INTERN +int +rbt_search_cmp( +/*===========*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_bound_t* parent, /*!< in: search bounds */ + const void* key, /*!< in: key to search */ + ib_rbt_compare compare); /*!< in: comparator */ +/****************************************************************//** +Clear the tree, deletes (and free's) all the nodes. */ +UNIV_INTERN +void +rbt_clear( +/*======*/ + ib_rbt_t* tree); /*!< in: rb tree */ +/****************************************************************//** +Merge the node from dst into src. Return the number of nodes merged. +@return no. of recs merged */ +UNIV_INTERN +ulint +rbt_merge_uniq( +/*===========*/ + ib_rbt_t* dst, /*!< in: dst rb tree */ + const ib_rbt_t* src); /*!< in: src rb tree */ +/****************************************************************//** +Merge the node from dst into src. Return the number of nodes merged. +Delete the nodes from src after copying node to dst. As a side effect +the duplicates will be left untouched in the src, since we don't support +duplicates (yet). NOTE: src and dst must be similar, the function doesn't +check for this condition (yet). +@return no. of recs merged */ +UNIV_INTERN +ulint +rbt_merge_uniq_destructive( +/*=======================*/ + ib_rbt_t* dst, /*!< in: dst rb tree */ + ib_rbt_t* src); /*!< in: src rb tree */ +/****************************************************************//** +Verify the integrity of the RB tree. For debugging. 0 failure else height +of tree (in count of black nodes). +@return TRUE if OK FALSE if tree invalid. */ +UNIV_INTERN +ibool +rbt_validate( +/*=========*/ + const ib_rbt_t* tree); /*!< in: tree to validate */ +/****************************************************************//** +Iterate over the tree in depth first order. */ +UNIV_INTERN +void +rbt_print( +/*======*/ + const ib_rbt_t* tree, /*!< in: tree to traverse */ + ib_rbt_print_node print); /*!< in: print function */ + +#endif /* INNOBASE_UT0RBT_H */ diff --git a/storage/xtradb/include/ut0rnd.ic b/storage/xtradb/include/ut0rnd.ic index 372b5b6d5b7..c2043660efd 100644 --- a/storage/xtradb/include/ut0rnd.ic +++ b/storage/xtradb/include/ut0rnd.ic @@ -152,6 +152,7 @@ ut_hash_ulint( ulint key, /*!< in: value to be hashed */ ulint table_size) /*!< in: hash table size */ { + ut_ad(table_size); key = key ^ UT_HASH_RANDOM_MASK2; return(key % table_size); diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c index 59394f13766..7ec4a53e0ea 100644 --- a/storage/xtradb/lock/lock0lock.c +++ b/storage/xtradb/lock/lock0lock.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -376,6 +376,7 @@ UNIV_INTERN FILE* lock_latest_err_file; /* Flags for recursive deadlock search */ #define LOCK_VICTIM_IS_START 1 #define LOCK_VICTIM_IS_OTHER 2 +#define LOCK_EXCEED_MAX_DEPTH 3 /********************************************************************//** Checks if a lock request results in a deadlock. @@ -394,24 +395,25 @@ Looks recursively for a deadlock. deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a deadlock was found and we chose some other trx as a victim: we must do the search again in this last case because there may be another -deadlock! */ +deadlock! +LOCK_EXCEED_MAX_DEPTH if the lock search exceeds max steps or max depth. */ static ulint lock_deadlock_recursive( /*====================*/ trx_t* start, /*!< in: recursion starting point */ trx_t* trx, /*!< in: a transaction waiting for a lock */ - lock_t* wait_lock, /*!< in: the lock trx is waiting to be granted */ + lock_t* wait_lock, /*!< in: lock that is waiting to be granted */ ulint* cost, /*!< in/out: number of calculation steps thus far: if this exceeds LOCK_MAX_N_STEPS_... - we return LOCK_VICTIM_IS_START */ + we return LOCK_EXCEED_MAX_DEPTH */ ulint depth); /*!< in: recursion depth: if this exceeds LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we - return LOCK_VICTIM_IS_START */ + return LOCK_EXCEED_MAX_DEPTH */ /*********************************************************************//** Gets the nth bit of a record lock. -@return TRUE if bit set */ +@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/ UNIV_INLINE ibool lock_rec_get_nth_bit( @@ -1222,7 +1224,7 @@ lock_rec_get_first_on_page( /*********************************************************************//** Gets the next explicit lock request on a record. -@return next lock, NULL if none exists */ +@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */ UNIV_INLINE lock_t* lock_rec_get_next( @@ -1731,11 +1733,11 @@ lock_rec_create( Enqueues a waiting request for a lock which cannot be granted immediately. Checks for deadlocks. @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or -DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another -transaction was chosen as a victim, and we got the lock immediately: -no need to wait then */ +DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that +there was a deadlock, but another transaction was chosen as a victim, +and we got the lock immediately: no need to wait then */ static -ulint +enum db_err lock_rec_enqueue_waiting( /*=====================*/ ulint type_mode,/*!< in: lock mode this @@ -1809,7 +1811,7 @@ lock_rec_enqueue_waiting( if (trx->wait_lock == NULL) { - return(DB_SUCCESS); + return(DB_SUCCESS_LOCKED_REC); } trx->que_state = TRX_QUE_LOCK_WAIT; @@ -1929,6 +1931,16 @@ somebody_waits: return(lock_rec_create(type_mode, block, heap_no, index, trx)); } +/** Record locking request status */ +enum lock_rec_req_status { + /** Failed to acquire a lock */ + LOCK_REC_FAIL, + /** Succeeded in acquiring a lock (implicit or already acquired) */ + LOCK_REC_SUCCESS, + /** Explicitly created a new lock */ + LOCK_REC_SUCCESS_CREATED +}; + /*********************************************************************//** This is a fast routine for locking a record in the most common cases: there are no explicit locks on the page, or there is just one lock, owned @@ -1936,9 +1948,9 @@ by this transaction, and of the right type_mode. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return TRUE if locking succeeded */ +@return whether the locking succeeded */ UNIV_INLINE -ibool +enum lock_rec_req_status lock_rec_lock_fast( /*===============*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -1977,19 +1989,19 @@ lock_rec_lock_fast( lock_rec_create(mode, block, heap_no, index, trx); } - return(TRUE); + return(LOCK_REC_SUCCESS_CREATED); } if (lock_rec_get_next_on_page(lock)) { - return(FALSE); + return(LOCK_REC_FAIL); } if (lock->trx != trx || lock->type_mode != (mode | LOCK_REC) || lock_rec_get_n_bits(lock) <= heap_no) { - return(FALSE); + return(LOCK_REC_FAIL); } if (!impl) { @@ -1998,10 +2010,11 @@ lock_rec_lock_fast( if (!lock_rec_get_nth_bit(lock, heap_no)) { lock_rec_set_nth_bit(lock, heap_no); + return(LOCK_REC_SUCCESS_CREATED); } } - return(TRUE); + return(LOCK_REC_SUCCESS); } /*********************************************************************//** @@ -2009,9 +2022,10 @@ This is the general, and slower, routine for locking a record. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return DB_SUCCESS, DB_LOCK_WAIT, or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ static -ulint +enum db_err lock_rec_lock_slow( /*===============*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -2028,7 +2042,6 @@ lock_rec_lock_slow( que_thr_t* thr) /*!< in: query thread */ { trx_t* trx; - ulint err; ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S @@ -2047,27 +2060,23 @@ lock_rec_lock_slow( /* The trx already has a strong enough lock on rec: do nothing */ - err = DB_SUCCESS; } else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) { /* If another transaction has a non-gap conflicting request in the queue, as this transaction does not have a lock strong enough already granted on the record, we have to wait. */ - err = lock_rec_enqueue_waiting(mode, block, heap_no, - index, thr); - } else { - if (!impl) { - /* Set the requested lock on the record */ - - lock_rec_add_to_queue(LOCK_REC | mode, block, - heap_no, index, trx); - } + return(lock_rec_enqueue_waiting(mode, block, heap_no, + index, thr)); + } else if (!impl) { + /* Set the requested lock on the record */ - err = DB_SUCCESS; + lock_rec_add_to_queue(LOCK_REC | mode, block, + heap_no, index, trx); + return(DB_SUCCESS_LOCKED_REC); } - return(err); + return(DB_SUCCESS); } /*********************************************************************//** @@ -2076,9 +2085,10 @@ possible, enqueues a waiting lock request. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return DB_SUCCESS, DB_LOCK_WAIT, or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ static -ulint +enum db_err lock_rec_lock( /*==========*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -2094,8 +2104,6 @@ lock_rec_lock( dict_index_t* index, /*!< in: index of record */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); @@ -2107,18 +2115,20 @@ lock_rec_lock( || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP || mode - (LOCK_MODE_MASK & mode) == 0); - if (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { - - /* We try a simplified and faster subroutine for the most - common cases */ - - err = DB_SUCCESS; - } else { - err = lock_rec_lock_slow(impl, mode, block, - heap_no, index, thr); + /* We try a simplified and faster subroutine for the most + common cases */ + switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { + case LOCK_REC_SUCCESS: + return(DB_SUCCESS); + case LOCK_REC_SUCCESS_CREATED: + return(DB_SUCCESS_LOCKED_REC); + case LOCK_REC_FAIL: + return(lock_rec_lock_slow(impl, mode, block, + heap_no, index, thr)); } - return(err); + ut_error; + return(DB_ERROR); } /*********************************************************************//** @@ -2404,7 +2414,7 @@ lock_rec_inherit_to_gap( if (!lock_rec_get_insert_intention(lock) && !((srv_locks_unsafe_for_binlog || lock->trx->isolation_level - == TRX_ISO_READ_COMMITTED) + <= TRX_ISO_READ_COMMITTED) && lock_get_mode(lock) == LOCK_X)) { lock_rec_add_to_queue(LOCK_REC | LOCK_GAP @@ -3267,8 +3277,6 @@ lock_deadlock_occurs( lock_t* lock, /*!< in: lock the transaction is requesting */ trx_t* trx) /*!< in: transaction */ { - dict_table_t* table; - dict_index_t* index; trx_t* mark_trx; ulint ret; ulint cost = 0; @@ -3290,31 +3298,51 @@ retry: ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0); - if (ret == LOCK_VICTIM_IS_OTHER) { + switch (ret) { + case LOCK_VICTIM_IS_OTHER: /* We chose some other trx as a victim: retry if there still is a deadlock */ - goto retry; - } - if (UNIV_UNLIKELY(ret == LOCK_VICTIM_IS_START)) { - if (lock_get_type_low(lock) & LOCK_TABLE) { - table = lock->un_member.tab_lock.table; - index = NULL; + case LOCK_EXCEED_MAX_DEPTH: + /* If the lock search exceeds the max step + or the max depth, the current trx will be + the victim. Print its information. */ + rewind(lock_latest_err_file); + ut_print_timestamp(lock_latest_err_file); + + fputs("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE" + " WAITS-FOR GRAPH, WE WILL ROLL BACK" + " FOLLOWING TRANSACTION \n", + lock_latest_err_file); + + fputs("\n*** TRANSACTION:\n", lock_latest_err_file); + trx_print(lock_latest_err_file, trx, 3000); + + fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n", + lock_latest_err_file); + + if (lock_get_type(lock) == LOCK_REC) { + lock_rec_print(lock_latest_err_file, lock); } else { - index = lock->index; - table = index->table; + lock_table_print(lock_latest_err_file, lock); } + break; - lock_deadlock_found = TRUE; - + case LOCK_VICTIM_IS_START: + srv_n_lock_deadlock_count++; fputs("*** WE ROLL BACK TRANSACTION (2)\n", lock_latest_err_file); + break; - return(TRUE); + default: + /* No deadlock detected*/ + return(FALSE); } - return(FALSE); + lock_deadlock_found = TRUE; + + return(TRUE); } /********************************************************************//** @@ -3323,25 +3351,26 @@ Looks recursively for a deadlock. deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a deadlock was found and we chose some other trx as a victim: we must do the search again in this last case because there may be another -deadlock! */ +deadlock! +LOCK_EXCEED_MAX_DEPTH if the lock search exceeds max steps or max depth. */ static ulint lock_deadlock_recursive( /*====================*/ trx_t* start, /*!< in: recursion starting point */ trx_t* trx, /*!< in: a transaction waiting for a lock */ - lock_t* wait_lock, /*!< in: the lock trx is waiting to be granted */ + lock_t* wait_lock, /*!< in: lock that is waiting to be granted */ ulint* cost, /*!< in/out: number of calculation steps thus far: if this exceeds LOCK_MAX_N_STEPS_... - we return LOCK_VICTIM_IS_START */ + we return LOCK_EXCEED_MAX_DEPTH */ ulint depth) /*!< in: recursion depth: if this exceeds LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we - return LOCK_VICTIM_IS_START */ + return LOCK_EXCEED_MAX_DEPTH */ { + ulint ret; lock_t* lock; - ulint bit_no = ULINT_UNDEFINED; trx_t* lock_trx; - ulint ret; + ulint heap_no = ULINT_UNDEFINED; ut_a(trx); ut_a(start); @@ -3357,27 +3386,44 @@ lock_deadlock_recursive( *cost = *cost + 1; - lock = wait_lock; - if (lock_get_type_low(wait_lock) == LOCK_REC) { + ulint space; + ulint page_no; - bit_no = lock_rec_find_set_bit(wait_lock); + heap_no = lock_rec_find_set_bit(wait_lock); + ut_a(heap_no != ULINT_UNDEFINED); + + space = wait_lock->un_member.rec_lock.space; + page_no = wait_lock->un_member.rec_lock.page_no; + + lock = lock_rec_get_first_on_page_addr(space, page_no); + + /* Position the iterator on the first matching record lock. */ + while (lock != NULL + && lock != wait_lock + && !lock_rec_get_nth_bit(lock, heap_no)) { + + lock = lock_rec_get_next_on_page(lock); + } - ut_a(bit_no != ULINT_UNDEFINED); + if (lock == wait_lock) { + lock = NULL; + } + + ut_ad(lock == NULL || lock_rec_get_nth_bit(lock, heap_no)); + + } else { + lock = wait_lock; } /* Look at the locks ahead of wait_lock in the lock queue */ for (;;) { - if (lock_get_type_low(lock) & LOCK_TABLE) { - - lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, - lock); - } else { - ut_ad(lock_get_type_low(lock) == LOCK_REC); - ut_a(bit_no != ULINT_UNDEFINED); + /* Get previous table lock. */ + if (heap_no == ULINT_UNDEFINED) { - lock = (lock_t*) lock_rec_get_prev(lock, bit_no); + lock = UT_LIST_GET_PREV( + un_member.tab_lock.locks, lock); } if (lock == NULL) { @@ -3395,7 +3441,7 @@ lock_deadlock_recursive( lock_trx = lock->trx; - if (lock_trx == start || too_far) { + if (lock_trx == start) { /* We came back to the recursion starting point: a deadlock detected; or we have @@ -3442,19 +3488,10 @@ lock_deadlock_recursive( } #ifdef UNIV_DEBUG if (lock_print_waits) { - fputs("Deadlock detected" - " or too long search\n", + fputs("Deadlock detected\n", stderr); } #endif /* UNIV_DEBUG */ - if (too_far) { - - fputs("TOO DEEP OR LONG SEARCH" - " IN THE LOCK TABLE" - " WAITS-FOR GRAPH\n", ef); - - return(LOCK_VICTIM_IS_START); - } if (trx_weight_cmp(wait_lock->trx, start) >= 0) { @@ -3490,6 +3527,21 @@ lock_deadlock_recursive( return(LOCK_VICTIM_IS_OTHER); } + if (too_far) { + +#ifdef UNIV_DEBUG + if (lock_print_waits) { + fputs("Deadlock search exceeds" + " max steps or depth.\n", + stderr); + } +#endif /* UNIV_DEBUG */ + /* The information about transaction/lock + to be rolled back is available in the top + level. Do not print anything here. */ + return(LOCK_EXCEED_MAX_DEPTH); + } + if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) { /* Another trx ahead has requested lock in an @@ -3499,12 +3551,28 @@ lock_deadlock_recursive( ret = lock_deadlock_recursive( start, lock_trx, lock_trx->wait_lock, cost, depth + 1); + if (ret != 0) { return(ret); } } } + /* Get the next record lock to check. */ + if (heap_no != ULINT_UNDEFINED) { + + ut_a(lock != NULL); + + do { + lock = lock_rec_get_next_on_page(lock); + } while (lock != NULL + && lock != wait_lock + && !lock_rec_get_nth_bit(lock, heap_no)); + + if (lock == wait_lock) { + lock = NULL; + } + } }/* end of the 'for (;;)'-loop */ } @@ -3706,9 +3774,10 @@ lock_table_enqueue_waiting( /*********************************************************************//** Checks if other transactions have an incompatible mode lock request in -the lock queue. */ +the lock queue. +@return lock or NULL */ UNIV_INLINE -ibool +lock_t* lock_table_other_has_incompatible( /*==============================*/ trx_t* trx, /*!< in: transaction, or NULL if all @@ -3730,13 +3799,13 @@ lock_table_other_has_incompatible( && (!lock_mode_compatible(lock_get_mode(lock), mode)) && (wait || !(lock_get_wait(lock)))) { - return(TRUE); + return(lock); } lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); } - return(FALSE); + return(NULL); } /*********************************************************************//** @@ -3887,8 +3956,8 @@ lock_rec_unlock( const rec_t* rec, /*!< in: record */ enum lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */ { + lock_t* first_lock; lock_t* lock; - lock_t* release_lock = NULL; ulint heap_no; ut_ad(trx && rec); @@ -3898,48 +3967,40 @@ lock_rec_unlock( mutex_enter(&kernel_mutex); - lock = lock_rec_get_first(block, heap_no); + first_lock = lock_rec_get_first(block, heap_no); /* Find the last lock with the same lock_mode and transaction from the record. */ - while (lock != NULL) { + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { if (lock->trx == trx && lock_get_mode(lock) == lock_mode) { - release_lock = lock; ut_a(!lock_get_wait(lock)); + lock_rec_reset_nth_bit(lock, heap_no); + goto released; } - - lock = lock_rec_get_next(heap_no, lock); } - /* If a record lock is found, release the record lock */ - - if (UNIV_LIKELY(release_lock != NULL)) { - lock_rec_reset_nth_bit(release_lock, heap_no); - } else { - mutex_exit(&kernel_mutex); - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: unlock row could not" - " find a %lu mode lock on the record\n", - (ulong) lock_mode); + mutex_exit(&kernel_mutex); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: unlock row could not" + " find a %lu mode lock on the record\n", + (ulong) lock_mode); - return; - } + return; +released: /* Check if we can now grant waiting lock requests */ - lock = lock_rec_get_first(block, heap_no); - - while (lock != NULL) { + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { if (lock_get_wait(lock) && !lock_rec_has_to_wait_in_queue(lock)) { /* Grant the lock */ lock_grant(lock); } - - lock = lock_rec_get_next(heap_no, lock); } mutex_exit(&kernel_mutex); @@ -4260,31 +4321,34 @@ lock_rec_print( putc('\n', file); if ( srv_show_verbose_locks ) { - block = buf_page_try_get(space, page_no, &mtr); + block = buf_page_try_get(space, page_no, &mtr); + + for (i = 0; i < lock_rec_get_n_bits(lock); ++i) { + + if (!lock_rec_get_nth_bit(lock, i)) { + continue; + } + + fprintf(file, "Record lock, heap no %lu", (ulong) i); + if (block) { - for (i = 0; i < lock_rec_get_n_bits(lock); i++) { + const rec_t* rec; - if (lock_rec_get_nth_bit(lock, i)) { + rec = page_find_rec_with_heap_no( + buf_block_get_frame(block), i); - const rec_t* rec - = page_find_rec_with_heap_no( - buf_block_get_frame(block), i); - offsets = rec_get_offsets( - rec, lock->index, offsets, - ULINT_UNDEFINED, &heap); + offsets = rec_get_offsets( + rec, lock->index, offsets, + ULINT_UNDEFINED, &heap); - fprintf(file, "Record lock, heap no %lu ", - (ulong) i); - rec_print_new(file, rec, offsets); - putc('\n', file); - } - } - } else { - for (i = 0; i < lock_rec_get_n_bits(lock); i++) { - fprintf(file, "Record lock, heap no %lu\n", (ulong) i); - } + putc(' ', file); + rec_print_new(file, rec, offsets); } + + putc('\n', file); } + } + mtr_commit(&mtr); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); @@ -4329,14 +4393,26 @@ lock_get_n_rec_locks(void) #endif /* PRINT_NUM_OF_LOCK_STRUCTS */ /*********************************************************************//** -Prints info of locks for all transactions. */ +Prints info of locks for all transactions. +@return FALSE if not able to obtain kernel mutex +and exits without printing info */ UNIV_INTERN -void +ibool lock_print_info_summary( /*====================*/ - FILE* file) /*!< in: file where to print */ + FILE* file, /*!< in: file where to print */ + ibool nowait) /*!< in: whether to wait for the kernel mutex */ { - lock_mutex_enter_kernel(); + /* if nowait is FALSE, wait on the kernel mutex, + otherwise return immediately if fail to obtain the + mutex. */ + if (!nowait) { + lock_mutex_enter_kernel(); + } else if (mutex_enter_nowait(&kernel_mutex)) { + fputs("FAIL TO OBTAIN KERNEL MUTEX, " + "SKIP LOCK INFO PRINTING\n", file); + return(FALSE); + } if (lock_deadlock_found) { fputs("------------------------\n" @@ -4368,6 +4444,7 @@ lock_print_info_summary( "Total number of lock structs in row lock hash table %lu\n", (ulong) lock_get_n_rec_locks()); #endif /* PRINT_NUM_OF_LOCK_STRUCTS */ + return(TRUE); } /*********************************************************************//** @@ -4648,6 +4725,7 @@ lock_rec_queue_validate( ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no, impl_trx)); } +#if 0 } else { /* The kernel mutex may get released temporarily in the @@ -4658,6 +4736,27 @@ lock_rec_queue_validate( (fil_space_t::latch), the following check WILL break latching order and may cause a deadlock of threads. */ + /* NOTE: This is a bogus check that would fail in the + following case: Our transaction is updating a + row. After it has updated the clustered index record, + it goes to a secondary index record and finds someone + else holding an explicit S- or X-lock on that + secondary index record, presumably from a locking + read. Our transaction cannot update the secondary + index immediately, but places a waiting X-lock request + on the secondary index record. There is nothing + illegal in this. The assertion is simply too strong. */ + + /* From the locking point of view, each secondary + index is a separate table. A lock that is held on + secondary index rec does not give any rights to modify + or read the clustered index rec. Therefore, we can + think of the sec index as a separate 'table' from the + clust index 'table'. Conversely, a transaction that + has acquired a lock on and modified a clustered index + record may need to wait for a lock on the + corresponding record in a secondary index. */ + impl_trx = lock_sec_rec_some_has_impl_off_kernel( rec, index, offsets); @@ -4668,6 +4767,7 @@ lock_rec_queue_validate( ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no, impl_trx)); } +#endif } lock = lock_rec_get_first(block, heap_no); @@ -4765,6 +4865,13 @@ loop: || lock->trx->conc_state == TRX_PREPARED || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY); +# ifdef UNIV_SYNC_DEBUG + /* Only validate the record queues when this thread is not + holding a space->latch. Deadlocks are possible due to + latching order violation when UNIV_DEBUG is defined while + UNIV_SYNC_DEBUG is not. */ + if (!sync_thread_levels_contains(SYNC_FSP)) +# endif /* UNIV_SYNC_DEBUG */ for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) { if (i == 1 || lock_rec_get_nth_bit(lock, i)) { @@ -4930,7 +5037,7 @@ lock_rec_insert_check_and_lock( } trx = thr_get_trx(thr); - next_rec = page_rec_get_next((rec_t*) rec); + next_rec = page_rec_get_next_const(rec); next_rec_heap_no = page_rec_get_heap_no(next_rec); lock_mutex_enter_kernel(); @@ -4988,7 +5095,14 @@ lock_rec_insert_check_and_lock( lock_mutex_exit_kernel(); - if ((err == DB_SUCCESS) && !dict_index_is_clust(index)) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + /* fall through */ + case DB_SUCCESS: + if (dict_index_is_clust(index)) { + break; + } /* Update the page max trx id field */ page_update_max_trx_id(block, buf_block_get_page_zip(block), @@ -5111,6 +5225,10 @@ lock_clust_rec_modify_check_and_lock( ut_ad(lock_rec_queue_validate(block, rec, index, offsets)); + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + return(err); } @@ -5177,22 +5295,27 @@ lock_sec_rec_modify_check_and_lock( } #endif /* UNIV_DEBUG */ - if (err == DB_SUCCESS) { + if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) { /* Update the page max trx id field */ + /* It might not be necessary to do this if + err == DB_SUCCESS (no new lock created), + but it should not cost too much performance. */ page_update_max_trx_id(block, buf_block_get_page_zip(block), thr_get_trx(thr)->id, mtr); + err = DB_SUCCESS; } return(err); } /*********************************************************************//** -Like the counterpart for a clustered index below, but now we read a +Like lock_clust_rec_read_check_and_lock(), but reads a secondary index record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_sec_rec_read_check_and_lock( /*=============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -5213,8 +5336,8 @@ lock_sec_rec_read_check_and_lock( LOCK_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ulint heap_no; + enum db_err err; + ulint heap_no; ut_ad(!dict_index_is_clust(index)); ut_ad(block->frame == page_align(rec)); @@ -5265,9 +5388,10 @@ if the query thread should anyway be suspended for some reason; if not, then puts the transaction and the query thread to the lock wait state and inserts a waiting request for a record lock to the lock queue. Sets the requested mode lock on the record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_clust_rec_read_check_and_lock( /*===============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -5288,8 +5412,8 @@ lock_clust_rec_read_check_and_lock( LOCK_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ulint heap_no; + enum db_err err; + ulint heap_no; ut_ad(dict_index_is_clust(index)); ut_ad(block->frame == page_align(rec)); @@ -5360,17 +5484,22 @@ lock_clust_rec_read_check_and_lock_alt( mem_heap_t* tmp_heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; - ulint ret; + ulint err; rec_offs_init(offsets_); offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &tmp_heap); - ret = lock_clust_rec_read_check_and_lock(flags, block, rec, index, + err = lock_clust_rec_read_check_and_lock(flags, block, rec, index, offsets, mode, gap_mode, thr); if (tmp_heap) { mem_heap_free(tmp_heap); } - return(ret); + + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + + return(err); } /*******************************************************************//** diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c index f2487407071..fade31037b5 100644 --- a/storage/xtradb/log/log0log.c +++ b/storage/xtradb/log/log0log.c @@ -1,23 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ -/***************************************************************************** - -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2009, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -1128,6 +1111,7 @@ log_io_complete( group = (log_group_t*)((ulint)group - 1); if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { fil_flush(group->space_id); @@ -1149,6 +1133,7 @@ log_io_complete( logs and cannot end up here! */ if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT && srv_unix_file_flush_method != SRV_UNIX_NOSYNC && srv_flush_log_at_trx_commit != 2) { @@ -1529,7 +1514,8 @@ loop: mutex_exit(&(log_sys->mutex)); - if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { + if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC + || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { /* O_DSYNC means the OS did not buffer the log file at all: so we have also flushed to disk what we have written */ @@ -2045,7 +2031,7 @@ log_checkpoint( return(TRUE); } - ut_ad(log_sys->written_to_all_lsn >= oldest_lsn); + ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn); if (log_sys->n_pending_checkpoint_writes > 0) { /* A checkpoint write is running */ @@ -3127,7 +3113,7 @@ loop: if (srv_fast_shutdown < 2 && (srv_error_monitor_active - || srv_lock_timeout_and_monitor_active)) { + || srv_lock_timeout_active || srv_monitor_active)) { mutex_exit(&kernel_mutex); diff --git a/storage/xtradb/log/log0recv.c b/storage/xtradb/log/log0recv.c index 25eee07067c..bbb634addb0 100644 --- a/storage/xtradb/log/log0recv.c +++ b/storage/xtradb/log/log0recv.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -138,7 +138,9 @@ UNIV_INTERN ulint recv_max_parsed_page_no; /** This many frames must be left free in the buffer pool when we scan the log and store the scanned log records in the buffer pool: we will use these free frames to read in pages when we start applying the -log records to the database. */ +log records to the database. +This is the default value. If the actual size of the buffer pool is +larger than 10 MB we'll set this value to 512. */ UNIV_INTERN ulint recv_n_pool_free_frames; /** The maximum lsn we see for a page during the recovery process. If this @@ -242,6 +244,7 @@ recv_sys_mem_free(void) } } +#ifndef UNIV_HOTBACKUP /************************************************************ Reset the state of the recovery system variables. */ UNIV_INTERN @@ -251,7 +254,7 @@ recv_sys_var_init(void) { recv_lsn_checks_on = FALSE; - recv_n_pool_free_frames = 1024; + recv_n_pool_free_frames = 256; recv_recovery_on = FALSE; @@ -277,10 +280,11 @@ recv_sys_var_init(void) recv_max_parsed_page_no = 0; - recv_n_pool_free_frames = 1024; + recv_n_pool_free_frames = 256; recv_max_page_lsn = 0; } +#endif /* !UNIV_HOTBACKUP */ /************************************************************ Inits the recovery system for a recovery operation. */ @@ -295,20 +299,37 @@ recv_sys_init( return; } + /* Initialize red-black tree for fast insertions into the + flush_list during recovery process. + As this initialization is done while holding the buffer pool + mutex we perform it before acquiring recv_sys->mutex. */ +#ifndef UNIV_HOTBACKUP + buf_flush_init_flush_rbt(); + mutex_enter(&(recv_sys->mutex)); -#ifndef UNIV_HOTBACKUP recv_sys->heap = mem_heap_create_in_buffer(256); #else /* !UNIV_HOTBACKUP */ recv_sys->heap = mem_heap_create(256); recv_is_from_backup = TRUE; #endif /* !UNIV_HOTBACKUP */ + /* Set appropriate value of recv_n_pool_free_frames. */ + if (buf_pool_get_curr_size() >= (10 * 1024 * 1024)) { + /* Buffer pool of size greater than 10 MB. */ + recv_n_pool_free_frames = 512; + } + + if (buf_pool_get_curr_size() >= (32 * 1024 * 1024)) { + /* Buffer pool of size greater than 32 MB. */ + recv_n_pool_free_frames = 1024; + } + recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE); recv_sys->len = 0; recv_sys->recovered_offset = 0; - recv_sys->addr_hash = hash_create(available_memory / 64); + recv_sys->addr_hash = hash_create(available_memory / 512); recv_sys->n_addrs = 0; recv_sys->apply_log_recs = FALSE; @@ -348,7 +369,7 @@ recv_sys_empty_hash(void) hash_table_free(recv_sys->addr_hash); mem_heap_empty(recv_sys->heap); - recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256); + recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 512); } #ifndef UNIV_HOTBACKUP @@ -373,6 +394,9 @@ recv_sys_debug_free(void) recv_sys->last_block_buf_start = NULL; mutex_exit(&(recv_sys->mutex)); + + /* Free up the flush_rbt. */ + buf_flush_free_flush_rbt(); } # endif /* UNIV_LOG_DEBUG */ @@ -2117,15 +2141,6 @@ recv_parse_log_rec( } #endif /* UNIV_LOG_LSN_DEBUG */ - /* Check that page_no is sensible */ - - if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) { - - recv_sys->found_corrupt_log = TRUE; - - return(0); - } - new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr, NULL, NULL); if (UNIV_UNLIKELY(new_ptr == NULL)) { @@ -2234,6 +2249,14 @@ recv_report_corrupt_log( putc('\n', stderr); } +#ifndef UNIV_HOTBACKUP + if (!srv_force_recovery) { + fputs("InnoDB: Set innodb_force_recovery" + " to ignore this error.\n", stderr); + ut_error; + } +#endif /* !UNIV_HOTBACKUP */ + fputs("InnoDB: WARNING: the log file may have been corrupt and it\n" "InnoDB: is possible that the log scan did not proceed\n" "InnoDB: far enough in recovery! Please run CHECK TABLE\n" @@ -2623,7 +2646,7 @@ recv_scan_log_recs( ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0); ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_ad(len > 0); + ut_ad(len >= OS_FILE_LOG_BLOCK_SIZE); ut_a(store_to_hash <= TRUE); finished = FALSE; @@ -2748,6 +2771,16 @@ recv_scan_log_recs( recv_sys->found_corrupt_log = TRUE; +#ifndef UNIV_HOTBACKUP + if (!srv_force_recovery) { + fputs("InnoDB: Set" + " innodb_force_recovery" + " to ignore this error.\n", + stderr); + ut_error; + } +#endif /* !UNIV_HOTBACKUP */ + } else if (!recv_sys->found_corrupt_log) { more_data = recv_sys_add_to_parsing_buf( log_block, scanned_lsn); @@ -2932,9 +2965,12 @@ recv_recovery_from_checkpoint_start_func( ib_uint64_t contiguous_lsn; ib_uint64_t archived_lsn; byte* buf; - byte log_hdr_buf[LOG_FILE_HDR_SIZE]; + byte* log_hdr_buf; + byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE]; ulint err; + log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE); + #ifdef UNIV_LOG_ARCHIVE ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX); /** TRUE when recovering from a checkpoint */ @@ -3277,8 +3313,6 @@ void recv_recovery_from_checkpoint_finish(void) /*======================================*/ { - int i; - /* Apply the hashed log records to the respective file pages */ if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { @@ -3403,9 +3437,16 @@ recv_recovery_from_checkpoint_finish(void) The data dictionary latch should guarantee that there is at most one data dictionary transaction active at a time. */ trx_rollback_or_clean_recovered(FALSE); +} - /* Drop partially created indexes. */ - row_merge_drop_temp_indexes(); +/********************************************************//** +Initiates the rollback of active transactions. */ +UNIV_INTERN +void +recv_recovery_rollback_active(void) +/*===============================*/ +{ + int i; #ifdef UNIV_SYNC_DEBUG /* Wait for a while so that created threads have time to suspend @@ -3415,6 +3456,11 @@ recv_recovery_from_checkpoint_finish(void) /* Switch latching order checks on in sync0sync.c */ sync_order_checks_on = TRUE; #endif + /* Drop partially created indexes. */ + row_merge_drop_temp_indexes(); + /* Drop temporary tables. */ + row_mysql_drop_temp_tables(); + if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) { /* Rollback the uncommitted transactions which have no user session */ diff --git a/storage/xtradb/mem/mem0dbg.c b/storage/xtradb/mem/mem0dbg.c index 01eda20ec45..1cd2ff15bab 100644 --- a/storage/xtradb/mem/mem0dbg.c +++ b/storage/xtradb/mem/mem0dbg.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -180,6 +180,10 @@ mem_close(void) { mem_pool_free(mem_comm_pool); mem_comm_pool = NULL; +#ifdef UNIV_MEM_DEBUG + mutex_free(&mem_hash_mutex); + mem_hash_initialized = FALSE; +#endif /* UNIV_MEM_DEBUG */ } #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/mem/mem0mem.c b/storage/xtradb/mem/mem0mem.c index ccb2fd8a7b4..c0ce8a3e1ac 100644 --- a/storage/xtradb/mem/mem0mem.c +++ b/storage/xtradb/mem/mem0mem.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -383,6 +383,20 @@ mem_heap_create_block( mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE); mem_block_set_start(block, MEM_BLOCK_HEADER_SIZE); + if (UNIV_UNLIKELY(heap == NULL)) { + /* This is the first block of the heap. The field + total_size should be initialized here */ + block->total_size = len; + } else { + /* Not the first allocation for the heap. This block's + total_length field should be set to undefined. */ + ut_d(block->total_size = ULINT_UNDEFINED); + UNIV_MEM_INVALID(&block->total_size, + sizeof block->total_size); + + heap->total_size += len; + } + ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len); return(block); @@ -471,6 +485,10 @@ mem_heap_block_free( mem_pool_mutex_exit(); #endif + + ut_ad(heap->total_size >= block->len); + heap->total_size -= block->len; + type = heap->type; len = block->len; block->magic_n = MEM_FREED_BLOCK_MAGIC_N; diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index d3b3edea29f..48d796c38e1 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -1,23 +1,6 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ /*********************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted @@ -231,7 +214,7 @@ static os_aio_array_t* os_aio_sync_array = NULL; /*!< Synchronous I/O */ /* Per thread buffer used for merged IO requests. Used by os_aio_simulated_handle so that a buffer doesn't have to be allocated for each request. */ -static char* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS]; +static byte* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS]; static ulint os_aio_thread_buffer_size[SRV_MAX_N_IO_THREADS]; /** Number of asynchronous I/O segments. Set by os_aio_init(). */ @@ -846,7 +829,15 @@ next_file: #ifdef HAVE_READDIR_R ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent); - if (ret != 0) { + if (ret != 0 +#ifdef UNIV_AIX + /* On AIX, only if we got non-NULL 'ent' (result) value and + a non-zero 'ret' (return) value, it indicates a failed + readdir_r() call. An NULL 'ent' with an non-zero 'ret' + would indicate the "end of the directory" is reached. */ + && ent != NULL +#endif + ) { fprintf(stderr, "InnoDB: cannot read directory %s, error %lu\n", dirname, (ulong)ret); @@ -1388,7 +1379,11 @@ try_again: /* When srv_file_per_table is on, file creation failure may not be critical to the whole instance. Do not crash the server in - case of unknown errors. */ + case of unknown errors. + Please note "srv_file_per_table" is a global variable with + no explicit synchronization protection. It could be + changed during this execution path. It might not have the + same value as the one when building the table definition */ if (srv_file_per_table) { retry = os_file_handle_error_no_exit(name, create_mode == OS_FILE_CREATE ? @@ -1475,7 +1470,11 @@ try_again: /* When srv_file_per_table is on, file creation failure may not be critical to the whole instance. Do not crash the server in - case of unknown errors. */ + case of unknown errors. + Please note "srv_file_per_table" is a global variable with + no explicit synchronization protection. It could be + changed during this execution path. It might not have the + same value as the one when building the table definition */ if (srv_file_per_table) { retry = os_file_handle_error_no_exit(name, create_mode == OS_FILE_CREATE ? @@ -1503,6 +1502,11 @@ try_again: os_file_set_nocache(file, name, mode_str); } + /* ALL_O_DIRECT: O_DIRECT also for transaction log file */ + if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { + os_file_set_nocache(file, name, mode_str); + } + #ifdef USE_FILE_LOCK if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) { @@ -4030,6 +4034,9 @@ os_aio_simulated_handle( ulint i; time_t now; + /* Fix compiler warning */ + *consecutive_ios = NULL; + segment = os_aio_get_array_and_local_segment(&array, global_segment); restart: diff --git a/storage/xtradb/os/os0proc.c b/storage/xtradb/os/os0proc.c index 48922886f23..c101db3d179 100644 --- a/storage/xtradb/os/os0proc.c +++ b/storage/xtradb/os/os0proc.c @@ -229,3 +229,175 @@ os_mem_free_large( } #endif } + +/****************************************************************//** +Allocates or attaches and reuses shared memory segment. +The content is not cleared automatically. +@return allocated memory */ +UNIV_INTERN +void* +os_shm_alloc( +/*=========*/ + ulint* n, /*!< in/out: number of bytes */ + uint key, + ibool* is_new) +{ + void* ptr; +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H + ulint size; + int shmid; +#endif + + *is_new = FALSE; +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H + fprintf(stderr, + "InnoDB: The shared memory key %#x (%d) is specified.\n", + key, key); +# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX + if (!os_use_large_pages || !os_large_page_size) { + goto skip; + } + + /* Align block size to os_large_page_size */ + ut_ad(ut_is_2pow(os_large_page_size)); + size = ut_2pow_round(*n + (os_large_page_size - 1), + os_large_page_size); + + shmid = shmget((key_t)key, (size_t)size, + IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W); + if (shmid < 0) { + if (errno == EEXIST) { + fprintf(stderr, + "InnoDB: HugeTLB: The shared memory segment seems to exist already.\n"); + shmid = shmget((key_t)key, (size_t)size, + SHM_HUGETLB | SHM_R | SHM_W); + if (shmid < 0) { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes.(reuse) errno %d\n", + size, errno); + goto skip; + } else { + fprintf(stderr, + "InnoDB: HugeTLB: The existent shared memory segment is used.\n"); + } + } else { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes.(new) errno %d\n", + size, errno); + goto skip; + } + } else { + *is_new = TRUE; + fprintf(stderr, + "InnoDB: HugeTLB: The new shared memory segment is created.\n"); + } + + ptr = shmat(shmid, NULL, 0); + if (ptr == (void *)-1) { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n", + errno); + ptr = NULL; + } + + if (ptr) { + *n = size; + os_fast_mutex_lock(&ut_list_mutex); + ut_total_allocated_memory += size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_ALLOC(ptr, size); + return(ptr); + } +skip: + *is_new = FALSE; +# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */ +# ifdef HAVE_GETPAGESIZE + size = getpagesize(); +# else + size = UNIV_PAGE_SIZE; +# endif + /* Align block size to system page size */ + ut_ad(ut_is_2pow(size)); + size = *n = ut_2pow_round(*n + (size - 1), size); + + shmid = shmget((key_t)key, (size_t)size, + IPC_CREAT | IPC_EXCL | SHM_R | SHM_W); + if (shmid < 0) { + if (errno == EEXIST) { + fprintf(stderr, + "InnoDB: The shared memory segment seems to exist already.\n"); + shmid = shmget((key_t)key, (size_t)size, + SHM_R | SHM_W); + if (shmid < 0) { + fprintf(stderr, + "InnoDB: Warning: Failed to allocate %lu bytes.(reuse) errno %d\n", + size, errno); + ptr = NULL; + goto end; + } else { + fprintf(stderr, + "InnoDB: The existent shared memory segment is used.\n"); + } + } else { + fprintf(stderr, + "InnoDB: Warning: Failed to allocate %lu bytes.(new) errno %d\n", + size, errno); + ptr = NULL; + goto end; + } + } else { + *is_new = TRUE; + fprintf(stderr, + "InnoDB: The new shared memory segment is created.\n"); + } + + ptr = shmat(shmid, NULL, 0); + if (ptr == (void *)-1) { + fprintf(stderr, + "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n", + errno); + ptr = NULL; + } + + if (ptr) { + *n = size; + os_fast_mutex_lock(&ut_list_mutex); + ut_total_allocated_memory += size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_ALLOC(ptr, size); + } +end: +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + fprintf(stderr, "InnoDB: shared memory segment is not supported.\n"); + ptr = NULL; +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + return(ptr); +} + +/****************************************************************//** +Detach shared memory segment. */ +UNIV_INTERN +void +os_shm_free( +/*========*/ + void *ptr, /*!< in: pointer returned by + os_shm_alloc() */ + ulint size) /*!< in: size returned by + os_shm_alloc() */ +{ + os_fast_mutex_lock(&ut_list_mutex); + ut_a(ut_total_allocated_memory >= size); + os_fast_mutex_unlock(&ut_list_mutex); + +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H + if (!shmdt(ptr)) { + os_fast_mutex_lock(&ut_list_mutex); + ut_a(ut_total_allocated_memory >= size); + ut_total_allocated_memory -= size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_FREE(ptr, size); + } +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + fprintf(stderr, "InnoDB: shared memory segment is not supported.\n"); +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ +} diff --git a/storage/xtradb/page/page0page.c b/storage/xtradb/page/page0page.c index ab2ba60570e..10008f9ac25 100644 --- a/storage/xtradb/page/page0page.c +++ b/storage/xtradb/page/page0page.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -658,6 +658,14 @@ page_copy_rec_list_end( index, mtr); } + /* Update PAGE_MAX_TRX_ID on the uncompressed page. + Modifications will be redo logged and copied to the compressed + page in page_zip_compress() or page_zip_reorganize() below. */ + if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) { + page_update_max_trx_id(new_block, NULL, + page_get_max_trx_id(page), mtr); + } + if (UNIV_LIKELY_NULL(new_page_zip)) { mtr_set_log_mode(mtr, log_mode); @@ -696,15 +704,10 @@ page_copy_rec_list_end( } } - /* Update the lock table, MAX_TRX_ID, and possible hash index */ + /* Update the lock table and possible hash index */ lock_move_rec_list_end(new_block, block, rec); - if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) { - page_update_max_trx_id(new_block, new_page_zip, - page_get_max_trx_id(page), mtr); - } - btr_search_move_or_delete_hash_entries(new_block, block, index); return(ret); @@ -772,6 +775,16 @@ page_copy_rec_list_start( mem_heap_free(heap); } + /* Update PAGE_MAX_TRX_ID on the uncompressed page. + Modifications will be redo logged and copied to the compressed + page in page_zip_compress() or page_zip_reorganize() below. */ + if (dict_index_is_sec_or_ibuf(index) + && page_is_leaf(page_align(rec))) { + page_update_max_trx_id(new_block, NULL, + page_get_max_trx_id(page_align(rec)), + mtr); + } + if (UNIV_LIKELY_NULL(new_page_zip)) { mtr_set_log_mode(mtr, log_mode); @@ -809,14 +822,7 @@ page_copy_rec_list_start( } } - /* Update MAX_TRX_ID, the lock table, and possible hash index */ - - if (dict_index_is_sec_or_ibuf(index) - && page_is_leaf(page_align(rec))) { - page_update_max_trx_id(new_block, new_page_zip, - page_get_max_trx_id(page_align(rec)), - mtr); - } + /* Update the lock table and possible hash index */ lock_move_rec_list_start(new_block, block, rec, ret); @@ -2408,8 +2414,13 @@ page_validate( } offs = page_offset(rec_get_start(rec, offsets)); + i = rec_offs_size(offsets); + if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) { + fputs("InnoDB: record offset out of bounds\n", stderr); + goto func_exit; + } - for (i = rec_offs_size(offsets); i--; ) { + while (i--) { if (UNIV_UNLIKELY(buf[offs + i])) { /* No other record may overlap this */ @@ -2517,8 +2528,13 @@ n_owned_zero: count++; offs = page_offset(rec_get_start(rec, offsets)); + i = rec_offs_size(offsets); + if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) { + fputs("InnoDB: record offset out of bounds\n", stderr); + goto func_exit; + } - for (i = rec_offs_size(offsets); i--; ) { + while (i--) { if (UNIV_UNLIKELY(buf[offs + i])) { fputs("InnoDB: Record overlaps another" diff --git a/storage/xtradb/page/page0zip.c b/storage/xtradb/page/page0zip.c index c5c373781a2..a94d2d54417 100644 --- a/storage/xtradb/page/page0zip.c +++ b/storage/xtradb/page/page0zip.c @@ -571,7 +571,7 @@ page_zip_dir_encode( /* Traverse the list of stored records in the collation order, starting from the first user record. */ - rec = page + PAGE_NEW_INFIMUM, TRUE; + rec = page + PAGE_NEW_INFIMUM; i = 0; @@ -1153,6 +1153,10 @@ page_zip_compress( FILE* logfile = NULL; #endif + if (!page) { + return(FALSE); + } + ut_a(page_is_comp(page)); ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX); ut_ad(page_simple_validate_new((page_t*) page)); @@ -1464,6 +1468,7 @@ page_zip_fields_free( dict_table_t* table = index->table; mem_heap_free(index->heap); mutex_free(&(table->autoinc_mutex)); + ut_free(table->name); mem_heap_free(table->heap); } } @@ -3117,8 +3122,13 @@ page_zip_validate_low( temp_page_zip in a debugger when running valgrind --db-attach. */ VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE); UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE); +# if UNIV_WORD_SIZE == 4 VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip); + /* On 32-bit systems, there is no padding in page_zip_des_t. + On other systems, Valgrind could complain about uninitialized + pad bytes. */ UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip); +# endif VALGRIND_GET_VBITS(page_zip->data, temp_page, page_zip_get_size(page_zip)); UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); diff --git a/storage/xtradb/plug.in b/storage/xtradb/plug.in index 2d7bd51ec17..37c895fb520 100644 --- a/storage/xtradb/plug.in +++ b/storage/xtradb/plug.in @@ -1,5 +1,5 @@ # -# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. +# Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software @@ -14,12 +14,13 @@ # Place, Suite 330, Boston, MA 02111-1307 USA # -MYSQL_STORAGE_ENGINE(innobase, innodb, [InnoDB Storage Engine], - [Transactional Tables using InnoDB], [max,max-no-ndb]) -MYSQL_PLUGIN_DIRECTORY(innobase, [storage/xtradb]) -MYSQL_PLUGIN_STATIC(innobase, [libinnobase.a]) -MYSQL_PLUGIN_DYNAMIC(innobase, [ha_innodb.la]) -MYSQL_PLUGIN_ACTIONS(innobase, [ +MYSQL_STORAGE_ENGINE(xtradb, xtradb, [XtraDB Storage Engine], + [XtraDB - a drop-in replacement for InnoDB], [max,max-no-ndb]) +MYSQL_PLUGIN_DIRECTORY(xtradb, [storage/xtradb]) +MYSQL_PLUGIN_STATIC(xtradb, [libxtradb.a]) +MYSQL_PLUGIN_DYNAMIC(xtradb, [ha_xtradb.la]) +MYSQL_PLUGIN_ACTIONS(xtradb, [ + with_plugin_innobase=$with_plugin_xtradb # for legacy code in configure.in AC_CHECK_LIB(rt, aio_read, [innodb_system_libs="-lrt"]) AC_SUBST(innodb_system_libs) AC_CHECK_HEADERS(aio.h sched.h) diff --git a/storage/xtradb/que/que0que.c b/storage/xtradb/que/que0que.c index 2fe046fa9b8..5c85a04d139 100644 --- a/storage/xtradb/que/que0que.c +++ b/storage/xtradb/que/que0que.c @@ -622,11 +622,21 @@ que_graph_free_recursive( que_graph_free_recursive(cre_ind->ind_def); que_graph_free_recursive(cre_ind->field_def); + if (srv_use_sys_stats_table) + que_graph_free_recursive(cre_ind->stats_def); que_graph_free_recursive(cre_ind->commit_node); mem_heap_free(cre_ind->heap); break; + case QUE_NODE_INSERT_STATS: + cre_ind = node; + + que_graph_free_recursive(cre_ind->stats_def); + que_graph_free_recursive(cre_ind->commit_node); + + mem_heap_free(cre_ind->heap); + break; case QUE_NODE_PROC: que_graph_free_stat_list(((proc_node_t*)node)->stat_list); @@ -1139,6 +1149,8 @@ que_node_print_info( str = "CREATE TABLE"; } else if (type == QUE_NODE_CREATE_INDEX) { str = "CREATE INDEX"; + } else if (type == QUE_NODE_INSERT_STATS) { + str = "INSERT TO SYS_STATS"; } else if (type == QUE_NODE_FOR) { str = "FOR LOOP"; } else if (type == QUE_NODE_RETURN) { @@ -1256,6 +1268,8 @@ que_thr_step( thr = dict_create_table_step(thr); } else if (type == QUE_NODE_CREATE_INDEX) { thr = dict_create_index_step(thr); + } else if (type == QUE_NODE_INSERT_STATS) { + thr = dict_insert_stats_step(thr); } else if (type == QUE_NODE_ROW_PRINTF) { thr = row_printf_step(thr); } else { diff --git a/storage/xtradb/rem/rem0cmp.c b/storage/xtradb/rem/rem0cmp.c index 45230f1d7b1..8ee434f85da 100644 --- a/storage/xtradb/rem/rem0cmp.c +++ b/storage/xtradb/rem/rem0cmp.c @@ -706,7 +706,9 @@ cmp_rec_rec_simple( const rec_t* rec2, /*!< in: physical record */ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */ - const dict_index_t* index) /*!< in: data dictionary index */ + const dict_index_t* index, /*!< in: data dictionary index */ + ibool* null_eq)/*!< out: set to TRUE if + found matching null values */ { ulint rec1_f_len; /*!< length of current field in rec1 */ const byte* rec1_b_ptr; /*!< pointer to the current byte @@ -753,6 +755,9 @@ cmp_rec_rec_simple( || rec2_f_len == UNIV_SQL_NULL) { if (rec1_f_len == rec2_f_len) { + if (null_eq) { + *null_eq = TRUE; + } goto next_field; diff --git a/storage/xtradb/rem/rem0rec.c b/storage/xtradb/rem/rem0rec.c index 1c8b3fd8c1e..37ba8ca2ffe 100644 --- a/storage/xtradb/rem/rem0rec.c +++ b/storage/xtradb/rem/rem0rec.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -212,6 +212,13 @@ rec_get_n_extern_new( const dict_col_t* col = dict_field_get_col(field); len = *lens--; + /* If the maximum length of the field is up + to 255 bytes, the actual length is always + stored in one byte. If the maximum length is + more than 255 bytes, the actual length is + stored in one byte for 0..127. The length + will be encoded in two bytes when it is 128 or + more, or when the field is stored externally. */ if (UNIV_UNLIKELY(col->len > 255) || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { if (len & 0x80) { @@ -294,6 +301,13 @@ rec_init_offsets_comp_ordinary( const dict_col_t* col = dict_field_get_col(field); len = *lens--; + /* If the maximum length of the field is up + to 255 bytes, the actual length is always + stored in one byte. If the maximum length is + more than 255 bytes, the actual length is + stored in one byte for 0..127. The length + will be encoded in two bytes when it is 128 or + more, or when the field is stored externally. */ if (UNIV_UNLIKELY(col->len > 255) || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { @@ -425,6 +439,15 @@ rec_init_offsets( const dict_col_t* col = dict_field_get_col(field); len = *lens--; + /* If the maximum length of the field + is up to 255 bytes, the actual length + is always stored in one byte. If the + maximum length is more than 255 bytes, + the actual length is stored in one + byte for 0..127. The length will be + encoded in two bytes when it is 128 or + more, or when the field is stored + externally. */ if (UNIV_UNLIKELY(col->len > 255) || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { @@ -647,6 +670,13 @@ rec_get_offsets_reverse( const dict_col_t* col = dict_field_get_col(field); len = *lens++; + /* If the maximum length of the field is up + to 255 bytes, the actual length is always + stored in one byte. If the maximum length is + more than 255 bytes, the actual length is + stored in one byte for 0..127. The length + will be encoded in two bytes when it is 128 or + more, or when the field is stored externally. */ if (UNIV_UNLIKELY(col->len > 255) || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { if (len & 0x80) { @@ -695,19 +725,9 @@ rec_get_nth_field_offs_old( ulint os; ulint next_os; - ut_ad(rec && len); - ut_ad(n < rec_get_n_fields_old(rec)); - - if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) { - fprintf(stderr, "Error: trying to access field %lu in rec\n", - (ulong) n); - ut_error; - } - - if (UNIV_UNLIKELY(rec == NULL)) { - fputs("Error: rec is NULL pointer\n", stderr); - ut_error; - } + ut_ad(len); + ut_a(rec); + ut_a(n < rec_get_n_fields_old(rec)); if (rec_get_1byte_offs_flag(rec)) { os = rec_1_get_field_start_offs(rec, n); @@ -791,12 +811,20 @@ rec_get_converted_size_comp_prefix( ut_ad(len <= col->len || col->mtype == DATA_BLOB); + /* If the maximum length of a variable-length field + is up to 255 bytes, the actual length is always stored + in one byte. If the maximum length is more than 255 + bytes, the actual length is stored in one byte for + 0..127. The length will be encoded in two bytes when + it is 128 or more, or when the field is stored externally. */ + if (field->fixed_len) { ut_ad(len == field->fixed_len); /* dict_index_add_col() should guarantee this */ ut_ad(!field->prefix_len || field->fixed_len == field->prefix_len); } else if (dfield_is_ext(&fields[i])) { + ut_ad(col->len >= 256 || col->mtype == DATA_BLOB); extra_size += 2; } else if (len < 128 || (col->len < 256 && col->mtype != DATA_BLOB)) { @@ -1096,6 +1124,8 @@ rec_convert_dtuple_to_rec_comp( /* Store the data and the offsets */ for (i = 0, field = fields; i < n_fields; i++, field++) { + const dict_field_t* ifield; + type = dfield_get_type(field); len = dfield_get_len(field); @@ -1130,12 +1160,20 @@ rec_convert_dtuple_to_rec_comp( /* only nullable fields can be null */ ut_ad(!dfield_is_null(field)); - fixed_len = dict_index_get_nth_field(index, i)->fixed_len; - + ifield = dict_index_get_nth_field(index, i); + fixed_len = ifield->fixed_len; + /* If the maximum length of a variable-length field + is up to 255 bytes, the actual length is always stored + in one byte. If the maximum length is more than 255 + bytes, the actual length is stored in one byte for + 0..127. The length will be encoded in two bytes when + it is 128 or more, or when the field is stored externally. */ if (fixed_len) { ut_ad(len == fixed_len); ut_ad(!dfield_is_ext(field)); } else if (dfield_is_ext(field)) { + ut_ad(ifield->col->len >= 256 + || ifield->col->mtype == DATA_BLOB); ut_ad(len <= REC_MAX_INDEX_COL_LEN + BTR_EXTERN_FIELD_REF_SIZE); *lens-- = (byte) (len >> 8) | 0xc0; @@ -1225,11 +1263,20 @@ rec_convert_dtuple_to_rec( mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; const ulint* offsets; + ulint i; rec_offs_init(offsets_); offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap); ut_ad(rec_validate(rec, offsets)); + ut_ad(dtuple_get_n_fields(dtuple) + == rec_offs_n_fields(offsets)); + + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + ut_ad(!dfield_is_ext(dtuple_get_nth_field(dtuple, i)) + == !rec_offs_nth_extern(offsets, i)); + } + if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } @@ -1412,6 +1459,13 @@ rec_copy_prefix_to_buf( prefix_len += field->fixed_len; } else { ulint len = *lens--; + /* If the maximum length of the column is up + to 255 bytes, the actual length is always + stored in one byte. If the maximum length is + more than 255 bytes, the actual length is + stored in one byte for 0..127. The length + will be encoded in two bytes when it is 128 or + more, or when the column is stored externally. */ if (col->len > 255 || col->mtype == DATA_BLOB) { if (len & 0x80) { /* 1exxxxxx */ diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c index 4b104ced649..d4925e46f97 100644 --- a/storage/xtradb/row/row0ins.c +++ b/storage/xtradb/row/row0ins.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -51,6 +51,15 @@ Created 4/20/1996 Heikki Tuuri #define ROW_INS_PREV 1 #define ROW_INS_NEXT 2 +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ /*********************************************************************//** Creates an insert node struct. @@ -1121,9 +1130,9 @@ nonstandard_exit_func: /*********************************************************************//** Sets a shared lock on a record. Used in locking possible duplicate key records and also in checking foreign key constraints. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_ins_set_shared_rec_lock( /*========================*/ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or @@ -1134,7 +1143,7 @@ row_ins_set_shared_rec_lock( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + enum db_err err; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -1152,9 +1161,9 @@ row_ins_set_shared_rec_lock( /*********************************************************************//** Sets a exclusive lock on a record. Used in locking possible duplicate key records -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_ins_set_exclusive_rec_lock( /*===========================*/ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or @@ -1165,7 +1174,7 @@ row_ins_set_exclusive_rec_lock( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + enum db_err err; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -1205,7 +1214,6 @@ row_ins_check_foreign_constraint( dict_index_t* check_index; ulint n_fields_cmp; btr_pcur_t pcur; - ibool moved; int cmp; ulint err; ulint i; @@ -1336,7 +1344,7 @@ run_again: /* Scan index records and check if there is a matching record */ - for (;;) { + do { const rec_t* rec = btr_pcur_get_rec(&pcur); const buf_block_t* block = btr_pcur_get_block(&pcur); @@ -1348,7 +1356,7 @@ run_again: if (page_rec_is_infimum(rec)) { - goto next_rec; + continue; } offsets = rec_get_offsets(rec, check_index, @@ -1359,12 +1367,13 @@ run_again: err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - - break; + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + continue; + default: + goto end_scan; } - - goto next_rec; } cmp = cmp_dtuple_rec(entry, rec, offsets); @@ -1375,9 +1384,12 @@ run_again: err = row_ins_set_shared_rec_lock( LOCK_ORDINARY, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: break; + default: + goto end_scan; } } else { /* Found a matching record. Lock only @@ -1388,15 +1400,18 @@ run_again: LOCK_REC_NOT_GAP, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: break; + default: + goto end_scan; } if (check_ref) { err = DB_SUCCESS; - break; + goto end_scan; } else if (foreign->type != 0) { /* There is an ON UPDATE or ON DELETE condition: check them in a separate @@ -1422,7 +1437,7 @@ run_again: err = DB_FOREIGN_DUPLICATE_KEY; } - break; + goto end_scan; } /* row_ins_foreign_check_on_constraint @@ -1435,49 +1450,41 @@ run_again: thr, foreign, rec, entry); err = DB_ROW_IS_REFERENCED; - break; + goto end_scan; } } - } + } else { + ut_a(cmp < 0); - if (cmp < 0) { err = row_ins_set_shared_rec_lock( LOCK_GAP, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - break; - } - - if (check_ref) { - err = DB_NO_REFERENCED_ROW; - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - } else { - err = DB_SUCCESS; + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + if (check_ref) { + err = DB_NO_REFERENCED_ROW; + row_ins_foreign_report_add_err( + trx, foreign, rec, entry); + } else { + err = DB_SUCCESS; + } } - break; + goto end_scan; } + } while (btr_pcur_move_to_next(&pcur, &mtr)); - ut_a(cmp == 0); -next_rec: - moved = btr_pcur_move_to_next(&pcur, &mtr); - - if (!moved) { - if (check_ref) { - rec = btr_pcur_get_rec(&pcur); - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - err = DB_NO_REFERENCED_ROW; - } else { - err = DB_SUCCESS; - } - - break; - } + if (check_ref) { + row_ins_foreign_report_add_err( + trx, foreign, btr_pcur_get_rec(&pcur), entry); + err = DB_NO_REFERENCED_ROW; + } else { + err = DB_SUCCESS; } +end_scan: btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -1725,9 +1732,13 @@ row_ins_scan_sec_index_for_duplicate( rec, index, offsets, thr); } - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: break; + default: + goto end_scan; } if (page_rec_is_supremum(rec)) { @@ -1744,17 +1755,15 @@ row_ins_scan_sec_index_for_duplicate( thr_get_trx(thr)->error_info = index; - break; + goto end_scan; } + } else { + ut_a(cmp < 0); + goto end_scan; } - - if (cmp < 0) { - break; - } - - ut_a(cmp == 0); } while (btr_pcur_move_to_next(&pcur, &mtr)); +end_scan: if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } @@ -1843,7 +1852,11 @@ row_ins_duplicate_error_in_clust( cursor->index, offsets, thr); } - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto func_exit; } @@ -1883,7 +1896,11 @@ row_ins_duplicate_error_in_clust( rec, cursor->index, offsets, thr); } - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto func_exit; } @@ -1997,7 +2014,7 @@ row_ins_index_entry_low( btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, mode | BTR_INSERT | ignore_sec_unique, - &cursor, 0, &mtr); + &cursor, 0, __FILE__, __LINE__, &mtr); if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) { /* The insertion was made to the insert buffer already during @@ -2055,7 +2072,8 @@ row_ins_index_entry_low( btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, mode | BTR_INSERT, - &cursor, 0, &mtr); + &cursor, 0, + __FILE__, __LINE__, &mtr); } } @@ -2110,7 +2128,8 @@ function_exit: mtr_start(&mtr); btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &cursor, 0, &mtr); + BTR_MODIFY_TREE, &cursor, 0, + __FILE__, __LINE__, &mtr); rec = btr_cur_get_rec(&cursor); offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); diff --git a/storage/xtradb/row/row0merge.c b/storage/xtradb/row/row0merge.c index 93b2095dc26..47c03c77850 100644 --- a/storage/xtradb/row/row0merge.c +++ b/storage/xtradb/row/row0merge.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -424,14 +424,13 @@ row_merge_dup_report( row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */ const dfield_t* entry) /*!< in: duplicate index entry */ { - mrec_buf_t buf; + mrec_buf_t* buf; const dtuple_t* tuple; dtuple_t tuple_store; const rec_t* rec; const dict_index_t* index = dup->index; ulint n_fields= dict_index_get_n_fields(index); - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; + mem_heap_t* heap; ulint* offsets; ulint n_ext; @@ -441,22 +440,22 @@ row_merge_dup_report( return; } - rec_offs_init(offsets_); - /* Convert the tuple to a record and then to MySQL format. */ + heap = mem_heap_create((1 + REC_OFFS_HEADER_SIZE + n_fields) + * sizeof *offsets + + sizeof *buf); + + buf = mem_heap_alloc(heap, sizeof *buf); tuple = dtuple_from_fields(&tuple_store, entry, n_fields); n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0; - rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext); - offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, - &heap); + rec = rec_convert_dtuple_to_rec(*buf, index, tuple, n_ext); + offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); innobase_rec_to_mysql(dup->table, rec, index, offsets); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } + mem_heap_free(heap); } /*************************************************************//** @@ -627,22 +626,26 @@ row_merge_buf_write( } /******************************************************//** -Create a memory heap and allocate space for row_merge_rec_offsets(). +Create a memory heap and allocate space for row_merge_rec_offsets() +and mrec_buf_t[3]. @return memory heap */ static mem_heap_t* row_merge_heap_create( /*==================*/ const dict_index_t* index, /*!< in: record descriptor */ + mrec_buf_t** buf, /*!< out: 3 buffers */ ulint** offsets1, /*!< out: offsets */ ulint** offsets2) /*!< out: offsets */ { ulint i = 1 + REC_OFFS_HEADER_SIZE + dict_index_get_n_fields(index); - mem_heap_t* heap = mem_heap_create(2 * i * sizeof *offsets1); + mem_heap_t* heap = mem_heap_create(2 * i * sizeof **offsets1 + + 3 * sizeof **buf); - *offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1); - *offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2); + *buf = mem_heap_alloc(heap, 3 * sizeof **buf); + *offsets1 = mem_heap_alloc(heap, i * sizeof **offsets1); + *offsets2 = mem_heap_alloc(heap, i * sizeof **offsets2); (*offsets1)[0] = (*offsets2)[0] = i; (*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index); @@ -714,14 +717,16 @@ row_merge_read( } /********************************************************************//** -Read a merge block from the file system. +Write a merge block to the file system. @return TRUE if request was successful, FALSE if fail */ static ibool row_merge_write( /*============*/ int fd, /*!< in: file descriptor */ - ulint offset, /*!< in: offset where to write */ + ulint offset, /*!< in: offset where to read + in number of row_merge_block_t + elements */ const void* buf) /*!< in: data */ { ib_uint64_t ofs = ((ib_uint64_t) offset) @@ -1072,11 +1077,14 @@ row_merge_cmp( record to be compared */ const ulint* offsets1, /*!< in: first record offsets */ const ulint* offsets2, /*!< in: second record offsets */ - const dict_index_t* index) /*!< in: index */ + const dict_index_t* index, /*!< in: index */ + ibool* null_eq) /*!< out: set to TRUE if + found matching null values */ { int cmp; - cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index); + cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index, + null_eq); #ifdef UNIV_DEBUG if (row_merge_print_cmp) { @@ -1401,7 +1409,8 @@ row_merge_blocks( { mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */ - mrec_buf_t buf[3]; /*!< buffer for handling split mrec in block[] */ + mrec_buf_t* buf; /*!< buffer for handling + split mrec in block[] */ const byte* b0; /*!< pointer to block[0] */ const byte* b1; /*!< pointer to block[1] */ byte* b2; /*!< pointer to block[2] */ @@ -1421,7 +1430,7 @@ row_merge_blocks( } #endif /* UNIV_DEBUG */ - heap = row_merge_heap_create(index, &offsets0, &offsets1); + heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1); /* Write a record and read the next record. Split the output file in two halves, which can be merged on the following pass. */ @@ -1448,11 +1457,13 @@ corrupt: } while (mrec0 && mrec1) { + ibool null_eq = FALSE; switch (row_merge_cmp(mrec0, mrec1, - offsets0, offsets1, index)) { + offsets0, offsets1, index, + &null_eq)) { case 0: if (UNIV_UNLIKELY - (dict_index_is_unique(index))) { + (dict_index_is_unique(index) && !null_eq)) { innobase_rec_to_mysql(table, mrec0, index, offsets0); mem_heap_free(heap); @@ -1507,7 +1518,7 @@ row_merge_blocks_copy( { mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */ - mrec_buf_t buf[3]; /*!< buffer for handling + mrec_buf_t* buf; /*!< buffer for handling split mrec in block[] */ const byte* b0; /*!< pointer to block[0] */ byte* b2; /*!< pointer to block[2] */ @@ -1525,7 +1536,7 @@ row_merge_blocks_copy( } #endif /* UNIV_DEBUG */ - heap = row_merge_heap_create(index, &offsets0, &offsets1); + heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1); /* Write a record and read the next record. Split the output file in two halves, which can be merged on the following pass. */ @@ -1574,22 +1585,28 @@ row_merge( const dict_index_t* index, /*!< in: index being created */ merge_file_t* file, /*!< in/out: file containing index entries */ - ulint* half, /*!< in/out: half the file */ row_merge_block_t* block, /*!< in/out: 3 buffers */ int* tmpfd, /*!< in/out: temporary file handle */ - TABLE* table) /*!< in/out: MySQL table, for + TABLE* table, /*!< in/out: MySQL table, for reporting erroneous key value if applicable */ + ulint* num_run,/*!< in/out: Number of runs remain + to be merged */ + ulint* run_offset) /*!< in/out: Array contains the + first offset number for each merge + run */ { ulint foffs0; /*!< first input offset */ ulint foffs1; /*!< second input offset */ ulint error; /*!< error code */ merge_file_t of; /*!< output file */ - const ulint ihalf = *half; + const ulint ihalf = run_offset[*num_run / 2]; /*!< half the input file */ - ulint ohalf; /*!< half the output file */ + ulint n_run = 0; + /*!< num of runs generated from this merge */ UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]); + ut_ad(ihalf < file->offset); of.fd = *tmpfd; @@ -1597,17 +1614,20 @@ row_merge( of.n_rec = 0; /* Merge blocks to the output file. */ - ohalf = 0; foffs0 = 0; foffs1 = ihalf; + UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset); + for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) { - ulint ahalf; /*!< arithmetic half the input file */ if (UNIV_UNLIKELY(trx_is_interrupted(trx))) { return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + error = row_merge_blocks(index, file, block, &foffs0, &foffs1, &of, table); @@ -1615,21 +1635,6 @@ row_merge( return(error); } - /* Record the offset of the output file when - approximately half the output has been generated. In - this way, the next invocation of row_merge() will - spend most of the time in this loop. The initial - estimate is ohalf==0. */ - ahalf = file->offset / 2; - ut_ad(ohalf <= of.offset); - - /* Improve the estimate until reaching half the input - file size, or we can not get any closer to it. All - comparands should be non-negative when !(ohalf < ahalf) - because ohalf <= of.offset. */ - if (ohalf < ahalf || of.offset - ahalf < ohalf - ahalf) { - ohalf = of.offset; - } } /* Copy the last blocks, if there are any. */ @@ -1639,6 +1644,9 @@ row_merge( return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) { return(DB_CORRUPTION); } @@ -1651,6 +1659,9 @@ row_merge( return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) { return(DB_CORRUPTION); } @@ -1662,10 +1673,23 @@ row_merge( return(DB_CORRUPTION); } + ut_ad(n_run <= *num_run); + + *num_run = n_run; + + /* Each run can contain one or more offsets. As merge goes on, + the number of runs (to merge) will reduce until we have one + single run. So the number of runs will always be smaller than + the number of offsets in file */ + ut_ad((*num_run) <= file->offset); + + /* The number of offsets in output file is always equal or + smaller than input file */ + ut_ad(of.offset <= file->offset); + /* Swap file descriptors for the next pass. */ *tmpfd = file->fd; *file = of; - *half = ohalf; UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]); @@ -1690,27 +1714,44 @@ row_merge_sort( if applicable */ { ulint half = file->offset / 2; + ulint num_runs; + ulint* run_offset; + ulint error = DB_SUCCESS; + + /* Record the number of merge runs we need to perform */ + num_runs = file->offset; + + /* If num_runs are less than 1, nothing to merge */ + if (num_runs <= 1) { + return(error); + } + + /* "run_offset" records each run's first offset number */ + run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint)); + + /* This tells row_merge() where to start for the first round + of merge. */ + run_offset[half] = half; /* The file should always contain at least one byte (the end of file marker). Thus, it must be at least one block. */ ut_ad(file->offset > 0); + /* Merge the runs until we have one big run */ do { - ulint error; + error = row_merge(trx, index, file, block, tmpfd, + table, &num_runs, run_offset); - error = row_merge(trx, index, file, &half, - block, tmpfd, table); + UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset); if (error != DB_SUCCESS) { - return(error); + break; } + } while (num_runs > 1); - /* half > 0 should hold except when the file consists - of one block. No need to merge further then. */ - ut_ad(half > 0 || file->offset == 1); - } while (half < file->offset && half > 0); + mem_free(run_offset); - return(DB_SUCCESS); + return(error); } /*************************************************************//** @@ -1767,7 +1808,6 @@ row_merge_insert_index_tuples( int fd, /*!< in: file descriptor */ row_merge_block_t* block) /*!< in/out: file buffer */ { - mrec_buf_t buf; const byte* b; que_thr_t* thr; ins_node_t* node; @@ -1786,7 +1826,7 @@ row_merge_insert_index_tuples( trx->op_info = "inserting index entries"; - graph_heap = mem_heap_create(500); + graph_heap = mem_heap_create(500 + sizeof(mrec_buf_t)); node = ins_node_create(INS_DIRECT, table, graph_heap); thr = pars_complete_graph_for_exec(node, trx, graph_heap); @@ -1808,12 +1848,14 @@ row_merge_insert_index_tuples( if (!row_merge_read(fd, foffs, block)) { error = DB_CORRUPTION; } else { + mrec_buf_t* buf = mem_heap_alloc(graph_heap, sizeof *buf); + for (;;) { const mrec_t* mrec; dtuple_t* dtuple; ulint n_ext; - b = row_merge_read_rec(block, &buf, b, index, + b = row_merge_read_rec(block, buf, b, index, fd, &foffs, &mrec, offsets); if (UNIV_UNLIKELY(!b)) { /* End of list, or I/O error */ @@ -1981,17 +2023,17 @@ row_merge_drop_index( "UPDATE SYS_INDEXES SET NAME=CONCAT('" TEMP_INDEX_PREFIX_STR "', NAME) WHERE ID = :indexid;\n" "COMMIT WORK;\n" + /* Drop the statistics of the index. */ + "DELETE FROM SYS_STATS WHERE INDEX_ID = :indexid;\n" /* Drop the field definitions of the index. */ "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n" /* Drop the index definition and the B-tree. */ - "DELETE FROM SYS_INDEXES WHERE ID = :indexid\n" - " AND TABLE_ID = :tableid;\n" + "DELETE FROM SYS_INDEXES WHERE ID = :indexid;\n" "END;\n"; ut_ad(index && table && trx); pars_info_add_dulint_literal(info, "indexid", index->id); - pars_info_add_dulint_literal(info, "tableid", table->id); trx_start_if_not_started(trx); trx->op_info = "dropping index"; @@ -2040,47 +2082,82 @@ row_merge_drop_temp_indexes(void) /*=============================*/ { trx_t* trx; - ulint err; - - /* We use the private SQL parser of Innobase to generate the - query graphs needed in deleting the dictionary data from system - tables in Innobase. Deleting a row from SYS_INDEXES table also - frees the file segments of the B-tree associated with the index. */ - static const char drop_temp_indexes[] = - "PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n" - "indexid CHAR;\n" - "DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n" - "WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "';\n" - "BEGIN\n" - "\tOPEN c;\n" - "\tWHILE 1=1 LOOP\n" - "\t\tFETCH c INTO indexid;\n" - "\t\tIF (SQL % NOTFOUND) THEN\n" - "\t\t\tEXIT;\n" - "\t\tEND IF;\n" - "\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n" - "\t\tDELETE FROM SYS_INDEXES WHERE ID = indexid;\n" - "\tEND LOOP;\n" - "\tCLOSE c;\n" - "\tCOMMIT WORK;\n" - "END;\n"; + btr_pcur_t pcur; + mtr_t mtr; + /* Load the table definitions that contain partially defined + indexes, so that the data dictionary information can be checked + when accessing the tablename.ibd files. */ trx = trx_allocate_for_background(); trx->op_info = "dropping partially created indexes"; row_mysql_lock_data_dictionary(trx); - /* Incomplete transactions may be holding some locks on the - data dictionary tables. However, they should never have been - able to lock the records corresponding to the partially - created indexes that we are attempting to delete, because the - table was locked when the indexes were being created. We will - drop the partially created indexes before the rollback of - incomplete transactions is initiated. Thus, this should not - interfere with the incomplete transactions. */ - trx->isolation_level = TRX_ISO_READ_UNCOMMITTED; - err = que_eval_sql(NULL, drop_temp_indexes, FALSE, trx); - ut_a(err == DB_SUCCESS); + mtr_start(&mtr); + + btr_pcur_open_at_index_side( + TRUE, + dict_table_get_first_index(dict_sys->sys_indexes), + BTR_SEARCH_LEAF, &pcur, TRUE, &mtr); + + for (;;) { + const rec_t* rec; + const byte* field; + ulint len; + dulint table_id; + dict_table_t* table; + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + if (!btr_pcur_is_on_user_rec(&pcur)) { + break; + } + + rec = btr_pcur_get_rec(&pcur); + field = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_NAME_FIELD, + &len); + if (len == UNIV_SQL_NULL || len == 0 + || (char) *field != TEMP_INDEX_PREFIX) { + continue; + } + + /* This is a temporary index. */ + + field = rec_get_nth_field_old(rec, 0/*TABLE_ID*/, &len); + if (len != 8) { + /* Corrupted TABLE_ID */ + continue; + } + table_id = mach_read_from_8(field); + + btr_pcur_store_position(&pcur, &mtr); + btr_pcur_commit_specify_mtr(&pcur, &mtr); + + table = dict_table_get_on_id_low(table_id); + + if (table) { + dict_index_t* index; + dict_index_t* next_index; + + for (index = dict_table_get_first_index(table); + index; index = next_index) { + + next_index = dict_table_get_next_index(index); + + if (*index->name == TEMP_INDEX_PREFIX) { + row_merge_drop_index(index, table, trx); + trx_commit_for_mysql(trx); + } + } + } + + mtr_start(&mtr); + btr_pcur_restore_position(BTR_SEARCH_LEAF, + &pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); row_mysql_unlock_data_dictionary(trx); trx_free_for_background(trx); } @@ -2268,7 +2345,7 @@ row_merge_rename_tables( { ulint err = DB_ERROR; pars_info_t* info; - const char* old_name= old_table->name; + char old_name[MAX_TABLE_NAME_LEN + 1]; ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_ad(old_table != new_table); @@ -2276,6 +2353,17 @@ row_merge_rename_tables( ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); + /* store the old/current name to an automatic variable */ + if (strlen(old_table->name) + 1 <= sizeof(old_name)) { + memcpy(old_name, old_table->name, strlen(old_table->name) + 1); + } else { + ut_print_timestamp(stderr); + fprintf(stderr, "InnoDB: too long table name: '%s', " + "max length is %d\n", old_table->name, + MAX_TABLE_NAME_LEN); + ut_error; + } + trx->op_info = "renaming tables"; /* We use the private SQL parser of Innobase to generate the query diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c index 8585b816911..62221fa456d 100644 --- a/storage/xtradb/row/row0mysql.c +++ b/storage/xtradb/row/row0mysql.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2000, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -30,6 +30,7 @@ Created 9/17/2000 Heikki Tuuri #include "row0mysql.ic" #endif +#include "ha_prototypes.h" #include "row0ins.h" #include "row0merge.h" #include "row0sel.h" @@ -485,7 +486,7 @@ next_column: /****************************************************************//** Handles user errors and lock waits detected by the database engine. @return TRUE if it was a lock wait and we should continue running the -query thread */ +query thread and in that case the thr is ALREADY in the running state. */ UNIV_INTERN ibool row_mysql_handle_errors( @@ -522,6 +523,7 @@ handle_new_error: case DB_CANNOT_ADD_CONSTRAINT: case DB_TOO_MANY_CONCURRENT_TRXS: case DB_OUT_OF_FILE_SPACE: + case DB_INTERRUPTED: if (savept) { /* Roll back the latest, possibly incomplete insertion or update */ @@ -624,6 +626,8 @@ row_create_prebuilt( prebuilt->select_lock_type = LOCK_NONE; prebuilt->stored_select_lock_type = 99999999; + UNIV_MEM_INVALID(&prebuilt->stored_select_lock_type, + sizeof prebuilt->stored_select_lock_type); prebuilt->search_tuple = dtuple_create( heap, 2 * dict_table_get_n_cols(table)); @@ -864,7 +868,7 @@ row_update_statistics_if_needed( if (counter > 2000000000 || ((ib_int64_t)counter > 16 + table->stat_n_rows / 16)) { - dict_update_statistics(table); + dict_update_statistics(table, TRUE); } } @@ -1124,6 +1128,13 @@ row_insert_for_mysql( thr = que_fork_get_first_thr(prebuilt->ins_graph); + if (!prebuilt->mysql_has_locked) { + fprintf(stderr, "InnoDB: Error: row_insert_for_mysql is called without ha_innobase::external_lock()\n"); + if (trx->mysql_thd != NULL) { + innobase_mysql_print_thd(stderr, trx->mysql_thd, 600); + } + } + if (prebuilt->sql_stat_start) { node->state = INS_NODE_SET_IX_LOCK; prebuilt->sql_stat_start = FALSE; @@ -1430,27 +1441,26 @@ run_again: } /*********************************************************************//** -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -this session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. -@return error code or DB_SUCCESS */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE or this +session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +Before calling this function row_search_for_mysql() must have +initialized prebuilt->new_rec_locks to store the information which new +record locks really were set. This function removes a newly set +clustered index record lock under prebuilt->pcur or +prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that +releases the latest clustered index record lock we set. +@return error code or DB_SUCCESS */ UNIV_INTERN int row_unlock_for_mysql( /*=================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL handle */ - ibool has_latches_on_recs)/*!< TRUE if called so that we have - the latches on the records under pcur - and clust_pcur, and we do not need to - reposition the cursors. */ + ibool has_latches_on_recs)/*!< in: TRUE if called so + that we have the latches on + the records under pcur and + clust_pcur, and we do not need + to reposition the cursors. */ { btr_pcur_t* pcur = prebuilt->pcur; btr_pcur_t* clust_pcur = prebuilt->clust_pcur; @@ -1461,7 +1471,7 @@ row_unlock_for_mysql( if (UNIV_UNLIKELY (!srv_locks_unsafe_for_binlog - && trx->isolation_level != TRX_ISO_READ_COMMITTED)) { + && trx->isolation_level > TRX_ISO_READ_COMMITTED)) { fprintf(stderr, "InnoDB: Error: calling row_unlock_for_mysql though\n" @@ -1648,37 +1658,6 @@ row_table_got_default_clust_index( } /*********************************************************************//** -Calculates the key number used inside MySQL for an Innobase index. We have -to take into account if we generated a default clustered index for the table -@return the key number used inside MySQL */ -UNIV_INTERN -ulint -row_get_mysql_key_number_for_index( -/*===============================*/ - const dict_index_t* index) /*!< in: index */ -{ - const dict_index_t* ind; - ulint i; - - ut_a(index); - - i = 0; - ind = dict_table_get_first_index(index->table); - - while (index != ind) { - ind = dict_table_get_next_index(ind); - i++; - } - - if (row_table_got_default_clust_index(index->table)) { - ut_a(i > 0); - i--; - } - - return(i); -} - -/*********************************************************************//** Locks the data dictionary in shared mode from modifications, for performing foreign key check, rollback, or other operation invisible to MySQL. */ UNIV_INTERN @@ -2044,6 +2023,45 @@ error_handling: } /*********************************************************************//** +*/ +UNIV_INTERN +int +row_insert_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx) +{ + ind_node_t* node; + mem_heap_t* heap; + que_thr_t* thr; + ulint err; + + ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + + trx->op_info = "try to insert rows to SYS_STATS"; + + trx_start_if_not_started(trx); + trx->error_state = DB_SUCCESS; + + heap = mem_heap_create(512); + + node = ind_insert_stats_graph_create(index, heap); + + thr = pars_complete_graph_for_exec(node, trx, heap); + + ut_a(thr == que_fork_start_command(que_node_get_parent(thr))); + que_run_threads(thr); + + err = trx->error_state; + + que_graph_free((que_t*) que_node_get_parent(thr)); + + trx->op_info = ""; + + return((int) err); +} + +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. @@ -2062,6 +2080,7 @@ row_table_add_foreign_constraints( FOREIGN KEY (a, b) REFERENCES table2(c, d), table2 can be written also with the database name before it: test.table2 */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -2083,8 +2102,8 @@ row_table_add_foreign_constraints( trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - err = dict_create_foreign_constraints(trx, sql_string, name, - reject_fks); + err = dict_create_foreign_constraints(trx, sql_string, sql_length, + name, reject_fks); if (err == DB_SUCCESS) { /* Check that also referencing constraints are ok */ err = dict_load_foreigns(name, TRUE); @@ -2428,7 +2447,7 @@ row_discard_tablespace_for_mysql( goto funct_exit; } - new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + dict_hdr_get_new_id(&new_id, NULL, NULL); /* Remove all locks except the table-level S and X locks. */ lock_remove_all_on_table(table, FALSE); @@ -2790,10 +2809,11 @@ row_truncate_table_for_mysql( dict_index_t* index; - space = 0; + dict_hdr_get_new_id(NULL, NULL, &space); - if (fil_create_new_single_table_tablespace( - &space, table->name, FALSE, flags, + if (space == ULINT_UNDEFINED + || fil_create_new_single_table_tablespace( + space, table->name, FALSE, flags, FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) { ut_print_timestamp(stderr); fprintf(stderr, @@ -2898,7 +2918,7 @@ next_rec: mem_heap_free(heap); - new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + dict_hdr_get_new_id(&new_id, NULL, NULL); info = pars_info_create(); @@ -2942,7 +2962,7 @@ next_rec: dict_table_autoinc_lock(table); dict_table_autoinc_initialize(table, 1); dict_table_autoinc_unlock(table); - dict_update_statistics(table); + dict_update_statistics(table, TRUE); trx_commit_for_mysql(trx); @@ -3244,6 +3264,8 @@ check_next_foreign: " IF (SQL % NOTFOUND) THEN\n" " found := 0;\n" " ELSE\n" + " DELETE FROM SYS_STATS\n" + " WHERE INDEX_ID = index_id;\n" " DELETE FROM SYS_FIELDS\n" " WHERE INDEX_ID = index_id;\n" " DELETE FROM SYS_INDEXES\n" @@ -3258,19 +3280,13 @@ check_next_foreign: "END;\n" , FALSE, trx); - if (err != DB_SUCCESS) { - ut_a(err == DB_OUT_OF_FILE_SPACE); - - err = DB_MUST_GET_MORE_FILE_SPACE; - - row_mysql_handle_errors(&err, trx, NULL, NULL); - - ut_error; - } else { - ibool is_path; + switch (err) { + ibool is_temp; const char* name_or_path; mem_heap_t* heap; + case DB_SUCCESS: + heap = mem_heap_create(200); /* Clone the name, in case it has been allocated @@ -3280,12 +3296,13 @@ check_next_foreign: space_id = table->space; if (table->dir_path_of_temp_table != NULL) { - is_path = TRUE; name_or_path = mem_heap_strdup( heap, table->dir_path_of_temp_table); + is_temp = TRUE; } else { - is_path = FALSE; name_or_path = name; + is_temp = (table->flags >> DICT_TF2_SHIFT) + & DICT_TF2_TEMPORARY; } dict_table_remove_from_cache(table); @@ -3302,11 +3319,11 @@ check_next_foreign: /* Do not drop possible .ibd tablespace if something went wrong: we do not want to delete valuable data of the user */ - if (err == DB_SUCCESS && space_id > 0) { + if (err == DB_SUCCESS && !trx_sys_sys_space(space_id)) { if (!fil_space_for_table_exists_in_mem(space_id, name_or_path, - is_path, - FALSE, TRUE)) { + is_temp, FALSE, + !is_temp)) { err = DB_SUCCESS; fprintf(stderr, @@ -3335,7 +3352,27 @@ check_next_foreign: } mem_heap_free(heap); + break; + + case DB_TOO_MANY_CONCURRENT_TRXS: + /* Cannot even find a free slot for the + the undo log. We can directly exit here + and return the DB_TOO_MANY_CONCURRENT_TRXS + error. */ + break; + + case DB_OUT_OF_FILE_SPACE: + err = DB_MUST_GET_MORE_FILE_SPACE; + + row_mysql_handle_errors(&err, trx, NULL, NULL); + + /* Fall through to raise error */ + + default: + /* No other possible error returns */ + ut_error; } + funct_exit: if (locked_dictionary) { @@ -3351,6 +3388,90 @@ funct_exit: return((int) err); } +/*********************************************************************//** +Drop all temporary tables during crash recovery. */ +UNIV_INTERN +void +row_mysql_drop_temp_tables(void) +/*============================*/ +{ + trx_t* trx; + btr_pcur_t pcur; + mtr_t mtr; + mem_heap_t* heap; + + trx = trx_allocate_for_background(); + trx->op_info = "dropping temporary tables"; + row_mysql_lock_data_dictionary(trx); + + heap = mem_heap_create(200); + + mtr_start(&mtr); + + btr_pcur_open_at_index_side( + TRUE, + dict_table_get_first_index(dict_sys->sys_tables), + BTR_SEARCH_LEAF, &pcur, TRUE, &mtr); + + for (;;) { + const rec_t* rec; + const byte* field; + ulint len; + const char* table_name; + dict_table_t* table; + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + if (!btr_pcur_is_on_user_rec(&pcur)) { + break; + } + + rec = btr_pcur_get_rec(&pcur); + field = rec_get_nth_field_old(rec, 4/*N_COLS*/, &len); + if (len != 4 || !(mach_read_from_4(field) & 0x80000000UL)) { + continue; + } + + /* Because this is not a ROW_FORMAT=REDUNDANT table, + the is_temp flag is valid. Examine it. */ + + field = rec_get_nth_field_old(rec, 7/*MIX_LEN*/, &len); + if (len != 4 + || !(mach_read_from_4(field) & DICT_TF2_TEMPORARY)) { + continue; + } + + /* This is a temporary table. */ + field = rec_get_nth_field_old(rec, 0/*NAME*/, &len); + if (len == UNIV_SQL_NULL || len == 0) { + /* Corrupted SYS_TABLES.NAME */ + continue; + } + + table_name = mem_heap_strdupl(heap, (const char*) field, len); + + btr_pcur_store_position(&pcur, &mtr); + btr_pcur_commit_specify_mtr(&pcur, &mtr); + + table = dict_load_table(table_name); + + if (table) { + row_drop_table_for_mysql(table_name, trx, FALSE); + trx_commit_for_mysql(trx); + } + + mtr_start(&mtr); + btr_pcur_restore_position(BTR_SEARCH_LEAF, + &pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + row_mysql_unlock_data_dictionary(trx); + trx_free_for_background(trx); +} + /*******************************************************************//** Drop all foreign keys in a database, see Bug#18942. Called at the end of row_drop_database_for_mysql(). @@ -3902,14 +4023,15 @@ Checks that the index contains entries in an ascending order, unique constraint is not broken, and calculates the number of index entries in the read view of the current transaction. @return TRUE if ok */ -static +UNIV_INTERN ibool -row_scan_and_check_index( -/*=====================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL */ - dict_index_t* index, /*!< in: index */ - ulint* n_rows) /*!< out: number of entries seen in the - current consistent read */ +row_check_index_for_mysql( +/*======================*/ + row_prebuilt_t* prebuilt, /*!< in: prebuilt struct + in MySQL handle */ + const dict_index_t* index, /*!< in: index */ + ulint* n_rows) /*!< out: number of entries + seen in the consistent read */ { dtuple_t* prev_entry = NULL; ulint matched_fields; @@ -3930,31 +4052,9 @@ row_scan_and_check_index( *n_rows = 0; - if (!row_merge_is_index_usable(prebuilt->trx, index)) { - /* A newly created index may lack some delete-marked - records that may exist in the read view of - prebuilt->trx. Thus, such indexes must not be - accessed by consistent read. */ - return(is_ok); - } - buf = mem_alloc(UNIV_PAGE_SIZE); heap = mem_heap_create(100); - /* Make a dummy template in prebuilt, which we will use - in scanning the index entries */ - - prebuilt->index = index; - /* row_merge_is_index_usable() was already checked above. */ - prebuilt->index_usable = TRUE; - prebuilt->sql_stat_start = TRUE; - prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE; - prebuilt->n_template = 0; - prebuilt->need_to_access_clustered = FALSE; - - dtuple_set_n_fields(prebuilt->search_tuple, 0); - - prebuilt->select_lock_type = LOCK_NONE; cnt = 1000; ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, 0); @@ -4073,119 +4173,6 @@ not_ok: } /*********************************************************************//** -Checks a table for corruption. -@return DB_ERROR or DB_SUCCESS */ -UNIV_INTERN -ulint -row_check_table_for_mysql( -/*======================*/ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL - handle */ -{ - dict_table_t* table = prebuilt->table; - dict_index_t* index; - ulint n_rows; - ulint n_rows_in_table = ULINT_UNDEFINED; - ulint ret = DB_SUCCESS; - ulint old_isolation_level; - - if (table->ibd_file_missing) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error:\n" - "InnoDB: MySQL is trying to use a table handle" - " but the .ibd file for\n" - "InnoDB: table %s does not exist.\n" - "InnoDB: Have you deleted the .ibd file" - " from the database directory under\n" - "InnoDB: the MySQL datadir, or have you" - " used DISCARD TABLESPACE?\n" - "InnoDB: Look from\n" - "InnoDB: " REFMAN "innodb-troubleshooting.html\n" - "InnoDB: how you can resolve the problem.\n", - table->name); - return(DB_ERROR); - } - - prebuilt->trx->op_info = "checking table"; - - old_isolation_level = prebuilt->trx->isolation_level; - - /* We must run the index record counts at an isolation level - >= READ COMMITTED, because a dirty read can see a wrong number - of records in some index; to play safe, we use always - REPEATABLE READ here */ - - prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ; - - /* Enlarge the fatal lock wait timeout during CHECK TABLE. */ - mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ - mutex_exit(&kernel_mutex); - - index = dict_table_get_first_index(table); - - while (index != NULL) { - /* fputs("Validating index ", stderr); - ut_print_name(stderr, trx, FALSE, index->name); - putc('\n', stderr); */ - - if (!btr_validate_index(index, prebuilt->trx)) { - ret = DB_ERROR; - } else { - if (!row_scan_and_check_index(prebuilt,index, &n_rows)){ - ret = DB_ERROR; - } - - if (trx_is_interrupted(prebuilt->trx)) { - ret = DB_INTERRUPTED; - break; - } - - /* fprintf(stderr, "%lu entries in index %s\n", n_rows, - index->name); */ - - if (index == dict_table_get_first_index(table)) { - n_rows_in_table = n_rows; - } else if (n_rows != n_rows_in_table) { - - ret = DB_ERROR; - - fputs("Error: ", stderr); - dict_index_name_print(stderr, - prebuilt->trx, index); - fprintf(stderr, - " contains %lu entries," - " should be %lu\n", - (ulong) n_rows, - (ulong) n_rows_in_table); - } - } - - index = dict_table_get_next_index(index); - } - - /* Restore the original isolation level */ - prebuilt->trx->isolation_level = old_isolation_level; - - /* We validate also the whole adaptive hash index for all tables - at every CHECK TABLE */ - - if (!btr_search_validate()) { - - ret = DB_ERROR; - } - - /* Restore the fatal lock wait timeout after CHECK TABLE. */ - mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */ - mutex_exit(&kernel_mutex); - - prebuilt->trx->op_info = ""; - - return(ret); -} - -/*********************************************************************//** Determines if a table is a magic monitor table. @return TRUE if monitor table */ UNIV_INTERN diff --git a/storage/xtradb/row/row0purge.c b/storage/xtradb/row/row0purge.c index 500ebe571ab..835af990672 100644 --- a/storage/xtradb/row/row0purge.c +++ b/storage/xtradb/row/row0purge.c @@ -44,6 +44,16 @@ Created 3/14/1997 Heikki Tuuri #include "row0mysql.h" #include "log0log.h" +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /********************************************************************//** Creates a purge node to a query graph. @return own: purge node */ @@ -126,6 +136,7 @@ row_purge_remove_clust_if_poss_low( pcur = &(node->pcur); btr_cur = btr_pcur_get_btr_cur(pcur); + log_free_check(); mtr_start(&mtr); success = row_purge_reposition_pcur(mode, node, &mtr); diff --git a/storage/xtradb/row/row0row.c b/storage/xtradb/row/row0row.c index 128ac3ba3e8..cb7dfa2b7c9 100644 --- a/storage/xtradb/row/row0row.c +++ b/storage/xtradb/row/row0row.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -915,6 +915,10 @@ row_raw_format( ret = row_raw_format_int(data, data_len, prtype, buf, buf_size, &format_in_hex); + if (format_in_hex) { + + goto format_in_hex; + } break; case DATA_CHAR: case DATA_VARCHAR: @@ -923,14 +927,15 @@ row_raw_format( ret = row_raw_format_str(data, data_len, prtype, buf, buf_size, &format_in_hex); + if (format_in_hex) { + + goto format_in_hex; + } + break; /* XXX support more data types */ default: - - format_in_hex = TRUE; - } - - if (format_in_hex) { + format_in_hex: if (UNIV_LIKELY(buf_size > 2)) { diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c index 61b4f84d6b2..0c728154257 100644 --- a/storage/xtradb/row/row0sel.c +++ b/storage/xtradb/row/row0sel.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -132,7 +132,8 @@ index record. NOTE: the comparison is NOT done as a binary comparison, but character fields are compared with collation! @return TRUE if the secondary record is equal to the corresponding -fields in the clustered record, when compared with collation */ +fields in the clustered record, when compared with collation; +FALSE if not equal or if the clustered record has been marked for deletion */ static ibool row_sel_sec_rec_is_for_clust_rec( @@ -431,10 +432,6 @@ row_sel_fetch_columns( data = rec_get_nth_field(rec, offsets, field_no, &len); - if (len == UNIV_SQL_NULL) { - len = UNIV_SQL_NULL; - } - needs_copy = column->copy_val; } @@ -855,7 +852,7 @@ row_sel_get_clust_rec( trx = thr_get_trx(thr); if (srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) { + || trx->isolation_level <= TRX_ISO_READ_COMMITTED) { lock_type = LOCK_REC_NOT_GAP; } else { lock_type = LOCK_ORDINARY; @@ -866,8 +863,14 @@ row_sel_get_clust_rec( clust_rec, index, offsets, node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + /* Declare the variable uninitialized in Valgrind. + It should be set to DB_SUCCESS at func_exit. */ + UNIV_MEM_INVALID(&err, sizeof err); + break; + default: goto err_exit; } } else { @@ -937,9 +940,9 @@ err_exit: /*********************************************************************//** Sets a lock on a record. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ UNIV_INLINE -ulint +enum db_err sel_set_rec_lock( /*=============*/ const buf_block_t* block, /*!< in: buffer block of rec */ @@ -951,8 +954,8 @@ sel_set_rec_lock( LOC_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - trx_t* trx; - ulint err; + trx_t* trx; + enum db_err err; trx = thr_get_trx(thr); @@ -1468,7 +1471,7 @@ rec_loop: if (srv_locks_unsafe_for_binlog || trx->isolation_level - == TRX_ISO_READ_COMMITTED) { + <= TRX_ISO_READ_COMMITTED) { if (page_rec_is_supremum(next_rec)) { @@ -1485,11 +1488,15 @@ rec_loop: node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: /* Note that in this case we will store in pcur the PREDECESSOR of the record we are waiting the lock for */ - goto lock_wait_or_error; } } @@ -1525,7 +1532,7 @@ skip_lock: trx = thr_get_trx(thr); if (srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) { + || trx->isolation_level <= TRX_ISO_READ_COMMITTED) { if (page_rec_is_supremum(rec)) { @@ -1541,8 +1548,12 @@ skip_lock: rec, index, offsets, node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -2170,36 +2181,6 @@ row_fetch_print( return((void*)42); } -/****************************************************************//** -Callback function for fetch that stores an unsigned 4 byte integer to the -location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length -= 4. -@return always returns NULL */ -UNIV_INTERN -void* -row_fetch_store_uint4( -/*==================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: data pointer */ -{ - sel_node_t* node = row; - ib_uint32_t* val = user_arg; - ulint tmp; - - dfield_t* dfield = que_node_get_val(node->select_list); - const dtype_t* type = dfield_get_type(dfield); - ulint len = dfield_get_len(dfield); - - ut_a(dtype_get_mtype(type) == DATA_INT); - ut_a(dtype_get_prtype(type) & DATA_UNSIGNED); - ut_a(len == 4); - - tmp = mach_read_from_4(dfield_get_data(dfield)); - *val = (ib_uint32_t) tmp; - - return(NULL); -} - /***********************************************************//** Prints a row in a select result. @return query thread to run next or NULL */ @@ -2531,6 +2512,7 @@ row_sel_field_store_in_mysql_format( byte* pad_ptr; ut_ad(len != UNIV_SQL_NULL); + UNIV_MEM_ASSERT_RW(data, len); switch (templ->type) { case DATA_INT: @@ -2698,6 +2680,16 @@ row_sel_store_mysql_rec( prebuilt->blob_heap = NULL; } +// psergey@askmonty.org: don't take the following: +#if 0 + /* init null bytes with default values as they might be + + left uninitialized in some cases and these uninited bytes + might be copied into mysql record buffer that leads to + valgrind warnings */ + memcpy(mysql_rec, prebuilt->default_rec, prebuilt->null_bitmap_len); +#endif + for (i = start_field_no; i < end_field_no /* prebuilt->n_template */ ; i++) { templ = prebuilt->mysql_template + i; @@ -2781,6 +2773,9 @@ row_sel_store_mysql_rec( /* MySQL assumes that the field for an SQL NULL value is set to the default value. */ + UNIV_MEM_ASSERT_RW(prebuilt->default_rec + + templ->mysql_col_offset, + templ->mysql_col_len); mysql_rec[templ->mysql_null_byte_offset] |= (byte) templ->mysql_null_bit_mask; memcpy(mysql_rec + templ->mysql_col_offset, @@ -2832,9 +2827,9 @@ row_sel_build_prev_vers_for_mysql( Retrieves the clustered index record corresponding to a record in a non-clustered index. Does the necessary locking. Used in the MySQL interface. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_sel_get_clust_rec_for_mysql( /*============================*/ row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */ @@ -2861,7 +2856,7 @@ row_sel_get_clust_rec_for_mysql( dict_index_t* clust_index; const rec_t* clust_rec; rec_t* old_vers; - ulint err; + enum db_err err; trx_t* trx; *out_rec = NULL; @@ -2920,6 +2915,7 @@ row_sel_get_clust_rec_for_mysql( clust_rec = NULL; + err = DB_SUCCESS; goto func_exit; } @@ -2935,8 +2931,11 @@ row_sel_get_clust_rec_for_mysql( 0, btr_pcur_get_block(prebuilt->clust_pcur), clust_rec, clust_index, *offsets, prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + break; + default: goto err_exit; } } else { @@ -2983,6 +2982,7 @@ row_sel_get_clust_rec_for_mysql( if (clust_rec && (old_vers + || trx->isolation_level <= TRX_ISO_READ_UNCOMMITTED || rec_get_deleted_flag(rec, dict_table_is_comp( sec_index->table))) && !row_sel_sec_rec_is_for_clust_rec( @@ -2995,6 +2995,8 @@ row_sel_get_clust_rec_for_mysql( rec, sec_index, clust_rec, clust_index)); #endif } + + err = DB_SUCCESS; } func_exit: @@ -3007,7 +3009,6 @@ func_exit: btr_pcur_store_position(prebuilt->clust_pcur, mtr); } - err = DB_SUCCESS; err_exit: return(err); } @@ -3104,6 +3105,11 @@ row_sel_pop_cached_row_for_mysql( for (i = 0; i < prebuilt->n_template; i++) { templ = prebuilt->mysql_template + i; +#if 0 /* Some of the cached_rec may legitimately be uninitialized. */ + UNIV_MEM_ASSERT_RW(cached_rec + + templ->mysql_col_offset, + templ->mysql_col_len); +#endif ut_memcpy(buf + templ->mysql_col_offset, cached_rec + templ->mysql_col_offset, templ->mysql_col_len); @@ -3122,6 +3128,11 @@ row_sel_pop_cached_row_for_mysql( } } else { +#if 0 /* Some of the cached_rec may legitimately be uninitialized. */ + UNIV_MEM_ASSERT_RW(prebuilt->fetch_cache + [prebuilt->fetch_cache_first], + prebuilt->mysql_prefix_len); +#endif ut_memcpy(buf, prebuilt->fetch_cache[prebuilt->fetch_cache_first], prebuilt->mysql_prefix_len); @@ -3175,6 +3186,8 @@ row_sel_push_cache_row_for_mysql( } ut_ad(prebuilt->fetch_cache_first == 0); + UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached], + prebuilt->mysql_row_len); if (UNIV_UNLIKELY(!row_sel_store_mysql_rec( prebuilt->fetch_cache[ @@ -3242,14 +3255,17 @@ row_sel_try_search_shortcut_for_mysql( ut_ad(dict_index_is_clust(index)); ut_ad(!prebuilt->templ_contains_blob); +#ifndef UNIV_SEARCH_DEBUG btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, pcur, -#ifndef UNIV_SEARCH_DEBUG RW_S_LATCH, -#else + mtr); +#else /* UNIV_SEARCH_DEBUG */ + btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, pcur, 0, -#endif mtr); +#endif /* UNIV_SEARCH_DEBUG */ rec = btr_pcur_get_rec(pcur); if (!page_rec_is_user_rec(rec)) { @@ -3350,6 +3366,7 @@ row_search_for_mysql( ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; ibool some_fields_in_buffer; + ibool problematic_use = FALSE; ibool get_clust_rec = 0; rec_offs_init(offsets_); @@ -3667,6 +3684,13 @@ shortcut_fails_too_big_rec: trx->has_search_latch = FALSE; } + ut_ad(prebuilt->sql_stat_start || trx->conc_state == TRX_ACTIVE); + ut_ad(trx->conc_state == TRX_NOT_STARTED + || trx->conc_state == TRX_ACTIVE); + ut_ad(prebuilt->sql_stat_start + || prebuilt->select_lock_type != LOCK_NONE + || trx->read_view); + trx_start_if_not_started(trx); if (trx->isolation_level <= TRX_ISO_READ_COMMITTED @@ -3737,7 +3761,7 @@ shortcut_fails_too_big_rec: && !page_rec_is_supremum(rec) && set_also_gap_locks && !(srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) + || trx->isolation_level <= TRX_ISO_READ_COMMITTED) && prebuilt->select_lock_type != LOCK_NONE) { /* Try to place a gap lock on the next index record @@ -3751,8 +3775,12 @@ shortcut_fails_too_big_rec: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3768,6 +3796,15 @@ shortcut_fails_too_big_rec: } } + if (!prebuilt->mysql_has_locked) { + fprintf(stderr, "InnoDB: Error: row_search_for_mysql() is called without ha_innobase::external_lock()\n"); + if (trx->mysql_thd != NULL) { + innobase_mysql_print_thd(stderr, trx->mysql_thd, 600); + } + problematic_use = TRUE; + } +retry_check: + if (!prebuilt->sql_stat_start) { /* No need to set an intention lock or assign a read view */ @@ -3778,6 +3815,14 @@ shortcut_fails_too_big_rec: " perform a consistent read\n" "InnoDB: but the read view is not assigned!\n", stderr); + if (problematic_use) { + fprintf(stderr, "InnoDB: It may be caused by calling " + "without ha_innobase::external_lock()\n" + "InnoDB: For the first-aid, avoiding the crash. " + "But it should be fixed ASAP.\n"); + prebuilt->sql_stat_start = TRUE; + goto retry_check; + } trx_print(stderr, trx, 600); fputc('\n', stderr); ut_a(0); @@ -3840,7 +3885,7 @@ rec_loop: if (set_also_gap_locks && !(srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) + || trx->isolation_level <= TRX_ISO_READ_COMMITTED) && prebuilt->select_lock_type != LOCK_NONE) { /* Try to place a lock on the index record */ @@ -3857,8 +3902,12 @@ rec_loop: prebuilt->select_lock_type, LOCK_ORDINARY, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3974,7 +4023,7 @@ wrong_offs: if (set_also_gap_locks && !(srv_locks_unsafe_for_binlog || trx->isolation_level - == TRX_ISO_READ_COMMITTED) + <= TRX_ISO_READ_COMMITTED) && prebuilt->select_lock_type != LOCK_NONE) { /* Try to place a gap lock on the index @@ -3988,8 +4037,11 @@ wrong_offs: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -4010,7 +4062,7 @@ wrong_offs: if (set_also_gap_locks && !(srv_locks_unsafe_for_binlog || trx->isolation_level - == TRX_ISO_READ_COMMITTED) + <= TRX_ISO_READ_COMMITTED) && prebuilt->select_lock_type != LOCK_NONE) { /* Try to place a gap lock on the index @@ -4024,8 +4076,11 @@ wrong_offs: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -4058,7 +4113,7 @@ wrong_offs: if (!set_also_gap_locks || srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED + || trx->isolation_level <= TRX_ISO_READ_COMMITTED || (unique_search && !UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp)))) { @@ -4095,17 +4150,24 @@ no_gap_lock: switch (err) { const rec_t* old_vers; - case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: if (srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) { + || trx->isolation_level + <= TRX_ISO_READ_COMMITTED) { /* Note that a record of prebuilt->index was locked. */ prebuilt->new_rec_locks = 1; } + err = DB_SUCCESS; + case DB_SUCCESS: break; case DB_LOCK_WAIT: + /* Never unlock rows that were part of a conflict. */ + prebuilt->new_rec_locks = 0; + if (UNIV_LIKELY(prebuilt->row_read_type != ROW_READ_TRY_SEMI_CONSISTENT) + || unique_search || index != clust_index) { goto lock_wait_or_error; @@ -4132,7 +4194,6 @@ no_gap_lock: if (UNIV_LIKELY(trx->wait_lock != NULL)) { lock_cancel_waiting_and_release( trx->wait_lock); - prebuilt->new_rec_locks = 0; } else { mutex_exit(&kernel_mutex); @@ -4144,9 +4205,6 @@ no_gap_lock: ULINT_UNDEFINED, &heap); err = DB_SUCCESS; - /* Note that a record of - prebuilt->index was locked. */ - prebuilt->new_rec_locks = 1; break; } mutex_exit(&kernel_mutex); @@ -4229,7 +4287,7 @@ no_gap_lock: /* The record is delete-marked: we can skip it */ if ((srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) + || trx->isolation_level <= TRX_ISO_READ_COMMITTED) && prebuilt->select_lock_type != LOCK_NONE && !did_semi_consistent_read) { @@ -4299,33 +4357,36 @@ idx_cond_check: err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec, thr, &clust_rec, &offsets, &heap, &mtr); - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS: + if (clust_rec == NULL) { + /* The record did not exist in the read view */ + ut_ad(prebuilt->select_lock_type == LOCK_NONE); + goto next_rec; + } + break; + case DB_SUCCESS_LOCKED_REC: + ut_a(clust_rec != NULL); + if (srv_locks_unsafe_for_binlog + || trx->isolation_level + <= TRX_ISO_READ_COMMITTED) { + /* Note that the clustered index record + was locked. */ + prebuilt->new_rec_locks = 2; + } + err = DB_SUCCESS; + break; + default: goto lock_wait_or_error; } - if (clust_rec == NULL) { - /* The record did not exist in the read view */ - ut_ad(prebuilt->select_lock_type == LOCK_NONE); - - goto next_rec; - } - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - /* Note that both the secondary index record - and the clustered index record were locked. */ - ut_ad(prebuilt->new_rec_locks == 1); - prebuilt->new_rec_locks = 2; - } - if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) { /* The record is delete marked: we can skip it */ if ((srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) + || trx->isolation_level <= TRX_ISO_READ_COMMITTED) && prebuilt->select_lock_type != LOCK_NONE) { /* No need to keep a lock on a delete-marked @@ -4547,7 +4608,7 @@ lock_wait_or_error: moves_up, &mtr); if ((srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) + || trx->isolation_level <= TRX_ISO_READ_COMMITTED) && !same_user_rec) { /* Since we were not able to restore the cursor @@ -4711,7 +4772,6 @@ row_search_autoinc_read_column( ut_a(len != UNIV_SQL_NULL); - /* we assume AUTOINC value cannot be negative */ switch (mtype) { case DATA_INT: ut_a(len <= sizeof value); @@ -4720,12 +4780,12 @@ row_search_autoinc_read_column( case DATA_FLOAT: ut_a(len == sizeof(float)); - value = mach_float_read(data); + value = (ib_uint64_t) mach_float_read(data); break; case DATA_DOUBLE: ut_a(len == sizeof(double)); - value = mach_double_read(data); + value = (ib_uint64_t) mach_double_read(data); break; default: diff --git a/storage/xtradb/row/row0uins.c b/storage/xtradb/row/row0uins.c index 9f9c814f1a5..930a5cf13b6 100644 --- a/storage/xtradb/row/row0uins.c +++ b/storage/xtradb/row/row0uins.c @@ -46,6 +46,16 @@ Created 2/25/1997 Heikki Tuuri #include "ibuf0ibuf.h" #include "log0log.h" +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***************************************************************//** Removes a clustered index record. The pcur in node was positioned on the record, now it is detached. @@ -152,7 +162,6 @@ row_undo_ins_remove_sec_low( ulint err; mtr_t mtr; - log_free_check(); mtr_start(&mtr); found = row_search_index_entry(index, entry, mode, &pcur, &mtr); @@ -335,6 +344,7 @@ row_undo_ins( transactions. */ ut_a(trx_is_recv(node->trx)); } else { + log_free_check(); err = row_undo_ins_remove_sec(node->index, entry); if (err != DB_SUCCESS) { @@ -346,5 +356,6 @@ row_undo_ins( node->index = dict_table_get_next_index(node->index); } + log_free_check(); return(row_undo_ins_remove_clust_rec(node)); } diff --git a/storage/xtradb/row/row0umod.c b/storage/xtradb/row/row0umod.c index 6be475d8c78..8464b0f95cc 100644 --- a/storage/xtradb/row/row0umod.c +++ b/storage/xtradb/row/row0umod.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -58,12 +58,22 @@ delete marked clustered index record was delete unmarked and possibly also some of its fields were changed. Now, it is possible that the delete marked version has become obsolete at the time the undo is started. */ +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***********************************************************//** Checks if also the previous version of the clustered index record was modified or inserted by the same transaction, and its undo number is such that it should be undone in the same rollback. @return TRUE if also previous modify or insert of this row should be undone */ -UNIV_INLINE +static ibool row_undo_mod_undo_also_prev_vers( /*=============================*/ @@ -144,13 +154,17 @@ row_undo_mod_clust_low( /***********************************************************//** Removes a clustered index record after undo if possible. +This is attempted when the record was inserted by updating a +delete-marked record and there no longer exist transactions +that would see the delete-marked record. In other words, we +roll back the insert by purging the record. @return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */ static ulint row_undo_mod_remove_clust_low( /*==========================*/ undo_node_t* node, /*!< in: row undo node */ - que_thr_t* thr __attribute__((unused)), /*!< in: query thread */ + que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr, /*!< in: mtr */ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ { @@ -159,6 +173,7 @@ row_undo_mod_remove_clust_low( ulint err; ibool success; + ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); pcur = &(node->pcur); btr_cur = btr_pcur_get_btr_cur(pcur); @@ -190,11 +205,13 @@ row_undo_mod_remove_clust_low( } else { ut_ad(mode == BTR_MODIFY_TREE); - /* Note that since this operation is analogous to purge, - we can free also inherited externally stored fields: - hence the RB_NONE in the call below */ + /* This operation is analogous to purge, we can free also + inherited externally stored fields */ - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, RB_NONE, mtr); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, + thr_is_recv(thr) + ? RB_RECOVERY_PURGE_REC + : RB_NONE, mtr); /* The delete operation may fail if we have little file space left: TODO: easiest to crash the database @@ -224,6 +241,8 @@ row_undo_mod_clust( ut_ad(node && thr); + log_free_check(); + /* Check if also the previous version of the clustered index record should be undone in this same rollback operation */ @@ -370,10 +389,11 @@ row_undo_mod_del_mark_or_remove_sec_low( } else { ut_ad(mode == BTR_MODIFY_TREE); - /* No need to distinguish RB_RECOVERY here, because we - are deleting a secondary index record: the distinction - between RB_NORMAL and RB_RECOVERY only matters when - deleting a record that contains externally stored + /* No need to distinguish RB_RECOVERY_PURGE here, + because we are deleting a secondary index record: + the distinction between RB_NORMAL and + RB_RECOVERY_PURGE only matters when deleting a + record that contains externally stored columns. */ ut_ad(!dict_index_is_clust(index)); btr_cur_pessimistic_delete(&err, FALSE, btr_cur, @@ -438,7 +458,7 @@ row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_TREE */ que_thr_t* thr, /*!< in: query thread */ dict_index_t* index, /*!< in: index */ - dtuple_t* entry) /*!< in: index entry */ + const dtuple_t* entry) /*!< in: index entry */ { mem_heap_t* heap; btr_pcur_t pcur; @@ -533,6 +553,7 @@ row_undo_mod_upd_del_sec( dict_index_t* index; ulint err = DB_SUCCESS; + ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); heap = mem_heap_create(1024); while (node->index != NULL) { @@ -550,7 +571,7 @@ row_undo_mod_upd_del_sec( does not exist. However, this situation may only occur during the rollback of incomplete transactions. */ - ut_a(trx_is_recv(thr_get_trx(thr))); + ut_a(thr_is_recv(thr)); } else { err = row_undo_mod_del_mark_or_remove_sec( node, thr, index, entry); @@ -648,24 +669,55 @@ row_undo_mod_upd_exist_sec( /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, index, heap); - ut_a(entry); - /* NOTE that if we updated the fields of a - delete-marked secondary index record so that - alphabetically they stayed the same, e.g., - 'abc' -> 'aBc', we cannot return to the original - values because we do not know them. But this should - not cause problems because in row0sel.c, in queries - we always retrieve the clustered index record or an - earlier version of it, if the secondary index record - through which we do the search is delete-marked. */ - - err = row_undo_mod_del_mark_or_remove_sec(node, thr, - index, - entry); - if (err != DB_SUCCESS) { - mem_heap_free(heap); - - return(err); + if (UNIV_UNLIKELY(!entry)) { + /* The server must have crashed in + row_upd_clust_rec_by_insert(), in + row_ins_index_entry_low() before + btr_store_big_rec_extern_fields() + has written the externally stored columns + (BLOBs) of the new clustered index entry. */ + + /* The table must be in DYNAMIC or COMPRESSED + format. REDUNDANT and COMPACT formats + store a local 768-byte prefix of each + externally stored column. */ + ut_a(dict_table_get_format(index->table) + >= DICT_TF_FORMAT_ZIP); + + /* This is only legitimate when + rolling back an incomplete transaction + after crash recovery. */ + ut_a(thr_get_trx(thr)->is_recovered); + + /* The server must have crashed before + completing the insert of the new + clustered index entry and before + inserting to the secondary indexes. + Because node->row was not yet written + to this index, we can ignore it. But + we must restore node->undo_row. */ + } else { + /* NOTE that if we updated the fields of a + delete-marked secondary index record so that + alphabetically they stayed the same, e.g., + 'abc' -> 'aBc', we cannot return to the + original values because we do not know them. + But this should not cause problems because + in row0sel.c, in queries we always retrieve + the clustered index record or an earlier + version of it, if the secondary index record + through which we do the search is + delete-marked. */ + + err = row_undo_mod_del_mark_or_remove_sec( + node, thr, index, entry); + if (err != DB_SUCCESS) { + mem_heap_free(heap); + + return(err); + } + + mem_heap_empty(heap); } /* We may have to update the delete mark in the @@ -674,7 +726,6 @@ row_undo_mod_upd_exist_sec( the secondary index record if we updated its fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */ - mem_heap_empty(heap); entry = row_build_index_entry(node->undo_row, node->undo_ext, index, heap); diff --git a/storage/xtradb/row/row0undo.c b/storage/xtradb/row/row0undo.c index 3d739c9689a..9ef842b5114 100644 --- a/storage/xtradb/row/row0undo.c +++ b/storage/xtradb/row/row0undo.c @@ -297,7 +297,7 @@ row_undo( if (locked_data_dict) { - row_mysql_lock_data_dictionary(trx); + row_mysql_freeze_data_dictionary(trx); } if (node->state == UNDO_NODE_INSERT) { @@ -312,7 +312,7 @@ row_undo( if (locked_data_dict) { - row_mysql_unlock_data_dictionary(trx); + row_mysql_unfreeze_data_dictionary(trx); } /* Do some cleanup */ diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c index 58dfd43ead9..d0aaecd3dae 100644 --- a/storage/xtradb/row/row0upd.c +++ b/storage/xtradb/row/row0upd.c @@ -92,6 +92,16 @@ the x-latch freed? The most efficient way for performing a searched delete is obviously to keep the x-latch for several steps of query graph execution. */ +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***********************************************************//** Checks if an update vector changes some of the first ordering fields of an index record. This is only used in foreign key checks and we can assume @@ -1344,9 +1354,6 @@ row_upd_copy_columns( data = rec_get_nth_field(rec, offsets, column->field_nos[SYM_CLUST_FIELD_NO], &len); - if (len == UNIV_SQL_NULL) { - len = UNIV_SQL_NULL; - } eval_node_copy_and_alloc_val(column, data, len); column = UT_LIST_GET_NEXT(col_var_list, column); @@ -1456,7 +1463,6 @@ row_upd_sec_index_entry( entry = row_build_index_entry(node->row, node->ext, index, heap); ut_a(entry); - log_free_check(); mtr_start(&mtr); found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, @@ -1532,7 +1538,7 @@ Updates the secondary index record if it is changed in the row update or deletes it if this is a delete. @return DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ -UNIV_INLINE +static ulint row_upd_sec_step( /*=============*/ @@ -2018,6 +2024,7 @@ row_upd( if (node->state == UPD_NODE_UPDATE_CLUSTERED || node->state == UPD_NODE_INSERT_CLUSTERED) { + log_free_check(); err = row_upd_clust_step(node, thr); if (err != DB_SUCCESS) { @@ -2032,6 +2039,8 @@ row_upd( } while (node->index != NULL) { + + log_free_check(); err = row_upd_sec_step(node, thr); if (err != DB_SUCCESS) { diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index e655c4e844d..b9905116603 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -1,7 +1,8 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. +Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The contributions by Google are incorporated with their permission, and subject to the conditions contained in the file COPYING.Google. +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. @@ -22,32 +30,6 @@ this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *****************************************************************************/ -/*********************************************************************** - -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. -Copyright (c) 2009, Percona Inc. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -***********************************************************************/ /**************************************************//** @file srv/srv0srv.c @@ -122,7 +104,8 @@ UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600; in microseconds, in order to reduce the lagging of the purge thread. */ UNIV_INTERN ulint srv_dml_needed_delay = 0; -UNIV_INTERN ibool srv_lock_timeout_and_monitor_active = FALSE; +UNIV_INTERN ibool srv_lock_timeout_active = FALSE; +UNIV_INTERN ibool srv_monitor_active = FALSE; UNIV_INTERN ibool srv_error_monitor_active = FALSE; UNIV_INTERN const char* srv_main_thread_op_info = ""; @@ -162,9 +145,10 @@ UNIV_INTERN char** srv_data_file_names = NULL; /* size in database pages */ UNIV_INTERN ulint* srv_data_file_sizes = NULL; +UNIV_INTERN char* srv_doublewrite_file = NULL; + UNIV_INTERN ibool srv_extra_undoslots = FALSE; -UNIV_INTERN ibool srv_fast_recovery = FALSE; UNIV_INTERN ibool srv_recovery_stats = FALSE; UNIV_INTERN ulint srv_use_purge_thread = 0; @@ -198,11 +182,20 @@ UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1; the checkpoints. */ UNIV_INTERN char srv_adaptive_flushing = TRUE; -UNIV_INTERN ulong srv_show_locks_held = 10; -UNIV_INTERN ulong srv_show_verbose_locks = 0; +UNIV_INTERN ulong srv_show_locks_held = 10; +UNIV_INTERN ulong srv_show_verbose_locks = 0; + +/** Maximum number of times allowed to conditionally acquire +mutex before switching to blocking wait on the mutex */ +#define MAX_MUTEX_NOWAIT 20 +/** Check whether the number of failed nonblocking mutex +acquisition attempts exceeds maximum allowed value. If so, +srv_printf_innodb_monitor() will request mutex acquisition +with mutex_enter(), which will wait until it gets the mutex. */ +#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT) -/* The sort order table of the MySQL latin1_swedish_ci character set +/** The sort order table of the MySQL latin1_swedish_ci character set collation */ UNIV_INTERN const byte* srv_latin1_ordering; @@ -218,6 +211,9 @@ UNIV_INTERN ulint srv_buf_pool_curr_size = 0; UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX; UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX; +/* key value for shm */ +UNIV_INTERN uint srv_buffer_pool_shm_key = 0; + /* This parameter is deprecated. Use srv_n_io_[read|write]_threads instead. */ UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX; @@ -387,6 +383,7 @@ UNIV_INTERN unsigned long long srv_stats_sample_pages = 8; UNIV_INTERN ulong srv_stats_method = 0; UNIV_INTERN ulong srv_stats_auto_update = 1; UNIV_INTERN ulint srv_stats_update_need_lock = 1; +UNIV_INTERN ibool srv_use_sys_stats_table = FALSE; UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE; UNIV_INTERN ibool srv_use_checksums = TRUE; @@ -410,7 +407,6 @@ UNIV_INTERN ulong srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ UNIV_INTERN ulong srv_adaptive_checkpoint = 0; /* 0: none 1: reflex 2: estimate */ UNIV_INTERN ulong srv_expand_import = 0; /* 0:disable 1:enable */ -UNIV_INTERN ulint srv_relax_table_creation = 0; /* 0:disable 1:enable */ UNIV_INTERN ulint srv_pass_corrupt_table = 0; /* 0:disable 1:enable */ UNIV_INTERN ulong srv_extra_rsegments = 0; /* extra rseg for users */ @@ -439,7 +435,7 @@ static ulint srv_n_rows_inserted_old = 0; static ulint srv_n_rows_updated_old = 0; static ulint srv_n_rows_deleted_old = 0; static ulint srv_n_rows_read_old = 0; - +UNIV_INTERN ulint srv_n_lock_deadlock_count = 0; UNIV_INTERN ulint srv_n_lock_wait_count = 0; UNIV_INTERN ulint srv_n_lock_wait_current_count = 0; UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0; @@ -1771,6 +1767,11 @@ srv_suspend_mysql_thread( trx->error_state = DB_LOCK_WAIT_TIMEOUT; } + + if (trx_is_interrupted(trx)) { + + trx->error_state = DB_INTERRUPTED; + } } /********************************************************************//** @@ -1833,12 +1834,15 @@ srv_refresh_innodb_monitor_stats(void) } /******************************************************************//** -Outputs to a file the output of the InnoDB Monitor. */ +Outputs to a file the output of the InnoDB Monitor. +@return FALSE if not all information printed +due to failure to obtain necessary mutex */ UNIV_INTERN -void +ibool srv_printf_innodb_monitor( /*======================*/ FILE* file, /*!< in: output stream */ + ibool nowait, /*!< in: whether to wait for kernel mutex */ ulint* trx_start, /*!< out: file position of the start of the list of active transactions */ ulint* trx_end) /*!< out: file position of the end of @@ -1847,6 +1851,7 @@ srv_printf_innodb_monitor( double time_elapsed; time_t current_time; ulint n_reserved; + ibool ret; ulint btr_search_sys_subtotal; ulint lock_sys_subtotal; @@ -1877,9 +1882,9 @@ srv_printf_innodb_monitor( "Per second averages calculated from the last %lu seconds\n", (ulong)time_elapsed); - fputs("----------\n" - "BACKGROUND THREAD\n" - "----------\n", file); + fputs("-----------------\n" + "BACKGROUND THREAD\n" + "-----------------\n", file); srv_print_master_thread_info(file); fputs("----------\n" @@ -2069,22 +2074,28 @@ srv_printf_innodb_monitor( srv_n_rows_deleted_old = srv_n_rows_deleted; srv_n_rows_read_old = srv_n_rows_read; - lock_print_info_summary(file); - if (trx_start) { - long t = ftell(file); - if (t < 0) { - *trx_start = ULINT_UNDEFINED; - } else { - *trx_start = (ulint) t; + /* Only if lock_print_info_summary proceeds correctly, + before we call the lock_print_info_all_transactions + to print all the lock information. */ + ret = lock_print_info_summary(file, nowait); + + if (ret) { + if (trx_start) { + long t = ftell(file); + if (t < 0) { + *trx_start = ULINT_UNDEFINED; + } else { + *trx_start = (ulint) t; + } } - } - lock_print_info_all_transactions(file); - if (trx_end) { - long t = ftell(file); - if (t < 0) { - *trx_end = ULINT_UNDEFINED; - } else { - *trx_end = (ulint) t; + lock_print_info_all_transactions(file); + if (trx_end) { + long t = ftell(file); + if (t < 0) { + *trx_end = ULINT_UNDEFINED; + } else { + *trx_end = (ulint) t; + } } } @@ -2093,6 +2104,8 @@ srv_printf_innodb_monitor( "============================\n", file); mutex_exit(&srv_innodb_monitor_mutex); fflush(file); + + return(ret); } /******************************************************************//** @@ -2133,6 +2146,8 @@ srv_export_innodb_status(void) = UT_LIST_GET_LEN(buf_pool->flush_list); export_vars.innodb_buffer_pool_pages_free = UT_LIST_GET_LEN(buf_pool->free); + export_vars.innodb_deadlocks + = srv_n_lock_deadlock_count; #ifdef UNIV_DEBUG export_vars.innodb_buffer_pool_pages_latched = buf_get_latched_pages_number(); @@ -2181,26 +2196,23 @@ srv_export_innodb_status(void) } /*********************************************************************//** -A thread which wakes up threads whose lock wait may have lasted too long. -This also prints the info output by various InnoDB monitors. +A thread which prints the info output by various InnoDB monitors. @return a dummy parameter */ UNIV_INTERN os_thread_ret_t -srv_lock_timeout_and_monitor_thread( -/*================================*/ +srv_monitor_thread( +/*===============*/ void* arg __attribute__((unused))) /*!< in: a dummy parameter required by os_thread_create */ { - srv_slot_t* slot; double time_elapsed; time_t current_time; time_t last_table_monitor_time; time_t last_tablespace_monitor_time; time_t last_monitor_time; - ibool some_waits; - double wait_time; - ulint i; + ulint mutex_skipped; + ibool last_srv_print_monitor; #ifdef UNIV_DEBUG_THREAD_CREATION fprintf(stderr, "Lock timeout thread starts, id %lu\n", @@ -2211,13 +2223,15 @@ srv_lock_timeout_and_monitor_thread( last_table_monitor_time = time(NULL); last_tablespace_monitor_time = time(NULL); last_monitor_time = time(NULL); + mutex_skipped = 0; + last_srv_print_monitor = srv_print_innodb_monitor; loop: - srv_lock_timeout_and_monitor_active = TRUE; + srv_monitor_active = TRUE; - /* When someone is waiting for a lock, we wake up every second - and check if a timeout has passed for a lock wait */ + /* Wake up every 5 seconds to see if we need to print + monitor information. */ - os_thread_sleep(1000000); + os_thread_sleep(5000000); current_time = time(NULL); @@ -2227,14 +2241,40 @@ loop: last_monitor_time = time(NULL); if (srv_print_innodb_monitor) { - srv_printf_innodb_monitor(stderr, NULL, NULL); + /* Reset mutex_skipped counter everytime + srv_print_innodb_monitor changes. This is to + ensure we will not be blocked by kernel_mutex + for short duration information printing, + such as requested by sync_array_print_long_waits() */ + if (!last_srv_print_monitor) { + mutex_skipped = 0; + last_srv_print_monitor = TRUE; + } + + if (!srv_printf_innodb_monitor(stderr, + MUTEX_NOWAIT(mutex_skipped), + NULL, NULL)) { + mutex_skipped++; + } else { + /* Reset the counter */ + mutex_skipped = 0; + } + } else { + last_srv_print_monitor = FALSE; } + if (srv_innodb_status) { mutex_enter(&srv_monitor_file_mutex); rewind(srv_monitor_file); - srv_printf_innodb_monitor(srv_monitor_file, NULL, - NULL); + if (!srv_printf_innodb_monitor(srv_monitor_file, + MUTEX_NOWAIT(mutex_skipped), + NULL, NULL)) { + mutex_skipped++; + } else { + mutex_skipped = 0; + } + os_file_set_eof(srv_monitor_file); mutex_exit(&srv_monitor_file_mutex); } @@ -2287,6 +2327,56 @@ loop: } } + if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) { + goto exit_func; + } + + if (srv_print_innodb_monitor + || srv_print_innodb_lock_monitor + || srv_print_innodb_tablespace_monitor + || srv_print_innodb_table_monitor) { + goto loop; + } + + srv_monitor_active = FALSE; + + goto loop; + +exit_func: + srv_monitor_active = FALSE; + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} + +/*********************************************************************//** +A thread which wakes up threads whose lock wait may have lasted too long. +@return a dummy parameter */ +UNIV_INTERN +os_thread_ret_t +srv_lock_timeout_thread( +/*====================*/ + void* arg __attribute__((unused))) + /* in: a dummy parameter required by + os_thread_create */ +{ + srv_slot_t* slot; + ibool some_waits; + double wait_time; + ulint i; + +loop: + /* When someone is waiting for a lock, we wake up every second + and check if a timeout has passed for a lock wait */ + + os_thread_sleep(1000000); + + srv_lock_timeout_active = TRUE; + mutex_enter(&kernel_mutex); some_waits = FALSE; @@ -2310,9 +2400,10 @@ loop: lock_wait_timeout = thd_lock_wait_timeout( trx->mysql_thd); - if (lock_wait_timeout < 100000000 - && (wait_time > (double) lock_wait_timeout - || wait_time < 0)) { + if (trx_is_interrupted(trx) + || (lock_wait_timeout < 100000000 + && (wait_time > (double) lock_wait_timeout + || wait_time < 0))) { /* Timeout exceeded or a wrap-around in system time counter: cancel the lock request queued @@ -2337,17 +2428,11 @@ loop: goto exit_func; } - if (some_waits || srv_print_innodb_monitor - || srv_print_innodb_lock_monitor - || srv_print_innodb_tablespace_monitor - || srv_print_innodb_table_monitor) { + if (some_waits) { goto loop; } - /* No one was waiting for a lock and no monitor was active: - suspend this thread */ - - srv_lock_timeout_and_monitor_active = FALSE; + srv_lock_timeout_active = FALSE; #if 0 /* The following synchronisation is disabled, since @@ -2357,7 +2442,7 @@ loop: goto loop; exit_func: - srv_lock_timeout_and_monitor_active = FALSE; + srv_lock_timeout_active = FALSE; /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ @@ -2706,7 +2791,10 @@ loop: BUF_FLUSH_LIST, n_flush, IB_ULONGLONG_MAX); - skip_sleep = TRUE; + + if (n_flush == PCT_IO(100)) { + skip_sleep = TRUE; + } } mutex_enter(&(log_sys->mutex)); @@ -2817,7 +2905,7 @@ loop: if (bpl) { retry_flush_batch: n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, - bpl, + (ulint) bpl, oldest_lsn + (lsn - lsn_old)); if (n_pages_flushed == ULINT_UNDEFINED) { os_thread_sleep(5000); diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index dcc13ea17b6..62ffa366f18 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1,7 +1,8 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. +Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The contributions by Google are incorporated with their permission, and subject to the conditions contained in the file COPYING.Google. +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. @@ -22,32 +30,6 @@ this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *****************************************************************************/ -/*********************************************************************** - -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. -Copyright (c) 2009, Percona Inc. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -***********************************************************************/ /********************************************************************//** @file srv/srv0start.c @@ -105,6 +87,7 @@ Created 2/16/1996 Heikki Tuuri # include "btr0pcur.h" # include "thr0loc.h" # include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */ +# include "zlib.h" /* for ZLIB_VERSION */ /** Log sequence number immediately after startup */ UNIV_INTERN ib_uint64_t srv_start_lsn; @@ -143,9 +126,9 @@ static mutex_t ios_mutex; static ulint ios; /** io_handler_thread parameters for thread identification */ -static ulint n[SRV_MAX_N_IO_THREADS + 5 + 64]; +static ulint n[SRV_MAX_N_IO_THREADS + 6 + 64]; /** io_handler_thread identifiers */ -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5 + 64]; +static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6 + 64]; /** We use this mutex to test the return value of pthread_mutex_trylock on successful locking. HP-UX does NOT return 0, though Linux et al do. */ @@ -728,6 +711,7 @@ open_or_create_data_files( /*======================*/ ibool* create_new_db, /*!< out: TRUE if new database should be created */ + ibool* create_new_doublewrite_file, #ifdef UNIV_LOG_ARCHIVE ulint* min_arch_log_no,/*!< out: min of archived log numbers in data files */ @@ -760,6 +744,7 @@ open_or_create_data_files( *sum_of_new_sizes = 0; *create_new_db = FALSE; + *create_new_doublewrite_file = FALSE; srv_normalize_path_for_win(srv_data_home); @@ -992,6 +977,142 @@ skip_size_check: srv_data_file_is_raw_partition[i] != 0); } + /* special file for doublewrite buffer */ + if (srv_doublewrite_file) + { + srv_normalize_path_for_win(srv_doublewrite_file); + + fprintf(stderr, + "InnoDB: Notice: innodb_doublewrite_file is specified.\n" + "InnoDB: This is for expert only. Don't use if you don't understand what is it 'WELL'.\n" + "InnoDB: ### Don't specify older file than the last checkpoint ###\n" + "InnoDB: otherwise the older doublewrite buffer will break your data during recovery!\n"); + + strcpy(name, srv_doublewrite_file); + + /* First we try to create the file: if it already + exists, ret will get value FALSE */ + + files[i] = os_file_create(name, OS_FILE_CREATE, + OS_FILE_NORMAL, + OS_DATA_FILE, &ret); + + if (ret == FALSE && os_file_get_last_error(FALSE) + != OS_FILE_ALREADY_EXISTS +#ifdef UNIV_AIX + /* AIX 5.1 after security patch ML7 may have + errno set to 0 here, which causes our function + to return 100; work around that AIX problem */ + && os_file_get_last_error(FALSE) != 100 +#endif + ) { + fprintf(stderr, + "InnoDB: Error in creating" + " or opening %s\n", + name); + + return(DB_ERROR); + } + + if (ret == FALSE) { + /* We open the data file */ + + files[i] = os_file_create( + name, OS_FILE_OPEN, OS_FILE_NORMAL, + OS_DATA_FILE, &ret); + + if (!ret) { + fprintf(stderr, + "InnoDB: Error in opening %s\n", name); + os_file_get_last_error(TRUE); + + return(DB_ERROR); + } + + ret = os_file_get_size(files[i], &size, &size_high); + ut_a(ret); + /* Round size downward to megabytes */ + + rounded_size_pages + = (size / (1024 * 1024) + 4096 * size_high) + << (20 - UNIV_PAGE_SIZE_SHIFT); + + if (rounded_size_pages != TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9) { + + fprintf(stderr, + "InnoDB: Warning: doublewrite buffer file %s" + " is of a different size\n" + "InnoDB: %lu pages" + " (rounded down to MB)\n" + "InnoDB: than intended size" + " %lu pages...\n", + name, + (ulong) rounded_size_pages, + (ulong) TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9); + } + + fil_read_flushed_lsn_and_arch_log_no( + files[i], one_opened, +#ifdef UNIV_LOG_ARCHIVE + min_arch_log_no, max_arch_log_no, +#endif /* UNIV_LOG_ARCHIVE */ + min_flushed_lsn, max_flushed_lsn); + one_opened = TRUE; + } else { + /* We created the data file and now write it full of + zeros */ + + *create_new_doublewrite_file = TRUE; + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Doublewrite buffer file %s did not" + " exist: new to be created\n", + name); + + if (*create_new_db == FALSE) { + fprintf(stderr, + "InnoDB: Warning: Previous version's ibdata files may cause crash.\n" + " If you use that, please use the ibdata files of this version.\n"); + } + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Setting file %s size to %lu MB\n", + name, + (ulong) ((TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9) + >> (20 - UNIV_PAGE_SIZE_SHIFT))); + + fprintf(stderr, + "InnoDB: Database physically writes the" + " file full: wait...\n"); + + ret = os_file_set_size( + name, files[i], + srv_calc_low32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9), + srv_calc_high32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9)); + + if (!ret) { + fprintf(stderr, + "InnoDB: Error in creating %s:" + " probably out of disk space\n", name); + + return(DB_ERROR); + } + } + + ret = os_file_close(files[i]); + ut_a(ret); + + fil_space_create(name, TRX_DOUBLEWRITE_SPACE, 0, FIL_TABLESPACE); + + ut_a(fil_validate()); + + fil_node_create(name, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, TRX_DOUBLEWRITE_SPACE, FALSE); + + i++; + } + ios = 0; mutex_create(&ios_mutex, SYNC_NO_ORDER_CHECK); @@ -1010,6 +1131,7 @@ innobase_start_or_create_for_mysql(void) { buf_pool_t* ret; ibool create_new_db; + ibool create_new_doublewrite_file; ibool log_file_created; ibool log_created = FALSE; ibool log_opened = FALSE; @@ -1074,7 +1196,11 @@ innobase_start_or_create_for_mysql(void) #ifdef UNIV_IBUF_DEBUG fprintf(stderr, "InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n" - "InnoDB: Crash recovery will fail with UNIV_IBUF_DEBUG\n"); +# ifdef UNIV_IBUF_COUNT_DEBUG + "InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on !!!!!!!!!\n" + "InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG\n" +# endif + ); #endif #ifdef UNIV_SYNC_DEBUG @@ -1101,7 +1227,15 @@ innobase_start_or_create_for_mysql(void) "InnoDB: The InnoDB memory heap is disabled\n"); } - fprintf(stderr, "InnoDB: %s\n", IB_ATOMICS_STARTUP_MSG); + fputs("InnoDB: " IB_ATOMICS_STARTUP_MSG + "\nInnoDB: Compressed tables use zlib " ZLIB_VERSION +#ifdef UNIV_ZIP_DEBUG + " with validation" +#endif /* UNIV_ZIP_DEBUG */ +#ifdef UNIV_ZIP_COPY + " and extra copying" +#endif /* UNIV_ZIP_COPY */ + "\n" , stderr); /* Since InnoDB does not currently clean up all its internal data structures in MySQL Embedded Server Library server_end(), we @@ -1168,6 +1302,9 @@ innobase_start_or_create_for_mysql(void) } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) { srv_unix_file_flush_method = SRV_UNIX_O_DIRECT; + } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) { + srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT; + } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) { srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC; @@ -1388,6 +1525,7 @@ innobase_start_or_create_for_mysql(void) } err = open_or_create_data_files(&create_new_db, + &create_new_doublewrite_file, #ifdef UNIV_LOG_ARCHIVE &min_arch_log_no, &max_arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ @@ -1504,6 +1642,14 @@ innobase_start_or_create_for_mysql(void) trx_sys_file_format_init(); + if (create_new_doublewrite_file) { + mtr_start(&mtr); + fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr); + mtr_commit(&mtr); + + trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE); + } + if (create_new_db) { mtr_start(&mtr); fsp_header_init(0, sum_of_new_sizes, &mtr); @@ -1573,6 +1719,8 @@ innobase_start_or_create_for_mysql(void) Note that this is not as heavy weight as it seems. At this point there will be only ONE page in the buf_LRU and there must be no page in the buf_flush list. */ + /* TODO: treat more correctly */ + if (!srv_buffer_pool_shm_key) buf_pool_invalidate(); /* We always try to do a recovery, even if the database had @@ -1596,6 +1744,14 @@ innobase_start_or_create_for_mysql(void) dict_boot(); trx_sys_init_at_db_start(); + /* Initialize the fsp free limit global variable in the log + system */ + fsp_header_get_free_limit(); + + /* recv_recovery_from_checkpoint_finish needs trx lists which + are initialized in trx_sys_init_at_db_start(). */ + + recv_recovery_from_checkpoint_finish(); if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) { /* The following call is necessary for the insert buffer to work with multiple tablespaces. We must @@ -1611,26 +1767,14 @@ innobase_start_or_create_for_mysql(void) every table in the InnoDB data dictionary that has an .ibd file. - We also determine the maximum tablespace id used. - - TODO: We may have incomplete transactions in the - data dictionary tables. Does that harm the scanning of - the data dictionary below? */ + We also determine the maximum tablespace id used. */ dict_check_tablespaces_and_store_max_id( recv_needed_recovery); } srv_startup_is_before_trx_rollback_phase = FALSE; - - /* Initialize the fsp free limit global variable in the log - system */ - fsp_header_get_free_limit(); - - /* recv_recovery_from_checkpoint_finish needs trx lists which - are initialized in trx_sys_init_at_db_start(). */ - - recv_recovery_from_checkpoint_finish(); + recv_recovery_rollback_active(); /* It is possible that file_format tag has never been set. In this case we initialize it to minimum @@ -1679,15 +1823,18 @@ innobase_start_or_create_for_mysql(void) /* fprintf(stderr, "Max allowed record size %lu\n", page_get_free_space_of_empty() / 2); */ - /* Create the thread which watches the timeouts for lock waits - and prints InnoDB monitor info */ - - os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL, + /* Create the thread which watches the timeouts for lock waits */ + os_thread_create(&srv_lock_timeout_thread, NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS); /* Create the thread which warns of long semaphore waits */ os_thread_create(&srv_error_monitor_thread, NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS); + + /* Create the thread which prints InnoDB monitor info */ + os_thread_create(&srv_monitor_thread, NULL, + thread_ids + 4 + SRV_MAX_N_IO_THREADS); + srv_is_being_started = FALSE; if (trx_doublewrite == NULL) { @@ -1712,13 +1859,13 @@ innobase_start_or_create_for_mysql(void) ulint i; os_thread_create(&srv_purge_thread, NULL, thread_ids - + (4 + SRV_MAX_N_IO_THREADS)); + + (5 + SRV_MAX_N_IO_THREADS)); for (i = 0; i < srv_use_purge_thread - 1; i++) { - n[5 + i + SRV_MAX_N_IO_THREADS] = i; /* using as index for arrays in purge_sys */ + n[6 + i + SRV_MAX_N_IO_THREADS] = i; /* using as index for arrays in purge_sys */ os_thread_create(&srv_purge_worker_thread, - n + (5 + i + SRV_MAX_N_IO_THREADS), - thread_ids + (5 + i + SRV_MAX_N_IO_THREADS)); + n + (6 + i + SRV_MAX_N_IO_THREADS), + thread_ids + (6 + i + SRV_MAX_N_IO_THREADS)); } } #ifdef UNIV_DEBUG @@ -1821,7 +1968,7 @@ innobase_start_or_create_for_mysql(void) if (srv_print_verbose_log) { ut_print_timestamp(stderr); fprintf(stderr, - " InnoDB Plugin %s started; " + " Percona XtraDB (http://www.percona.com) %s started; " "log sequence number %llu\n", INNODB_VERSION_STR, srv_start_lsn); } diff --git a/storage/xtradb/sync/sync0arr.c b/storage/xtradb/sync/sync0arr.c index cfa52cdcc88..223e1715944 100644 --- a/storage/xtradb/sync/sync0arr.c +++ b/storage/xtradb/sync/sync0arr.c @@ -498,7 +498,9 @@ sync_array_cell_print( || type == RW_LOCK_WAIT_EX || type == RW_LOCK_SHARED) { - fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file); + fputs(type == RW_LOCK_EX ? "X-lock on" + : type == RW_LOCK_WAIT_EX ? "X-lock (wait_ex) on" + : "S-lock on", file); rwlock = cell->old_wait_rw_lock; diff --git a/storage/xtradb/sync/sync0rw.c b/storage/xtradb/sync/sync0rw.c index 07eac403dfe..9e10f6e943b 100644 --- a/storage/xtradb/sync/sync0rw.c +++ b/storage/xtradb/sync/sync0rw.c @@ -268,7 +268,7 @@ rw_lock_create_func( lock->level = level; #endif /* UNIV_SYNC_DEBUG */ - lock->magic_n = RW_LOCK_MAGIC_N; + ut_d(lock->magic_n = RW_LOCK_MAGIC_N); lock->lock_name = cmutex_name; @@ -282,10 +282,8 @@ rw_lock_create_func( mutex_enter(&rw_lock_list_mutex); - if (UT_LIST_GET_LEN(rw_lock_list) > 0) { - ut_a(UT_LIST_GET_FIRST(rw_lock_list)->magic_n - == RW_LOCK_MAGIC_N); - } + ut_ad(UT_LIST_GET_FIRST(rw_lock_list) == NULL + || UT_LIST_GET_FIRST(rw_lock_list)->magic_n == RW_LOCK_MAGIC_N); UT_LIST_ADD_FIRST(list, rw_lock_list, lock); @@ -314,18 +312,16 @@ rw_lock_free( os_event_free(lock->wait_ex_event); - if (UT_LIST_GET_PREV(list, lock)) { - ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); - } - if (UT_LIST_GET_NEXT(list, lock)) { - ut_a(UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); - } + ut_ad(UT_LIST_GET_PREV(list, lock) == NULL + || UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); + ut_ad(UT_LIST_GET_NEXT(list, lock) == NULL + || UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); UT_LIST_REMOVE(list, rw_lock_list, lock); mutex_exit(&rw_lock_list_mutex); - lock->magic_n = 0; + ut_d(lock->magic_n = 0); } #ifdef UNIV_DEBUG @@ -344,7 +340,7 @@ rw_lock_validate( ulint waiters = rw_lock_get_waiters(lock); lint lock_word = lock->lock_word; - ut_a(lock->magic_n == RW_LOCK_MAGIC_N); + ut_ad(lock->magic_n == RW_LOCK_MAGIC_N); ut_a(waiters == 0 || waiters == 1); ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0); diff --git a/storage/xtradb/sync/sync0sync.c b/storage/xtradb/sync/sync0sync.c index c0e543f284d..225f28df78e 100644 --- a/storage/xtradb/sync/sync0sync.c +++ b/storage/xtradb/sync/sync0sync.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -317,6 +317,15 @@ mutex_free( ut_a(mutex_get_lock_word(mutex) == 0); ut_a(mutex_get_waiters(mutex) == 0); +#ifdef UNIV_MEM_DEBUG + if (mutex == &mem_hash_mutex) { + ut_ad(UT_LIST_GET_LEN(mutex_list) == 1); + ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex); + UT_LIST_REMOVE(list, mutex_list, mutex); + goto func_exit; + } +#endif /* UNIV_MEM_DEBUG */ + if (mutex != &mutex_list_mutex #ifdef UNIV_SYNC_DEBUG && mutex != &sync_thread_mutex @@ -338,7 +347,9 @@ mutex_free( } os_event_free(mutex->event); - +#ifdef UNIV_MEM_DEBUG +func_exit: +#endif /* UNIV_MEM_DEBUG */ #if !defined(HAVE_ATOMIC_BUILTINS) os_fast_mutex_free(&(mutex->os_fast_mutex)); #endif @@ -423,20 +434,19 @@ mutex_set_waiters( mutex_t* mutex, /*!< in: mutex */ ulint n) /*!< in: value to set */ { -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - volatile ulint* ptr; /* declared volatile to ensure that - the value is stored to memory */ -#endif - +#ifdef INNODB_RW_LOCKS_USE_ATOMICS ut_ad(mutex); -#ifdef INNODB_RW_LOCKS_USE_ATOMICS if (n) { os_compare_and_swap_ulint(&mutex->waiters, 0, 1); } else { os_compare_and_swap_ulint(&mutex->waiters, 1, 0); } #else + volatile ulint* ptr; /* declared volatile to ensure that + the value is stored to memory */ + ut_ad(mutex); + ptr = &(mutex->waiters); *ptr = n; /* Here we assume that the write of a single @@ -959,12 +969,62 @@ sync_thread_levels_contain( } /******************************************************************//** +Checks if the level array for the current thread contains a +mutex or rw-latch at the specified level. +@return a matching latch, or NULL if not found */ +UNIV_INTERN +void* +sync_thread_levels_contains( +/*========================*/ + ulint level) /*!< in: latching order level + (SYNC_DICT, ...)*/ +{ + sync_level_t* arr; + sync_thread_t* thread_slot; + sync_level_t* slot; + ulint i; + + if (!sync_order_checks_on) { + + return(NULL); + } + + mutex_enter(&sync_thread_mutex); + + thread_slot = sync_thread_level_arrays_find_slot(); + + if (thread_slot == NULL) { + + mutex_exit(&sync_thread_mutex); + + return(NULL); + } + + arr = thread_slot->levels; + + for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { + + slot = sync_thread_levels_get_nth(arr, i); + + if (slot->latch != NULL && slot->level == level) { + + mutex_exit(&sync_thread_mutex); + return(slot->latch); + } + } + + mutex_exit(&sync_thread_mutex); + + return(NULL); +} + +/******************************************************************//** Checks that the level array for the current thread is empty. -@return TRUE if empty except the exceptions specified below */ +@return a latch, or NULL if empty except the exceptions specified below */ UNIV_INTERN -ibool -sync_thread_levels_empty_gen( -/*=========================*/ +void* +sync_thread_levels_nonempty_gen( +/*============================*/ ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is allowed to be owned by the thread, also purge_is_running mutex is @@ -977,7 +1037,7 @@ sync_thread_levels_empty_gen( if (!sync_order_checks_on) { - return(TRUE); + return(NULL); } mutex_enter(&sync_thread_mutex); @@ -988,7 +1048,7 @@ sync_thread_levels_empty_gen( mutex_exit(&sync_thread_mutex); - return(TRUE); + return(NULL); } arr = thread_slot->levels; @@ -1005,13 +1065,13 @@ sync_thread_levels_empty_gen( mutex_exit(&sync_thread_mutex); ut_error; - return(FALSE); + return(slot->latch); } } mutex_exit(&sync_thread_mutex); - return(TRUE); + return(NULL); } /******************************************************************//** @@ -1388,6 +1448,12 @@ sync_close(void) mutex = UT_LIST_GET_FIRST(mutex_list); while (mutex) { +#ifdef UNIV_MEM_DEBUG + if (mutex == &mem_hash_mutex) { + mutex = UT_LIST_GET_NEXT(list, mutex); + continue; + } +#endif /* UNIV_MEM_DEBUG */ mutex_free(mutex); mutex = UT_LIST_GET_FIRST(mutex_list); } diff --git a/storage/xtradb/trx/trx0i_s.c b/storage/xtradb/trx/trx0i_s.c index 78b20edd365..5bc8302d0c0 100644 --- a/storage/xtradb/trx/trx0i_s.c +++ b/storage/xtradb/trx/trx0i_s.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2007, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -28,11 +28,18 @@ table cache" for later retrieval. Created July 17, 2007 Vasil Dimov *******************************************************/ +/* Found during the build of 5.5.3 on Linux 2.4 and early 2.6 kernels: + The includes "univ.i" -> "my_global.h" cause a different path + to be taken further down with pthread functions and types, + so they must come first. + From the symptoms, this is related to bug#46587 in the MySQL bug DB. +*/ +#include "univ.i" + +#include <mysql/plugin.h> #include "mysql_addons.h" -#include "univ.i" -#include <mysql/plugin.h> #include "buf0buf.h" #include "dict0dict.h" #include "ha0storage.h" @@ -422,6 +429,9 @@ fill_trx_row( which to copy volatile strings */ { + const char* stmt; + size_t stmt_len; + row->trx_id = trx_get_id(trx); row->trx_started = (ib_time_t) trx->start_time; row->trx_state = trx_get_que_state_str(trx); @@ -442,37 +452,32 @@ fill_trx_row( row->trx_weight = (ullint) ut_conv_dulint_to_longlong(TRX_WEIGHT(trx)); - if (trx->mysql_thd != NULL) { - row->trx_mysql_thread_id - = thd_get_thread_id(trx->mysql_thd); - } else { + if (trx->mysql_thd == NULL) { /* For internal transactions e.g., purge and transactions being recovered at startup there is no associated MySQL thread data structure. */ row->trx_mysql_thread_id = 0; + row->trx_query = NULL; + return(TRUE); } - if (trx->mysql_query_str != NULL && *trx->mysql_query_str != NULL) { + row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd); + stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len); - if (strlen(*trx->mysql_query_str) - > TRX_I_S_TRX_QUERY_MAX_LEN) { + if (stmt != NULL) { - char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; + char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; - memcpy(query, *trx->mysql_query_str, - TRX_I_S_TRX_QUERY_MAX_LEN); - query[TRX_I_S_TRX_QUERY_MAX_LEN] = '\0'; + if (stmt_len > TRX_I_S_TRX_QUERY_MAX_LEN) { + stmt_len = TRX_I_S_TRX_QUERY_MAX_LEN; + } - row->trx_query = ha_storage_put_memlim( - cache->storage, query, - TRX_I_S_TRX_QUERY_MAX_LEN + 1, - MAX_ALLOWED_FOR_STORAGE(cache)); - } else { + memcpy(query, stmt, stmt_len); + query[stmt_len] = '\0'; - row->trx_query = ha_storage_put_str_memlim( - cache->storage, *trx->mysql_query_str, - MAX_ALLOWED_FOR_STORAGE(cache)); - } + row->trx_query = ha_storage_put_memlim( + cache->storage, stmt, stmt_len + 1, + MAX_ALLOWED_FOR_STORAGE(cache)); if (row->trx_query == NULL) { diff --git a/storage/xtradb/trx/trx0purge.c b/storage/xtradb/trx/trx0purge.c index 41e16b35e85..1c317665878 100644 --- a/storage/xtradb/trx/trx0purge.c +++ b/storage/xtradb/trx/trx0purge.c @@ -1148,8 +1148,7 @@ trx_purge(void) /* If we cannot advance the 'purge view' because of an old 'consistent read view', then the DML statements cannot be delayed. Also, srv_max_purge_lag <= 0 means 'infinity'. */ - if (srv_max_purge_lag > 0 - && !UT_LIST_GET_LAST(trx_sys->view_list)) { + if (srv_max_purge_lag > 0) { float ratio = (float) trx_sys->rseg_history_len / srv_max_purge_lag; if (ratio > ULINT_MAX / 10000) { diff --git a/storage/xtradb/trx/trx0rec.c b/storage/xtradb/trx/trx0rec.c index 5097cf18dcd..f50e10ed756 100644 --- a/storage/xtradb/trx/trx0rec.c +++ b/storage/xtradb/trx/trx0rec.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -350,8 +350,13 @@ trx_undo_rec_get_col_val( ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE); ut_ad(*len > *orig_len); - ut_ad(*len >= REC_MAX_INDEX_COL_LEN + /* @see dtuple_convert_big_rec() */ + ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE * 2); + /* we do not have access to index->table here + ut_ad(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP + || *len >= REC_MAX_INDEX_COL_LEN + BTR_EXTERN_FIELD_REF_SIZE); + */ *len += UNIV_EXTERN_STORAGE_FIELD; break; @@ -977,6 +982,7 @@ trx_undo_update_rec_get_update( fprintf(stderr, "\n" "InnoDB: n_fields = %lu, i = %lu, ptr %p\n", (ulong) n_fields, (ulong) i, ptr); + *upd = NULL; return(NULL); } @@ -1074,11 +1080,15 @@ trx_undo_rec_get_partial_row( /* If the prefix of this column is indexed, ensure that enough prefix is stored in the undo log record. */ - ut_a(ignore_prefix - || !col->ord_part - || dfield_get_len(dfield) - >= REC_MAX_INDEX_COL_LEN - + BTR_EXTERN_FIELD_REF_SIZE); + if (!ignore_prefix && col->ord_part) { + ut_a(dfield_get_len(dfield) + >= 2 * BTR_EXTERN_FIELD_REF_SIZE); + ut_a(dict_table_get_format(index->table) + >= DICT_TF_FORMAT_ZIP + || dfield_get_len(dfield) + >= REC_MAX_INDEX_COL_LEN + + BTR_EXTERN_FIELD_REF_SIZE); + } } } diff --git a/storage/xtradb/trx/trx0rseg.c b/storage/xtradb/trx/trx0rseg.c index 8d754788e2a..57b5611d624 100644 --- a/storage/xtradb/trx/trx0rseg.c +++ b/storage/xtradb/trx/trx0rseg.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/xtradb/trx/trx0sys.c b/storage/xtradb/trx/trx0sys.c index 8ea34a8c81c..ad4471ada0b 100644 --- a/storage/xtradb/trx/trx0sys.c +++ b/storage/xtradb/trx/trx0sys.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -402,6 +402,149 @@ start_again: goto start_again; } + + if (srv_doublewrite_file) { + /* the same doublewrite buffer to TRX_SYS_SPACE should exist. + check and create if not exist.*/ + + mtr_start(&mtr); + trx_doublewrite_buf_is_being_created = TRUE; + + block = buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, TRX_SYS_PAGE_NO, + RW_X_LATCH, &mtr); + buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); + + doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE; + + if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC) + == TRX_SYS_DOUBLEWRITE_MAGIC_N) { + /* The doublewrite buffer has already been created: + just read in some numbers */ + + mtr_commit(&mtr); + } else { + fprintf(stderr, + "InnoDB: Doublewrite buffer not found in the doublewrite file:" + " creating new\n"); + + if (buf_pool_get_curr_size() + < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + + FSP_EXTENT_SIZE / 2 + 100) + * UNIV_PAGE_SIZE)) { + fprintf(stderr, + "InnoDB: Cannot create doublewrite buffer:" + " you must\n" + "InnoDB: increase your buffer pool size.\n" + "InnoDB: Cannot continue operation.\n"); + + exit(1); + } + + block2 = fseg_create(TRX_DOUBLEWRITE_SPACE, TRX_SYS_PAGE_NO, + TRX_SYS_DOUBLEWRITE + + TRX_SYS_DOUBLEWRITE_FSEG, &mtr); + + /* fseg_create acquires a second latch on the page, + therefore we must declare it: */ + + buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK); + + if (block2 == NULL) { + fprintf(stderr, + "InnoDB: Cannot create doublewrite buffer:" + " you must\n" + "InnoDB: increase your tablespace size.\n" + "InnoDB: Cannot continue operation.\n"); + + /* We exit without committing the mtr to prevent + its modifications to the database getting to disk */ + + exit(1); + } + + fseg_header = buf_block_get_frame(block) + + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG; + prev_page_no = 0; + + for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + + FSP_EXTENT_SIZE / 2; i++) { + page_no = fseg_alloc_free_page(fseg_header, + prev_page_no + 1, + FSP_UP, &mtr); + if (page_no == FIL_NULL) { + fprintf(stderr, + "InnoDB: Cannot create doublewrite" + " buffer: you must\n" + "InnoDB: increase your" + " tablespace size.\n" + "InnoDB: Cannot continue operation.\n" + ); + + exit(1); + } + + /* We read the allocated pages to the buffer pool; + when they are written to disk in a flush, the space + id and page number fields are also written to the + pages. When we at database startup read pages + from the doublewrite buffer, we know that if the + space id and page number in them are the same as + the page position in the tablespace, then the page + has not been written to in doublewrite. */ + + new_block = buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, page_no, + RW_X_LATCH, &mtr); + buf_block_dbg_add_level(new_block, + SYNC_NO_ORDER_CHECK); + + if (i == FSP_EXTENT_SIZE / 2) { + ut_a(page_no == FSP_EXTENT_SIZE); + mlog_write_ulint(doublewrite + + TRX_SYS_DOUBLEWRITE_BLOCK1, + page_no, MLOG_4BYTES, &mtr); + mlog_write_ulint(doublewrite + + TRX_SYS_DOUBLEWRITE_REPEAT + + TRX_SYS_DOUBLEWRITE_BLOCK1, + page_no, MLOG_4BYTES, &mtr); + } else if (i == FSP_EXTENT_SIZE / 2 + + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { + ut_a(page_no == 2 * FSP_EXTENT_SIZE); + mlog_write_ulint(doublewrite + + TRX_SYS_DOUBLEWRITE_BLOCK2, + page_no, MLOG_4BYTES, &mtr); + mlog_write_ulint(doublewrite + + TRX_SYS_DOUBLEWRITE_REPEAT + + TRX_SYS_DOUBLEWRITE_BLOCK2, + page_no, MLOG_4BYTES, &mtr); + } else if (i > FSP_EXTENT_SIZE / 2) { + ut_a(page_no == prev_page_no + 1); + } + + prev_page_no = page_no; + } + + mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC, + TRX_SYS_DOUBLEWRITE_MAGIC_N, + MLOG_4BYTES, &mtr); + mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC + + TRX_SYS_DOUBLEWRITE_REPEAT, + TRX_SYS_DOUBLEWRITE_MAGIC_N, + MLOG_4BYTES, &mtr); + + mlog_write_ulint(doublewrite + + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED, + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N, + MLOG_4BYTES, &mtr); + mtr_commit(&mtr); + + /* Flush the modified pages to disk and make a checkpoint */ + log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE); + + fprintf(stderr, "InnoDB: Doublewrite buffer created in the doublewrite file\n"); + } + + trx_doublewrite_buf_is_being_created = FALSE; + } } /****************************************************************//** @@ -425,10 +568,19 @@ trx_sys_doublewrite_init_or_restore_pages( ulint source_page_no; byte* page; byte* doublewrite; + ulint doublewrite_space_id; ulint space_id; ulint page_no; ulint i; + doublewrite_space_id = (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE); + + if (srv_doublewrite_file) { + fprintf(stderr, + "InnoDB: doublewrite file '%s' is used.\n", + srv_doublewrite_file); + } + /* We do the file i/o past the buffer pool */ unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE); @@ -437,7 +589,7 @@ trx_sys_doublewrite_init_or_restore_pages( /* Read the trx sys header to check if we are using the doublewrite buffer */ - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0, + fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, TRX_SYS_PAGE_NO, 0, UNIV_PAGE_SIZE, read_buf, NULL); doublewrite = read_buf + TRX_SYS_DOUBLEWRITE; @@ -475,10 +627,10 @@ trx_sys_doublewrite_init_or_restore_pages( /* Read the pages from the doublewrite buffer to memory */ - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0, + fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block1, 0, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, buf, NULL); - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0, + fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block2, 0, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, NULL); @@ -534,7 +686,8 @@ trx_sys_doublewrite_init_or_restore_pages( " doublewrite buf.\n", (ulong) space_id, (ulong) page_no, (ulong) i); - } else if (space_id == TRX_SYS_SPACE + } else if ((space_id == TRX_SYS_SPACE + || (srv_doublewrite_file && space_id == TRX_DOUBLEWRITE_SPACE)) && ((page_no >= block1 && page_no < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) @@ -594,8 +747,8 @@ trx_sys_doublewrite_init_or_restore_pages( " recover the database" " with the my.cnf\n" "InnoDB: option:\n" - "InnoDB: set-variable=" - "innodb_force_recovery=6\n"); + "InnoDB:" + " innodb_force_recovery=6\n"); exit(1); } @@ -687,13 +840,13 @@ UNIV_INTERN void trx_sys_update_mysql_binlog_offset( /*===============================*/ + trx_sysf_t* sys_header, const char* file_name_in,/*!< in: MySQL log file name */ ib_int64_t offset, /*!< in: position in that log file */ ulint field, /*!< in: offset of the MySQL log info field in the trx sys header */ mtr_t* mtr) /*!< in: mtr */ { - trx_sysf_t* sys_header; const char* file_name; if (ut_strlen(file_name_in) >= TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN) { @@ -707,8 +860,6 @@ trx_sys_update_mysql_binlog_offset( file_name = file_name_in; } - sys_header = trx_sysf_get(mtr); - if (mach_read_from_4(sys_header + field + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD) != TRX_SYS_MYSQL_LOG_MAGIC_N) { @@ -982,6 +1133,83 @@ trx_sysf_create( } /*****************************************************************//** +Creates dummy of the file page for the transaction system. */ +static +void +trx_sysf_dummy_create( +/*==================*/ + ulint space, + mtr_t* mtr) +{ + buf_block_t* block; + page_t* page; + + ut_ad(mtr); + + /* Note that below we first reserve the file space x-latch, and + then enter the kernel: we must do it in this order to conform + to the latching order rules. */ + + mtr_x_lock(fil_space_get_latch(space, NULL), mtr); + mutex_enter(&kernel_mutex); + + /* Create the trx sys file block in a new allocated file segment */ + block = fseg_create(space, 0, TRX_SYS + TRX_SYS_FSEG_HEADER, + mtr); + buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER); + + fprintf(stderr, "%lu\n", buf_block_get_page_no(block)); + ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO); + + page = buf_block_get_frame(block); + + mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS, + MLOG_2BYTES, mtr); + + /* Reset the doublewrite buffer magic number to zero so that we + know that the doublewrite buffer has not yet been created (this + suppresses a Valgrind warning) */ + + mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE + + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr); + +#ifdef UNDEFINED + /* TODO: REMOVE IT: The bellow is not needed, I think */ + sys_header = trx_sysf_get(mtr); + + /* Start counting transaction ids from number 1 up */ + mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE, + ut_dulint_create(0, 1), mtr); + + /* Reset the rollback segment slots */ + for (i = 0; i < TRX_SYS_N_RSEGS; i++) { + + trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr); + trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr); + } + + /* The remaining area (up to the page trailer) is uninitialized. + Silence Valgrind warnings about it. */ + UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS + + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE + + TRX_SYS_RSEG_SPACE), + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + - (TRX_SYS_RSEGS + + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE + + TRX_SYS_RSEG_SPACE)) + + page - sys_header); + + /* Create the first rollback segment in the SYSTEM tablespace */ + page_no = trx_rseg_header_create(space, 0, ULINT_MAX, &slot_no, + mtr); + ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID); + ut_a(page_no != FIL_NULL); +#endif + + mutex_exit(&kernel_mutex); +} + +/*****************************************************************//** Creates and initializes the central memory structures for the transaction system. This is called when the database is started. */ UNIV_INTERN @@ -1087,6 +1315,26 @@ trx_sys_create(void) trx_sys_init_at_db_start(); } +/*****************************************************************//** +Creates and initializes the dummy transaction system page for tablespace. */ +UNIV_INTERN +void +trx_sys_dummy_create( +/*=================*/ + ulint space) +{ + mtr_t mtr; + + /* This function is only for doublewrite file for now */ + ut_a(space == TRX_DOUBLEWRITE_SPACE); + + mtr_start(&mtr); + + trx_sysf_dummy_create(space, &mtr); + + mtr_commit(&mtr); +} + /********************************************************************* Create extra rollback segments when create_new_db */ UNIV_INTERN @@ -1608,6 +1856,7 @@ trx_sys_file_format_id_to_name( #endif /* !UNIV_HOTBACKUP */ +#ifndef UNIV_HOTBACKUP /********************************************************************* Shutdown/Close the transaction system. */ UNIV_INTERN @@ -1684,3 +1933,4 @@ trx_sys_close(void) trx_sys = NULL; mutex_exit(&kernel_mutex); } +#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c index e81daf4cad9..9584f0c4c46 100644 --- a/storage/xtradb/trx/trx0trx.c +++ b/storage/xtradb/trx/trx0trx.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -109,6 +109,8 @@ trx_create( trx->support_xa = TRUE; + trx->flush_log_at_trx_commit_session = 3; /* means to use innodb_flush_log_at_trx_commit value */ + trx->check_foreigns = TRUE; trx->check_unique_secondary = TRUE; @@ -119,7 +121,6 @@ trx_create( trx->table_id = ut_dulint_zero; trx->mysql_thd = NULL; - trx->mysql_query_str = NULL; trx->active_trans = 0; trx->duplicates = 0; @@ -455,6 +456,7 @@ trx_lists_init_at_db_start(void) trx_undo_t* undo; trx_t* trx; + ut_ad(mutex_own(&kernel_mutex)); UT_LIST_INIT(trx_sys->trx_list); /* Look from the rollback segments if there exist undo logs for @@ -735,6 +737,9 @@ trx_start( generated by the same transaction, doesn't. */ trx->support_xa = thd_supports_xa(trx->mysql_thd); + trx->flush_log_at_trx_commit_session = + thd_flush_log_at_trx_commit_session(trx->mysql_thd); + mutex_enter(&kernel_mutex); ret = trx_start_low(trx, rseg_id); @@ -757,6 +762,7 @@ trx_commit_off_kernel( trx_rseg_t* rseg; trx_undo_t* undo; mtr_t mtr; + trx_sysf_t* sys_header = NULL; ut_ad(mutex_own(&kernel_mutex)); @@ -814,7 +820,11 @@ trx_commit_off_kernel( if (trx->mysql_log_file_name && trx->mysql_log_file_name[0] != '\0') { + if (!sys_header) { + sys_header = trx_sysf_get(&mtr); + } trx_sys_update_mysql_binlog_offset( + sys_header, trx->mysql_log_file_name, trx->mysql_log_offset, TRX_SYS_MYSQL_LOG_INFO, &mtr); @@ -823,11 +833,16 @@ trx_commit_off_kernel( if (trx->mysql_master_log_file_name[0] != '\0') { /* This database server is a MySQL replication slave */ + if (!sys_header) { + sys_header = trx_sysf_get(&mtr); + } trx_sys_update_mysql_binlog_offset( + sys_header, trx->mysql_relay_log_file_name, trx->mysql_relay_log_pos, TRX_SYS_MYSQL_RELAY_LOG_INFO, &mtr); trx_sys_update_mysql_binlog_offset( + sys_header, trx->mysql_master_log_file_name, trx->mysql_master_log_pos, TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr); @@ -885,7 +900,7 @@ trx_commit_off_kernel( recovery i.e.: back ground rollback thread is still active then there is a chance that the rollback thread may see this trx as COMMITTED_IN_MEMORY and goes adhead to clean it - up calling trx_cleanup_at_db_startup(). This can happen + up calling trx_cleanup_at_db_startup(). This can happen in the case we are committing a trx here that is left in PREPARED state during the crash. Note that commit of the rollback of a PREPARED trx happens in the recovery thread @@ -906,6 +921,7 @@ trx_commit_off_kernel( trx->read_view = NULL; if (lsn) { + ulint flush_log_at_trx_commit; mutex_exit(&kernel_mutex); @@ -914,6 +930,12 @@ trx_commit_off_kernel( trx_undo_insert_cleanup(trx); } + if (trx->flush_log_at_trx_commit_session == 3) { + flush_log_at_trx_commit = srv_flush_log_at_trx_commit; + } else { + flush_log_at_trx_commit = trx->flush_log_at_trx_commit_session; + } + /* NOTE that we could possibly make a group commit more efficient here: call os_thread_yield here to allow also other trxs to come to commit! */ @@ -945,9 +967,9 @@ trx_commit_off_kernel( if (trx->flush_log_later) { /* Do nothing yet */ trx->must_flush_log_later = TRUE; - } else if (srv_flush_log_at_trx_commit == 0) { + } else if (flush_log_at_trx_commit == 0) { /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 1) { + } else if (flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ @@ -959,7 +981,7 @@ trx_commit_off_kernel( log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } - } else if (srv_flush_log_at_trx_commit == 2) { + } else if (flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ @@ -982,7 +1004,6 @@ trx_commit_off_kernel( trx->rseg = NULL; trx->undo_no = ut_dulint_zero; trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; - trx->mysql_query_str = NULL; ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); @@ -1640,16 +1661,23 @@ trx_commit_complete_for_mysql( trx_t* trx) /*!< in: trx handle */ { ib_uint64_t lsn = trx->commit_lsn; + ulint flush_log_at_trx_commit; ut_a(trx); trx->op_info = "flushing log"; + if (trx->flush_log_at_trx_commit_session == 3) { + flush_log_at_trx_commit = srv_flush_log_at_trx_commit; + } else { + flush_log_at_trx_commit = trx->flush_log_at_trx_commit_session; + } + if (!trx->must_flush_log_later) { /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 0) { + } else if (flush_log_at_trx_commit == 0) { /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 1) { + } else if (flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ @@ -1660,7 +1688,7 @@ trx_commit_complete_for_mysql( log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } - } else if (srv_flush_log_at_trx_commit == 2) { + } else if (flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ @@ -1921,6 +1949,8 @@ trx_prepare_off_kernel( /*--------------------------------------*/ if (lsn) { + ulint flush_log_at_trx_commit; + /* Depending on the my.cnf options, we may now write the log buffer to the log files, making the prepared state of the transaction durable if the OS does not crash. We may also @@ -1940,9 +1970,15 @@ trx_prepare_off_kernel( mutex_exit(&kernel_mutex); - if (srv_flush_log_at_trx_commit == 0) { + if (trx->flush_log_at_trx_commit_session == 3) { + flush_log_at_trx_commit = srv_flush_log_at_trx_commit; + } else { + flush_log_at_trx_commit = trx->flush_log_at_trx_commit_session; + } + + if (flush_log_at_trx_commit == 0) { /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 1) { + } else if (flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ @@ -1954,7 +1990,7 @@ trx_prepare_off_kernel( log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } - } else if (srv_flush_log_at_trx_commit == 2) { + } else if (flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ diff --git a/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_gcc.c b/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_gcc.c deleted file mode 100644 index 30de5aa6f17..00000000000 --- a/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_gcc.c +++ /dev/null @@ -1,43 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles, then pthread_t objects can be used as arguments -to GCC atomic builtin functions. - -Created March 5, 2009 Vasil Dimov -*****************************************************************************/ - -#include <pthread.h> -#include <string.h> - -int -main(int argc, char** argv) -{ - pthread_t x1; - pthread_t x2; - pthread_t x3; - - memset(&x1, 0x0, sizeof(x1)); - memset(&x2, 0x0, sizeof(x2)); - memset(&x3, 0x0, sizeof(x3)); - - __sync_bool_compare_and_swap(&x1, x2, x3); - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_solaris.c b/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_solaris.c deleted file mode 100644 index 310603c7503..00000000000 --- a/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_solaris.c +++ /dev/null @@ -1,54 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles and returns 0, then pthread_t objects can be used as -arguments to Solaris libc atomic functions. - -Created April 18, 2009 Vasil Dimov -*****************************************************************************/ - -#include <pthread.h> -#include <string.h> - -int -main(int argc, char** argv) -{ - pthread_t x1; - pthread_t x2; - pthread_t x3; - - memset(&x1, 0x0, sizeof(x1)); - memset(&x2, 0x0, sizeof(x2)); - memset(&x3, 0x0, sizeof(x3)); - - if (sizeof(pthread_t) == 4) { - - atomic_cas_32(&x1, x2, x3); - - } else if (sizeof(pthread_t) == 8) { - - atomic_cas_64(&x1, x2, x3); - - } else { - - return(1); - } - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_have_gcc_atomics.c b/storage/xtradb/ut/ut0auxconf_have_gcc_atomics.c deleted file mode 100644 index da5c13d7d79..00000000000 --- a/storage/xtradb/ut/ut0auxconf_have_gcc_atomics.c +++ /dev/null @@ -1,61 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles and returns 0, then GCC atomic funcions are available. - -Created September 12, 2009 Vasil Dimov -*****************************************************************************/ - -int -main(int argc, char** argv) -{ - long x; - long y; - long res; - char c; - - x = 10; - y = 123; - res = __sync_bool_compare_and_swap(&x, x, y); - if (!res || x != y) { - return(1); - } - - x = 10; - y = 123; - res = __sync_bool_compare_and_swap(&x, x + 1, y); - if (res || x != 10) { - return(1); - } - - x = 10; - y = 123; - res = __sync_add_and_fetch(&x, y); - if (res != 123 + 10 || x != 123 + 10) { - return(1); - } - - c = 10; - res = __sync_lock_test_and_set(&c, 123); - if (res != 10 || c != 123) { - return(1); - } - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_have_solaris_atomics.c b/storage/xtradb/ut/ut0auxconf_have_solaris_atomics.c deleted file mode 100644 index 7eb704edd4b..00000000000 --- a/storage/xtradb/ut/ut0auxconf_have_solaris_atomics.c +++ /dev/null @@ -1,39 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles, then Solaris libc atomic funcions are available. - -Created April 18, 2009 Vasil Dimov -*****************************************************************************/ -#include <atomic.h> - -int -main(int argc, char** argv) -{ - ulong_t ulong = 0; - uint32_t uint32 = 0; - uint64_t uint64 = 0; - - atomic_cas_ulong(&ulong, 0, 1); - atomic_cas_32(&uint32, 0, 1); - atomic_cas_64(&uint64, 0, 1); - atomic_add_long(&ulong, 0); - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_pause.c b/storage/xtradb/ut/ut0auxconf_pause.c deleted file mode 100644 index 54d63bdd9bc..00000000000 --- a/storage/xtradb/ut/ut0auxconf_pause.c +++ /dev/null @@ -1,32 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles and can be run and returns 0, then the pause -instruction is available. - -Created Jul 21, 2009 Vasil Dimov -*****************************************************************************/ - -int -main(int argc, char** argv) -{ - __asm__ __volatile__ ("pause"); - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_sizeof_pthread_t.c b/storage/xtradb/ut/ut0auxconf_sizeof_pthread_t.c deleted file mode 100644 index 96add4526ef..00000000000 --- a/storage/xtradb/ut/ut0auxconf_sizeof_pthread_t.c +++ /dev/null @@ -1,35 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -This program should compile and when run, print a single line like: -#define SIZEOF_PTHREAD_T %d - -Created April 18, 2009 Vasil Dimov -*****************************************************************************/ - -#include <stdio.h> -#include <pthread.h> - -int -main(int argc, char** argv) -{ - printf("#define SIZEOF_PTHREAD_T %d\n", (int) sizeof(pthread_t)); - - return(0); -} diff --git a/storage/xtradb/ut/ut0rbt.c b/storage/xtradb/ut/ut0rbt.c new file mode 100644 index 00000000000..3d7bc91e714 --- /dev/null +++ b/storage/xtradb/ut/ut0rbt.c @@ -0,0 +1,1249 @@ +/***************************************************************************** + +Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/*******************************************************************//** +@file ut/ut0rbt.c +Red-Black tree implementation + +Created 2007-03-20 Sunny Bains +***********************************************************************/ + +#include "ut0rbt.h" + +/************************************************************************ +Definition of a red-black tree +============================== + +A red-black tree is a binary search tree which has the following +red-black properties: + + 1. Every node is either red or black. + 2. Every leaf (NULL - in our case tree->nil) is black. + 3. If a node is red, then both its children are black. + 4. Every simple path from a node to a descendant leaf contains the + same number of black nodes. + + from (3) above, the implication is that on any path from the root + to a leaf, red nodes must not be adjacent. + + However, any number of black nodes may appear in a sequence. */ + +#if defined(IB_RBT_TESTING) +#warning "Testing enabled!" +#endif + +#define ROOT(t) (t->root->left) +#define SIZEOF_NODE(t) ((sizeof(ib_rbt_node_t) + t->sizeof_value) - 1) + +/****************************************************************//** +Print out the sub-tree recursively. */ +static +void +rbt_print_subtree( +/*==============*/ + const ib_rbt_t* tree, /*!< in: tree to traverse */ + const ib_rbt_node_t* node, /*!< in: node to print */ + ib_rbt_print_node print) /*!< in: print key function */ +{ + /* FIXME: Doesn't do anything yet */ + if (node != tree->nil) { + print(node); + rbt_print_subtree(tree, node->left, print); + rbt_print_subtree(tree, node->right, print); + } +} + +/****************************************************************//** +Verify that the keys are in order. +@return TRUE of OK. FALSE if not ordered */ +static +ibool +rbt_check_ordering( +/*===============*/ + const ib_rbt_t* tree) /*!< in: tree to verfify */ +{ + const ib_rbt_node_t* node; + const ib_rbt_node_t* prev = NULL; + + /* Iterate over all the nodes, comparing each node with the prev */ + for (node = rbt_first(tree); node; node = rbt_next(tree, prev)) { + + if (prev && tree->compare(prev->value, node->value) >= 0) { + return(FALSE); + } + + prev = node; + } + + return(TRUE); +} + +/****************************************************************//** +Check that every path from the root to the leaves has the same count. +Count is expressed in the number of black nodes. +@return 0 on failure else black height of the subtree */ +static +ibool +rbt_count_black_nodes( +/*==================*/ + const ib_rbt_t* tree, /*!< in: tree to verify */ + const ib_rbt_node_t* node) /*!< in: start of sub-tree */ +{ + ulint result; + + if (node != tree->nil) { + ulint left_height = rbt_count_black_nodes(tree, node->left); + + ulint right_height = rbt_count_black_nodes(tree, node->right); + + if (left_height == 0 + || right_height == 0 + || left_height != right_height) { + + result = 0; + } else if (node->color == IB_RBT_RED) { + + /* Case 3 */ + if (node->left->color != IB_RBT_BLACK + || node->right->color != IB_RBT_BLACK) { + + result = 0; + } else { + result = left_height; + } + /* Check if it's anything other than RED or BLACK. */ + } else if (node->color != IB_RBT_BLACK) { + + result = 0; + } else { + + result = right_height + 1; + } + } else { + result = 1; + } + + return(result); +} + +/****************************************************************//** +Turn the node's right child's left sub-tree into node's right sub-tree. +This will also make node's right child it's parent. */ +static +void +rbt_rotate_left( +/*============*/ + const ib_rbt_node_t* nil, /*!< in: nil node of the tree */ + ib_rbt_node_t* node) /*!< in: node to rotate */ +{ + ib_rbt_node_t* right = node->right; + + node->right = right->left; + + if (right->left != nil) { + right->left->parent = node; + } + + /* Right's new parent was node's parent. */ + right->parent = node->parent; + + /* Since root's parent is tree->nil and root->parent->left points + back to root, we can avoid the check. */ + if (node == node->parent->left) { + /* Node was on the left of its parent. */ + node->parent->left = right; + } else { + /* Node must have been on the right. */ + node->parent->right = right; + } + + /* Finally, put node on right's left. */ + right->left = node; + node->parent = right; +} + +/****************************************************************//** +Turn the node's left child's right sub-tree into node's left sub-tree. +This also make node's left child it's parent. */ +static +void +rbt_rotate_right( +/*=============*/ + const ib_rbt_node_t* nil, /*!< in: nil node of tree */ + ib_rbt_node_t* node) /*!< in: node to rotate */ +{ + ib_rbt_node_t* left = node->left; + + node->left = left->right; + + if (left->right != nil) { + left->right->parent = node; + } + + /* Left's new parent was node's parent. */ + left->parent = node->parent; + + /* Since root's parent is tree->nil and root->parent->left points + back to root, we can avoid the check. */ + if (node == node->parent->right) { + /* Node was on the left of its parent. */ + node->parent->right = left; + } else { + /* Node must have been on the left. */ + node->parent->left = left; + } + + /* Finally, put node on left's right. */ + left->right = node; + node->parent = left; +} + +/****************************************************************//** +Append a node to the tree. +@return inserted node */ +static +ib_rbt_node_t* +rbt_tree_add_child( +/*===============*/ + const ib_rbt_t* tree, /*!< in: rbt tree */ + ib_rbt_bound_t* parent, /*!< in: node's parent */ + ib_rbt_node_t* node) /*!< in: node to add */ +{ + /* Cast away the const. */ + ib_rbt_node_t* last = (ib_rbt_node_t*) parent->last; + + if (last == tree->root || parent->result < 0) { + last->left = node; + } else { + /* FIXME: We don't handle duplicates (yet)! */ + ut_a(parent->result != 0); + + last->right = node; + } + + node->parent = last; + + return(node); +} + +/****************************************************************//** +Generic binary tree insert +@return inserted node */ +static +ib_rbt_node_t* +rbt_tree_insert( +/*============*/ + ib_rbt_t* tree, /*!< in: rb tree */ + const void* key, /*!< in: key for ordering */ + ib_rbt_node_t* node) /*!< in: node hold the insert value */ +{ + ib_rbt_bound_t parent; + ib_rbt_node_t* current = ROOT(tree); + + parent.result = 0; + parent.last = tree->root; + + /* Regular binary search. */ + while (current != tree->nil) { + + parent.last = current; + parent.result = tree->compare(key, current->value); + + if (parent.result < 0) { + current = current->left; + } else { + current = current->right; + } + } + + ut_a(current == tree->nil); + + rbt_tree_add_child(tree, &parent, node); + + return(node); +} + +/****************************************************************//** +Balance a tree after inserting a node. */ +static +void +rbt_balance_tree( +/*=============*/ + const ib_rbt_t* tree, /*!< in: tree to balance */ + ib_rbt_node_t* node) /*!< in: node that was inserted */ +{ + const ib_rbt_node_t* nil = tree->nil; + ib_rbt_node_t* parent = node->parent; + + /* Restore the red-black property. */ + node->color = IB_RBT_RED; + + while (node != ROOT(tree) && parent->color == IB_RBT_RED) { + ib_rbt_node_t* grand_parent = parent->parent; + + if (parent == grand_parent->left) { + ib_rbt_node_t* uncle = grand_parent->right; + + if (uncle->color == IB_RBT_RED) { + + /* Case 1 - change the colors. */ + uncle->color = IB_RBT_BLACK; + parent->color = IB_RBT_BLACK; + grand_parent->color = IB_RBT_RED; + + /* Move node up the tree. */ + node = grand_parent; + + } else { + + if (node == parent->right) { + /* Right is a black node and node is + to the right, case 2 - move node + up and rotate. */ + node = parent; + rbt_rotate_left(nil, node); + } + + grand_parent = node->parent->parent; + + /* Case 3. */ + node->parent->color = IB_RBT_BLACK; + grand_parent->color = IB_RBT_RED; + + rbt_rotate_right(nil, grand_parent); + } + + } else { + ib_rbt_node_t* uncle = grand_parent->left; + + if (uncle->color == IB_RBT_RED) { + + /* Case 1 - change the colors. */ + uncle->color = IB_RBT_BLACK; + parent->color = IB_RBT_BLACK; + grand_parent->color = IB_RBT_RED; + + /* Move node up the tree. */ + node = grand_parent; + + } else { + + if (node == parent->left) { + /* Left is a black node and node is to + the right, case 2 - move node up and + rotate. */ + node = parent; + rbt_rotate_right(nil, node); + } + + grand_parent = node->parent->parent; + + /* Case 3. */ + node->parent->color = IB_RBT_BLACK; + grand_parent->color = IB_RBT_RED; + + rbt_rotate_left(nil, grand_parent); + } + } + + parent = node->parent; + } + + /* Color the root black. */ + ROOT(tree)->color = IB_RBT_BLACK; +} + +/****************************************************************//** +Find the given node's successor. +@return successor node or NULL if no successor */ +static +ib_rbt_node_t* +rbt_find_successor( +/*===============*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const ib_rbt_node_t* current)/*!< in: this is declared const + because it can be called via + rbt_next() */ +{ + const ib_rbt_node_t* nil = tree->nil; + ib_rbt_node_t* next = current->right; + + /* Is there a sub-tree to the right that we can follow. */ + if (next != nil) { + + /* Follow the left most links of the current right child. */ + while (next->left != nil) { + next = next->left; + } + + } else { /* We will have to go up the tree to find the successor. */ + ib_rbt_node_t* parent = current->parent; + + /* Cast away the const. */ + next = (ib_rbt_node_t*) current; + + while (parent != tree->root && next == parent->right) { + next = parent; + parent = next->parent; + } + + next = (parent == tree->root) ? NULL : parent; + } + + return(next); +} + +/****************************************************************//** +Find the given node's precedecessor. +@return predecessor node or NULL if no predecesor */ +static +ib_rbt_node_t* +rbt_find_predecessor( +/*=================*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const ib_rbt_node_t* current) /*!< in: this is declared const + because it can be called via + rbt_prev() */ +{ + const ib_rbt_node_t* nil = tree->nil; + ib_rbt_node_t* prev = current->left; + + /* Is there a sub-tree to the left that we can follow. */ + if (prev != nil) { + + /* Follow the right most links of the current left child. */ + while (prev->right != nil) { + prev = prev->right; + } + + } else { /* We will have to go up the tree to find the precedecessor. */ + ib_rbt_node_t* parent = current->parent; + + /* Cast away the const. */ + prev = (ib_rbt_node_t*)current; + + while (parent != tree->root && prev == parent->left) { + prev = parent; + parent = prev->parent; + } + + prev = (parent == tree->root) ? NULL : parent; + } + + return(prev); +} + +/****************************************************************//** +Replace node with child. After applying transformations eject becomes +an orphan. */ +static +void +rbt_eject_node( +/*===========*/ + ib_rbt_node_t* eject, /*!< in: node to eject */ + ib_rbt_node_t* node) /*!< in: node to replace with */ +{ + /* Update the to be ejected node's parent's child pointers. */ + if (eject->parent->left == eject) { + eject->parent->left = node; + } else if (eject->parent->right == eject) { + eject->parent->right = node; + } else { + ut_a(0); + } + /* eject is now an orphan but otherwise its pointers + and color are left intact. */ + + node->parent = eject->parent; +} + +/****************************************************************//** +Replace a node with another node. */ +static +void +rbt_replace_node( +/*=============*/ + ib_rbt_node_t* replace, /*!< in: node to replace */ + ib_rbt_node_t* node) /*!< in: node to replace with */ +{ + ib_rbt_color_t color = node->color; + + /* Update the node pointers. */ + node->left = replace->left; + node->right = replace->right; + + /* Update the child node pointers. */ + node->left->parent = node; + node->right->parent = node; + + /* Make the parent of replace point to node. */ + rbt_eject_node(replace, node); + + /* Swap the colors. */ + node->color = replace->color; + replace->color = color; +} + +/****************************************************************//** +Detach node from the tree replacing it with one of it's children. +@return the child node that now occupies the position of the detached node */ +static +ib_rbt_node_t* +rbt_detach_node( +/*============*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_node_t* node) /*!< in: node to detach */ +{ + ib_rbt_node_t* child; + const ib_rbt_node_t* nil = tree->nil; + + if (node->left != nil && node->right != nil) { + /* Case where the node to be deleted has two children. */ + ib_rbt_node_t* successor = rbt_find_successor(tree, node); + + ut_a(successor != nil); + ut_a(successor->parent != nil); + ut_a(successor->left == nil); + + child = successor->right; + + /* Remove the successor node and replace with its child. */ + rbt_eject_node(successor, child); + + /* Replace the node to delete with its successor node. */ + rbt_replace_node(node, successor); + } else { + ut_a(node->left == nil || node->right == nil); + + child = (node->left != nil) ? node->left : node->right; + + /* Replace the node to delete with one of it's children. */ + rbt_eject_node(node, child); + } + + /* Reset the node links. */ + node->parent = node->right = node->left = tree->nil; + + return(child); +} + +/****************************************************************//** +Rebalance the right sub-tree after deletion. +@return node to rebalance if more rebalancing required else NULL */ +static +ib_rbt_node_t* +rbt_balance_right( +/*==============*/ + const ib_rbt_node_t* nil, /*!< in: rb tree nil node */ + ib_rbt_node_t* parent, /*!< in: parent node */ + ib_rbt_node_t* sibling)/*!< in: sibling node */ +{ + ib_rbt_node_t* node = NULL; + + ut_a(sibling != nil); + + /* Case 3. */ + if (sibling->color == IB_RBT_RED) { + + parent->color = IB_RBT_RED; + sibling->color = IB_RBT_BLACK; + + rbt_rotate_left(nil, parent); + + sibling = parent->right; + + ut_a(sibling != nil); + } + + /* Since this will violate case 3 because of the change above. */ + if (sibling->left->color == IB_RBT_BLACK + && sibling->right->color == IB_RBT_BLACK) { + + node = parent; /* Parent needs to be rebalanced too. */ + sibling->color = IB_RBT_RED; + + } else { + if (sibling->right->color == IB_RBT_BLACK) { + + ut_a(sibling->left->color == IB_RBT_RED); + + sibling->color = IB_RBT_RED; + sibling->left->color = IB_RBT_BLACK; + + rbt_rotate_right(nil, sibling); + + sibling = parent->right; + ut_a(sibling != nil); + } + + sibling->color = parent->color; + sibling->right->color = IB_RBT_BLACK; + + parent->color = IB_RBT_BLACK; + + rbt_rotate_left(nil, parent); + } + + return(node); +} + +/****************************************************************//** +Rebalance the left sub-tree after deletion. +@return node to rebalance if more rebalancing required else NULL */ +static +ib_rbt_node_t* +rbt_balance_left( +/*=============*/ + const ib_rbt_node_t* nil, /*!< in: rb tree nil node */ + ib_rbt_node_t* parent, /*!< in: parent node */ + ib_rbt_node_t* sibling)/*!< in: sibling node */ +{ + ib_rbt_node_t* node = NULL; + + ut_a(sibling != nil); + + /* Case 3. */ + if (sibling->color == IB_RBT_RED) { + + parent->color = IB_RBT_RED; + sibling->color = IB_RBT_BLACK; + + rbt_rotate_right(nil, parent); + sibling = parent->left; + + ut_a(sibling != nil); + } + + /* Since this will violate case 3 because of the change above. */ + if (sibling->right->color == IB_RBT_BLACK + && sibling->left->color == IB_RBT_BLACK) { + + node = parent; /* Parent needs to be rebalanced too. */ + sibling->color = IB_RBT_RED; + + } else { + if (sibling->left->color == IB_RBT_BLACK) { + + ut_a(sibling->right->color == IB_RBT_RED); + + sibling->color = IB_RBT_RED; + sibling->right->color = IB_RBT_BLACK; + + rbt_rotate_left(nil, sibling); + + sibling = parent->left; + + ut_a(sibling != nil); + } + + sibling->color = parent->color; + sibling->left->color = IB_RBT_BLACK; + + parent->color = IB_RBT_BLACK; + + rbt_rotate_right(nil, parent); + } + + return(node); +} + +/****************************************************************//** +Delete the node and rebalance the tree if necessary */ +static +void +rbt_remove_node_and_rebalance( +/*==========================*/ + ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_node_t* node) /*!< in: node to remove */ +{ + /* Detach node and get the node that will be used + as rebalance start. */ + ib_rbt_node_t* child = rbt_detach_node(tree, node); + + if (node->color == IB_RBT_BLACK) { + ib_rbt_node_t* last = child; + + ROOT(tree)->color = IB_RBT_RED; + + while (child && child->color == IB_RBT_BLACK) { + ib_rbt_node_t* parent = child->parent; + + /* Did the deletion cause an imbalance in the + parents left sub-tree. */ + if (parent->left == child) { + + child = rbt_balance_right( + tree->nil, parent, parent->right); + + } else if (parent->right == child) { + + child = rbt_balance_left( + tree->nil, parent, parent->left); + + } else { + ut_error; + } + + if (child) { + last = child; + } + } + + ut_a(last); + + last->color = IB_RBT_BLACK; + ROOT(tree)->color = IB_RBT_BLACK; + } + + /* Note that we have removed a node from the tree. */ + --tree->n_nodes; +} + +/****************************************************************//** +Recursively free the nodes. */ +static +void +rbt_free_node( +/*==========*/ + ib_rbt_node_t* node, /*!< in: node to free */ + ib_rbt_node_t* nil) /*!< in: rb tree nil node */ +{ + if (node != nil) { + rbt_free_node(node->left, nil); + rbt_free_node(node->right, nil); + + ut_free(node); + } +} + +/****************************************************************//** +Free all the nodes and free the tree. */ +UNIV_INTERN +void +rbt_free( +/*=====*/ + ib_rbt_t* tree) /*!< in: rb tree to free */ +{ + rbt_free_node(tree->root, tree->nil); + ut_free(tree->nil); + ut_free(tree); +} + +/****************************************************************//** +Create an instance of a red black tree. +@return an empty rb tree */ +UNIV_INTERN +ib_rbt_t* +rbt_create( +/*=======*/ + size_t sizeof_value, /*!< in: sizeof data item */ + ib_rbt_compare compare) /*!< in: fn to compare items */ +{ + ib_rbt_t* tree; + ib_rbt_node_t* node; + + tree = (ib_rbt_t*) ut_malloc(sizeof(*tree)); + memset(tree, 0, sizeof(*tree)); + + tree->sizeof_value = sizeof_value; + + /* Create the sentinel (NIL) node. */ + node = tree->nil = (ib_rbt_node_t*) ut_malloc(sizeof(*node)); + memset(node, 0, sizeof(*node)); + + node->color = IB_RBT_BLACK; + node->parent = node->left = node->right = node; + + /* Create the "fake" root, the real root node will be the + left child of this node. */ + node = tree->root = (ib_rbt_node_t*) ut_malloc(sizeof(*node)); + memset(node, 0, sizeof(*node)); + + node->color = IB_RBT_BLACK; + node->parent = node->left = node->right = tree->nil; + + tree->compare = compare; + + return(tree); +} + +/****************************************************************//** +Generic insert of a value in the rb tree. +@return inserted node */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_insert( +/*=======*/ + ib_rbt_t* tree, /*!< in: rb tree */ + const void* key, /*!< in: key for ordering */ + const void* value) /*!< in: value of key, this value + is copied to the node */ +{ + ib_rbt_node_t* node; + + /* Create the node that will hold the value data. */ + node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree)); + + memcpy(node->value, value, tree->sizeof_value); + node->parent = node->left = node->right = tree->nil; + + /* Insert in the tree in the usual way. */ + rbt_tree_insert(tree, key, node); + rbt_balance_tree(tree, node); + + ++tree->n_nodes; + + return(node); +} + +/****************************************************************//** +Add a new node to the tree, useful for data that is pre-sorted. +@return appended node */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_add_node( +/*=========*/ + ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_bound_t* parent, /*!< in: bounds */ + const void* value) /*!< in: this value is copied + to the node */ +{ + ib_rbt_node_t* node; + + /* Create the node that will hold the value data */ + node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree)); + + memcpy(node->value, value, tree->sizeof_value); + node->parent = node->left = node->right = tree->nil; + + /* If tree is empty */ + if (parent->last == NULL) { + parent->last = tree->root; + } + + /* Append the node, the hope here is that the caller knows + what s/he is doing. */ + rbt_tree_add_child(tree, parent, node); + rbt_balance_tree(tree, node); + + ++tree->n_nodes; + +#if defined(IB_RBT_TESTING) + ut_a(rbt_validate(tree)); +#endif + return(node); +} + +/****************************************************************//** +Find a matching node in the rb tree. +@return NULL if not found else the node where key was found */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_lookup( +/*=======*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const void* key) /*!< in: key to use for search */ +{ + const ib_rbt_node_t* current = ROOT(tree); + + /* Regular binary search. */ + while (current != tree->nil) { + int result = tree->compare(key, current->value); + + if (result < 0) { + current = current->left; + } else if (result > 0) { + current = current->right; + } else { + break; + } + } + + return(current != tree->nil ? current : NULL); +} + +/****************************************************************//** +Delete a node from the red black tree, identified by key. +@return TRUE if success FALSE if not found */ +UNIV_INTERN +ibool +rbt_delete( +/*=======*/ + ib_rbt_t* tree, /*!< in: rb tree */ + const void* key) /*!< in: key to delete */ +{ + ibool deleted = FALSE; + ib_rbt_node_t* node = (ib_rbt_node_t*) rbt_lookup(tree, key); + + if (node) { + rbt_remove_node_and_rebalance(tree, node); + + ut_free(node); + deleted = TRUE; + } + + return(deleted); +} + +/****************************************************************//** +Remove a node from the rb tree, the node is not free'd, that is the +callers responsibility. +@return deleted node but without the const */ +UNIV_INTERN +ib_rbt_node_t* +rbt_remove_node( +/*============*/ + ib_rbt_t* tree, /*!< in: rb tree */ + const ib_rbt_node_t* const_node) /*!< in: node to delete, this + is a fudge and declared const + because the caller can access + only const nodes */ +{ + /* Cast away the const. */ + rbt_remove_node_and_rebalance(tree, (ib_rbt_node_t*) const_node); + + /* This is to make it easier to do something like this: + ut_free(rbt_remove_node(node)); + */ + + return((ib_rbt_node_t*) const_node); +} + +/****************************************************************//** +Find the node that has the lowest key that is >= key. +@return node satisfying the lower bound constraint or NULL */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_lower_bound( +/*============*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const void* key) /*!< in: key to search */ +{ + ib_rbt_node_t* lb_node = NULL; + ib_rbt_node_t* current = ROOT(tree); + + while (current != tree->nil) { + int result = tree->compare(key, current->value); + + if (result > 0) { + + current = current->right; + + } else if (result < 0) { + + lb_node = current; + current = current->left; + + } else { + lb_node = current; + break; + } + } + + return(lb_node); +} + +/****************************************************************//** +Find the node that has the greatest key that is <= key. +@return node satisfying the upper bound constraint or NULL */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_upper_bound( +/*============*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const void* key) /*!< in: key to search */ +{ + ib_rbt_node_t* ub_node = NULL; + ib_rbt_node_t* current = ROOT(tree); + + while (current != tree->nil) { + int result = tree->compare(key, current->value); + + if (result > 0) { + + ub_node = current; + current = current->right; + + } else if (result < 0) { + + current = current->left; + + } else { + ub_node = current; + break; + } + } + + return(ub_node); +} + +/****************************************************************//** +Find the node that has the greatest key that is <= key. +@return value of result */ +UNIV_INTERN +int +rbt_search( +/*=======*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_bound_t* parent, /*!< in: search bounds */ + const void* key) /*!< in: key to search */ +{ + ib_rbt_node_t* current = ROOT(tree); + + /* Every thing is greater than the NULL root. */ + parent->result = 1; + parent->last = NULL; + + while (current != tree->nil) { + + parent->last = current; + parent->result = tree->compare(key, current->value); + + if (parent->result > 0) { + current = current->right; + } else if (parent->result < 0) { + current = current->left; + } else { + break; + } + } + + return(parent->result); +} + +/****************************************************************//** +Find the node that has the greatest key that is <= key. But use the +supplied comparison function. +@return value of result */ +UNIV_INTERN +int +rbt_search_cmp( +/*===========*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_bound_t* parent, /*!< in: search bounds */ + const void* key, /*!< in: key to search */ + ib_rbt_compare compare) /*!< in: fn to compare items */ +{ + ib_rbt_node_t* current = ROOT(tree); + + /* Every thing is greater than the NULL root. */ + parent->result = 1; + parent->last = NULL; + + while (current != tree->nil) { + + parent->last = current; + parent->result = compare(key, current->value); + + if (parent->result > 0) { + current = current->right; + } else if (parent->result < 0) { + current = current->left; + } else { + break; + } + } + + return(parent->result); +} + +/****************************************************************//** +Get the leftmost node. +Return the left most node in the tree. */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_first( +/*======*/ + const ib_rbt_t* tree) /* in: rb tree */ +{ + ib_rbt_node_t* first = NULL; + ib_rbt_node_t* current = ROOT(tree); + + while (current != tree->nil) { + first = current; + current = current->left; + } + + return(first); +} + +/****************************************************************//** +Return the right most node in the tree. +@return the rightmost node or NULL */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_last( +/*=====*/ + const ib_rbt_t* tree) /*!< in: rb tree */ +{ + ib_rbt_node_t* last = NULL; + ib_rbt_node_t* current = ROOT(tree); + + while (current != tree->nil) { + last = current; + current = current->right; + } + + return(last); +} + +/****************************************************************//** +Return the next node. +@return node next from current */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_next( +/*=====*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const ib_rbt_node_t* current)/*!< in: current node */ +{ + return(current ? rbt_find_successor(tree, current) : NULL); +} + +/****************************************************************//** +Return the previous node. +@return node prev from current */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_prev( +/*=====*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const ib_rbt_node_t* current)/*!< in: current node */ +{ + return(current ? rbt_find_predecessor(tree, current) : NULL); +} + +/****************************************************************//** +Reset the tree. Delete all the nodes. */ +UNIV_INTERN +void +rbt_clear( +/*======*/ + ib_rbt_t* tree) /*!< in: rb tree */ +{ + rbt_free_node(ROOT(tree), tree->nil); + + tree->n_nodes = 0; + tree->root->left = tree->root->right = tree->nil; +} + +/****************************************************************//** +Merge the node from dst into src. Return the number of nodes merged. +@return no. of recs merged */ +UNIV_INTERN +ulint +rbt_merge_uniq( +/*===========*/ + ib_rbt_t* dst, /*!< in: dst rb tree */ + const ib_rbt_t* src) /*!< in: src rb tree */ +{ + ib_rbt_bound_t parent; + ulint n_merged = 0; + const ib_rbt_node_t* src_node = rbt_first(src); + + if (rbt_empty(src) || dst == src) { + return(0); + } + + for (/* No op */; src_node; src_node = rbt_next(src, src_node)) { + + if (rbt_search(dst, &parent, src_node->value) != 0) { + rbt_add_node(dst, &parent, src_node->value); + ++n_merged; + } + } + + return(n_merged); +} + +/****************************************************************//** +Merge the node from dst into src. Return the number of nodes merged. +Delete the nodes from src after copying node to dst. As a side effect +the duplicates will be left untouched in the src. +@return no. of recs merged */ +UNIV_INTERN +ulint +rbt_merge_uniq_destructive( +/*=======================*/ + ib_rbt_t* dst, /*!< in: dst rb tree */ + ib_rbt_t* src) /*!< in: src rb tree */ +{ + ib_rbt_bound_t parent; + ib_rbt_node_t* src_node; + ulint old_size = rbt_size(dst); + + if (rbt_empty(src) || dst == src) { + return(0); + } + + for (src_node = (ib_rbt_node_t*) rbt_first(src); src_node; /* */) { + ib_rbt_node_t* prev = src_node; + + src_node = (ib_rbt_node_t*)rbt_next(src, prev); + + /* Skip duplicates. */ + if (rbt_search(dst, &parent, prev->value) != 0) { + + /* Remove and reset the node but preserve + the node (data) value. */ + rbt_remove_node_and_rebalance(src, prev); + + /* The nil should be taken from the dst tree. */ + prev->parent = prev->left = prev->right = dst->nil; + rbt_tree_add_child(dst, &parent, prev); + rbt_balance_tree(dst, prev); + + ++dst->n_nodes; + } + } + +#if defined(IB_RBT_TESTING) + ut_a(rbt_validate(dst)); + ut_a(rbt_validate(src)); +#endif + return(rbt_size(dst) - old_size); +} + +/****************************************************************//** +Check that every path from the root to the leaves has the same count and +the tree nodes are in order. +@return TRUE if OK FALSE otherwise */ +UNIV_INTERN +ibool +rbt_validate( +/*=========*/ + const ib_rbt_t* tree) /*!< in: RB tree to validate */ +{ + if (rbt_count_black_nodes(tree, ROOT(tree)) > 0) { + return(rbt_check_ordering(tree)); + } + + return(FALSE); +} + +/****************************************************************//** +Iterate over the tree in depth first order. */ +UNIV_INTERN +void +rbt_print( +/*======*/ + const ib_rbt_t* tree, /*!< in: tree to traverse */ + ib_rbt_print_node print) /*!< in: print function */ +{ + rbt_print_subtree(tree, ROOT(tree), print); +} |