diff options
Diffstat (limited to 'sql/ha_innodb.cc')
-rw-r--r-- | sql/ha_innodb.cc | 752 |
1 files changed, 579 insertions, 173 deletions
diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index 455d890758f..d42311b43b6 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB & InnoDB Oy +/* Copyright (C) 2000 MySQL AB & Innobase Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ /* TODO list for the InnoDB handler: - Ask Monty if strings of different languages can exist in the same - database. Answer: in near future yes, but not yet. + database. Answer: in 4.1 yes. */ #ifdef __GNUC__ @@ -27,6 +27,7 @@ #endif #include "mysql_priv.h" +#include "slave.h" #ifdef HAVE_INNOBASE_DB #include <m_ctype.h> #include <assert.h> @@ -53,10 +54,13 @@ typedef byte mysql_byte; /* Include necessary InnoDB headers */ extern "C" { #include "../innobase/include/univ.i" +#include "../innobase/include/os0file.h" +#include "../innobase/include/os0thread.h" #include "../innobase/include/srv0start.h" #include "../innobase/include/srv0srv.h" #include "../innobase/include/trx0roll.h" #include "../innobase/include/trx0trx.h" +#include "../innobase/include/trx0sys.h" #include "../innobase/include/row0ins.h" #include "../innobase/include/row0mysql.h" #include "../innobase/include/row0sel.h" @@ -107,8 +111,6 @@ my_bool innobase_fast_shutdown = TRUE; specify any startup options. */ -/* innobase_data_file_path=ibdata:15,idata2:1,... */ - char *innobase_data_file_path= (char*) "ibdata1:10M:autoextend"; static char *internal_innobase_data_file_path=0; @@ -135,8 +137,9 @@ static void innobase_print_error(const char* db_errpfx, char* buffer); /* General functions */ /********************************************************************** -Releases possible search latch, auto inc lock, and InnoDB thread FIFO ticket. -These should be released at each SQL statement end. */ +Releases possible search latch and InnoDB thread FIFO ticket. These should +be released at each SQL statement end. It does no harm to release these +also in the middle of an SQL statement. */ static void innobase_release_stat_resources( @@ -147,16 +150,6 @@ innobase_release_stat_resources( trx_search_latch_release_if_reserved(trx); } - if (trx->auto_inc_lock) { - - /* If we had reserved the auto-inc lock for - some table in this SQL statement, we release it now */ - - srv_conc_enter_innodb(trx); - row_unlock_table_autoinc_for_mysql(trx); - srv_conc_exit_innodb(trx); - } - if (trx->declared_to_be_inside_innodb) { /* Release our possible ticket in the FIFO */ @@ -188,7 +181,8 @@ int convert_error_code_to_mysql( /*========================*/ /* out: MySQL error code */ - int error) /* in: InnoDB error code */ + int error, /* in: InnoDB error code */ + THD* thd) /* in: user thread handle or NULL */ { if (error == DB_SUCCESS) { @@ -207,11 +201,27 @@ convert_error_code_to_mysql( return(HA_ERR_NO_ACTIVE_RECORD); } else if (error == (int) DB_DEADLOCK) { + /* Since we roll back the whole transaction, we must + tell it also to MySQL so that MySQL knows to empty the + cached binlog for this transaction */ + + if (thd) { + ha_rollback(thd); + } return(HA_ERR_LOCK_DEADLOCK); } else if (error == (int) DB_LOCK_WAIT_TIMEOUT) { + /* Since we roll back the whole transaction, we must + tell it also to MySQL so that MySQL knows to empty the + cached binlog for this transaction */ + + + if (thd) { + ha_rollback(thd); + } + return(HA_ERR_LOCK_WAIT_TIMEOUT); } else if (error == (int) DB_NO_REFERENCED_ROW) { @@ -242,8 +252,6 @@ convert_error_code_to_mysql( return(HA_ERR_TO_BIG_ROW); } else { - DBUG_ASSERT(0); - return(-1); // Unknown error } } @@ -251,41 +259,66 @@ convert_error_code_to_mysql( extern "C" { /***************************************************************** Prints info of a THD object (== user session thread) to the -standard output. NOTE that mysql/innobase/trx/trx0trx.c must contain +standard output. NOTE that /mysql/innobase/trx/trx0trx.c must contain the prototype for this function! */ void innobase_mysql_print_thd( /*=====================*/ - void* input_thd)/* in: pointer to a MySQL THD object */ + char* buf, /* in/out: buffer where to print, must be at least + 400 bytes */ + void* input_thd)/* in: pointer to a MySQL THD object */ { - THD* thd; + THD* thd; + char* old_buf = buf; + + thd = (THD*) input_thd; - thd = (THD*) input_thd; + /* We cannot use the return value of normal sprintf() as this is + not portable to some old non-Posix Unixes, e.g., some old SCO + Unixes */ - printf("MySQL thread id %lu, query id %lu", - thd->thread_id, thd->query_id); + buf += my_sprintf(buf, + (buf, "MySQL thread id %lu, query id %lu", + thd->thread_id, thd->query_id)); if (thd->host) { - printf(" %s", thd->host); + *buf = ' '; + buf++; + buf = strnmov(buf, thd->host, 30); } if (thd->ip) { - printf(" %s", thd->ip); + *buf = ' '; + buf++; + buf=strnmov(buf, thd->ip, 20); } if (thd->user) { - printf(" %s", thd->user); + *buf = ' '; + buf++; + buf=strnmov(buf, thd->user, 20); } if (thd->proc_info) { - printf(" %s", thd->proc_info); + *buf = ' '; + buf++; + buf=strnmov(buf, thd->proc_info, 50); } if (thd->query) { - printf("\n%-.100s", thd->query); + *buf = '\n'; + buf++; + buf=strnmov(buf, thd->query, 150); } - printf("\n"); + buf[0] = '\n'; + buf[1] = '\0'; /* Note that we must put a null character here to end + the printed string */ + + /* We test the printed length did not overrun the buffer length of + 400 bytes */ + + ut_a(strlen(old_buf) < 400); } } @@ -302,6 +335,8 @@ check_trx_exists( { trx_t* trx; + ut_a(thd == current_thd); + trx = (trx_t*) thd->transaction.all.innobase_tid; if (trx == NULL) { @@ -321,7 +356,23 @@ check_trx_exists( thd->transaction.stmt.innobase_tid = (void*)&innodb_dummy_stmt_trx_handle; } else { - ut_a(trx->magic_n == TRX_MAGIC_N); + if (trx->magic_n != TRX_MAGIC_N) { + mem_analyze_corruption((byte*)trx); + + ut_a(0); + } + } + + if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) { + trx->check_foreigns = FALSE; + } else { + trx->check_foreigns = TRUE; + } + + if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) { + trx->check_unique_secondary = FALSE; + } else { + trx->check_unique_secondary = TRUE; } return(trx); @@ -340,7 +391,7 @@ ha_innobase::update_thd( { row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; trx_t* trx; - + trx = check_trx_exists(thd); if (prebuilt->trx != trx) { @@ -353,6 +404,61 @@ ha_innobase::update_thd( return(0); } +/* The code here appears for documentational purposes only. Not used +or tested yet. Will be used in 4.1. */ +/********************************************************************* +Call this when you have opened a new table handle in HANDLER, before you +call index_read_idx() etc. Actually, we can let the cursor stay open even +over a transaction commit! Then you should call this before every operation, +fecth next etc. This function inits the necessary things even after a +transaction commit. */ + +void +ha_innobase::init_table_handle_for_HANDLER(void) +/*============================================*/ +{ + row_prebuilt_t* prebuilt; + + /* If current thd does not yet have a trx struct, create one. + If the current handle does not yet have a prebuilt struct, create + one. Update the trx pointers in the prebuilt struct. Normally + this operation is done in external_lock. */ + + update_thd(current_thd); + + /* Initialize the prebuilt struct much like it would be inited in + external_lock */ + + prebuilt = (row_prebuilt_t*)innobase_prebuilt; + + /* If the transaction is not started yet, start it */ + + trx_start_if_not_started_noninline(prebuilt->trx); + + /* Assign a read view if the transaction does not have it yet */ + + trx_assign_read_view(prebuilt->trx); + + /* We did the necessary inits in this function, no need to repeat them + in row_search_for_mysql */ + + prebuilt->sql_stat_start = FALSE; + + /* We let HANDLER always to do the reads as consistent reads, even + if the trx isolation level would have been specified as SERIALIZABLE */ + + prebuilt->select_lock_type = LOCK_NONE; + + /* Always fetch all columns in the index record */ + + prebuilt->hint_no_need_to_fetch_extra_cols = FALSE; + + /* We want always to fetch all columns in the whole row? Or do + we???? */ + + prebuilt->read_just_key = FALSE; +} + /************************************************************************* Opens an InnoDB database. */ @@ -361,12 +467,15 @@ innobase_init(void) /*===============*/ /* out: TRUE if error */ { + static char current_dir[3]; // Set if using current lib int err; bool ret; - char current_lib[3], *default_path; + char *default_path; DBUG_ENTER("innobase_init"); + os_innodb_umask = (ulint)my_umask; + /* When using the embedded server, the datadirectory is not in the current directory. @@ -376,10 +485,10 @@ innobase_init(void) else { /* It's better to use current lib, to keep path's short */ - current_lib[0] = FN_CURLIB; - current_lib[1] = FN_LIBCHAR; - current_lib[2] = 0; - default_path=current_lib; + current_dir[0] = FN_CURLIB; + current_dir[1] = FN_LIBCHAR; + current_dir[2] = 0; + default_path=current_dir; } if (specialflag & SPECIAL_NO_PRIOR) { @@ -440,11 +549,12 @@ innobase_init(void) srv_log_archive_on = (ulint) innobase_log_archive; srv_log_buffer_size = (ulint) innobase_log_buffer_size; - srv_flush_log_at_trx_commit = (ibool) innobase_flush_log_at_trx_commit; + srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit; srv_use_native_aio = 0; srv_pool_size = (ulint) innobase_buffer_pool_size; + srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; srv_n_file_io_threads = (ulint) innobase_file_io_threads; @@ -475,6 +585,22 @@ innobase_init(void) (void) hash_init(&innobase_open_tables,system_charset_info,32,0,0, (hash_get_key) innobase_get_key,0,0); pthread_mutex_init(&innobase_mutex,MY_MUTEX_INIT_FAST); + + /* If this is a replication slave and we needed to do a crash recovery, + set the master binlog position to what InnoDB internally knew about + how far we got transactions durable inside InnoDB. There is a + problem here: if the user used also MyISAM tables, InnoDB might not + know the right position for them. + + THIS DOES NOT WORK CURRENTLY because replication seems to initialize + glob_mi also after innobase_init. */ + +/* if (trx_sys_mysql_master_log_pos != -1) { + ut_memcpy(glob_mi.log_file_name, trx_sys_mysql_master_log_name, + 1 + ut_strlen(trx_sys_mysql_master_log_name)); + glob_mi.pos = trx_sys_mysql_master_log_pos; + } +*/ DBUG_RETURN(0); } @@ -534,6 +660,30 @@ innobase_get_free_space(void) /********************************************************************* Commits a transaction in an InnoDB database. */ +void +innobase_commit_low( +/*================*/ + trx_t* trx) /* in: transaction handle */ +{ + if (current_thd->slave_thread) + { + /* Update the replication position info inside InnoDB */ +#ifdef NEED_TO_BE_FIXED + trx->mysql_relay_log_file_name= active_mi->rli.log_file_name; + trx->mysql_relay_log_pos= active_mi->rli.relay_log_pos; +#endif + trx->mysql_master_log_file_name= active_mi->rli.master_log_name; + trx->mysql_master_log_pos= ((ib_longlong) + (active_mi->rli.master_log_pos + + active_mi->rli.event_len + + active_mi->rli.pending)); + } + trx_commit_for_mysql(trx); +} + +/********************************************************************* +Commits a transaction in an InnoDB database. */ + int innobase_commit( /*============*/ @@ -552,9 +702,18 @@ innobase_commit( trx = check_trx_exists(thd); - if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { + if (trx->auto_inc_lock) { + + /* If we had reserved the auto-inc lock for + some table in this SQL statement, we release it now */ + + srv_conc_enter_innodb(trx); + row_unlock_table_autoinc_for_mysql(trx); + srv_conc_exit_innodb(trx); + } - trx_commit_for_mysql(trx); + if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { + innobase_commit_low(trx); } /* Release possible statement level resources */ @@ -594,6 +753,8 @@ innobase_report_binlog_offset_and_commit( trx = (trx_t*)trx_handle; + ut_a(trx != NULL); + trx->mysql_log_file_name = log_file_name; trx->mysql_log_offset = (ib_longlong)end_offset; @@ -619,6 +780,16 @@ innobase_rollback( trx = check_trx_exists(thd); + if (trx->auto_inc_lock) { + + /* If we had reserved the auto-inc lock for + some table in this SQL statement, we release it now */ + + srv_conc_enter_innodb(trx); + row_unlock_table_autoinc_for_mysql(trx); + srv_conc_exit_innodb(trx); + } + srv_conc_enter_innodb(trx); if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { @@ -634,7 +805,7 @@ innobase_rollback( trx_mark_sql_stat_end(trx); - DBUG_RETURN(convert_error_code_to_mysql(error)); + DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); } /********************************************************************* @@ -747,7 +918,7 @@ normalize_table_name( } /********************************************************************* -Creates and opens a handle to a table which already exists in an Innobase +Creates and opens a handle to a table which already exists in an InnoDB database. */ int @@ -796,7 +967,6 @@ ha_innobase::open( ib_table = dict_table_get_and_increment_handle_count( norm_name, NULL); - if (NULL == ib_table) { sql_print_error("InnoDB error:\n\ @@ -819,13 +989,13 @@ have moved .frm files to another database?", primary_key = MAX_KEY; - if (!row_table_got_default_clust_index(ib_table)) { + /* Allocate a buffer for a 'row reference'. A row reference is + a string of bytes of length ref_length which uniquely specifies + a row in our table. Note that MySQL may also compare two row + references for equality by doing a simple memcmp on the strings + of length ref_length! */ - /* If we automatically created the clustered index, - then MySQL does not know about it and it must not be aware - of the index used on scan, to avoid checking if we update - the column of the index. The column is the row id in - the automatical case, and it will not be updated. */ + if (!row_table_got_default_clust_index(ib_table)) { ((row_prebuilt_t*)innobase_prebuilt) ->clust_index_was_generated = FALSE; @@ -834,13 +1004,13 @@ have moved .frm files to another database?", key_used_on_scan = 0; /* - MySQL allocates the buffer for ref. - This includes all keys + one byte for each column - that may be NULL. - The ref_length must be exact as possible as - all reference buffers are allocated based on this. + MySQL allocates the buffer for ref. key_info->key_length + includes space for all key columns + one byte for each column + that may be NULL. ref_length must be as exact as possible to + save space, because all row reference buffers are allocated + based on ref_length. */ - + ref_length = table->key_info->key_length; } else { ((row_prebuilt_t*)innobase_prebuilt) @@ -848,11 +1018,23 @@ have moved .frm files to another database?", ref_length = DATA_ROW_ID_LEN; + /* + If we automatically created the clustered index, then + MySQL does not know about it, and MySQL must NOT be aware + of the index used on scan, to make it avoid checking if we + update the column of the index. That is why we assert below + that key_used_on_scan is the undefined value MAX_KEY. + The column is the row id in the automatical generation case, + and it will never be updated anyway. + */ DBUG_ASSERT(key_used_on_scan == MAX_KEY); } auto_inc_counter_for_this_stat = 0; + block_size = 16 * 1024; /* Index block size in InnoDB: used by MySQL + in query optimization */ + /* Init table lock structure */ thr_lock_data_init(&share->lock,&lock,(void*) 0); @@ -1094,7 +1276,8 @@ get_innobase_type_from_mysql_type( } /*********************************************************************** -Stores a key value for a row to a buffer. */ +Stores a key value for a row to a buffer. This must currently only be used +to store a row reference to the 'ref' buffer of this table handle! */ uint ha_innobase::store_key_val_for_row( @@ -1102,7 +1285,8 @@ ha_innobase::store_key_val_for_row( /* out: key value length as stored in buff */ uint keynr, /* in: key number */ char* buff, /* in/out: buffer for the key value (in MySQL - format) */ + format); currently this MUST be the 'ref' + buffer! */ const mysql_byte* record)/* in: row in MySQL format */ { KEY* key_info = table->key_info + keynr; @@ -1131,11 +1315,12 @@ ha_innobase::store_key_val_for_row( } /* - We have to zero-fill the buffer to be able to compare two - keys to see if they are equal + We have to zero-fill the 'ref' buffer so that MySQL is able to + use a simple memcmp to compare two key values to determine if they + are equal */ bzero(buff, (ref_length- (uint) (buff - buff_start))); - return ref_length; + DBUG_RETURN(ref_length); } /****************************************************************** @@ -1310,9 +1495,13 @@ ha_innobase::write_row( row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt; int error; longlong auto_inc; + longlong dummy; DBUG_ENTER("ha_innobase::write_row"); + ut_a(prebuilt->trx == + (trx_t*) current_thd->transaction.all.innobase_tid); + statistic_increment(ha_write_count, &LOCK_status); if (table->time_stamp) { @@ -1329,7 +1518,31 @@ ha_innobase::write_row( if (table->next_number_field && record == table->record[0]) { /* This is the case where the table has an auto-increment column */ - + + /* Initialize the auto-inc counter if it has not been + initialized yet */ + + if (0 == dict_table_autoinc_peek(prebuilt->table)) { + + /* This call initializes the counter */ + error = innobase_read_and_init_auto_inc(&dummy); + + if (error) { + /* Deadlock or lock wait timeout */ + + goto func_exit; + } + + /* We have to set sql_stat_start to TRUE because + the above call probably has called a select, and + has reset that flag; row_insert_for_mysql has to + know to set the IX intention lock on the table, + something it only does at the start of each + statement */ + + prebuilt->sql_stat_start = TRUE; + } + /* Fetch the value the user possibly has set in the autoincrement field */ @@ -1362,10 +1575,9 @@ ha_innobase::write_row( } if (auto_inc != 0) { - /* This call will calculate the max of the - current value and the value supplied by the user, if - the auto_inc counter is already initialized - for the table */ + /* This call will calculate the max of the current + value and the value supplied by the user and + update the counter accordingly */ /* We have to use the transactional lock mechanism on the auto-inc counter of the table to ensure @@ -1380,7 +1592,8 @@ ha_innobase::write_row( if (error != DB_SUCCESS) { - error = convert_error_code_to_mysql(error); + error = convert_error_code_to_mysql(error, + user_thd); goto func_exit; } @@ -1396,7 +1609,7 @@ ha_innobase::write_row( srv_conc_exit_innodb(prebuilt->trx); error = convert_error_code_to_mysql( - error); + error, user_thd); goto func_exit; } } @@ -1404,45 +1617,18 @@ ha_innobase::write_row( auto_inc = dict_table_autoinc_get(prebuilt->table); srv_conc_exit_innodb(prebuilt->trx); - /* If auto_inc is now != 0 the autoinc counter - was already initialized for the table: we can give - the new value for MySQL to place in the field */ + /* We can give the new value for MySQL to place in + the field */ - if (auto_inc != 0) { - user_thd->next_insert_id = auto_inc; - } - } - - update_auto_increment(); - - if (auto_inc == 0) { - /* The autoinc counter for our table was not yet - initialized, initialize it now */ - - auto_inc = table->next_number_field->val_int(); - - srv_conc_enter_innodb(prebuilt->trx); - error = row_lock_table_autoinc_for_mysql(prebuilt); - srv_conc_exit_innodb(prebuilt->trx); - - if (error != DB_SUCCESS) { - - error = convert_error_code_to_mysql(error); - goto func_exit; - } - - dict_table_autoinc_initialize(prebuilt->table, - auto_inc); + user_thd->next_insert_id = auto_inc; } - /* We have to set sql_stat_start to TRUE because - update_auto_increment may have called a select, and - has reset that flag; row_insert_for_mysql has to - know to set the IX intention lock on the table, something - it only does at the start of each statement */ + /* This call of a handler.cc function places + user_thd->next_insert_id to the column value, if the column + value was not set by the user */ - prebuilt->sql_stat_start = TRUE; - } + update_auto_increment(); + } if (prebuilt->mysql_template == NULL || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) { @@ -1467,7 +1653,7 @@ ha_innobase::write_row( prebuilt->trx->ignore_duplicates_in_insert = FALSE; - error = convert_error_code_to_mysql(error); + error = convert_error_code_to_mysql(error, user_thd); /* Tell InnoDB server that there might be work for utility threads: */ @@ -1651,6 +1837,9 @@ ha_innobase::update_row( DBUG_ENTER("ha_innobase::update_row"); + ut_a(prebuilt->trx == + (trx_t*) current_thd->transaction.all.innobase_tid); + if (table->time_stamp) { update_timestamp(new_row + table->time_stamp - 1); } @@ -1684,7 +1873,7 @@ ha_innobase::update_row( srv_conc_exit_innodb(prebuilt->trx); - error = convert_error_code_to_mysql(error); + error = convert_error_code_to_mysql(error, user_thd); /* Tell InnoDB server that there might be work for utility threads: */ @@ -1708,6 +1897,9 @@ ha_innobase::delete_row( DBUG_ENTER("ha_innobase::delete_row"); + ut_a(prebuilt->trx == + (trx_t*) current_thd->transaction.all.innobase_tid); + if (last_query_id != user_thd->query_id) { prebuilt->sql_stat_start = TRUE; last_query_id = user_thd->query_id; @@ -1729,7 +1921,7 @@ ha_innobase::delete_row( srv_conc_exit_innodb(prebuilt->trx); - error = convert_error_code_to_mysql(error); + error = convert_error_code_to_mysql(error, user_thd); /* Tell the InnoDB server that there might be work for utility threads: */ @@ -1796,6 +1988,55 @@ convert_search_mode_to_innobase( return(0); } +/* + BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED + --------------------------------------------------- +The following does not cover all the details, but explains how we determine +the start of a new SQL statement, and what is associated with it. + +For each table in the database the MySQL interpreter may have several +table handle instances in use, also in a single SQL query. For each table +handle instance there is an InnoDB 'prebuilt' struct which contains most +of the InnoDB data associated with this table handle instance. + + A) if the user has not explicitly set any MySQL table level locks: + + 1) MySQL calls ::external_lock to set an 'intention' table level lock on +the table of the handle instance. There we set +prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set +true if we are taking this table handle instance to use in a new SQL +statement issued by the user. We also increment trx->n_mysql_tables_in_use. + + 2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search +instructions to prebuilt->template of the table handle instance in +::index_read. The template is used to save CPU time in large joins. + + 3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we +allocate a new consistent read view for the trx if it does not yet have one, +or in the case of a locking read, set an InnoDB 'intention' table level +lock on the table. + + 4) We do the SELECT. MySQL may repeatedly call ::index_read for the +same table handle instance, if it is a join. + + 5) When the SELECT ends, MySQL removes its intention table level locks +in ::external_lock. When trx->n_mysql_tables_in_use drops to zero, + (a) we execute a COMMIT there if the autocommit is on, + (b) we also release possible 'SQL statement level resources' InnoDB may +have for this SQL statement. The MySQL interpreter does NOT execute +autocommit for pure read transactions, though it should. That is why the +table handler in that case has to execute the COMMIT in ::external_lock. + + B) If the user has explicitly set MySQL table level locks, then MySQL +does NOT call ::external_lock at the start of the statement. To determine +when we are at the start of a new SQL statement we at the start of +::index_read also compare the query id to the latest query id where the +table handle instance was used. If it has changed, we know we are at the +start of a new SQL statement. Since the query id can theoretically +overwrap, we use this test only as a secondary way of determining the +start of a new SQL statement. */ + + /************************************************************************** Positions an index cursor to the index specified in the handle. Fetches the row if any. */ @@ -1809,7 +2050,10 @@ ha_innobase::index_read( row */ const mysql_byte* key_ptr,/* in: key value; if this is NULL we position the cursor at the - start or end of index */ + start or end of index; this can + also contain an InnoDB row id, in + which case key_len is the InnoDB + row id length */ uint key_len,/* in: key value length */ enum ha_rkey_function find_flag)/* in: search flags from my_base.h */ { @@ -1821,6 +2065,10 @@ ha_innobase::index_read( ulint ret; DBUG_ENTER("index_read"); + + ut_a(prebuilt->trx == + (trx_t*) current_thd->transaction.all.innobase_tid); + statistic_increment(ha_read_key_count, &LOCK_status); if (last_query_id != user_thd->query_id) { @@ -1832,10 +2080,8 @@ ha_innobase::index_read( index = prebuilt->index; - /* Note that if the select is used for an update, we always - fetch the clustered index record: therefore the index for which the - template is built is not necessarily prebuilt->index, but can also - be the clustered index */ + /* Note that if the index for which the search template is built is not + necessarily prebuilt->index, but can also be the clustered index */ if (prebuilt->sql_stat_start) { build_template(prebuilt, user_thd, table, @@ -1843,6 +2089,9 @@ ha_innobase::index_read( } if (key_ptr) { + /* Convert the search key value to InnoDB format into + prebuilt->search_tuple */ + row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple, (byte*) key_val_buff, index, @@ -1887,7 +2136,7 @@ ha_innobase::index_read( error = HA_ERR_KEY_NOT_FOUND; table->status = STATUS_NOT_FOUND; } else { - error = convert_error_code_to_mysql(ret); + error = convert_error_code_to_mysql(ret, user_thd); table->status = STATUS_NOT_FOUND; } @@ -1925,7 +2174,6 @@ ha_innobase::change_active_index( { row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; KEY* key=0; - statistic_increment(ha_read_key_count, &LOCK_status); DBUG_ENTER("change_active_index"); @@ -2010,6 +2258,9 @@ ha_innobase::general_fetch( DBUG_ENTER("general_fetch"); + ut_a(prebuilt->trx == + (trx_t*) current_thd->transaction.all.innobase_tid); + srv_conc_enter_innodb(prebuilt->trx); ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode, @@ -2028,7 +2279,7 @@ ha_innobase::general_fetch( error = HA_ERR_END_OF_FILE; table->status = STATUS_NOT_FOUND; } else { - error = convert_error_code_to_mysql(ret); + error = convert_error_code_to_mysql(ret, user_thd); table->status = STATUS_NOT_FOUND; } @@ -2202,8 +2453,7 @@ ha_innobase::rnd_next( } /************************************************************************** -Fetches a row from the table based on a reference. TODO: currently we use -'ref_stored_len' of the handle as the key length. This may change. */ +Fetches a row from the table based on a row reference. */ int ha_innobase::rnd_pos( @@ -2211,21 +2461,28 @@ ha_innobase::rnd_pos( /* out: 0, HA_ERR_KEY_NOT_FOUND, or error code */ mysql_byte* buf, /* in/out: buffer for the row */ - mysql_byte* pos) /* in: primary key value in MySQL format */ + mysql_byte* pos) /* in: primary key value of the row in the + MySQL format, or the row id if the clustered + index was internally generated by InnoDB; + the length of data in pos has to be + ref_length */ { row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; int error; uint keynr = active_index; DBUG_ENTER("rnd_pos"); - DBUG_DUMP("key", (char*) pos, ref_stored_len); + DBUG_DUMP("key", (char*) pos, ref_length); statistic_increment(ha_read_rnd_count, &LOCK_status); + ut_a(prebuilt->trx == + (trx_t*) current_thd->transaction.all.innobase_tid); + if (prebuilt->clust_index_was_generated) { /* No primary key was defined for the table and we generated the clustered index from the row id: the row reference is the row id, not any key value - that MySQL knows */ + that MySQL knows of */ error = change_active_index(MAX_KEY); } else { @@ -2237,7 +2494,10 @@ ha_innobase::rnd_pos( DBUG_RETURN(error); } - error = index_read(buf, pos, ref_stored_len, HA_READ_KEY_EXACT); + /* Note that we assume the length of the row reference is fixed + for the table, and it is == ref_length */ + + error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT); if (error) { DBUG_PRINT("error",("Got error: %ld",error)); @@ -2249,7 +2509,8 @@ ha_innobase::rnd_pos( /************************************************************************* Stores a reference to the current row to 'ref' field of the handle. Note -that the function parameter is illogical: we must assume that 'record' +that in the case where we have generated the clustered index for the +table, the function parameter is illogical: we MUST ASSUME that 'record' is the current 'position' of the handle, because if row ref is actually the row id internally generated in InnoDB, then 'record' does not contain it. We just guess that the row id must be for the record where the handle @@ -2263,11 +2524,14 @@ ha_innobase::position( row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; uint len; + ut_a(prebuilt->trx == + (trx_t*) current_thd->transaction.all.innobase_tid); + if (prebuilt->clust_index_was_generated) { /* No primary key was defined for the table and we generated the clustered index from row id: the row reference will be the row id, not any key value - that MySQL knows */ + that MySQL knows of */ len = DATA_ROW_ID_LEN; @@ -2276,8 +2540,11 @@ ha_innobase::position( len = store_key_val_for_row(primary_key, (char*) ref, record); } - DBUG_ASSERT(len == ref_length); - ref_stored_len = len; + /* Since we do not store len to the buffer 'ref', we must assume + that len is always fixed for this table. The following assertion + checks this. */ + + ut_a(len == ref_length); } @@ -2335,7 +2602,7 @@ create_table_def( error = row_create_table_for_mysql(table, trx); - error = convert_error_code_to_mysql(error); + error = convert_error_code_to_mysql(error, NULL); DBUG_RETURN(error); } @@ -2393,7 +2660,7 @@ create_index( error = row_create_index_for_mysql(index, trx); - error = convert_error_code_to_mysql(error); + error = convert_error_code_to_mysql(error, NULL); DBUG_RETURN(error); } @@ -2419,7 +2686,7 @@ create_clustered_index_when_no_primary( 0, DICT_CLUSTERED, 0); error = row_create_index_for_mysql(index, trx); - error = convert_error_code_to_mysql(error); + error = convert_error_code_to_mysql(error, NULL); return(error); } @@ -2445,22 +2712,41 @@ ha_innobase::create( uint i; char name2[FN_REFLEN]; char norm_name[FN_REFLEN]; + THD *thd= current_thd; DBUG_ENTER("ha_innobase::create"); + DBUG_ASSERT(thd != NULL); + trx = trx_allocate_for_mysql(); + if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) { + trx->check_foreigns = FALSE; + } + + if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) { + trx->check_unique_secondary = FALSE; + } + + fn_format(name2, name, "", "",2); // Remove the .frm extension normalize_table_name(norm_name, name2); - /* Create the table definition in InnoDB */ + /* Latch the InnoDB data dictionary exclusive so that no deadlocks + or lock waits can happen in it during a table create operation. + (Drop table etc. do this latching in row0mysql.c.) */ + + row_mysql_lock_data_dictionary(); + + /* Create the table definition in InnoDB */ error = create_table_def(trx, form, norm_name); if (error) { + innobase_commit_low(trx); - trx_commit_for_mysql(trx); + row_mysql_unlock_data_dictionary(); trx_free_for_mysql(trx); @@ -2476,7 +2762,7 @@ ha_innobase::create( /* Our function row_get_mysql_key_number_for_index assumes the primary key is always number 0, if it exists */ - assert(primary_key_no == -1 || primary_key_no == 0); + DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0); /* Create the keys */ @@ -2488,7 +2774,9 @@ ha_innobase::create( error = create_clustered_index_when_no_primary(trx, norm_name); if (error) { - trx_commit_for_mysql(trx); + innobase_commit_low(trx); + + row_mysql_unlock_data_dictionary(); trx_free_for_mysql(trx); @@ -2501,7 +2789,9 @@ ha_innobase::create( first */ if ((error = create_index(trx, form, norm_name, (uint) primary_key_no))) { - trx_commit_for_mysql(trx); + innobase_commit_low(trx); + + row_mysql_unlock_data_dictionary(); trx_free_for_mysql(trx); @@ -2515,7 +2805,9 @@ ha_innobase::create( if ((error = create_index(trx, form, norm_name, i))) { - trx_commit_for_mysql(trx); + innobase_commit_low(trx); + + row_mysql_unlock_data_dictionary(); trx_free_for_mysql(trx); @@ -2527,17 +2819,21 @@ ha_innobase::create( error = row_table_add_foreign_constraints(trx, create_info->create_statement, norm_name); - error = convert_error_code_to_mysql(error); + error = convert_error_code_to_mysql(error, NULL); if (error) { - trx_commit_for_mysql(trx); + innobase_commit_low(trx); + + row_mysql_unlock_data_dictionary(); trx_free_for_mysql(trx); DBUG_RETURN(error); } - trx_commit_for_mysql(trx); + innobase_commit_low(trx); + + row_mysql_unlock_data_dictionary(); /* Flush the log to reduce probability that the .frm files and the InnoDB data dictionary get out-of-sync if the user runs @@ -2547,7 +2843,7 @@ ha_innobase::create( innobase_table = dict_table_get(norm_name, NULL); - assert(innobase_table != 0); + DBUG_ASSERT(innobase_table != 0); /* Tell the InnoDB server that there might be work for utility threads: */ @@ -2605,11 +2901,11 @@ ha_innobase::delete_table( srv_active_wake_master_thread(); - trx_commit_for_mysql(trx); + innobase_commit_low(trx); trx_free_for_mysql(trx); - error = convert_error_code_to_mysql(error); + error = convert_error_code_to_mysql(error, NULL); DBUG_RETURN(error); } @@ -2662,10 +2958,10 @@ innobase_drop_database( srv_active_wake_master_thread(); - trx_commit_for_mysql(trx); + innobase_commit_low(trx); trx_free_for_mysql(trx); - error = convert_error_code_to_mysql(error); + error = convert_error_code_to_mysql(error, NULL); return(error); } @@ -2715,10 +3011,10 @@ ha_innobase::rename_table( srv_active_wake_master_thread(); - trx_commit_for_mysql(trx); + innobase_commit_low(trx); trx_free_for_mysql(trx); - error = convert_error_code_to_mysql(error); + error = convert_error_code_to_mysql(error, NULL); DBUG_RETURN(error); } @@ -2981,6 +3277,8 @@ ha_innobase::check( ulint ret; ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N); + ut_a(prebuilt->trx == + (trx_t*) current_thd->transaction.all.innobase_tid); if (prebuilt->mysql_template == NULL) { /* Build the template; we will use a dummy template @@ -3030,8 +3328,9 @@ ha_innobase::update_table_comment( *pos++=' '; } - pos += sprintf(pos, "InnoDB free: %lu kB", - (ulong) innobase_get_free_space()); + pos += my_sprintf(pos, + (pos,"InnoDB free: %lu kB", + (ulong) innobase_get_free_space())); /* We assume 450 - length bytes of space to print info */ @@ -3177,7 +3476,7 @@ ha_innobase::external_lock( thd->transaction.all.innodb_active_trans = 1; trx->n_mysql_tables_in_use++; - if (thd->tx_isolation == ISO_SERIALIZABLE + if (thd->variables.tx_isolation == ISO_SERIALIZABLE && prebuilt->select_lock_type == LOCK_NONE) { /* To get serializable execution we let InnoDB @@ -3205,7 +3504,7 @@ ha_innobase::external_lock( innobase_release_stat_resources(trx); if (!(thd->options - & (OPTION_NOT_AUTO_COMMIT | OPTION_BEGIN))) { + & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { innobase_commit(thd, trx); thd->transaction.all.innodb_active_trans=0; @@ -3217,6 +3516,53 @@ ha_innobase::external_lock( } /**************************************************************************** +Implements the SHOW INNODB STATUS command. Send the output of the InnoDB +Monitor to the client. */ + +int +innodb_show_status( +/*===============*/ + THD* thd) /* in: the MySQL query thread of the caller */ +{ + String* packet = &thd->packet; + char* buf; + + DBUG_ENTER("innodb_show_status"); + + /* We let the InnoDB Monitor to output at most 100 kB of text, add + a safety margin of 10 kB for buffer overruns */ + + buf = (char*)ut_malloc(110 * 1024); + + srv_sprintf_innodb_monitor(buf, 100 * 1024); + + List<Item> field_list; + + field_list.push_back(new Item_empty_string("Status", strlen(buf))); + + if(send_fields(thd, field_list, 1)) { + DBUG_RETURN(-1); + } + + packet->length(0); + + net_store_data(packet, buf); + + if (my_net_write(&thd->net, (char*)thd->packet.ptr(), + packet->length())) { + ut_free(buf); + + DBUG_RETURN(-1); + } + + ut_free(buf); + + send_eof(&thd->net); + + DBUG_RETURN(0); +} + +/**************************************************************************** Handling the shared INNOBASE_SHARE structure that is needed to provide table locking. ****************************************************************************/ @@ -3325,34 +3671,53 @@ ha_innobase::store_lock( } /*********************************************************************** -Returns the next auto-increment column value for the table. write_row -normally fetches the value from the cache in the data dictionary. This -function in used by SHOW TABLE STATUS and when the first insert to the table -is done after database startup. */ +This function initializes the auto-inc counter if it has not been +initialized yet. This function does not change the value of the auto-inc +counter if it already has been initialized. In parameter ret returns +the value of the auto-inc counter. */ -longlong -ha_innobase::get_auto_increment() -/*=============================*/ - /* out: the next auto-increment column value */ +int +ha_innobase::innobase_read_and_init_auto_inc( +/*=========================================*/ + /* out: 0 or error code: deadlock or + lock wait timeout */ + longlong* ret) /* out: auto-inc value */ { row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - longlong nr; + longlong auto_inc; int error; - /* Also SHOW TABLE STATUS calls this function. Previously, when we did - always read the max autoinc key value, setting x-locks, users were - surprised that SHOW TABLE STATUS could end up in a deadlock with - ordinary SQL queries. We avoid these deadlocks if the auto-inc - counter for the table has been initialized by fetching the value - from the table struct in dictionary cache. */ + ut_a(prebuilt); + ut_a(prebuilt->trx == + (trx_t*) current_thd->transaction.all.innobase_tid); + ut_a(prebuilt->table); + + auto_inc = dict_table_autoinc_read(prebuilt->table); - assert(prebuilt->table); - - nr = dict_table_autoinc_read(prebuilt->table); + if (auto_inc != 0) { + /* Already initialized */ + *ret = auto_inc; + + return(0); + } + + srv_conc_enter_innodb(prebuilt->trx); + error = row_lock_table_autoinc_for_mysql(prebuilt); + srv_conc_exit_innodb(prebuilt->trx); + + if (error != DB_SUCCESS) { + error = convert_error_code_to_mysql(error, user_thd); - if (nr != 0) { + goto func_exit; + } - return(nr + 1); + /* Check again if someone has initialized the counter meanwhile */ + auto_inc = dict_table_autoinc_read(prebuilt->table); + + if (auto_inc != 0) { + *ret = auto_inc; + + return(0); } (void) extra(HA_EXTRA_KEYREAD); @@ -3372,22 +3737,63 @@ ha_innobase::get_auto_increment() prebuilt->hint_no_need_to_fetch_extra_cols = FALSE; - prebuilt->trx->mysql_n_tables_locked += 1; + prebuilt->trx->mysql_n_tables_locked += 1; - error = index_last(table->record[1]); + error = index_last(table->record[1]); if (error) { - nr = 1; + if (error == HA_ERR_END_OF_FILE) { + /* The table was empty, initialize to 1 */ + auto_inc = 1; + + error = 0; + } else { + /* Deadlock or a lock wait timeout */ + auto_inc = -1; + + goto func_exit; + } } else { - nr = (longlong) table->next_number_field-> + /* Initialize to max(col) + 1 */ + auto_inc = (longlong) table->next_number_field-> val_int_offset(table->rec_buff_length) + 1; } + dict_table_autoinc_initialize(prebuilt->table, auto_inc); + +func_exit: (void) extra(HA_EXTRA_NO_KEYREAD); - index_end(); + index_end(); + + *ret = auto_inc; + + return(error); +} + +/*********************************************************************** +This function initializes the auto-inc counter if it has not been +initialized yet. This function does not change the value of the auto-inc +counter if it already has been initialized. Returns the value of the +auto-inc counter. */ + +longlong +ha_innobase::get_auto_increment() +/*=============================*/ + /* out: auto-increment column value, -1 if error + (deadlock or lock wait timeout) */ +{ + longlong nr; + int error; + + error = innobase_read_and_init_auto_inc(&nr); + + if (error) { + + return(-1); + } - return(nr); + return(nr); } #endif /* HAVE_INNOBASE_DB */ |