diff options
author | Davi Arnaut <Davi.Arnaut@Sun.COM> | 2010-07-19 15:34:28 -0300 |
---|---|---|
committer | Davi Arnaut <Davi.Arnaut@Sun.COM> | 2010-07-19 15:34:28 -0300 |
commit | dfaf73a9871bb76a080855297d7645d4e4e22547 (patch) | |
tree | 805e26981b7d83aff452964d972ba057877eb23f /storage/innodb_plugin | |
parent | 85e5ce0ba06b6a79e4bd7521ab0e44c1e63a3e77 (diff) | |
parent | d79e6c2e4594dc506d4d30cdc53545321b475ada (diff) | |
download | mariadb-git-dfaf73a9871bb76a080855297d7645d4e4e22547.tar.gz |
Merge of mysql-5.1 into mysql-5.1-bugteam.
Diffstat (limited to 'storage/innodb_plugin')
32 files changed, 779 insertions, 466 deletions
diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 89823642957..5ebcf1e87a2 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,69 @@ +2010-06-24 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#54679 alter table causes compressed row_format to revert + to compact + +2010-06-22 The InnoDB Team + + * dict/dict0dict.c, dict/dict0mem.c, include/dict0mem.h, + include/univ.i, page/page0zip.c, row/row0merge.c: + Fix Bug#47991 InnoDB Dictionary Cache memory usage increases + indefinitely when renaming tables + +2010-06-22 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#54686: "field->col->mtype == type" assertion error at + row/row0sel.c + +2010-06-22 The InnoDB Team + + * handler/ha_innodb.cc, innodb_bug54044.result, innodb_bug54044.test: + Fix Bug#54044 Create temporary tables and using innodb crashes. + +2010-06-21 The InnoDB Team + + * dict/dict0load.c, fil/fil0fil.c: + Fix Bug#54658: InnoDB: Warning: allocated tablespace %lu, + old maximum was 0 (introduced in Bug #53578 fix) + +2010-06-16 The InnoDB Team + + * row/row0merge.c: + Fix Bug#54330 Broken fast index creation + +2010-06-10 The InnoDB Team + + * include/log0log.ic, row/row0ins.c, row/row0purge.c, + row/row0uins.c, row/row0umod.c, row/row0upd.c: + Fix Bug#39168 ERROR: the age of the last checkpoint ... exceeds + the log group capacity + +2010-06-08 The InnoDB Team + + * dict/dict0load.c: + Fix Bug#54009 Server crashes when data is selected from non backed + up table for InnoDB plugin + +2010-06-02 The InnoDB Team + + * include/db0err.h, include/lock0lock.h, include/row0mysql.h, + lock/lock0lock.c, row/row0ins.c, row/row0mysql.c, row/row0sel.c: + Fix Bug#53674 InnoDB: Error: unlock row could not find a + 4 mode lock on the record + +2010-06-01 The InnoDB Team + + * include/sync0rw.h, sync/sync0rw.c: + Fix Bug#48197 Concurrent rw_lock_free may cause assertion failure + +2010-06-01 The InnoDB Team + + * row/row0umod.c: + Fix Bug#53812 assert row/row0umod.c line 660 in txn rollback + after crash recovery + 2010-05-25 The InnoDB Team * handler/ha_innodb.cc, include/row0mysql.h, row/row0mysql.c: @@ -251,6 +317,14 @@ Fix Bug#49497 Error 1467 (ER_AUTOINC_READ_FAILED) on inserting a negative value +2010-01-28 The InnoDB Team + * handler/ha_innodb.h, handler/ha_innodb.cc, + handler/handler0alter.cc, + mysql-test/innodb_bug47622.test, + mysql-test/innodb_bug47622.result: + Fix Bug#47622 the new index is added before the existing ones + in MySQL, but after one in SE + 2010-01-27 The InnoDB Team * include/row0mysql.h, log/log0recv.c, row/row0mysql.c: diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 6270aa6a727..50531ad3bd7 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -1959,9 +1959,8 @@ any_extern: err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, thr, mtr, &roll_ptr); if (err != DB_SUCCESS) { -err_exit: - mem_heap_free(heap); - return(err); + + goto err_exit; } /* Ok, we may do the replacement. Store on the page infimum the @@ -2007,9 +2006,10 @@ err_exit: page_cur_move_to_next(page_cursor); + err = DB_SUCCESS; +err_exit: mem_heap_free(heap); - - return(DB_SUCCESS); + return(err); } /*************************************************************//** diff --git a/storage/innodb_plugin/btr/btr0sea.c b/storage/innodb_plugin/btr/btr0sea.c index ef7afeb1039..c91afa31bf0 100644 --- a/storage/innodb_plugin/btr/btr0sea.c +++ b/storage/innodb_plugin/btr/btr0sea.c @@ -182,6 +182,7 @@ void btr_search_sys_free(void) /*=====================*/ { + rw_lock_free(&btr_search_latch); mem_free(btr_search_latch_temp); btr_search_latch_temp = NULL; mem_heap_free(btr_search_sys->hash_index->heap); diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c index bc5e9814099..660686bac1e 100644 --- a/storage/innodb_plugin/buf/buf0buf.c +++ b/storage/innodb_plugin/buf/buf0buf.c @@ -2236,7 +2236,7 @@ wait_until_unfixed: block->page.buf_fix_count = 1; buf_block_set_io_fix(block, BUF_IO_READ); - rw_lock_x_lock(&block->lock); + rw_lock_x_lock_func(&block->lock, 0, file, line); UNIV_MEM_INVALID(bpage, sizeof *bpage); diff --git a/storage/innodb_plugin/dict/dict0crea.c b/storage/innodb_plugin/dict/dict0crea.c index f185371bfca..09353c45c8c 100644 --- a/storage/innodb_plugin/dict/dict0crea.c +++ b/storage/innodb_plugin/dict/dict0crea.c @@ -240,17 +240,29 @@ dict_build_table_def_step( ibool is_path; mtr_t mtr; ulint space = 0; + ibool file_per_table; ut_ad(mutex_own(&(dict_sys->mutex))); table = node->table; - dict_hdr_get_new_id(&table->id, NULL, - srv_file_per_table ? &space : NULL); + /* Cache the global variable "srv_file_per_table" to + a local variable before using it. Please note + "srv_file_per_table" is not under dict_sys mutex + protection, and could be changed while executing + this function. So better to cache the current value + to a local variable, and all future reference to + "srv_file_per_table" should use this local variable. */ + file_per_table = srv_file_per_table; + + dict_hdr_get_new_id(&table->id, NULL, NULL); thr_get_trx(thr)->table_id = table->id; - if (srv_file_per_table) { + if (file_per_table) { + /* Get a new space id if srv_file_per_table is set */ + dict_hdr_get_new_id(NULL, NULL, &space); + if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) { return(DB_ERROR); } diff --git a/storage/innodb_plugin/dict/dict0dict.c b/storage/innodb_plugin/dict/dict0dict.c index 61f70c72720..fe4e058e122 100644 --- a/storage/innodb_plugin/dict/dict0dict.c +++ b/storage/innodb_plugin/dict/dict0dict.c @@ -570,13 +570,11 @@ dict_table_get_on_id( if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0 || trx->dict_operation_lock_mode == RW_X_LATCH) { - /* It is a system table which will always exist in the table - cache: we avoid acquiring the dictionary mutex, because - if we are doing a rollback to handle an error in TABLE - CREATE, for example, we already have the mutex! */ - ut_ad(mutex_own(&(dict_sys->mutex)) - || trx->dict_operation_lock_mode == RW_X_LATCH); + /* Note: An X latch implies that the transaction + already owns the dictionary mutex. */ + + ut_ad(mutex_own(&dict_sys->mutex)); return(dict_table_get_on_id_low(table_id)); } @@ -850,7 +848,8 @@ dict_table_add_to_cache( /* Add table to LRU list of tables */ UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); - dict_sys->size += mem_heap_get_size(table->heap); + dict_sys->size += mem_heap_get_size(table->heap) + + strlen(table->name) + 1; } /**********************************************************************//** @@ -904,14 +903,21 @@ dict_table_rename_in_cache( dict_foreign_t* foreign; dict_index_t* index; ulint fold; - ulint old_size; - const char* old_name; + char old_name[MAX_TABLE_NAME_LEN + 1]; ut_ad(table); ut_ad(mutex_own(&(dict_sys->mutex))); - old_size = mem_heap_get_size(table->heap); - old_name = table->name; + /* store the old/current name to an automatic variable */ + if (strlen(table->name) + 1 <= sizeof(old_name)) { + memcpy(old_name, table->name, strlen(table->name) + 1); + } else { + ut_print_timestamp(stderr); + fprintf(stderr, "InnoDB: too long table name: '%s', " + "max length is %d\n", table->name, + MAX_TABLE_NAME_LEN); + ut_error; + } fold = ut_fold_string(new_name); @@ -957,12 +963,22 @@ dict_table_rename_in_cache( /* Remove table from the hash tables of tables */ HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, ut_fold_string(old_name), table); - table->name = mem_heap_strdup(table->heap, new_name); + + if (strlen(new_name) > strlen(table->name)) { + /* We allocate MAX_TABLE_NAME_LEN+1 bytes here to avoid + memory fragmentation, we assume a repeated calls of + ut_realloc() with the same size do not cause fragmentation */ + ut_a(strlen(new_name) <= MAX_TABLE_NAME_LEN); + table->name = ut_realloc(table->name, MAX_TABLE_NAME_LEN + 1); + } + memcpy(table->name, new_name, strlen(new_name) + 1); /* Add table to hash table of tables */ HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table); - dict_sys->size += (mem_heap_get_size(table->heap) - old_size); + + dict_sys->size += strlen(new_name) - strlen(old_name); + ut_a(dict_sys->size > 0); /* Update the table_name field in indexes */ index = dict_table_get_first_index(table); @@ -1187,7 +1203,7 @@ dict_table_remove_from_cache( /* Remove table from LRU list of tables */ UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); - size = mem_heap_get_size(table->heap); + size = mem_heap_get_size(table->heap) + strlen(table->name) + 1; ut_ad(dict_sys->size >= size); diff --git a/storage/innodb_plugin/dict/dict0load.c b/storage/innodb_plugin/dict/dict0load.c index 377818308c5..3c495d21786 100644 --- a/storage/innodb_plugin/dict/dict0load.c +++ b/storage/innodb_plugin/dict/dict0load.c @@ -316,7 +316,7 @@ dict_check_tablespaces_and_store_max_id( dict_index_t* sys_index; btr_pcur_t pcur; const rec_t* rec; - ulint max_space_id = 0; + ulint max_space_id; mtr_t mtr; mutex_enter(&(dict_sys->mutex)); @@ -327,6 +327,11 @@ dict_check_tablespaces_and_store_max_id( sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); ut_a(!dict_table_is_comp(sys_tables)); + max_space_id = mtr_read_ulint(dict_hdr_get(&mtr) + + DICT_HDR_MAX_SPACE_ID, + MLOG_4BYTES, &mtr); + fil_set_max_space_id_if_bigger(max_space_id); + btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr); loop: @@ -973,6 +978,7 @@ err_exit: /* Try to open the tablespace */ if (!fil_open_single_table_tablespace( TRUE, space, + flags == DICT_TF_COMPACT ? 0 : flags & ~(~0 << DICT_TF_BITS), name)) { /* We failed to find a sensible tablespace file */ diff --git a/storage/innodb_plugin/dict/dict0mem.c b/storage/innodb_plugin/dict/dict0mem.c index 66b4b43f296..3287247029f 100644 --- a/storage/innodb_plugin/dict/dict0mem.c +++ b/storage/innodb_plugin/dict/dict0mem.c @@ -68,7 +68,8 @@ dict_mem_table_create( table->heap = heap; table->flags = (unsigned int) flags; - table->name = mem_heap_strdup(heap, name); + table->name = ut_malloc(strlen(name) + 1); + memcpy(table->name, name, strlen(name) + 1); table->space = (unsigned int) space; table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS); @@ -106,6 +107,7 @@ dict_mem_table_free( #ifndef UNIV_HOTBACKUP mutex_free(&(table->autoinc_mutex)); #endif /* UNIV_HOTBACKUP */ + ut_free(table->name); mem_heap_free(table->heap); } diff --git a/storage/innodb_plugin/fil/fil0fil.c b/storage/innodb_plugin/fil/fil0fil.c index af85e14f226..796fe921a7e 100644 --- a/storage/innodb_plugin/fil/fil0fil.c +++ b/storage/innodb_plugin/fil/fil0fil.c @@ -1197,7 +1197,7 @@ try_again: space->tablespace_version = fil_system->tablespace_version; space->mark = FALSE; - if (UNIV_LIKELY(purpose == FIL_TABLESPACE) + if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on) && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) { if (!fil_system->space_id_reuse_warned) { fil_system->space_id_reuse_warned = TRUE; @@ -3542,39 +3542,6 @@ next_datadir_item: return(err); } -/********************************************************************//** -If we need crash recovery, and we have called -fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), -we can call this function to print an error message of orphaned .ibd files -for which there is not a data dictionary entry with a matching table name -and space id. */ -UNIV_INTERN -void -fil_print_orphaned_tablespaces(void) -/*================================*/ -{ - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = UT_LIST_GET_FIRST(fil_system->space_list); - - while (space) { - if (space->purpose == FIL_TABLESPACE && space->id != 0 - && !space->mark) { - fputs("InnoDB: Warning: tablespace ", stderr); - ut_print_filename(stderr, space->name); - fprintf(stderr, " of id %lu has no matching table in\n" - "InnoDB: the InnoDB data dictionary.\n", - (ulong) space->id); - } - - space = UT_LIST_GET_NEXT(space_list, space); - } - - mutex_exit(&fil_system->mutex); -} - /*******************************************************************//** Returns TRUE if a single-table tablespace does not exist in the memory cache, or is being deleted there. diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index c65ba3163fc..ceb50516659 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -3950,6 +3950,11 @@ get_innobase_type_from_mysql_type( case MYSQL_TYPE_BLOB: case MYSQL_TYPE_LONG_BLOB: return(DATA_BLOB); + case MYSQL_TYPE_NULL: + /* MySQL currently accepts "NULL" datatype, but will + reject such datatype in the next release. We will cope + with it and not trigger assertion failure in 5.1 */ + break; default: ut_error; } @@ -5379,6 +5384,9 @@ ha_innobase::index_read( prebuilt->index_usable = FALSE; DBUG_RETURN(HA_ERR_CRASHED); } + if (UNIV_UNLIKELY(!prebuilt->index_usable)) { + DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED); + } /* Note that if the index for which the search template is built is not necessarily prebuilt->index, but can also be the clustered index */ @@ -5997,7 +6005,22 @@ create_table_def( field = form->field[i]; col_type = get_innobase_type_from_mysql_type(&unsigned_type, - field); + field); + + if (!col_type) { + push_warning_printf( + (THD*) trx->mysql_thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_CANT_CREATE_TABLE, + "Error creating table '%s' with " + "column '%s'. Please check its " + "column type and try to re-create " + "the table with an appropriate " + "column type.", + table->name, (char*) field->field_name); + goto err_col; + } + if (field->null_ptr) { nulls_allowed = 0; } else { @@ -6055,7 +6078,7 @@ create_table_def( if (dict_col_name_is_reserved(field->field_name)){ my_error(ER_WRONG_COLUMN_NAME, MYF(0), field->field_name); - +err_col: dict_mem_table_free(table); trx_commit_for_mysql(trx); @@ -6457,6 +6480,7 @@ ha_innobase::create( const ulint file_format = srv_file_format; const char* stmt; size_t stmt_len; + enum row_type row_type; DBUG_ENTER("ha_innobase::create"); @@ -6577,94 +6601,94 @@ ha_innobase::create( } } - if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) { - if (flags) { - /* KEY_BLOCK_SIZE was specified. */ - if (form->s->row_type != ROW_TYPE_COMPRESSED) { - /* ROW_FORMAT other than COMPRESSED - ignores KEY_BLOCK_SIZE. It does not - make sense to reject conflicting - KEY_BLOCK_SIZE and ROW_FORMAT, because - such combinations can be obtained - with ALTER TABLE anyway. */ - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" - " unless ROW_FORMAT=COMPRESSED.", - create_info->key_block_size); - flags = 0; - } - } else { - /* No KEY_BLOCK_SIZE */ - if (form->s->row_type == ROW_TYPE_COMPRESSED) { - /* ROW_FORMAT=COMPRESSED without - KEY_BLOCK_SIZE implies half the - maximum KEY_BLOCK_SIZE. */ - flags = (DICT_TF_ZSSIZE_MAX - 1) - << DICT_TF_ZSSIZE_SHIFT - | DICT_TF_COMPACT - | DICT_TF_FORMAT_ZIP - << DICT_TF_FORMAT_SHIFT; + row_type = form->s->row_type; + + if (flags) { + /* KEY_BLOCK_SIZE was specified. */ + if (!(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) { + /* ROW_FORMAT was not specified; + default to ROW_FORMAT=COMPRESSED */ + row_type = ROW_TYPE_COMPRESSED; + } else if (row_type != ROW_TYPE_COMPRESSED) { + /* ROW_FORMAT other than COMPRESSED + ignores KEY_BLOCK_SIZE. It does not + make sense to reject conflicting + KEY_BLOCK_SIZE and ROW_FORMAT, because + such combinations can be obtained + with ALTER TABLE anyway. */ + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" + " unless ROW_FORMAT=COMPRESSED.", + create_info->key_block_size); + flags = 0; + } + } else { + /* No KEY_BLOCK_SIZE */ + if (row_type == ROW_TYPE_COMPRESSED) { + /* ROW_FORMAT=COMPRESSED without + KEY_BLOCK_SIZE implies half the + maximum KEY_BLOCK_SIZE. */ + flags = (DICT_TF_ZSSIZE_MAX - 1) + << DICT_TF_ZSSIZE_SHIFT + | DICT_TF_COMPACT + | DICT_TF_FORMAT_ZIP + << DICT_TF_FORMAT_SHIFT; #if DICT_TF_ZSSIZE_MAX < 1 # error "DICT_TF_ZSSIZE_MAX < 1" #endif - } } + } - switch (form->s->row_type) { - const char* row_format_name; - case ROW_TYPE_REDUNDANT: - break; - case ROW_TYPE_COMPRESSED: - case ROW_TYPE_DYNAMIC: - row_format_name - = form->s->row_type == ROW_TYPE_COMPRESSED - ? "COMPRESSED" - : "DYNAMIC"; - - if (!srv_file_per_table) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_per_table.", - row_format_name); - } else if (file_format < DICT_TF_FORMAT_ZIP) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_format >" - " Antelope.", - row_format_name); - } else { - flags |= DICT_TF_COMPACT - | (DICT_TF_FORMAT_ZIP - << DICT_TF_FORMAT_SHIFT); - break; - } + switch (row_type) { + const char* row_format_name; + case ROW_TYPE_REDUNDANT: + break; + case ROW_TYPE_COMPRESSED: + case ROW_TYPE_DYNAMIC: + row_format_name + = row_type == ROW_TYPE_COMPRESSED + ? "COMPRESSED" + : "DYNAMIC"; - /* fall through */ - case ROW_TYPE_NOT_USED: - case ROW_TYPE_FIXED: - default: - push_warning(thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: assuming ROW_FORMAT=COMPACT."); - case ROW_TYPE_DEFAULT: - case ROW_TYPE_COMPACT: - flags = DICT_TF_COMPACT; + if (!srv_file_per_table) { + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ROW_FORMAT=%s" + " requires innodb_file_per_table.", + row_format_name); + } else if (file_format < DICT_TF_FORMAT_ZIP) { + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ROW_FORMAT=%s" + " requires innodb_file_format >" + " Antelope.", + row_format_name); + } else { + flags |= DICT_TF_COMPACT + | (DICT_TF_FORMAT_ZIP + << DICT_TF_FORMAT_SHIFT); break; } - } else if (!flags) { - /* No KEY_BLOCK_SIZE or ROW_FORMAT specified: - use ROW_FORMAT=COMPACT by default. */ + + /* fall through */ + case ROW_TYPE_NOT_USED: + case ROW_TYPE_FIXED: + default: + push_warning(thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: assuming ROW_FORMAT=COMPACT."); + case ROW_TYPE_DEFAULT: + case ROW_TYPE_COMPACT: flags = DICT_TF_COMPACT; + break; } /* Look for a primary key */ @@ -7221,6 +7245,10 @@ ha_innobase::records_in_range( n_rows = HA_POS_ERROR; goto func_exit; } + if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) { + n_rows = HA_ERR_TABLE_DEF_CHANGED; + goto func_exit; + } heap = mem_heap_create(2 * (key->key_parts * sizeof(dfield_t) + sizeof(dtuple_t))); @@ -7420,8 +7448,7 @@ innobase_get_mysql_key_number_for_index( /* If index does not belong to the table of share structure. Search index->table instead */ - if (index->table != ib_table - && strcmp(index->table->name, share->table_name)) { + if (index->table != ib_table) { i = 0; ind = dict_table_get_first_index(index->table); diff --git a/storage/innodb_plugin/include/db0err.h b/storage/innodb_plugin/include/db0err.h index 747e9b5364e..c841c2b4afe 100644 --- a/storage/innodb_plugin/include/db0err.h +++ b/storage/innodb_plugin/include/db0err.h @@ -28,6 +28,8 @@ Created 5/24/1996 Heikki Tuuri enum db_err { + DB_SUCCESS_LOCKED_REC = 9, /*!< like DB_SUCCESS, but a new + explicit record lock was created */ DB_SUCCESS = 10, /* The following are error codes */ diff --git a/storage/innodb_plugin/include/dict0mem.h b/storage/innodb_plugin/include/dict0mem.h index 9996fb59a75..2fce1e00927 100644 --- a/storage/innodb_plugin/include/dict0mem.h +++ b/storage/innodb_plugin/include/dict0mem.h @@ -382,7 +382,7 @@ initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_struct{ dulint id; /*!< id of the table */ mem_heap_t* heap; /*!< memory heap */ - const char* name; /*!< table name */ + char* name; /*!< table name */ const char* dir_path_of_temp_table;/*!< NULL or the directory path where a TEMPORARY table that was explicitly created by a user should be placed if diff --git a/storage/innodb_plugin/include/fil0fil.h b/storage/innodb_plugin/include/fil0fil.h index 10c3ff025ac..c894875b352 100644 --- a/storage/innodb_plugin/include/fil0fil.h +++ b/storage/innodb_plugin/include/fil0fil.h @@ -506,16 +506,6 @@ UNIV_INTERN ulint fil_load_single_table_tablespaces(void); /*===================================*/ -/********************************************************************//** -If we need crash recovery, and we have called -fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), -we can call this function to print an error message of orphaned .ibd files -for which there is not a data dictionary entry with a matching table name -and space id. */ -UNIV_INTERN -void -fil_print_orphaned_tablespaces(void); -/*================================*/ /*******************************************************************//** Returns TRUE if a single-table tablespace does not exist in the memory cache, or is being deleted there. diff --git a/storage/innodb_plugin/include/lock0lock.h b/storage/innodb_plugin/include/lock0lock.h index 7d76cbe3c75..b3e1e5c4537 100644 --- a/storage/innodb_plugin/include/lock0lock.h +++ b/storage/innodb_plugin/include/lock0lock.h @@ -340,11 +340,12 @@ lock_sec_rec_modify_check_and_lock( que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr); /*!< in/out: mini-transaction */ /*********************************************************************//** -Like the counterpart for a clustered index below, but now we read a +Like lock_clust_rec_read_check_and_lock(), but reads a secondary index record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_sec_rec_read_check_and_lock( /*=============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -371,9 +372,10 @@ if the query thread should anyway be suspended for some reason; if not, then puts the transaction and the query thread to the lock wait state and inserts a waiting request for a record lock to the lock queue. Sets the requested mode lock on the record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_clust_rec_read_check_and_lock( /*===============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG diff --git a/storage/innodb_plugin/include/log0log.ic b/storage/innodb_plugin/include/log0log.ic index 139f4041a36..1ce00fd7313 100644 --- a/storage/innodb_plugin/include/log0log.ic +++ b/storage/innodb_plugin/include/log0log.ic @@ -433,7 +433,10 @@ void log_free_check(void) /*================*/ { - /* ut_ad(sync_thread_levels_empty()); */ + +#ifdef UNIV_SYNC_DEBUG + ut_ad(sync_thread_levels_empty_gen(TRUE)); +#endif /* UNIV_SYNC_DEBUG */ if (log_sys->check_flush_or_checkpoint) { diff --git a/storage/innodb_plugin/include/row0mysql.h b/storage/innodb_plugin/include/row0mysql.h index e90742abe7c..39ea240772c 100644 --- a/storage/innodb_plugin/include/row0mysql.h +++ b/storage/innodb_plugin/include/row0mysql.h @@ -264,27 +264,26 @@ row_update_for_mysql( row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL handle */ /*********************************************************************//** -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. -@return error code or DB_SUCCESS */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE or this +session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +Before calling this function row_search_for_mysql() must have +initialized prebuilt->new_rec_locks to store the information which new +record locks really were set. This function removes a newly set +clustered index record lock under prebuilt->pcur or +prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that +releases the latest clustered index record lock we set. +@return error code or DB_SUCCESS */ UNIV_INTERN int row_unlock_for_mysql( /*=================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL handle */ - ibool has_latches_on_recs);/*!< TRUE if called so that we have - the latches on the records under pcur - and clust_pcur, and we do not need to - reposition the cursors. */ + ibool has_latches_on_recs);/*!< in: TRUE if called + so that we have the latches on + the records under pcur and + clust_pcur, and we do not need + to reposition the cursors. */ /*********************************************************************//** Creates an query graph node of 'update' type to be used in the MySQL interface. @@ -702,18 +701,17 @@ struct row_prebuilt_struct { ulint new_rec_locks; /*!< normally 0; if srv_locks_unsafe_for_binlog is TRUE or session is using READ - COMMITTED isolation level, in a - cursor search, if we set a new - record lock on an index, this is - incremented; this is used in - releasing the locks under the - cursors if we are performing an - UPDATE and we determine after - retrieving the row that it does - not need to be locked; thus, - these can be used to implement a - 'mini-rollback' that releases - the latest record locks */ + COMMITTED or READ UNCOMMITTED + isolation level, set in + row_search_for_mysql() if we set a new + record lock on the secondary + or clustered index; this is + used in row_unlock_for_mysql() + when releasing the lock under + the cursor if we determine + after retrieving the row that + it does not need to be locked + ('mini-rollback') */ ulint mysql_prefix_len;/*!< byte offset of the end of the last requested column */ ulint mysql_row_len; /*!< length in bytes of a row in the diff --git a/storage/innodb_plugin/include/sync0rw.h b/storage/innodb_plugin/include/sync0rw.h index 6f7e13220c1..175f3deb77c 100644 --- a/storage/innodb_plugin/include/sync0rw.h +++ b/storage/innodb_plugin/include/sync0rw.h @@ -555,11 +555,12 @@ struct rw_lock_struct { unsigned cline:14; /*!< Line where created */ unsigned last_s_line:14; /*!< Line number where last time s-locked */ unsigned last_x_line:14; /*!< Line number where last time x-locked */ +#ifdef UNIV_DEBUG ulint magic_n; /*!< RW_LOCK_MAGIC_N */ -}; - /** Value of rw_lock_struct::magic_n */ #define RW_LOCK_MAGIC_N 22643 +#endif /* UNIV_DEBUG */ +}; #ifdef UNIV_SYNC_DEBUG /** The structure for storing debug info of an rw-lock */ diff --git a/storage/innodb_plugin/include/sync0sync.h b/storage/innodb_plugin/include/sync0sync.h index d470b823fc3..71c9920a10b 100644 --- a/storage/innodb_plugin/include/sync0sync.h +++ b/storage/innodb_plugin/include/sync0sync.h @@ -438,7 +438,7 @@ or row lock! */ #define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the file format tag */ #define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve - this in X-mode, implicit or backround + this in X-mode; implicit or backround operations purge, rollback, foreign key checks reserve this in S-mode */ #define SYNC_DICT 1000 diff --git a/storage/innodb_plugin/include/univ.i b/storage/innodb_plugin/include/univ.i index 018fd157a25..b8e595161b9 100644 --- a/storage/innodb_plugin/include/univ.i +++ b/storage/innodb_plugin/include/univ.i @@ -46,7 +46,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 9 +#define INNODB_VERSION_BUGFIX 10 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -290,6 +290,12 @@ management to ensure correct alignment for doubles etc. */ /* Maximum number of parallel threads in a parallelized operation */ #define UNIV_MAX_PARALLELISM 32 +/* The maximum length of a table name. This is the MySQL limit and is +defined in mysql_com.h like NAME_CHAR_LEN*SYSTEM_CHARSET_MBMAXLEN, the +number does not include a terminating '\0'. InnoDB probably can handle +longer names internally */ +#define MAX_TABLE_NAME_LEN 192 + /* UNIVERSAL TYPE DEFINITIONS ========================== diff --git a/storage/innodb_plugin/lock/lock0lock.c b/storage/innodb_plugin/lock/lock0lock.c index 04e5fe1a65a..77d69d11a2d 100644 --- a/storage/innodb_plugin/lock/lock0lock.c +++ b/storage/innodb_plugin/lock/lock0lock.c @@ -1733,11 +1733,11 @@ lock_rec_create( Enqueues a waiting request for a lock which cannot be granted immediately. Checks for deadlocks. @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or -DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another -transaction was chosen as a victim, and we got the lock immediately: -no need to wait then */ +DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that +there was a deadlock, but another transaction was chosen as a victim, +and we got the lock immediately: no need to wait then */ static -ulint +enum db_err lock_rec_enqueue_waiting( /*=====================*/ ulint type_mode,/*!< in: lock mode this @@ -1809,7 +1809,7 @@ lock_rec_enqueue_waiting( if (trx->wait_lock == NULL) { - return(DB_SUCCESS); + return(DB_SUCCESS_LOCKED_REC); } trx->que_state = TRX_QUE_LOCK_WAIT; @@ -1925,6 +1925,16 @@ somebody_waits: return(lock_rec_create(type_mode, block, heap_no, index, trx)); } +/** Record locking request status */ +enum lock_rec_req_status { + /** Failed to acquire a lock */ + LOCK_REC_FAIL, + /** Succeeded in acquiring a lock (implicit or already acquired) */ + LOCK_REC_SUCCESS, + /** Explicitly created a new lock */ + LOCK_REC_SUCCESS_CREATED +}; + /*********************************************************************//** This is a fast routine for locking a record in the most common cases: there are no explicit locks on the page, or there is just one lock, owned @@ -1932,9 +1942,9 @@ by this transaction, and of the right type_mode. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return TRUE if locking succeeded */ +@return whether the locking succeeded */ UNIV_INLINE -ibool +enum lock_rec_req_status lock_rec_lock_fast( /*===============*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -1973,19 +1983,19 @@ lock_rec_lock_fast( lock_rec_create(mode, block, heap_no, index, trx); } - return(TRUE); + return(LOCK_REC_SUCCESS_CREATED); } if (lock_rec_get_next_on_page(lock)) { - return(FALSE); + return(LOCK_REC_FAIL); } if (lock->trx != trx || lock->type_mode != (mode | LOCK_REC) || lock_rec_get_n_bits(lock) <= heap_no) { - return(FALSE); + return(LOCK_REC_FAIL); } if (!impl) { @@ -1994,10 +2004,11 @@ lock_rec_lock_fast( if (!lock_rec_get_nth_bit(lock, heap_no)) { lock_rec_set_nth_bit(lock, heap_no); + return(LOCK_REC_SUCCESS_CREATED); } } - return(TRUE); + return(LOCK_REC_SUCCESS); } /*********************************************************************//** @@ -2005,9 +2016,10 @@ This is the general, and slower, routine for locking a record. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return DB_SUCCESS, DB_LOCK_WAIT, or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ static -ulint +enum db_err lock_rec_lock_slow( /*===============*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -2024,7 +2036,6 @@ lock_rec_lock_slow( que_thr_t* thr) /*!< in: query thread */ { trx_t* trx; - ulint err; ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S @@ -2043,27 +2054,23 @@ lock_rec_lock_slow( /* The trx already has a strong enough lock on rec: do nothing */ - err = DB_SUCCESS; } else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) { /* If another transaction has a non-gap conflicting request in the queue, as this transaction does not have a lock strong enough already granted on the record, we have to wait. */ - err = lock_rec_enqueue_waiting(mode, block, heap_no, - index, thr); - } else { - if (!impl) { - /* Set the requested lock on the record */ - - lock_rec_add_to_queue(LOCK_REC | mode, block, - heap_no, index, trx); - } + return(lock_rec_enqueue_waiting(mode, block, heap_no, + index, thr)); + } else if (!impl) { + /* Set the requested lock on the record */ - err = DB_SUCCESS; + lock_rec_add_to_queue(LOCK_REC | mode, block, + heap_no, index, trx); + return(DB_SUCCESS_LOCKED_REC); } - return(err); + return(DB_SUCCESS); } /*********************************************************************//** @@ -2072,9 +2079,10 @@ possible, enqueues a waiting lock request. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return DB_SUCCESS, DB_LOCK_WAIT, or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ static -ulint +enum db_err lock_rec_lock( /*==========*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -2090,8 +2098,6 @@ lock_rec_lock( dict_index_t* index, /*!< in: index of record */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); @@ -2103,18 +2109,20 @@ lock_rec_lock( || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP || mode - (LOCK_MODE_MASK & mode) == 0); - if (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { - - /* We try a simplified and faster subroutine for the most - common cases */ - - err = DB_SUCCESS; - } else { - err = lock_rec_lock_slow(impl, mode, block, - heap_no, index, thr); + /* We try a simplified and faster subroutine for the most + common cases */ + switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { + case LOCK_REC_SUCCESS: + return(DB_SUCCESS); + case LOCK_REC_SUCCESS_CREATED: + return(DB_SUCCESS_LOCKED_REC); + case LOCK_REC_FAIL: + return(lock_rec_lock_slow(impl, mode, block, + heap_no, index, thr)); } - return(err); + ut_error; + return(DB_ERROR); } /*********************************************************************//** @@ -3935,8 +3943,8 @@ lock_rec_unlock( const rec_t* rec, /*!< in: record */ enum lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */ { + lock_t* first_lock; lock_t* lock; - lock_t* release_lock = NULL; ulint heap_no; ut_ad(trx && rec); @@ -3946,48 +3954,40 @@ lock_rec_unlock( mutex_enter(&kernel_mutex); - lock = lock_rec_get_first(block, heap_no); + first_lock = lock_rec_get_first(block, heap_no); /* Find the last lock with the same lock_mode and transaction from the record. */ - while (lock != NULL) { + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { if (lock->trx == trx && lock_get_mode(lock) == lock_mode) { - release_lock = lock; ut_a(!lock_get_wait(lock)); + lock_rec_reset_nth_bit(lock, heap_no); + goto released; } - - lock = lock_rec_get_next(heap_no, lock); } - /* If a record lock is found, release the record lock */ - - if (UNIV_LIKELY(release_lock != NULL)) { - lock_rec_reset_nth_bit(release_lock, heap_no); - } else { - mutex_exit(&kernel_mutex); - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: unlock row could not" - " find a %lu mode lock on the record\n", - (ulong) lock_mode); + mutex_exit(&kernel_mutex); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: unlock row could not" + " find a %lu mode lock on the record\n", + (ulong) lock_mode); - return; - } + return; +released: /* Check if we can now grant waiting lock requests */ - lock = lock_rec_get_first(block, heap_no); - - while (lock != NULL) { + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { if (lock_get_wait(lock) && !lock_rec_has_to_wait_in_queue(lock)) { /* Grant the lock */ lock_grant(lock); } - - lock = lock_rec_get_next(heap_no, lock); } mutex_exit(&kernel_mutex); @@ -5080,7 +5080,14 @@ lock_rec_insert_check_and_lock( lock_mutex_exit_kernel(); - if ((err == DB_SUCCESS) && !dict_index_is_clust(index)) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + /* fall through */ + case DB_SUCCESS: + if (dict_index_is_clust(index)) { + break; + } /* Update the page max trx id field */ page_update_max_trx_id(block, buf_block_get_page_zip(block), @@ -5203,6 +5210,10 @@ lock_clust_rec_modify_check_and_lock( ut_ad(lock_rec_queue_validate(block, rec, index, offsets)); + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + return(err); } @@ -5269,22 +5280,27 @@ lock_sec_rec_modify_check_and_lock( } #endif /* UNIV_DEBUG */ - if (err == DB_SUCCESS) { + if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) { /* Update the page max trx id field */ + /* It might not be necessary to do this if + err == DB_SUCCESS (no new lock created), + but it should not cost too much performance. */ page_update_max_trx_id(block, buf_block_get_page_zip(block), thr_get_trx(thr)->id, mtr); + err = DB_SUCCESS; } return(err); } /*********************************************************************//** -Like the counterpart for a clustered index below, but now we read a +Like lock_clust_rec_read_check_and_lock(), but reads a secondary index record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_sec_rec_read_check_and_lock( /*=============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -5305,8 +5321,8 @@ lock_sec_rec_read_check_and_lock( LOCK_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ulint heap_no; + enum db_err err; + ulint heap_no; ut_ad(!dict_index_is_clust(index)); ut_ad(block->frame == page_align(rec)); @@ -5357,9 +5373,10 @@ if the query thread should anyway be suspended for some reason; if not, then puts the transaction and the query thread to the lock wait state and inserts a waiting request for a record lock to the lock queue. Sets the requested mode lock on the record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_clust_rec_read_check_and_lock( /*===============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -5380,8 +5397,8 @@ lock_clust_rec_read_check_and_lock( LOCK_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ulint heap_no; + enum db_err err; + ulint heap_no; ut_ad(dict_index_is_clust(index)); ut_ad(block->frame == page_align(rec)); @@ -5452,17 +5469,22 @@ lock_clust_rec_read_check_and_lock_alt( mem_heap_t* tmp_heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; - ulint ret; + ulint err; rec_offs_init(offsets_); offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &tmp_heap); - ret = lock_clust_rec_read_check_and_lock(flags, block, rec, index, + err = lock_clust_rec_read_check_and_lock(flags, block, rec, index, offsets, mode, gap_mode, thr); if (tmp_heap) { mem_heap_free(tmp_heap); } - return(ret); + + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + + return(err); } /*******************************************************************//** diff --git a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0file.c index b244e3974b3..9f937b9def2 100644 --- a/storage/innodb_plugin/os/os0file.c +++ b/storage/innodb_plugin/os/os0file.c @@ -1339,7 +1339,11 @@ try_again: /* When srv_file_per_table is on, file creation failure may not be critical to the whole instance. Do not crash the server in - case of unknown errors. */ + case of unknown errors. + Please note "srv_file_per_table" is a global variable with + no explicit synchronization protection. It could be + changed during this execution path. It might not have the + same value as the one when building the table definition */ if (srv_file_per_table) { retry = os_file_handle_error_no_exit(name, create_mode == OS_FILE_CREATE ? @@ -1426,7 +1430,11 @@ try_again: /* When srv_file_per_table is on, file creation failure may not be critical to the whole instance. Do not crash the server in - case of unknown errors. */ + case of unknown errors. + Please note "srv_file_per_table" is a global variable with + no explicit synchronization protection. It could be + changed during this execution path. It might not have the + same value as the one when building the table definition */ if (srv_file_per_table) { retry = os_file_handle_error_no_exit(name, create_mode == OS_FILE_CREATE ? diff --git a/storage/innodb_plugin/page/page0zip.c b/storage/innodb_plugin/page/page0zip.c index 8d9632a3548..d3b1edefc6b 100644 --- a/storage/innodb_plugin/page/page0zip.c +++ b/storage/innodb_plugin/page/page0zip.c @@ -1464,6 +1464,7 @@ page_zip_fields_free( dict_table_t* table = index->table; mem_heap_free(index->heap); mutex_free(&(table->autoinc_mutex)); + ut_free(table->name); mem_heap_free(table->heap); } } diff --git a/storage/innodb_plugin/row/row0ins.c b/storage/innodb_plugin/row/row0ins.c index 230dc45dadc..a193bf21f7c 100644 --- a/storage/innodb_plugin/row/row0ins.c +++ b/storage/innodb_plugin/row/row0ins.c @@ -51,6 +51,15 @@ Created 4/20/1996 Heikki Tuuri #define ROW_INS_PREV 1 #define ROW_INS_NEXT 2 +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ /*********************************************************************//** Creates an insert node struct. @@ -1121,9 +1130,9 @@ nonstandard_exit_func: /*********************************************************************//** Sets a shared lock on a record. Used in locking possible duplicate key records and also in checking foreign key constraints. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_ins_set_shared_rec_lock( /*========================*/ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or @@ -1134,7 +1143,7 @@ row_ins_set_shared_rec_lock( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + enum db_err err; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -1152,9 +1161,9 @@ row_ins_set_shared_rec_lock( /*********************************************************************//** Sets a exclusive lock on a record. Used in locking possible duplicate key records -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_ins_set_exclusive_rec_lock( /*===========================*/ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or @@ -1165,7 +1174,7 @@ row_ins_set_exclusive_rec_lock( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + enum db_err err; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -1205,7 +1214,6 @@ row_ins_check_foreign_constraint( dict_index_t* check_index; ulint n_fields_cmp; btr_pcur_t pcur; - ibool moved; int cmp; ulint err; ulint i; @@ -1336,13 +1344,13 @@ run_again: /* Scan index records and check if there is a matching record */ - for (;;) { + do { const rec_t* rec = btr_pcur_get_rec(&pcur); const buf_block_t* block = btr_pcur_get_block(&pcur); if (page_rec_is_infimum(rec)) { - goto next_rec; + continue; } offsets = rec_get_offsets(rec, check_index, @@ -1353,12 +1361,13 @@ run_again: err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - - break; + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + continue; + default: + goto end_scan; } - - goto next_rec; } cmp = cmp_dtuple_rec(entry, rec, offsets); @@ -1369,9 +1378,12 @@ run_again: err = row_ins_set_shared_rec_lock( LOCK_ORDINARY, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: break; + default: + goto end_scan; } } else { /* Found a matching record. Lock only @@ -1382,15 +1394,18 @@ run_again: LOCK_REC_NOT_GAP, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: break; + default: + goto end_scan; } if (check_ref) { err = DB_SUCCESS; - break; + goto end_scan; } else if (foreign->type != 0) { /* There is an ON UPDATE or ON DELETE condition: check them in a separate @@ -1416,7 +1431,7 @@ run_again: err = DB_FOREIGN_DUPLICATE_KEY; } - break; + goto end_scan; } /* row_ins_foreign_check_on_constraint @@ -1429,49 +1444,41 @@ run_again: thr, foreign, rec, entry); err = DB_ROW_IS_REFERENCED; - break; + goto end_scan; } } - } + } else { + ut_a(cmp < 0); - if (cmp < 0) { err = row_ins_set_shared_rec_lock( LOCK_GAP, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - break; - } - - if (check_ref) { - err = DB_NO_REFERENCED_ROW; - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - } else { - err = DB_SUCCESS; + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + if (check_ref) { + err = DB_NO_REFERENCED_ROW; + row_ins_foreign_report_add_err( + trx, foreign, rec, entry); + } else { + err = DB_SUCCESS; + } } - break; + goto end_scan; } + } while (btr_pcur_move_to_next(&pcur, &mtr)); - ut_a(cmp == 0); -next_rec: - moved = btr_pcur_move_to_next(&pcur, &mtr); - - if (!moved) { - if (check_ref) { - rec = btr_pcur_get_rec(&pcur); - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - err = DB_NO_REFERENCED_ROW; - } else { - err = DB_SUCCESS; - } - - break; - } + if (check_ref) { + row_ins_foreign_report_add_err( + trx, foreign, btr_pcur_get_rec(&pcur), entry); + err = DB_NO_REFERENCED_ROW; + } else { + err = DB_SUCCESS; } +end_scan: btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -1719,9 +1726,13 @@ row_ins_scan_sec_index_for_duplicate( rec, index, offsets, thr); } - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: break; + default: + goto end_scan; } if (page_rec_is_supremum(rec)) { @@ -1738,17 +1749,15 @@ row_ins_scan_sec_index_for_duplicate( thr_get_trx(thr)->error_info = index; - break; + goto end_scan; } + } else { + ut_a(cmp < 0); + goto end_scan; } - - if (cmp < 0) { - break; - } - - ut_a(cmp == 0); } while (btr_pcur_move_to_next(&pcur, &mtr)); +end_scan: if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } @@ -1837,7 +1846,11 @@ row_ins_duplicate_error_in_clust( cursor->index, offsets, thr); } - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto func_exit; } @@ -1877,7 +1890,11 @@ row_ins_duplicate_error_in_clust( rec, cursor->index, offsets, thr); } - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto func_exit; } diff --git a/storage/innodb_plugin/row/row0merge.c b/storage/innodb_plugin/row/row0merge.c index 1f6851bf63c..70cc7912fad 100644 --- a/storage/innodb_plugin/row/row0merge.c +++ b/storage/innodb_plugin/row/row0merge.c @@ -1578,22 +1578,28 @@ row_merge( const dict_index_t* index, /*!< in: index being created */ merge_file_t* file, /*!< in/out: file containing index entries */ - ulint* half, /*!< in/out: half the file */ row_merge_block_t* block, /*!< in/out: 3 buffers */ int* tmpfd, /*!< in/out: temporary file handle */ - TABLE* table) /*!< in/out: MySQL table, for + TABLE* table, /*!< in/out: MySQL table, for reporting erroneous key value if applicable */ + ulint* num_run,/*!< in/out: Number of runs remain + to be merged */ + ulint* run_offset) /*!< in/out: Array contains the + first offset number for each merge + run */ { ulint foffs0; /*!< first input offset */ ulint foffs1; /*!< second input offset */ ulint error; /*!< error code */ merge_file_t of; /*!< output file */ - const ulint ihalf = *half; + const ulint ihalf = run_offset[*num_run / 2]; /*!< half the input file */ - ulint ohalf; /*!< half the output file */ + ulint n_run = 0; + /*!< num of runs generated from this merge */ UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]); + ut_ad(ihalf < file->offset); of.fd = *tmpfd; @@ -1601,17 +1607,20 @@ row_merge( of.n_rec = 0; /* Merge blocks to the output file. */ - ohalf = 0; foffs0 = 0; foffs1 = ihalf; + UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset); + for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) { - ulint ahalf; /*!< arithmetic half the input file */ if (UNIV_UNLIKELY(trx_is_interrupted(trx))) { return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + error = row_merge_blocks(index, file, block, &foffs0, &foffs1, &of, table); @@ -1619,21 +1628,6 @@ row_merge( return(error); } - /* Record the offset of the output file when - approximately half the output has been generated. In - this way, the next invocation of row_merge() will - spend most of the time in this loop. The initial - estimate is ohalf==0. */ - ahalf = file->offset / 2; - ut_ad(ohalf <= of.offset); - - /* Improve the estimate until reaching half the input - file size, or we can not get any closer to it. All - comparands should be non-negative when !(ohalf < ahalf) - because ohalf <= of.offset. */ - if (ohalf < ahalf || of.offset - ahalf < ohalf - ahalf) { - ohalf = of.offset; - } } /* Copy the last blocks, if there are any. */ @@ -1643,6 +1637,9 @@ row_merge( return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) { return(DB_CORRUPTION); } @@ -1655,6 +1652,9 @@ row_merge( return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) { return(DB_CORRUPTION); } @@ -1666,10 +1666,23 @@ row_merge( return(DB_CORRUPTION); } + ut_ad(n_run <= *num_run); + + *num_run = n_run; + + /* Each run can contain one or more offsets. As merge goes on, + the number of runs (to merge) will reduce until we have one + single run. So the number of runs will always be smaller than + the number of offsets in file */ + ut_ad((*num_run) <= file->offset); + + /* The number of offsets in output file is always equal or + smaller than input file */ + ut_ad(of.offset <= file->offset); + /* Swap file descriptors for the next pass. */ *tmpfd = file->fd; *file = of; - *half = ohalf; UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]); @@ -1694,27 +1707,44 @@ row_merge_sort( if applicable */ { ulint half = file->offset / 2; + ulint num_runs; + ulint* run_offset; + ulint error = DB_SUCCESS; + + /* Record the number of merge runs we need to perform */ + num_runs = file->offset; + + /* If num_runs are less than 1, nothing to merge */ + if (num_runs <= 1) { + return(error); + } + + /* "run_offset" records each run's first offset number */ + run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint)); + + /* This tells row_merge() where to start for the first round + of merge. */ + run_offset[half] = half; /* The file should always contain at least one byte (the end of file marker). Thus, it must be at least one block. */ ut_ad(file->offset > 0); + /* Merge the runs until we have one big run */ do { - ulint error; + error = row_merge(trx, index, file, block, tmpfd, + table, &num_runs, run_offset); - error = row_merge(trx, index, file, &half, - block, tmpfd, table); + UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset); if (error != DB_SUCCESS) { - return(error); + break; } + } while (num_runs > 1); - /* half > 0 should hold except when the file consists - of one block. No need to merge further then. */ - ut_ad(half > 0 || file->offset == 1); - } while (half < file->offset && half > 0); + mem_free(run_offset); - return(DB_SUCCESS); + return(error); } /*************************************************************//** @@ -2306,7 +2336,7 @@ row_merge_rename_tables( { ulint err = DB_ERROR; pars_info_t* info; - const char* old_name= old_table->name; + char old_name[MAX_TABLE_NAME_LEN + 1]; ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_ad(old_table != new_table); @@ -2314,6 +2344,17 @@ row_merge_rename_tables( ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); + /* store the old/current name to an automatic variable */ + if (strlen(old_table->name) + 1 <= sizeof(old_name)) { + memcpy(old_name, old_table->name, strlen(old_table->name) + 1); + } else { + ut_print_timestamp(stderr); + fprintf(stderr, "InnoDB: too long table name: '%s', " + "max length is %d\n", old_table->name, + MAX_TABLE_NAME_LEN); + ut_error; + } + trx->op_info = "renaming tables"; /* We use the private SQL parser of Innobase to generate the query diff --git a/storage/innodb_plugin/row/row0mysql.c b/storage/innodb_plugin/row/row0mysql.c index 8d47d0f25fc..feeb7fc80b7 100644 --- a/storage/innodb_plugin/row/row0mysql.c +++ b/storage/innodb_plugin/row/row0mysql.c @@ -625,6 +625,8 @@ row_create_prebuilt( prebuilt->select_lock_type = LOCK_NONE; prebuilt->stored_select_lock_type = 99999999; + UNIV_MEM_INVALID(&prebuilt->stored_select_lock_type, + sizeof prebuilt->stored_select_lock_type); prebuilt->search_tuple = dtuple_create( heap, 2 * dict_table_get_n_cols(table)); @@ -1428,27 +1430,26 @@ run_again: } /*********************************************************************//** -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -this session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. -@return error code or DB_SUCCESS */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE or this +session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +Before calling this function row_search_for_mysql() must have +initialized prebuilt->new_rec_locks to store the information which new +record locks really were set. This function removes a newly set +clustered index record lock under prebuilt->pcur or +prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that +releases the latest clustered index record lock we set. +@return error code or DB_SUCCESS */ UNIV_INTERN int row_unlock_for_mysql( /*=================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL handle */ - ibool has_latches_on_recs)/*!< TRUE if called so that we have - the latches on the records under pcur - and clust_pcur, and we do not need to - reposition the cursors. */ + ibool has_latches_on_recs)/*!< in: TRUE if called so + that we have the latches on + the records under pcur and + clust_pcur, and we do not need + to reposition the cursors. */ { btr_pcur_t* pcur = prebuilt->pcur; btr_pcur_t* clust_pcur = prebuilt->clust_pcur; diff --git a/storage/innodb_plugin/row/row0purge.c b/storage/innodb_plugin/row/row0purge.c index 500ebe571ab..835af990672 100644 --- a/storage/innodb_plugin/row/row0purge.c +++ b/storage/innodb_plugin/row/row0purge.c @@ -44,6 +44,16 @@ Created 3/14/1997 Heikki Tuuri #include "row0mysql.h" #include "log0log.h" +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /********************************************************************//** Creates a purge node to a query graph. @return own: purge node */ @@ -126,6 +136,7 @@ row_purge_remove_clust_if_poss_low( pcur = &(node->pcur); btr_cur = btr_pcur_get_btr_cur(pcur); + log_free_check(); mtr_start(&mtr); success = row_purge_reposition_pcur(mode, node, &mtr); diff --git a/storage/innodb_plugin/row/row0sel.c b/storage/innodb_plugin/row/row0sel.c index 4d19ed93a49..2861235a995 100644 --- a/storage/innodb_plugin/row/row0sel.c +++ b/storage/innodb_plugin/row/row0sel.c @@ -863,8 +863,14 @@ row_sel_get_clust_rec( clust_rec, index, offsets, node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + /* Declare the variable uninitialized in Valgrind. + It should be set to DB_SUCCESS at func_exit. */ + UNIV_MEM_INVALID(&err, sizeof err); + break; + default: goto err_exit; } } else { @@ -934,9 +940,9 @@ err_exit: /*********************************************************************//** Sets a lock on a record. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ UNIV_INLINE -ulint +enum db_err sel_set_rec_lock( /*=============*/ const buf_block_t* block, /*!< in: buffer block of rec */ @@ -948,8 +954,8 @@ sel_set_rec_lock( LOC_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - trx_t* trx; - ulint err; + trx_t* trx; + enum db_err err; trx = thr_get_trx(thr); @@ -1482,11 +1488,15 @@ rec_loop: node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: /* Note that in this case we will store in pcur the PREDECESSOR of the record we are waiting the lock for */ - goto lock_wait_or_error; } } @@ -1538,8 +1548,12 @@ skip_lock: rec, index, offsets, node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -2664,6 +2678,12 @@ row_sel_store_mysql_rec( prebuilt->blob_heap = NULL; } + /* init null bytes with default values as they might be + left uninitialized in some cases and these uninited bytes + might be copied into mysql record buffer that leads to + valgrind warnings */ + memcpy(mysql_rec, prebuilt->default_rec, prebuilt->null_bitmap_len); + for (i = 0; i < prebuilt->n_template; i++) { templ = prebuilt->mysql_template + i; @@ -2801,9 +2821,9 @@ row_sel_build_prev_vers_for_mysql( Retrieves the clustered index record corresponding to a record in a non-clustered index. Does the necessary locking. Used in the MySQL interface. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_sel_get_clust_rec_for_mysql( /*============================*/ row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */ @@ -2830,7 +2850,7 @@ row_sel_get_clust_rec_for_mysql( dict_index_t* clust_index; const rec_t* clust_rec; rec_t* old_vers; - ulint err; + enum db_err err; trx_t* trx; *out_rec = NULL; @@ -2889,6 +2909,7 @@ row_sel_get_clust_rec_for_mysql( clust_rec = NULL; + err = DB_SUCCESS; goto func_exit; } @@ -2904,8 +2925,11 @@ row_sel_get_clust_rec_for_mysql( 0, btr_pcur_get_block(prebuilt->clust_pcur), clust_rec, clust_index, *offsets, prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + break; + default: goto err_exit; } } else { @@ -2965,6 +2989,8 @@ row_sel_get_clust_rec_for_mysql( rec, sec_index, clust_rec, clust_index)); #endif } + + err = DB_SUCCESS; } func_exit: @@ -2977,7 +3003,6 @@ func_exit: btr_pcur_store_position(prebuilt->clust_pcur, mtr); } - err = DB_SUCCESS; err_exit: return(err); } @@ -3702,8 +3727,12 @@ shortcut_fails_too_big_rec: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3801,8 +3830,12 @@ rec_loop: prebuilt->select_lock_type, LOCK_ORDINARY, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3932,8 +3965,11 @@ wrong_offs: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3968,8 +4004,11 @@ wrong_offs: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -4039,15 +4078,21 @@ no_gap_lock: switch (err) { const rec_t* old_vers; - case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: if (srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) { + || trx->isolation_level + <= TRX_ISO_READ_COMMITTED) { /* Note that a record of prebuilt->index was locked. */ prebuilt->new_rec_locks = 1; } + err = DB_SUCCESS; + case DB_SUCCESS: break; case DB_LOCK_WAIT: + /* Never unlock rows that were part of a conflict. */ + prebuilt->new_rec_locks = 0; + if (UNIV_LIKELY(prebuilt->row_read_type != ROW_READ_TRY_SEMI_CONSISTENT) || unique_search @@ -4077,7 +4122,6 @@ no_gap_lock: if (UNIV_LIKELY(trx->wait_lock != NULL)) { lock_cancel_waiting_and_release( trx->wait_lock); - prebuilt->new_rec_locks = 0; } else { mutex_exit(&kernel_mutex); @@ -4089,9 +4133,6 @@ no_gap_lock: ULINT_UNDEFINED, &heap); err = DB_SUCCESS; - /* Note that a record of - prebuilt->index was locked. */ - prebuilt->new_rec_locks = 1; break; } mutex_exit(&kernel_mutex); @@ -4228,27 +4269,30 @@ requires_clust_rec: err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec, thr, &clust_rec, &offsets, &heap, &mtr); - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS: + if (clust_rec == NULL) { + /* The record did not exist in the read view */ + ut_ad(prebuilt->select_lock_type == LOCK_NONE); + goto next_rec; + } + break; + case DB_SUCCESS_LOCKED_REC: + ut_a(clust_rec != NULL); + if (srv_locks_unsafe_for_binlog + || trx->isolation_level + <= TRX_ISO_READ_COMMITTED) { + /* Note that the clustered index record + was locked. */ + prebuilt->new_rec_locks = 2; + } + err = DB_SUCCESS; + break; + default: goto lock_wait_or_error; } - if (clust_rec == NULL) { - /* The record did not exist in the read view */ - ut_ad(prebuilt->select_lock_type == LOCK_NONE); - - goto next_rec; - } - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - /* Note that both the secondary index record - and the clustered index record were locked. */ - ut_ad(prebuilt->new_rec_locks == 1); - prebuilt->new_rec_locks = 2; - } - if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) { /* The record is delete marked: we can skip it */ diff --git a/storage/innodb_plugin/row/row0uins.c b/storage/innodb_plugin/row/row0uins.c index 9f9c814f1a5..930a5cf13b6 100644 --- a/storage/innodb_plugin/row/row0uins.c +++ b/storage/innodb_plugin/row/row0uins.c @@ -46,6 +46,16 @@ Created 2/25/1997 Heikki Tuuri #include "ibuf0ibuf.h" #include "log0log.h" +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***************************************************************//** Removes a clustered index record. The pcur in node was positioned on the record, now it is detached. @@ -152,7 +162,6 @@ row_undo_ins_remove_sec_low( ulint err; mtr_t mtr; - log_free_check(); mtr_start(&mtr); found = row_search_index_entry(index, entry, mode, &pcur, &mtr); @@ -335,6 +344,7 @@ row_undo_ins( transactions. */ ut_a(trx_is_recv(node->trx)); } else { + log_free_check(); err = row_undo_ins_remove_sec(node->index, entry); if (err != DB_SUCCESS) { @@ -346,5 +356,6 @@ row_undo_ins( node->index = dict_table_get_next_index(node->index); } + log_free_check(); return(row_undo_ins_remove_clust_rec(node)); } diff --git a/storage/innodb_plugin/row/row0umod.c b/storage/innodb_plugin/row/row0umod.c index e7245dbee41..8464b0f95cc 100644 --- a/storage/innodb_plugin/row/row0umod.c +++ b/storage/innodb_plugin/row/row0umod.c @@ -58,12 +58,22 @@ delete marked clustered index record was delete unmarked and possibly also some of its fields were changed. Now, it is possible that the delete marked version has become obsolete at the time the undo is started. */ +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***********************************************************//** Checks if also the previous version of the clustered index record was modified or inserted by the same transaction, and its undo number is such that it should be undone in the same rollback. @return TRUE if also previous modify or insert of this row should be undone */ -UNIV_INLINE +static ibool row_undo_mod_undo_also_prev_vers( /*=============================*/ @@ -231,6 +241,8 @@ row_undo_mod_clust( ut_ad(node && thr); + log_free_check(); + /* Check if also the previous version of the clustered index record should be undone in this same rollback operation */ @@ -657,24 +669,55 @@ row_undo_mod_upd_exist_sec( /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, index, heap); - ut_a(entry); - /* NOTE that if we updated the fields of a - delete-marked secondary index record so that - alphabetically they stayed the same, e.g., - 'abc' -> 'aBc', we cannot return to the original - values because we do not know them. But this should - not cause problems because in row0sel.c, in queries - we always retrieve the clustered index record or an - earlier version of it, if the secondary index record - through which we do the search is delete-marked. */ - - err = row_undo_mod_del_mark_or_remove_sec(node, thr, - index, - entry); - if (err != DB_SUCCESS) { - mem_heap_free(heap); - - return(err); + if (UNIV_UNLIKELY(!entry)) { + /* The server must have crashed in + row_upd_clust_rec_by_insert(), in + row_ins_index_entry_low() before + btr_store_big_rec_extern_fields() + has written the externally stored columns + (BLOBs) of the new clustered index entry. */ + + /* The table must be in DYNAMIC or COMPRESSED + format. REDUNDANT and COMPACT formats + store a local 768-byte prefix of each + externally stored column. */ + ut_a(dict_table_get_format(index->table) + >= DICT_TF_FORMAT_ZIP); + + /* This is only legitimate when + rolling back an incomplete transaction + after crash recovery. */ + ut_a(thr_get_trx(thr)->is_recovered); + + /* The server must have crashed before + completing the insert of the new + clustered index entry and before + inserting to the secondary indexes. + Because node->row was not yet written + to this index, we can ignore it. But + we must restore node->undo_row. */ + } else { + /* NOTE that if we updated the fields of a + delete-marked secondary index record so that + alphabetically they stayed the same, e.g., + 'abc' -> 'aBc', we cannot return to the + original values because we do not know them. + But this should not cause problems because + in row0sel.c, in queries we always retrieve + the clustered index record or an earlier + version of it, if the secondary index record + through which we do the search is + delete-marked. */ + + err = row_undo_mod_del_mark_or_remove_sec( + node, thr, index, entry); + if (err != DB_SUCCESS) { + mem_heap_free(heap); + + return(err); + } + + mem_heap_empty(heap); } /* We may have to update the delete mark in the @@ -683,7 +726,6 @@ row_undo_mod_upd_exist_sec( the secondary index record if we updated its fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */ - mem_heap_empty(heap); entry = row_build_index_entry(node->undo_row, node->undo_ext, index, heap); diff --git a/storage/innodb_plugin/row/row0undo.c b/storage/innodb_plugin/row/row0undo.c index 3d739c9689a..9ef842b5114 100644 --- a/storage/innodb_plugin/row/row0undo.c +++ b/storage/innodb_plugin/row/row0undo.c @@ -297,7 +297,7 @@ row_undo( if (locked_data_dict) { - row_mysql_lock_data_dictionary(trx); + row_mysql_freeze_data_dictionary(trx); } if (node->state == UNDO_NODE_INSERT) { @@ -312,7 +312,7 @@ row_undo( if (locked_data_dict) { - row_mysql_unlock_data_dictionary(trx); + row_mysql_unfreeze_data_dictionary(trx); } /* Do some cleanup */ diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c index 95d1d00aeef..d0aaecd3dae 100644 --- a/storage/innodb_plugin/row/row0upd.c +++ b/storage/innodb_plugin/row/row0upd.c @@ -92,6 +92,16 @@ the x-latch freed? The most efficient way for performing a searched delete is obviously to keep the x-latch for several steps of query graph execution. */ +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***********************************************************//** Checks if an update vector changes some of the first ordering fields of an index record. This is only used in foreign key checks and we can assume @@ -1453,7 +1463,6 @@ row_upd_sec_index_entry( entry = row_build_index_entry(node->row, node->ext, index, heap); ut_a(entry); - log_free_check(); mtr_start(&mtr); found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, @@ -1529,7 +1538,7 @@ Updates the secondary index record if it is changed in the row update or deletes it if this is a delete. @return DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ -UNIV_INLINE +static ulint row_upd_sec_step( /*=============*/ @@ -2015,6 +2024,7 @@ row_upd( if (node->state == UPD_NODE_UPDATE_CLUSTERED || node->state == UPD_NODE_INSERT_CLUSTERED) { + log_free_check(); err = row_upd_clust_step(node, thr); if (err != DB_SUCCESS) { @@ -2029,6 +2039,8 @@ row_upd( } while (node->index != NULL) { + + log_free_check(); err = row_upd_sec_step(node, thr); if (err != DB_SUCCESS) { diff --git a/storage/innodb_plugin/sync/sync0rw.c b/storage/innodb_plugin/sync/sync0rw.c index d231b6acdf7..52eaa5d0f43 100644 --- a/storage/innodb_plugin/sync/sync0rw.c +++ b/storage/innodb_plugin/sync/sync0rw.c @@ -267,7 +267,7 @@ rw_lock_create_func( lock->level = level; #endif /* UNIV_SYNC_DEBUG */ - lock->magic_n = RW_LOCK_MAGIC_N; + ut_d(lock->magic_n = RW_LOCK_MAGIC_N); lock->cfile_name = cfile_name; lock->cline = (unsigned int) cline; @@ -282,10 +282,8 @@ rw_lock_create_func( mutex_enter(&rw_lock_list_mutex); - if (UT_LIST_GET_LEN(rw_lock_list) > 0) { - ut_a(UT_LIST_GET_FIRST(rw_lock_list)->magic_n - == RW_LOCK_MAGIC_N); - } + ut_ad(UT_LIST_GET_FIRST(rw_lock_list) == NULL + || UT_LIST_GET_FIRST(rw_lock_list)->magic_n == RW_LOCK_MAGIC_N); UT_LIST_ADD_FIRST(list, rw_lock_list, lock); @@ -305,8 +303,6 @@ rw_lock_free( ut_ad(rw_lock_validate(lock)); ut_a(lock->lock_word == X_LOCK_DECR); - lock->magic_n = 0; - #ifndef INNODB_RW_LOCKS_USE_ATOMICS mutex_free(rw_lock_get_mutex(lock)); #endif /* INNODB_RW_LOCKS_USE_ATOMICS */ @@ -316,16 +312,16 @@ rw_lock_free( os_event_free(lock->wait_ex_event); - if (UT_LIST_GET_PREV(list, lock)) { - ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); - } - if (UT_LIST_GET_NEXT(list, lock)) { - ut_a(UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); - } + ut_ad(UT_LIST_GET_PREV(list, lock) == NULL + || UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); + ut_ad(UT_LIST_GET_NEXT(list, lock) == NULL + || UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); UT_LIST_REMOVE(list, rw_lock_list, lock); mutex_exit(&rw_lock_list_mutex); + + ut_d(lock->magic_n = 0); } #ifdef UNIV_DEBUG @@ -344,7 +340,7 @@ rw_lock_validate( ulint waiters = rw_lock_get_waiters(lock); lint lock_word = lock->lock_word; - ut_a(lock->magic_n == RW_LOCK_MAGIC_N); + ut_ad(lock->magic_n == RW_LOCK_MAGIC_N); ut_a(waiters == 0 || waiters == 1); ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0); |