diff options
author | Jimmy Yang <jimmy.yang@oracle.com> | 2011-08-16 18:07:59 -0700 |
---|---|---|
committer | Jimmy Yang <jimmy.yang@oracle.com> | 2011-08-16 18:07:59 -0700 |
commit | 95fa7fab3b70e5117d757d9df42ac9d7040fea03 (patch) | |
tree | f35296e1cd9a625708ab194372e0944f1309f78c | |
parent | 887ac6774f9d33b64d7c316e9310038592ec6a83 (diff) | |
download | mariadb-git-95fa7fab3b70e5117d757d9df42ac9d7040fea03.tar.gz |
Fix bug #11830883, SUPPORT "CORRUPTED" BIT FOR INNODB TABLES AND INDEXES.
Also addressed issues in bug #11745133, where we could mark a table
corrupted instead of crashing the server when found a corrupted buffer/page
if the table created with innodb_file_per_table on.
30 files changed, 915 insertions, 55 deletions
diff --git a/include/my_base.h b/include/my_base.h index f6afc891281..cc02b0080d9 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -446,8 +446,9 @@ enum ha_base_keytype { #define HA_ERR_FILE_TOO_SHORT 175 /* File too short */ #define HA_ERR_WRONG_CRC 176 /* Wrong CRC on page */ #define HA_ERR_TOO_MANY_CONCURRENT_TRXS 177 /*Too many active concurrent transactions */ -#define HA_ERR_INDEX_COL_TOO_LONG 178 /* Index column length exceeds limit */ -#define HA_ERR_LAST 178 /* Copy of last error nr */ +#define HA_ERR_INDEX_COL_TOO_LONG 178 /* Index column length exceeds limit */ +#define HA_ERR_INDEX_CORRUPT 179 /* Index corrupted */ +#define HA_ERR_LAST 179 /* Copy of last error nr */ /* Number of different errors */ #define HA_ERR_ERRORS (HA_ERR_LAST - HA_ERR_FIRST + 1) diff --git a/mysql-test/suite/innodb/r/innodb_corrupt_bit.result b/mysql-test/suite/innodb/r/innodb_corrupt_bit.result new file mode 100644 index 00000000000..4253adc93aa --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_corrupt_bit.result @@ -0,0 +1,81 @@ +set names utf8; +CREATE TABLE corrupt_bit_test_ā( +a INT AUTO_INCREMENT PRIMARY KEY, +b CHAR(100), +c INT, +z INT, +INDEX(b)) +ENGINE=InnoDB; +INSERT INTO corrupt_bit_test_ā VALUES(0,'x',1, 1); +CREATE UNIQUE INDEX idxā ON corrupt_bit_test_ā(c, b); +CREATE UNIQUE INDEX idxē ON corrupt_bit_test_ā(z, b); +SELECT * FROM corrupt_bit_test_ā; +a b c z +1 x 1 1 +select @@unique_checks; +@@unique_checks +0 +select @@innodb_change_buffering_debug; +@@innodb_change_buffering_debug +1 +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1,z+1 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+10,z+10 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+20,z+20 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+50,z+50 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+100,z+100 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+200,z+200 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+400,z+400 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+800,z+800 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1600,z+1600 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+4000,z+4000 FROM corrupt_bit_test_ā; +select count(*) from corrupt_bit_test_ā; +count(*) +1024 +CREATE INDEX idx3 ON corrupt_bit_test_ā(b, c); +INSERT INTO corrupt_bit_test_ā VALUES(13000,'x',1,1); +CREATE INDEX idx4 ON corrupt_bit_test_ā(b, z); +check table corrupt_bit_test_ā; +Table Op Msg_type Msg_text +test.corrupt_bit_test_ā check Warning InnoDB: The B-tree of index "idxā" is corrupted. +test.corrupt_bit_test_ā check Warning InnoDB: The B-tree of index "idxē" is corrupted. +test.corrupt_bit_test_ā check error Corrupt +select c from corrupt_bit_test_ā; +ERROR HY000: Incorrect key file for table 'corrupt_bit_test_ā'; try to repair it +select z from corrupt_bit_test_ā; +ERROR HY000: Incorrect key file for table 'corrupt_bit_test_ā'; try to repair it +show warnings; +Level Code Message +Warning 179 InnoDB: Index "idxē" for table "test/corrupt_bit_test_@1s" is marked as corrupted +Error 1034 Incorrect key file for table 'corrupt_bit_test_ā'; try to repair it +insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001); +select * from corrupt_bit_test_ā use index(primary) where a = 10001; +a b c z +10001 a 20001 20001 +begin; +insert into corrupt_bit_test_ā values (10002, "a", 20002, 20002); +delete from corrupt_bit_test_ā where a = 10001; +insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001); +rollback; +drop index idxā on corrupt_bit_test_ā; +check table corrupt_bit_test_ā; +Table Op Msg_type Msg_text +test.corrupt_bit_test_ā check Warning InnoDB: Index "idxē" is marked as corrupted +test.corrupt_bit_test_ā check error Corrupt +set names utf8; +select z from corrupt_bit_test_ā; +ERROR HY000: Incorrect key file for table 'corrupt_bit_test_ā'; try to repair it +drop index idxē on corrupt_bit_test_ā; +select z from corrupt_bit_test_ā limit 10; +z +20001 +1 +1 +2 +11 +12 +21 +22 +31 +32 +drop table corrupt_bit_test_ā; +SET GLOBAL innodb_change_buffering_debug = 0; diff --git a/mysql-test/suite/innodb/t/innodb_corrupt_bit.test b/mysql-test/suite/innodb/t/innodb_corrupt_bit.test new file mode 100644 index 00000000000..7d98a8e4a03 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_corrupt_bit.test @@ -0,0 +1,135 @@ +# +# Test for persistent corrupt bit for corrupted index and table +# +-- source include/have_innodb.inc + +# This test needs debug server +--source include/have_debug.inc + +-- disable_query_log +# This test setup is extracted from bug56680.test: +# The flag innodb_change_buffering_debug is only available in debug builds. +# It instructs InnoDB to try to evict pages from the buffer pool when +# change buffering is possible, so that the change buffer will be used +# whenever possible. +-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE +SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug; +-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE +SET GLOBAL innodb_change_buffering_debug = 1; + +# Turn off Unique Check to create corrupted index with dup key +SET UNIQUE_CHECKS=0; + +-- enable_query_log + +set names utf8; + +CREATE TABLE corrupt_bit_test_ā( + a INT AUTO_INCREMENT PRIMARY KEY, + b CHAR(100), + c INT, + z INT, + INDEX(b)) +ENGINE=InnoDB; + +INSERT INTO corrupt_bit_test_ā VALUES(0,'x',1, 1); + +# This is the first unique index we intend to corrupt +CREATE UNIQUE INDEX idxā ON corrupt_bit_test_ā(c, b); + +# This is the second unique index we intend to corrupt +CREATE UNIQUE INDEX idxē ON corrupt_bit_test_ā(z, b); + +SELECT * FROM corrupt_bit_test_ā; + +select @@unique_checks; +select @@innodb_change_buffering_debug; + +# Create enough rows for the table, so that the insert buffer will be +# used for modifying the secondary index page. There must be multiple +# index pages, because changes to the root page are never buffered. + +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1,z+1 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+10,z+10 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+20,z+20 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+50,z+50 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+100,z+100 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+200,z+200 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+400,z+400 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+800,z+800 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1600,z+1600 FROM corrupt_bit_test_ā; +INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+4000,z+4000 FROM corrupt_bit_test_ā; + +select count(*) from corrupt_bit_test_ā; + +CREATE INDEX idx3 ON corrupt_bit_test_ā(b, c); + +# Create a dup key error on index "idxē" and "idxā" by inserting a dup value +INSERT INTO corrupt_bit_test_ā VALUES(13000,'x',1,1); + +# creating an index should succeed even if other secondary indexes are corrupted +CREATE INDEX idx4 ON corrupt_bit_test_ā(b, z); + +# Check table will find the unique indexes corrupted +# with dup key +check table corrupt_bit_test_ā; + +# This selection intend to use the corrupted index. Expect to fail +-- error ER_NOT_KEYFILE +select c from corrupt_bit_test_ā; + +-- error ER_NOT_KEYFILE +select z from corrupt_bit_test_ā; + +show warnings; + +# Since corrupted index is a secondary index, we only disable such +# index and allow other DML to proceed +insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001); + +# This does not use the corrupted index, expect to succeed +select * from corrupt_bit_test_ā use index(primary) where a = 10001; + +# Some more DMLs +begin; +insert into corrupt_bit_test_ā values (10002, "a", 20002, 20002); +delete from corrupt_bit_test_ā where a = 10001; +insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001); +rollback; + +# Drop one corrupted index before reboot +drop index idxā on corrupt_bit_test_ā; + +check table corrupt_bit_test_ā; + +# Shut down the server +-- exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +-- shutdown_server 20 +-- source include/wait_until_disconnected.inc + +# Restart the server +-- disable_query_log +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--enable_reconnect +--source include/wait_until_connected_again.inc +--disable_reconnect +-- enable_query_log + +set names utf8; + +# The index is marked as suspect in Sys_indexes too, so after server +# reboot, the attempt to use the index will fail too. +-- error ER_NOT_KEYFILE +select z from corrupt_bit_test_ā; + +# Drop the corrupted index +drop index idxē on corrupt_bit_test_ā; + +# Now select back to normal +select z from corrupt_bit_test_ā limit 10; + +# Drop table +drop table corrupt_bit_test_ā; + +-- error 0, ER_UNKNOWN_SYSTEM_VARIABLE +SET GLOBAL innodb_change_buffering_debug = 0; diff --git a/mysql-test/suite/sys_vars/r/all_vars.result b/mysql-test/suite/sys_vars/r/all_vars.result index 715ad9e2c15..edd6f57e32d 100644 --- a/mysql-test/suite/sys_vars/r/all_vars.result +++ b/mysql-test/suite/sys_vars/r/all_vars.result @@ -11,7 +11,9 @@ There should be *no* long test name listed below: select variable_name as `There should be *no* variables listed below:` from t2 left join t1 on variable_name=test_name where test_name is null; There should be *no* variables listed below: +INNODB_FORCE_LOAD_CORRUPTED INNODB_LARGE_PREFIX +INNODB_FORCE_LOAD_CORRUPTED INNODB_LARGE_PREFIX drop table t1; drop table t2; diff --git a/mysys/my_handler_errors.h b/mysys/my_handler_errors.h index 3bd83398e81..428a58b0767 100644 --- a/mysys/my_handler_errors.h +++ b/mysys/my_handler_errors.h @@ -81,7 +81,8 @@ static const char *handler_error_messages[]= "File to short; Expected more data in file", "Read page with wrong checksum", "Too many active concurrent transactions", - "Index column length exceeds limit" + "Index column length exceeds limit", + "Index corrupted" }; extern void my_handler_error_register(void); diff --git a/sql/handler.cc b/sql/handler.cc index 580677242bd..db1eea6484b 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -358,6 +358,7 @@ int ha_init_errors(void) SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE)); SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS)); SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG)); + SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT)); /* Register the error messages for use with my_error(). */ return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST); @@ -2865,6 +2866,9 @@ void handler::print_error(int error, myf errflag) case HA_ERR_INDEX_COL_TOO_LONG: textno= ER_INDEX_COLUMN_TOO_LONG; break; + case HA_ERR_INDEX_CORRUPT: + textno= ER_INDEX_CORRUPT; + break; default: { /* The error was "unknown" to this function. diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index b8f46f090ab..519d693f96d 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -6415,3 +6415,5 @@ ER_ERROR_IN_TRIGGER_BODY ER_ERROR_IN_UNKNOWN_TRIGGER_BODY eng "Unknown trigger has an error in its body: '%-.256s'" +ER_INDEX_CORRUPT + eng "Index %s is corrupted" diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c index 648e53ac4a6..565fbe7bbf4 100644 --- a/storage/innobase/buf/buf0buf.c +++ b/storage/innobase/buf/buf0buf.c @@ -3474,6 +3474,55 @@ buf_page_create( } /********************************************************************//** +Mark a table with the specified space pointed by bpage->space corrupted. +Also remove the bpage from LRU list. +@return TRUE if successful */ +static +ibool +buf_mark_space_corrupt( +/*===================*/ + buf_page_t* bpage) /*!< in: pointer to the block in question */ +{ + buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + const ibool uncompressed = (buf_page_get_state(bpage) + == BUF_BLOCK_FILE_PAGE); + ulint space = bpage->space; + ulint offset = bpage->offset; + ibool ret = TRUE; + + /* First unfix and release lock on the bpage */ + buf_pool_mutex_enter(buf_pool); + mutex_enter(buf_page_get_mutex(bpage)); + ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ); + ut_ad(bpage->buf_fix_count == 0); + + /* Set BUF_IO_NONE before we remove the block from LRU list */ + buf_page_set_io_fix(bpage, BUF_IO_NONE); + + if (uncompressed) { + rw_lock_x_unlock_gen( + &((buf_block_t*) bpage)->lock, + BUF_IO_READ); + } + + /* Find the table with specified space id, and mark it corrupted */ + if (dict_set_corrupted_by_space(space)) { + ut_ad(bpage->space == space && bpage->offset == offset); + buf_LRU_free_one_page(bpage); + } else { + ret = FALSE; + } + + ut_ad(buf_pool->n_pend_reads > 0); + buf_pool->n_pend_reads--; + + mutex_exit(buf_page_get_mutex(bpage)); + buf_pool_mutex_exit(buf_pool); + + return(ret); +} + +/********************************************************************//** Completes an asynchronous read or write request of a file page to or from the buffer pool. */ UNIV_INTERN @@ -3598,10 +3647,19 @@ corrupt: "InnoDB: about forcing recovery.\n", stderr); if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) { - fputs("InnoDB: Ending processing because of" - " a corrupt database page.\n", - stderr); - exit(1); + /* If page space id is larger than TRX_SYS_SPACE + (0), we will attempt to mark the corresponding + table as corrupted instead of crashing server */ + if (bpage->space > TRX_SYS_SPACE + && buf_mark_space_corrupt(bpage)) { + return; + } else { + fputs("InnoDB: Ending processing" + " because of" + " a corrupt database page.\n", + stderr); + ut_error; + } } } diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c index 93c98719e29..b5ca21e14a6 100644 --- a/storage/innobase/buf/buf0lru.c +++ b/storage/innobase/buf/buf0lru.c @@ -1885,6 +1885,22 @@ buf_LRU_block_free_hashed_page( buf_LRU_block_free_non_file_page(block); } +/******************************************************************//** +Remove one page from LRU list and put it to free list */ +UNIV_INTERN +void +buf_LRU_free_one_page( +/*==================*/ + buf_page_t* bpage) /*!< in/out: block, must contain a file page and + be in a state where it can be freed; there + may or may not be a hash index to the page */ +{ + if (buf_LRU_block_remove_hashed_page(bpage, TRUE) + != BUF_BLOCK_ZIP_FREE) { + buf_LRU_block_free_hashed_page((buf_block_t*) bpage); + } +} + /**********************************************************************//** Updates buf_pool->LRU_old_ratio for one buffer pool instance. @return updated old_pct */ diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c index ead86e37380..dfb733cb36c 100644 --- a/storage/innobase/dict/dict0dict.c +++ b/storage/innobase/dict/dict0dict.c @@ -54,6 +54,7 @@ UNIV_INTERN dict_index_t* dict_ind_compact; #include "row0merge.h" #include "m_ctype.h" /* my_isspace() */ #include "ha_prototypes.h" /* innobase_strcasecmp(), innobase_casedn_str()*/ +#include "row0upd.h" #include <ctype.h> @@ -611,8 +612,7 @@ dict_table_get_on_id( { dict_table_t* table; - if (table_id <= DICT_FIELDS_ID - || trx->dict_operation_lock_mode == RW_X_LATCH) { + if (trx->dict_operation_lock_mode == RW_X_LATCH) { /* Note: An X latch implies that the transaction already owns the dictionary mutex. */ @@ -5046,4 +5046,179 @@ dict_close(void) rw_lock_free(&dict_table_stats_latches[i]); } } + +/**********************************************************************//** +Find a table in dict_sys->table_LRU list with specified space id +@return table if found, NULL if not */ +static +dict_table_t* +dict_find_table_by_space( +/*=====================*/ + ulint space_id) /*!< in: space ID */ +{ + dict_table_t* table; + ulint num_item; + ulint count = 0; + + ut_ad(space_id > 0); + + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + num_item = UT_LIST_GET_LEN(dict_sys->table_LRU); + + /* This function intentionally does not acquire mutex as it is used + by error handling code in deep call stack as last means to avoid + killing the server, so it worth to risk some consequencies for + the action. */ + while (table && count < num_item) { + if (table->space == space_id) { + return(table); + } + + table = UT_LIST_GET_NEXT(table_LRU, table); + count++; + } + + return(NULL); +} + +/**********************************************************************//** +Flags a table with specified space_id corrupted in the data dictionary +cache +@return TRUE if successful */ +UNIV_INTERN +ibool +dict_set_corrupted_by_space( +/*========================*/ + ulint space_id) /*!< in: space ID */ +{ + dict_table_t* table; + + table = dict_find_table_by_space(space_id); + + if (!table) { + return(FALSE); + } + + /* mark the table->corrupted bit only, since the caller + could be too deep in the stack for SYS_INDEXES update */ + table->corrupted = TRUE; + + return(TRUE); +} + +/**********************************************************************//** +Flags an index corrupted both in the data dictionary cache +and in the SYS_INDEXES */ +UNIV_INTERN +void +dict_set_corrupted( +/*===============*/ + dict_index_t* index) /*!< in/out: index */ +{ + mem_heap_t* heap; + mtr_t mtr; + dict_index_t* sys_index; + dtuple_t* tuple; + dfield_t* dfield; + byte* buf; + const char* status; + btr_cur_t cursor; + + ut_ad(index); + ut_ad(mutex_own(&dict_sys->mutex)); + ut_ad(!dict_table_is_comp(dict_sys->sys_tables)); + ut_ad(!dict_table_is_comp(dict_sys->sys_indexes)); + +#ifdef UNIV_SYNC_DEBUG + ut_ad(sync_thread_levels_empty_except_dict()); +#endif + + /* Mark the table as corrupted only if the clustered index + is corrupted */ + if (dict_index_is_clust(index)) { + index->table->corrupted = TRUE; + } + + if (UNIV_UNLIKELY(dict_index_is_corrupted(index))) { + /* The index was already flagged corrupted. */ + ut_ad(index->table->corrupted); + return; + } + + heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t) + + sizeof(que_fork_t) + sizeof(upd_node_t) + + sizeof(upd_t) + 12)); + mtr_start(&mtr); + index->type |= DICT_CORRUPT; + + sys_index = UT_LIST_GET_FIRST(dict_sys->sys_indexes->indexes); + + /* Find the index row in SYS_INDEXES */ + tuple = dtuple_create(heap, 2); + + dfield = dtuple_get_nth_field(tuple, 0); + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->table->id); + dfield_set_data(dfield, buf, 8); + + dfield = dtuple_get_nth_field(tuple, 1); + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + dfield_set_data(dfield, buf, 8); + + dict_index_copy_types(tuple, sys_index, 2); + + btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, + &cursor, 0, __FILE__, __LINE__, &mtr); + + if (cursor.up_match == dtuple_get_n_fields(tuple)) { + /* UPDATE SYS_INDEXES SET TYPE=index->type + WHERE TABLE_ID=index->table->id AND INDEX_ID=index->id */ + ulint len; + byte* field = rec_get_nth_field_old( + btr_cur_get_rec(&cursor), + DICT_SYS_INDEXES_TYPE_FIELD, &len); + if (len != 4) { + goto fail; + } + mlog_write_ulint(field, index->type, MLOG_4BYTES, &mtr); + status = " InnoDB: Flagged corruption of "; + } else { +fail: + status = " InnoDB: Unable to flag corruption of "; + } + + mtr_commit(&mtr); + mem_heap_free(heap); + + ut_print_timestamp(stderr); + fputs(status, stderr); + dict_index_name_print(stderr, NULL, index); + putc('\n', stderr); +} + +/**********************************************************************//** +Flags an index corrupted in the data dictionary cache only. This +is used mostly to mark a corrupted index when index's own dictionary +is corrupted, and we force to load such index for repair purpose */ +UNIV_INTERN +void +dict_set_corrupted_index_cache_only( +/*================================*/ + dict_index_t* index) /*!< in/out: index */ +{ + ut_ad(index); + ut_ad(mutex_own(&dict_sys->mutex)); + ut_ad(!dict_table_is_comp(dict_sys->sys_tables)); + ut_ad(!dict_table_is_comp(dict_sys->sys_indexes)); + + /* Mark the table as corrupted only if the clustered index + is corrupted */ + if (dict_index_is_clust(index)) { + index->table->corrupted = TRUE; + } + + index->type |= DICT_CORRUPT; +} #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c index ab1fb16361e..60590aa6638 100644 --- a/storage/innobase/dict/dict0load.c +++ b/storage/innobase/dict/dict0load.c @@ -52,6 +52,11 @@ static const char* SYSTEM_TABLE_NAME[] = { "SYS_FOREIGN", "SYS_FOREIGN_COLS" }; + +/* If this flag is TRUE, then we will load the cluster index's (and tables') +metadata even if it is marked as "corrupted". */ +UNIV_INTERN my_bool srv_load_corrupted = FALSE; + /****************************************************************//** Compare the name of an index column. @return TRUE if the i'th column of index is 'name'. */ @@ -1324,6 +1329,9 @@ err_len: goto err_len; } type = mach_read_from_4(field); + if (UNIV_UNLIKELY(type & (~0 << DICT_IT_BITS))) { + return("unknown SYS_INDEXES.TYPE bits"); + } field = rec_get_nth_field_old(rec, 7/*SPACE*/, &len); if (UNIV_UNLIKELY(len != 4)) { @@ -1423,16 +1431,47 @@ dict_load_indexes( goto next_rec; } else if (err_msg) { fprintf(stderr, "InnoDB: %s\n", err_msg); + if (ignore_err & DICT_ERR_IGNORE_CORRUPT) { + goto next_rec; + } error = DB_CORRUPTION; goto func_exit; } ut_ad(index); + /* Check whether the index is corrupted */ + if (dict_index_is_corrupted(index)) { + ut_print_timestamp(stderr); + fputs(" InnoDB: ", stderr); + dict_index_name_print(stderr, NULL, index); + fputs(" is corrupted\n", stderr); + + if (!srv_load_corrupted + && !(ignore_err & DICT_ERR_IGNORE_CORRUPT) + && dict_index_is_clust(index)) { + dict_mem_index_free(index); + + error = DB_INDEX_CORRUPT; + goto func_exit; + } else { + /* We will load the index if + 1) srv_load_corrupted is TRUE + 2) ignore_err is set with + DICT_ERR_IGNORE_CORRUPT + 3) if the index corrupted is a secondary + index */ + ut_print_timestamp(stderr); + fputs(" InnoDB: load corrupted index ", stderr); + dict_index_name_print(stderr, NULL, index); + putc('\n', stderr); + } + } + /* We check for unsupported types first, so that the subsequent checks are relevant for the supported types. */ - if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE)) { - + if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE + | DICT_CORRUPT)) { fprintf(stderr, "InnoDB: Error: unknown type %lu" " of index %s of table %s\n", @@ -1453,9 +1492,13 @@ dict_load_indexes( /* If caller can tolerate this error, we will continue to load the index and let caller deal with this error. However - mark the index and table corrupted */ - index->corrupted = TRUE; - table->corrupted = TRUE; + mark the index and table corrupted. We + only need to mark such in the index + dictionary cache for such metadata corruption, + since we would always be able to set it + when loading the dictionary cache */ + dict_set_corrupted_index_cache_only(index); + fprintf(stderr, "InnoDB: Index is corrupt but forcing" " load into data dictionary\n"); @@ -1495,9 +1538,10 @@ corrupted: index->name, table->name); /* If the force recovery flag is set, and - if the failed index is not the primary index, we - will continue and open other indexes */ - if (srv_force_recovery + if the failed index is not the clustered index, + we will continue and open other indexes */ + if ((srv_force_recovery + || srv_load_corrupted) && !dict_index_is_clust(index)) { error = DB_SUCCESS; goto next_rec; @@ -1812,6 +1856,30 @@ err_exit: err = dict_load_indexes(table, heap, ignore_err); + if (err == DB_INDEX_CORRUPT) { + /* Refuse to load the table if the table has a corrupted + cluster index */ + if (!srv_load_corrupted) { + fprintf(stderr, "InnoDB: Error: Load table "); + ut_print_name(stderr, NULL, TRUE, table->name); + fprintf(stderr, " failed, the table has corrupted" + " clustered indexes. Turn on" + " 'innodb_force_load_corrupted'" + " to drop it\n"); + + dict_table_remove_from_cache(table); + table = NULL; + goto func_exit; + } else { + dict_index_t* clust_index; + clust_index = dict_table_get_first_index(table); + + if (dict_index_is_corrupted(clust_index)) { + table->corrupted = TRUE; + } + } + } + /* Initialize table foreign_child value. Its value could be changed when dict_load_foreigns() is called below */ table->fk_max_recusive_level = 0; @@ -1838,9 +1906,15 @@ err_exit: index = dict_table_get_first_index(table); if (!srv_force_recovery || !index - || !dict_index_is_clust(index)) { + || !dict_index_is_clust(index)) { dict_table_remove_from_cache(table); table = NULL; + } else if (dict_index_is_corrupted(index)) { + + /* It is possible we force to load a corrupted + clustered index if srv_load_corrupted is set. + Mark the table as corrupted in this case */ + table->corrupted = TRUE; } } #if 0 @@ -1867,6 +1941,7 @@ err_exit: mutex_exit(&dict_foreign_err_mutex); } #endif /* 0 */ +func_exit: mem_heap_free(heap); return(table); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index f8e96d5fa60..e5d714fc95f 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1043,6 +1043,8 @@ convert_error_code_to_mysql( #endif /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */ case DB_UNSUPPORTED: return(HA_ERR_UNSUPPORTED); + case DB_INDEX_CORRUPT: + return(HA_ERR_INDEX_CORRUPT); } } @@ -2078,6 +2080,27 @@ no_db_name: } +/*****************************************************************//** +A wrapper function of innobase_convert_name(), convert a table or +index name to the MySQL system_charset_info (UTF-8) and quote it if needed. +@return pointer to the end of buf */ +static inline +void +innobase_format_name( +/*==================*/ + char* buf, /*!< out: buffer for converted identifier */ + ulint buflen, /*!< in: length of buf, in bytes */ + const char* name, /*!< in: index or table name to format */ + ibool is_index_name) /*!< in: index name */ +{ + const char* bufend; + + bufend = innobase_convert_name(buf, buflen, name, strlen(name), + NULL, is_index_name); + + buf[bufend - buf] = '\0'; +} + /**********************************************************************//** Determines if the currently running transaction has been interrupted. @return TRUE if interrupted */ @@ -5664,12 +5687,14 @@ ha_innobase::index_read( index = prebuilt->index; - if (UNIV_UNLIKELY(index == NULL)) { + if (UNIV_UNLIKELY(index == NULL) || dict_index_is_corrupted(index)) { prebuilt->index_usable = FALSE; DBUG_RETURN(HA_ERR_CRASHED); } if (UNIV_UNLIKELY(!prebuilt->index_usable)) { - DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED); + DBUG_RETURN(dict_index_is_corrupted(index) + ? HA_ERR_INDEX_CORRUPT + : HA_ERR_TABLE_DEF_CHANGED); } /* Note that if the index for which the search template is built is not @@ -5855,10 +5880,33 @@ ha_innobase::change_active_index( prebuilt->index); if (UNIV_UNLIKELY(!prebuilt->index_usable)) { - push_warning_printf(user_thd, MYSQL_ERROR::WARN_LEVEL_WARN, - HA_ERR_TABLE_DEF_CHANGED, - "InnoDB: insufficient history for index %u", - keynr); + if (dict_index_is_corrupted(prebuilt->index)) { + char index_name[MAX_FULL_NAME_LEN + 1]; + char table_name[MAX_FULL_NAME_LEN + 1]; + + innobase_format_name( + index_name, sizeof index_name, + prebuilt->index->name, TRUE); + + innobase_format_name( + table_name, sizeof table_name, + prebuilt->index->table->name, FALSE); + + push_warning_printf( + user_thd, MYSQL_ERROR::WARN_LEVEL_WARN, + HA_ERR_INDEX_CORRUPT, + "InnoDB: Index %s for table %s is" + " marked as corrupted", + index_name, table_name); + DBUG_RETURN(1); + } else { + push_warning_printf( + user_thd, MYSQL_ERROR::WARN_LEVEL_WARN, + HA_ERR_TABLE_DEF_CHANGED, + "InnoDB: insufficient history for index %u", + keynr); + } + /* The caller seems to ignore this. Thus, we must check this again in row_search_for_mysql(). */ DBUG_RETURN(2); @@ -7518,6 +7566,10 @@ ha_innobase::records_in_range( n_rows = HA_POS_ERROR; goto func_exit; } + if (dict_index_is_corrupted(index)) { + n_rows = HA_ERR_INDEX_CORRUPT; + goto func_exit; + } if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) { n_rows = HA_ERR_TABLE_DEF_CHANGED; goto func_exit; @@ -8184,6 +8236,7 @@ ha_innobase::check( ulint n_rows_in_table = ULINT_UNDEFINED; ibool is_ok = TRUE; ulint old_isolation_level; + ibool table_corrupted; DBUG_ENTER("ha_innobase::check"); DBUG_ASSERT(thd == ha_thd()); @@ -8225,6 +8278,14 @@ ha_innobase::check( prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ; + /* Check whether the table is already marked as corrupted + before running the check table */ + table_corrupted = prebuilt->table->corrupted; + + /* Reset table->corrupted bit so that check table can proceed to + do additional check */ + prebuilt->table->corrupted = FALSE; + /* Enlarge the fatal lock wait timeout during CHECK TABLE. */ mutex_enter(&kernel_mutex); srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ @@ -8233,6 +8294,7 @@ ha_innobase::check( for (index = dict_table_get_first_index(prebuilt->table); index != NULL; index = dict_table_get_next_index(index)) { + char index_name[MAX_FULL_NAME_LEN + 1]; #if 0 fputs("Validating index ", stderr); ut_print_name(stderr, trx, FALSE, index->name); @@ -8241,11 +8303,16 @@ ha_innobase::check( if (!btr_validate_index(index, prebuilt->trx)) { is_ok = FALSE; + + innobase_format_name( + index_name, sizeof index_name, + prebuilt->index->name, TRUE); + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_NOT_KEYFILE, "InnoDB: The B-tree of" - " index '%-.200s' is corrupted.", - index->name); + " index %s is corrupted.", + index_name); continue; } @@ -8258,11 +8325,26 @@ ha_innobase::check( prebuilt->trx, prebuilt->index); if (UNIV_UNLIKELY(!prebuilt->index_usable)) { - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - HA_ERR_TABLE_DEF_CHANGED, - "InnoDB: Insufficient history for" - " index '%-.200s'", - index->name); + innobase_format_name( + index_name, sizeof index_name, + prebuilt->index->name, TRUE); + + if (dict_index_is_corrupted(prebuilt->index)) { + push_warning_printf( + user_thd, MYSQL_ERROR::WARN_LEVEL_WARN, + HA_ERR_INDEX_CORRUPT, + "InnoDB: Index %s is marked as" + " corrupted", + index_name); + is_ok = FALSE; + } else { + push_warning_printf( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + HA_ERR_TABLE_DEF_CHANGED, + "InnoDB: Insufficient history for" + " index %s", + index_name); + } continue; } @@ -8276,12 +8358,19 @@ ha_innobase::check( prebuilt->select_lock_type = LOCK_NONE; if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) { + innobase_format_name( + index_name, sizeof index_name, + index->name, TRUE); + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_NOT_KEYFILE, "InnoDB: The B-tree of" - " index '%-.200s' is corrupted.", - index->name); + " index %s is corrupted.", + index_name); is_ok = FALSE; + row_mysql_lock_data_dictionary(prebuilt->trx); + dict_set_corrupted(index); + row_mysql_unlock_data_dictionary(prebuilt->trx); } if (thd_killed(user_thd)) { @@ -8308,6 +8397,20 @@ ha_innobase::check( } } + if (table_corrupted) { + /* If some previous operation has marked the table as + corrupted in memory, and has not propagated such to + clustered index, we will do so here */ + index = dict_table_get_first_index(prebuilt->table); + + if (!dict_index_is_corrupted(index)) { + mutex_enter(&dict_sys->mutex); + dict_set_corrupted(index); + mutex_exit(&dict_sys->mutex); + } + prebuilt->table->corrupted = TRUE; + } + /* Restore the original isolation level */ prebuilt->trx->isolation_level = old_isolation_level; @@ -11101,6 +11204,11 @@ static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix, "Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.", NULL, NULL, FALSE); +static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Force InnoDB to load metadata of corrupted table.", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, "Force InnoDB to not use next-key locking, to use only row-level locking.", @@ -11360,6 +11468,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(flush_method), MYSQL_SYSVAR(force_recovery), MYSQL_SYSVAR(large_prefix), + MYSQL_SYSVAR(force_load_corrupted), MYSQL_SYSVAR(locks_unsafe_for_binlog), MYSQL_SYSVAR(lock_wait_timeout), #ifdef UNIV_LOG_ARCHIVE diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index 74ef2d2dab7..eb40621abbe 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -203,6 +203,17 @@ void buf_LRU_stat_update(void); /*=====================*/ +/******************************************************************//** +Remove one page from LRU list and put it to free list */ +UNIV_INTERN +void +buf_LRU_free_one_page( +/*==================*/ + buf_page_t* bpage) /*!< in/out: block, must contain a file page and + be in a state where it can be freed; there + may or may not be a hash index to the page */ + __attribute__((nonnull)); + #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /**********************************************************************//** Validates the LRU list. diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h index 28ef64500cc..415470b61b4 100644 --- a/storage/innobase/include/db0err.h +++ b/storage/innobase/include/db0err.h @@ -110,6 +110,7 @@ enum db_err { foreign keys as its prefix columns */ DB_TOO_BIG_INDEX_COL, /* index column size exceeds maximum limit */ + DB_INDEX_CORRUPT, /* we have corrupted index */ /* The following are partial failure codes */ DB_FAIL = 1000, diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h index 22df826da65..5d136862bc6 100644 --- a/storage/innobase/include/dict0boot.h +++ b/storage/innobase/include/dict0boot.h @@ -137,8 +137,10 @@ dict_create(void); header is created */ /*-------------------------------------------------------------*/ -/* The field number of the page number field in the sys_indexes table -clustered index */ +/* The field numbers in the SYS_TABLES clustered index */ +#define DICT_SYS_TABLES_TYPE_FIELD 5 + +/* The field numbers in the SYS_INDEXES clustered index */ #define DICT_SYS_INDEXES_PAGE_NO_FIELD 8 #define DICT_SYS_INDEXES_SPACE_NO_FIELD 7 #define DICT_SYS_INDEXES_TYPE_FIELD 6 diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index f979d0fcc96..93e9162dc87 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -585,6 +585,20 @@ dict_table_get_next_index( # define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index) #endif /* UNIV_DEBUG */ #endif /* !UNIV_HOTBACKUP */ + +/* Skip corrupted index */ +#define dict_table_skip_corrupt_index(index) \ + while (index && dict_index_is_corrupted(index)) { \ + index = dict_table_get_next_index(index); \ + } + +/* Get the next non-corrupt index */ +#define dict_table_next_uncorrupted_index(index) \ +do { \ + index = dict_table_get_next_index(index); \ + dict_table_skip_corrupt_index(index); \ +} while (0) + /********************************************************************//** Check whether the index is the clustered index. @return nonzero for clustered index, zero for other indexes */ @@ -593,7 +607,7 @@ ulint dict_index_is_clust( /*================*/ const dict_index_t* index) /*!< in: index */ - __attribute__((pure)); + __attribute__((nonnull, pure, warn_unused_result)); /********************************************************************//** Check whether the index is unique. @return nonzero for unique index, zero for other indexes */ @@ -602,7 +616,7 @@ ulint dict_index_is_unique( /*=================*/ const dict_index_t* index) /*!< in: index */ - __attribute__((pure)); + __attribute__((nonnull, pure, warn_unused_result)); /********************************************************************//** Check whether the index is the insert buffer tree. @return nonzero for insert buffer, zero for other indexes */ @@ -611,7 +625,7 @@ ulint dict_index_is_ibuf( /*===============*/ const dict_index_t* index) /*!< in: index */ - __attribute__((pure)); + __attribute__((nonnull, pure, warn_unused_result)); /********************************************************************//** Check whether the index is a secondary index or the insert buffer tree. @return nonzero for insert buffer, zero for other indexes */ @@ -620,7 +634,7 @@ ulint dict_index_is_sec_or_ibuf( /*======================*/ const dict_index_t* index) /*!< in: index */ - __attribute__((pure)); + __attribute__((nonnull, pure, warn_unused_result)); /********************************************************************//** Gets the number of user-defined columns in a table in the dictionary @@ -630,7 +644,8 @@ UNIV_INLINE ulint dict_table_get_n_user_cols( /*=======================*/ - const dict_table_t* table); /*!< in: table */ + const dict_table_t* table) /*!< in: table */ + __attribute__((nonnull, pure, warn_unused_result)); /********************************************************************//** Gets the number of system columns in a table in the dictionary cache. @return number of system (e.g., ROW_ID) columns of a table */ @@ -638,7 +653,8 @@ UNIV_INLINE ulint dict_table_get_n_sys_cols( /*======================*/ - const dict_table_t* table); /*!< in: table */ + const dict_table_t* table) /*!< in: table */ + __attribute__((nonnull, pure, warn_unused_result)); /********************************************************************//** Gets the number of all columns (also system) in a table in the dictionary cache. @@ -647,7 +663,8 @@ UNIV_INLINE ulint dict_table_get_n_cols( /*==================*/ - const dict_table_t* table); /*!< in: table */ + const dict_table_t* table) /*!< in: table */ + __attribute__((nonnull, pure, warn_unused_result)); #ifdef UNIV_DEBUG /********************************************************************//** Gets the nth column of a table. @@ -1243,6 +1260,56 @@ void dict_close(void); /*============*/ +/**********************************************************************//** +Check whether the table is corrupted. +@return nonzero for corrupted table, zero for valid tables */ +UNIV_INLINE +ulint +dict_table_is_corrupted( +/*====================*/ + const dict_table_t* table) /*!< in: table */ + __attribute__((nonnull, pure, warn_unused_result)); + +/**********************************************************************//** +Check whether the index is corrupted. +@return nonzero for corrupted index, zero for valid indexes */ +UNIV_INLINE +ulint +dict_index_is_corrupted( +/*====================*/ + const dict_index_t* index) /*!< in: index */ + __attribute__((nonnull, pure, warn_unused_result)); + +/**********************************************************************//** +Flags an index and table corrupted both in the data dictionary cache +and in the system table SYS_INDEXES. */ +UNIV_INTERN +void +dict_set_corrupted( +/*===============*/ + dict_index_t* index) /*!< in/out: index */ + UNIV_COLD __attribute__((nonnull)); + +/**********************************************************************//** +Flags an index corrupted in the data dictionary cache only. This +is used mostly to mark a corrupted index when index's own dictionary +is corrupted, and we force to load such index for repair purpose */ +UNIV_INTERN +void +dict_set_corrupted_index_cache_only( +/*================================*/ + dict_index_t* index); /*!< in/out: index */ + +/**********************************************************************//** +Flags a table with specified space_id corrupted in the table dictionary +cache. +@return TRUE if successful */ +UNIV_INTERN +ibool +dict_set_corrupted_by_space( +/*========================*/ + ulint space_id); /*!< in: space ID */ + #ifndef UNIV_NONINL #include "dict0dict.ic" #endif diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index 59811568556..ade9e627e29 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -27,6 +27,7 @@ Created 1/8/1996 Heikki Tuuri #ifndef UNIV_HOTBACKUP #include "dict0load.h" #include "rem0types.h" +#include "srv0srv.h" /*********************************************************************//** Gets the minimum number of bytes per character. @@ -828,7 +829,7 @@ dict_table_check_if_in_cache_low( } /**********************************************************************//** -load a table into dictionary cache, ignore any error specified during load; +load a table into dictionary cache, ignore any error specified during load; @return table, NULL if not found */ UNIV_INLINE dict_table_t* @@ -872,6 +873,18 @@ dict_table_get_low( table = dict_table_check_if_in_cache_low(table_name); + if (table && table->corrupted) { + fprintf(stderr, "InnoDB: table"); + ut_print_name(stderr, NULL, TRUE, table->name); + if (srv_load_corrupted) { + fputs(" is corrupted, but" + " innodb_force_load_corrupted is set\n", stderr); + } else { + fputs(" is corrupted\n", stderr); + return(NULL); + } + } + if (table == NULL) { table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE); } @@ -937,4 +950,35 @@ dict_max_field_len_store_undo( return(prefix_len); } +/********************************************************************//** +Check whether the table is corrupted. +@return nonzero for corrupted table, zero for valid tables */ +UNIV_INLINE +ulint +dict_table_is_corrupted( +/*====================*/ + const dict_table_t* table) /*!< in: table */ +{ + ut_ad(table); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + return(UNIV_UNLIKELY(table->corrupted)); +} + +/********************************************************************//** +Check whether the index is corrupted. +@return nonzero for corrupted index, zero for valid indexes */ +UNIV_INLINE +ulint +dict_index_is_corrupted( +/*====================*/ + const dict_index_t* index) /*!< in: index */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + return(UNIV_UNLIKELY((index->type & DICT_CORRUPT) + || (index->table && index->table->corrupted))); +} + #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 3a475fa85fc..9ded0dba39b 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -51,7 +51,12 @@ combination of types */ #define DICT_UNIQUE 2 /*!< unique index */ #define DICT_UNIVERSAL 4 /*!< index which can contain records from any other index */ -#define DICT_IBUF 8 /*!< insert buffer tree */ +#define DICT_IBUF 8 /*!< insert buffer tree */ +#define DICT_CORRUPT 16 /*!< bit to store the corrupted flag + in SYS_INDEXES.TYPE */ + +#define DICT_IT_BITS 5 /*!< number of bits used for + SYS_INDEXES.TYPE */ /* @} */ /** Types for a table object */ @@ -369,8 +374,9 @@ struct dict_index_struct{ /*!< space where the index tree is placed */ unsigned page:32;/*!< index tree root page number */ #endif /* !UNIV_HOTBACKUP */ - unsigned type:4; /*!< index type (DICT_CLUSTERED, DICT_UNIQUE, - DICT_UNIVERSAL, DICT_IBUF) */ + unsigned type:DICT_IT_BITS; + /*!< index type (DICT_CLUSTERED, DICT_UNIQUE, + DICT_UNIVERSAL, DICT_IBUF, DICT_CORRUPT) */ unsigned trx_id_offset:10;/*!< position of the trx id column in a clustered index record, if the fields before it are known to be of a fixed size, @@ -391,8 +397,6 @@ struct dict_index_struct{ /*!< TRUE if this index is marked to be dropped in ha_innobase::prepare_drop_index(), otherwise FALSE */ - unsigned corrupted:1; - /*!< TRUE if the index object is corrupted */ dict_field_t* fields; /*!< array of field descriptions */ #ifndef UNIV_HOTBACKUP UT_LIST_NODE_T(dict_index_t) diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h index 8cbd7cd5783..f0a05a38070 100644 --- a/storage/innobase/include/dict0types.h +++ b/storage/innobase/include/dict0types.h @@ -51,7 +51,8 @@ be or-ed together */ enum dict_err_ignore { DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */ DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root - page is FIL_NUL or incorrect value */ + page is FIL_NULL or incorrect value */ + DICT_ERR_IGNORE_CORRUPT = 2, /*!< skip corrupted indexes */ DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */ }; diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 7a93548cb03..dfe7397d189 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -141,6 +141,10 @@ extern ulint srv_log_buffer_size; extern ulong srv_flush_log_at_trx_commit; extern char srv_adaptive_flushing; +/* If this flag is TRUE, then we will load the indexes' (and tables') metadata +even if they are marked as "corrupted". Mostly it is for DBA to process +corrupted index and table */ +extern my_bool srv_load_corrupted; /* The sort order table of the MySQL latin1_swedish_ci character set collation */ diff --git a/storage/innobase/pars/pars0opt.c b/storage/innobase/pars/pars0opt.c index 2e392ba4836..d992805d9ef 100644 --- a/storage/innobase/pars/pars0opt.c +++ b/storage/innobase/pars/pars0opt.c @@ -568,7 +568,7 @@ opt_search_plan_for_table( best_last_op = last_op; } - index = dict_table_get_next_index(index); + dict_table_next_uncorrupted_index(index); } plan->index = best_index; diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c index 715e376f8f9..62146a95f11 100644 --- a/storage/innobase/row/row0ins.c +++ b/storage/innobase/row/row0ins.c @@ -118,6 +118,9 @@ ins_node_create_entry_list( node->entry_sys_heap); UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry); + /* We will include all indexes (include those corrupted + secondary indexes) in the entry list. Filteration of + these corrupted index will be done in row_ins() */ index = dict_table_get_next_index(index); } } @@ -2046,7 +2049,6 @@ row_ins_index_entry_low( mtr_start(&mtr); if (err != DB_SUCCESS) { - goto function_exit; } @@ -2431,6 +2433,13 @@ row_ins( node->index = dict_table_get_next_index(node->index); node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry); + + /* Skip corrupted secondar index and its entry */ + while (node->index && dict_index_is_corrupted(node->index)) { + + node->index = dict_table_get_next_index(node->index); + node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry); + } } ut_ad(node->entry == NULL); diff --git a/storage/innobase/row/row0merge.c b/storage/innobase/row/row0merge.c index 5be437add5a..d42f21241ca 100644 --- a/storage/innobase/row/row0merge.c +++ b/storage/innobase/row/row0merge.c @@ -2554,8 +2554,9 @@ row_merge_is_index_usable( const trx_t* trx, /*!< in: transaction */ const dict_index_t* index) /*!< in: index to check */ { - return(!trx->read_view - || read_view_sees_trx_id(trx->read_view, index->trx_id)); + return(!dict_index_is_corrupted(index) + && (!trx->read_view + || read_view_sees_trx_id(trx->read_view, index->trx_id))); } /*********************************************************************//** diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c index a56d419d1f0..d98d47a5da6 100644 --- a/storage/innobase/row/row0mysql.c +++ b/storage/innobase/row/row0mysql.c @@ -3098,7 +3098,8 @@ row_drop_table_for_mysql( ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ - table = dict_table_get_low_ignore_err(name, DICT_ERR_IGNORE_INDEX_ROOT); + table = dict_table_get_low_ignore_err( + name, DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT); if (!table) { err = DB_TABLE_NOT_FOUND; diff --git a/storage/innobase/row/row0purge.c b/storage/innobase/row/row0purge.c index 83e7c9e4857..c008c2d1c31 100644 --- a/storage/innobase/row/row0purge.c +++ b/storage/innobase/row/row0purge.c @@ -469,6 +469,13 @@ row_purge_del_mark( heap = mem_heap_create(1024); while (node->index != NULL) { + /* skip corrupted secondary index */ + dict_table_skip_corrupt_index(node->index); + + if (!node->index) { + break; + } + index = node->index; /* Build the index entry */ @@ -516,6 +523,12 @@ row_purge_upd_exist_or_extern_func( heap = mem_heap_create(1024); while (node->index != NULL) { + dict_table_skip_corrupt_index(node->index); + + if (!node->index) { + break; + } + index = node->index; if (row_upd_changes_ord_field_binary(node->index, node->update, diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c index 36da621e077..a1039010d00 100644 --- a/storage/innobase/row/row0sel.c +++ b/storage/innobase/row/row0sel.c @@ -3441,6 +3441,13 @@ row_search_for_mysql( return(DB_MISSING_HISTORY); } + if (dict_index_is_corrupted(index)) { +#ifdef UNIV_SYNC_DEBUG + ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); +#endif /* UNIV_SYNC_DEBUG */ + return(DB_CORRUPTION); + } + if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) { fprintf(stderr, "InnoDB: Error: trying to free a corrupt\n" diff --git a/storage/innobase/row/row0uins.c b/storage/innobase/row/row0uins.c index d25afed3840..4fa97c9355d 100644 --- a/storage/innobase/row/row0uins.c +++ b/storage/innobase/row/row0uins.c @@ -328,6 +328,8 @@ row_undo_ins( node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); + dict_table_skip_corrupt_index(node->index); + while (node->index != NULL) { dtuple_t* entry; ulint err; @@ -355,7 +357,7 @@ row_undo_ins( } } - node->index = dict_table_get_next_index(node->index); + dict_table_next_uncorrupted_index(node->index); } log_free_check(); diff --git a/storage/innobase/row/row0umod.c b/storage/innobase/row/row0umod.c index 2188fdeff49..b86ce9eeabd 100644 --- a/storage/innobase/row/row0umod.c +++ b/storage/innobase/row/row0umod.c @@ -573,6 +573,14 @@ row_undo_mod_upd_del_sec( heap = mem_heap_create(1024); while (node->index != NULL) { + + /* Skip all corrupted secondary index */ + dict_table_skip_corrupt_index(node->index); + + if (!node->index) { + break; + } + index = node->index; entry = row_build_index_entry(node->row, node->ext, @@ -626,6 +634,13 @@ row_undo_mod_del_mark_sec( heap = mem_heap_create(1024); while (node->index != NULL) { + /* Skip all corrupted secondary index */ + dict_table_skip_corrupt_index(node->index); + + if (!node->index) { + break; + } + index = node->index; entry = row_build_index_entry(node->row, node->ext, @@ -677,6 +692,13 @@ row_undo_mod_upd_exist_sec( heap = mem_heap_create(1024); while (node->index != NULL) { + /* Skip all corrupted secondary index */ + dict_table_skip_corrupt_index(node->index); + + if (!node->index) { + break; + } + index = node->index; if (row_upd_changes_ord_field_binary(node->index, node->update, @@ -859,6 +881,9 @@ row_undo_mod( node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); + /* Skip all corrupted secondary index */ + dict_table_skip_corrupt_index(node->index); + if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) { err = row_undo_mod_upd_exist_sec(node, thr); diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c index a2f6c17413f..6559c529117 100644 --- a/storage/innobase/row/row0upd.c +++ b/storage/innobase/row/row0upd.c @@ -2320,6 +2320,13 @@ row_upd( while (node->index != NULL) { + /* Skip corrupted index */ + dict_table_skip_corrupt_index(node->index); + + if (!node->index) { + break; + } + log_free_check(); err = row_upd_sec_step(node, thr); diff --git a/storage/innobase/ut/ut0ut.c b/storage/innobase/ut/ut0ut.c index 1ef1a082bb2..bd009f1fd32 100644 --- a/storage/innobase/ut/ut0ut.c +++ b/storage/innobase/ut/ut0ut.c @@ -712,6 +712,8 @@ ut_strerr( return("No index on referencing keys in referencing table"); case DB_PARENT_NO_INDEX: return("No index on referenced keys in referenced table"); + case DB_INDEX_CORRUPT: + return("Index corrupted"); case DB_END_OF_INDEX: return("End of index"); /* do not add default: in order to produce a warning if new code |