summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJimmy Yang <jimmy.yang@oracle.com>2011-08-16 18:07:59 -0700
committerJimmy Yang <jimmy.yang@oracle.com>2011-08-16 18:07:59 -0700
commit95fa7fab3b70e5117d757d9df42ac9d7040fea03 (patch)
treef35296e1cd9a625708ab194372e0944f1309f78c
parent887ac6774f9d33b64d7c316e9310038592ec6a83 (diff)
downloadmariadb-git-95fa7fab3b70e5117d757d9df42ac9d7040fea03.tar.gz
Fix bug #11830883, SUPPORT "CORRUPTED" BIT FOR INNODB TABLES AND INDEXES.
Also addressed issues in bug #11745133, where we could mark a table corrupted instead of crashing the server when found a corrupted buffer/page if the table created with innodb_file_per_table on.
-rw-r--r--include/my_base.h5
-rw-r--r--mysql-test/suite/innodb/r/innodb_corrupt_bit.result81
-rw-r--r--mysql-test/suite/innodb/t/innodb_corrupt_bit.test135
-rw-r--r--mysql-test/suite/sys_vars/r/all_vars.result2
-rw-r--r--mysys/my_handler_errors.h3
-rw-r--r--sql/handler.cc4
-rw-r--r--sql/share/errmsg-utf8.txt2
-rw-r--r--storage/innobase/buf/buf0buf.c66
-rw-r--r--storage/innobase/buf/buf0lru.c16
-rw-r--r--storage/innobase/dict/dict0dict.c179
-rw-r--r--storage/innobase/dict/dict0load.c93
-rw-r--r--storage/innobase/handler/ha_innodb.cc139
-rw-r--r--storage/innobase/include/buf0lru.h11
-rw-r--r--storage/innobase/include/db0err.h1
-rw-r--r--storage/innobase/include/dict0boot.h6
-rw-r--r--storage/innobase/include/dict0dict.h81
-rw-r--r--storage/innobase/include/dict0dict.ic46
-rw-r--r--storage/innobase/include/dict0mem.h14
-rw-r--r--storage/innobase/include/dict0types.h3
-rw-r--r--storage/innobase/include/srv0srv.h4
-rw-r--r--storage/innobase/pars/pars0opt.c2
-rw-r--r--storage/innobase/row/row0ins.c11
-rw-r--r--storage/innobase/row/row0merge.c5
-rw-r--r--storage/innobase/row/row0mysql.c3
-rw-r--r--storage/innobase/row/row0purge.c13
-rw-r--r--storage/innobase/row/row0sel.c7
-rw-r--r--storage/innobase/row/row0uins.c4
-rw-r--r--storage/innobase/row/row0umod.c25
-rw-r--r--storage/innobase/row/row0upd.c7
-rw-r--r--storage/innobase/ut/ut0ut.c2
30 files changed, 915 insertions, 55 deletions
diff --git a/include/my_base.h b/include/my_base.h
index f6afc891281..cc02b0080d9 100644
--- a/include/my_base.h
+++ b/include/my_base.h
@@ -446,8 +446,9 @@ enum ha_base_keytype {
#define HA_ERR_FILE_TOO_SHORT 175 /* File too short */
#define HA_ERR_WRONG_CRC 176 /* Wrong CRC on page */
#define HA_ERR_TOO_MANY_CONCURRENT_TRXS 177 /*Too many active concurrent transactions */
-#define HA_ERR_INDEX_COL_TOO_LONG 178 /* Index column length exceeds limit */
-#define HA_ERR_LAST 178 /* Copy of last error nr */
+#define HA_ERR_INDEX_COL_TOO_LONG 178 /* Index column length exceeds limit */
+#define HA_ERR_INDEX_CORRUPT 179 /* Index corrupted */
+#define HA_ERR_LAST 179 /* Copy of last error nr */
/* Number of different errors */
#define HA_ERR_ERRORS (HA_ERR_LAST - HA_ERR_FIRST + 1)
diff --git a/mysql-test/suite/innodb/r/innodb_corrupt_bit.result b/mysql-test/suite/innodb/r/innodb_corrupt_bit.result
new file mode 100644
index 00000000000..4253adc93aa
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb_corrupt_bit.result
@@ -0,0 +1,81 @@
+set names utf8;
+CREATE TABLE corrupt_bit_test_ā(
+a INT AUTO_INCREMENT PRIMARY KEY,
+b CHAR(100),
+c INT,
+z INT,
+INDEX(b))
+ENGINE=InnoDB;
+INSERT INTO corrupt_bit_test_ā VALUES(0,'x',1, 1);
+CREATE UNIQUE INDEX idxā ON corrupt_bit_test_ā(c, b);
+CREATE UNIQUE INDEX idxē ON corrupt_bit_test_ā(z, b);
+SELECT * FROM corrupt_bit_test_ā;
+a b c z
+1 x 1 1
+select @@unique_checks;
+@@unique_checks
+0
+select @@innodb_change_buffering_debug;
+@@innodb_change_buffering_debug
+1
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1,z+1 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+10,z+10 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+20,z+20 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+50,z+50 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+100,z+100 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+200,z+200 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+400,z+400 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+800,z+800 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1600,z+1600 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+4000,z+4000 FROM corrupt_bit_test_ā;
+select count(*) from corrupt_bit_test_ā;
+count(*)
+1024
+CREATE INDEX idx3 ON corrupt_bit_test_ā(b, c);
+INSERT INTO corrupt_bit_test_ā VALUES(13000,'x',1,1);
+CREATE INDEX idx4 ON corrupt_bit_test_ā(b, z);
+check table corrupt_bit_test_ā;
+Table Op Msg_type Msg_text
+test.corrupt_bit_test_ā check Warning InnoDB: The B-tree of index "idxā" is corrupted.
+test.corrupt_bit_test_ā check Warning InnoDB: The B-tree of index "idxē" is corrupted.
+test.corrupt_bit_test_ā check error Corrupt
+select c from corrupt_bit_test_ā;
+ERROR HY000: Incorrect key file for table 'corrupt_bit_test_ā'; try to repair it
+select z from corrupt_bit_test_ā;
+ERROR HY000: Incorrect key file for table 'corrupt_bit_test_ā'; try to repair it
+show warnings;
+Level Code Message
+Warning 179 InnoDB: Index "idxē" for table "test/corrupt_bit_test_@1s" is marked as corrupted
+Error 1034 Incorrect key file for table 'corrupt_bit_test_ā'; try to repair it
+insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001);
+select * from corrupt_bit_test_ā use index(primary) where a = 10001;
+a b c z
+10001 a 20001 20001
+begin;
+insert into corrupt_bit_test_ā values (10002, "a", 20002, 20002);
+delete from corrupt_bit_test_ā where a = 10001;
+insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001);
+rollback;
+drop index idxā on corrupt_bit_test_ā;
+check table corrupt_bit_test_ā;
+Table Op Msg_type Msg_text
+test.corrupt_bit_test_ā check Warning InnoDB: Index "idxē" is marked as corrupted
+test.corrupt_bit_test_ā check error Corrupt
+set names utf8;
+select z from corrupt_bit_test_ā;
+ERROR HY000: Incorrect key file for table 'corrupt_bit_test_ā'; try to repair it
+drop index idxē on corrupt_bit_test_ā;
+select z from corrupt_bit_test_ā limit 10;
+z
+20001
+1
+1
+2
+11
+12
+21
+22
+31
+32
+drop table corrupt_bit_test_ā;
+SET GLOBAL innodb_change_buffering_debug = 0;
diff --git a/mysql-test/suite/innodb/t/innodb_corrupt_bit.test b/mysql-test/suite/innodb/t/innodb_corrupt_bit.test
new file mode 100644
index 00000000000..7d98a8e4a03
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_corrupt_bit.test
@@ -0,0 +1,135 @@
+#
+# Test for persistent corrupt bit for corrupted index and table
+#
+-- source include/have_innodb.inc
+
+# This test needs debug server
+--source include/have_debug.inc
+
+-- disable_query_log
+# This test setup is extracted from bug56680.test:
+# The flag innodb_change_buffering_debug is only available in debug builds.
+# It instructs InnoDB to try to evict pages from the buffer pool when
+# change buffering is possible, so that the change buffer will be used
+# whenever possible.
+-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
+SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug;
+-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
+SET GLOBAL innodb_change_buffering_debug = 1;
+
+# Turn off Unique Check to create corrupted index with dup key
+SET UNIQUE_CHECKS=0;
+
+-- enable_query_log
+
+set names utf8;
+
+CREATE TABLE corrupt_bit_test_ā(
+ a INT AUTO_INCREMENT PRIMARY KEY,
+ b CHAR(100),
+ c INT,
+ z INT,
+ INDEX(b))
+ENGINE=InnoDB;
+
+INSERT INTO corrupt_bit_test_ā VALUES(0,'x',1, 1);
+
+# This is the first unique index we intend to corrupt
+CREATE UNIQUE INDEX idxā ON corrupt_bit_test_ā(c, b);
+
+# This is the second unique index we intend to corrupt
+CREATE UNIQUE INDEX idxē ON corrupt_bit_test_ā(z, b);
+
+SELECT * FROM corrupt_bit_test_ā;
+
+select @@unique_checks;
+select @@innodb_change_buffering_debug;
+
+# Create enough rows for the table, so that the insert buffer will be
+# used for modifying the secondary index page. There must be multiple
+# index pages, because changes to the root page are never buffered.
+
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1,z+1 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+10,z+10 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+20,z+20 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+50,z+50 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+100,z+100 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+200,z+200 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+400,z+400 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+800,z+800 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1600,z+1600 FROM corrupt_bit_test_ā;
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+4000,z+4000 FROM corrupt_bit_test_ā;
+
+select count(*) from corrupt_bit_test_ā;
+
+CREATE INDEX idx3 ON corrupt_bit_test_ā(b, c);
+
+# Create a dup key error on index "idxē" and "idxā" by inserting a dup value
+INSERT INTO corrupt_bit_test_ā VALUES(13000,'x',1,1);
+
+# creating an index should succeed even if other secondary indexes are corrupted
+CREATE INDEX idx4 ON corrupt_bit_test_ā(b, z);
+
+# Check table will find the unique indexes corrupted
+# with dup key
+check table corrupt_bit_test_ā;
+
+# This selection intend to use the corrupted index. Expect to fail
+-- error ER_NOT_KEYFILE
+select c from corrupt_bit_test_ā;
+
+-- error ER_NOT_KEYFILE
+select z from corrupt_bit_test_ā;
+
+show warnings;
+
+# Since corrupted index is a secondary index, we only disable such
+# index and allow other DML to proceed
+insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001);
+
+# This does not use the corrupted index, expect to succeed
+select * from corrupt_bit_test_ā use index(primary) where a = 10001;
+
+# Some more DMLs
+begin;
+insert into corrupt_bit_test_ā values (10002, "a", 20002, 20002);
+delete from corrupt_bit_test_ā where a = 10001;
+insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001);
+rollback;
+
+# Drop one corrupted index before reboot
+drop index idxā on corrupt_bit_test_ā;
+
+check table corrupt_bit_test_ā;
+
+# Shut down the server
+-- exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+-- shutdown_server 20
+-- source include/wait_until_disconnected.inc
+
+# Restart the server
+-- disable_query_log
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+-- enable_query_log
+
+set names utf8;
+
+# The index is marked as suspect in Sys_indexes too, so after server
+# reboot, the attempt to use the index will fail too.
+-- error ER_NOT_KEYFILE
+select z from corrupt_bit_test_ā;
+
+# Drop the corrupted index
+drop index idxē on corrupt_bit_test_ā;
+
+# Now select back to normal
+select z from corrupt_bit_test_ā limit 10;
+
+# Drop table
+drop table corrupt_bit_test_ā;
+
+-- error 0, ER_UNKNOWN_SYSTEM_VARIABLE
+SET GLOBAL innodb_change_buffering_debug = 0;
diff --git a/mysql-test/suite/sys_vars/r/all_vars.result b/mysql-test/suite/sys_vars/r/all_vars.result
index 715ad9e2c15..edd6f57e32d 100644
--- a/mysql-test/suite/sys_vars/r/all_vars.result
+++ b/mysql-test/suite/sys_vars/r/all_vars.result
@@ -11,7 +11,9 @@ There should be *no* long test name listed below:
select variable_name as `There should be *no* variables listed below:` from t2
left join t1 on variable_name=test_name where test_name is null;
There should be *no* variables listed below:
+INNODB_FORCE_LOAD_CORRUPTED
INNODB_LARGE_PREFIX
+INNODB_FORCE_LOAD_CORRUPTED
INNODB_LARGE_PREFIX
drop table t1;
drop table t2;
diff --git a/mysys/my_handler_errors.h b/mysys/my_handler_errors.h
index 3bd83398e81..428a58b0767 100644
--- a/mysys/my_handler_errors.h
+++ b/mysys/my_handler_errors.h
@@ -81,7 +81,8 @@ static const char *handler_error_messages[]=
"File to short; Expected more data in file",
"Read page with wrong checksum",
"Too many active concurrent transactions",
- "Index column length exceeds limit"
+ "Index column length exceeds limit",
+ "Index corrupted"
};
extern void my_handler_error_register(void);
diff --git a/sql/handler.cc b/sql/handler.cc
index 580677242bd..db1eea6484b 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -358,6 +358,7 @@ int ha_init_errors(void)
SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
+ SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT));
/* Register the error messages for use with my_error(). */
return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
@@ -2865,6 +2866,9 @@ void handler::print_error(int error, myf errflag)
case HA_ERR_INDEX_COL_TOO_LONG:
textno= ER_INDEX_COLUMN_TOO_LONG;
break;
+ case HA_ERR_INDEX_CORRUPT:
+ textno= ER_INDEX_CORRUPT;
+ break;
default:
{
/* The error was "unknown" to this function.
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index b8f46f090ab..519d693f96d 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -6415,3 +6415,5 @@ ER_ERROR_IN_TRIGGER_BODY
ER_ERROR_IN_UNKNOWN_TRIGGER_BODY
eng "Unknown trigger has an error in its body: '%-.256s'"
+ER_INDEX_CORRUPT
+ eng "Index %s is corrupted"
diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
index 648e53ac4a6..565fbe7bbf4 100644
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
@@ -3474,6 +3474,55 @@ buf_page_create(
}
/********************************************************************//**
+Mark a table with the specified space pointed by bpage->space corrupted.
+Also remove the bpage from LRU list.
+@return TRUE if successful */
+static
+ibool
+buf_mark_space_corrupt(
+/*===================*/
+ buf_page_t* bpage) /*!< in: pointer to the block in question */
+{
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ const ibool uncompressed = (buf_page_get_state(bpage)
+ == BUF_BLOCK_FILE_PAGE);
+ ulint space = bpage->space;
+ ulint offset = bpage->offset;
+ ibool ret = TRUE;
+
+ /* First unfix and release lock on the bpage */
+ buf_pool_mutex_enter(buf_pool);
+ mutex_enter(buf_page_get_mutex(bpage));
+ ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
+ ut_ad(bpage->buf_fix_count == 0);
+
+ /* Set BUF_IO_NONE before we remove the block from LRU list */
+ buf_page_set_io_fix(bpage, BUF_IO_NONE);
+
+ if (uncompressed) {
+ rw_lock_x_unlock_gen(
+ &((buf_block_t*) bpage)->lock,
+ BUF_IO_READ);
+ }
+
+ /* Find the table with specified space id, and mark it corrupted */
+ if (dict_set_corrupted_by_space(space)) {
+ ut_ad(bpage->space == space && bpage->offset == offset);
+ buf_LRU_free_one_page(bpage);
+ } else {
+ ret = FALSE;
+ }
+
+ ut_ad(buf_pool->n_pend_reads > 0);
+ buf_pool->n_pend_reads--;
+
+ mutex_exit(buf_page_get_mutex(bpage));
+ buf_pool_mutex_exit(buf_pool);
+
+ return(ret);
+}
+
+/********************************************************************//**
Completes an asynchronous read or write request of a file page to or from
the buffer pool. */
UNIV_INTERN
@@ -3598,10 +3647,19 @@ corrupt:
"InnoDB: about forcing recovery.\n", stderr);
if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
- fputs("InnoDB: Ending processing because of"
- " a corrupt database page.\n",
- stderr);
- exit(1);
+ /* If page space id is larger than TRX_SYS_SPACE
+ (0), we will attempt to mark the corresponding
+ table as corrupted instead of crashing server */
+ if (bpage->space > TRX_SYS_SPACE
+ && buf_mark_space_corrupt(bpage)) {
+ return;
+ } else {
+ fputs("InnoDB: Ending processing"
+ " because of"
+ " a corrupt database page.\n",
+ stderr);
+ ut_error;
+ }
}
}
diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
index 93c98719e29..b5ca21e14a6 100644
--- a/storage/innobase/buf/buf0lru.c
+++ b/storage/innobase/buf/buf0lru.c
@@ -1885,6 +1885,22 @@ buf_LRU_block_free_hashed_page(
buf_LRU_block_free_non_file_page(block);
}
+/******************************************************************//**
+Remove one page from LRU list and put it to free list */
+UNIV_INTERN
+void
+buf_LRU_free_one_page(
+/*==================*/
+ buf_page_t* bpage) /*!< in/out: block, must contain a file page and
+ be in a state where it can be freed; there
+ may or may not be a hash index to the page */
+{
+ if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+ != BUF_BLOCK_ZIP_FREE) {
+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
+ }
+}
+
/**********************************************************************//**
Updates buf_pool->LRU_old_ratio for one buffer pool instance.
@return updated old_pct */
diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
index ead86e37380..dfb733cb36c 100644
--- a/storage/innobase/dict/dict0dict.c
+++ b/storage/innobase/dict/dict0dict.c
@@ -54,6 +54,7 @@ UNIV_INTERN dict_index_t* dict_ind_compact;
#include "row0merge.h"
#include "m_ctype.h" /* my_isspace() */
#include "ha_prototypes.h" /* innobase_strcasecmp(), innobase_casedn_str()*/
+#include "row0upd.h"
#include <ctype.h>
@@ -611,8 +612,7 @@ dict_table_get_on_id(
{
dict_table_t* table;
- if (table_id <= DICT_FIELDS_ID
- || trx->dict_operation_lock_mode == RW_X_LATCH) {
+ if (trx->dict_operation_lock_mode == RW_X_LATCH) {
/* Note: An X latch implies that the transaction
already owns the dictionary mutex. */
@@ -5046,4 +5046,179 @@ dict_close(void)
rw_lock_free(&dict_table_stats_latches[i]);
}
}
+
+/**********************************************************************//**
+Find a table in dict_sys->table_LRU list with specified space id
+@return table if found, NULL if not */
+static
+dict_table_t*
+dict_find_table_by_space(
+/*=====================*/
+ ulint space_id) /*!< in: space ID */
+{
+ dict_table_t* table;
+ ulint num_item;
+ ulint count = 0;
+
+ ut_ad(space_id > 0);
+
+ table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
+ num_item = UT_LIST_GET_LEN(dict_sys->table_LRU);
+
+ /* This function intentionally does not acquire mutex as it is used
+ by error handling code in deep call stack as last means to avoid
+ killing the server, so it worth to risk some consequencies for
+ the action. */
+ while (table && count < num_item) {
+ if (table->space == space_id) {
+ return(table);
+ }
+
+ table = UT_LIST_GET_NEXT(table_LRU, table);
+ count++;
+ }
+
+ return(NULL);
+}
+
+/**********************************************************************//**
+Flags a table with specified space_id corrupted in the data dictionary
+cache
+@return TRUE if successful */
+UNIV_INTERN
+ibool
+dict_set_corrupted_by_space(
+/*========================*/
+ ulint space_id) /*!< in: space ID */
+{
+ dict_table_t* table;
+
+ table = dict_find_table_by_space(space_id);
+
+ if (!table) {
+ return(FALSE);
+ }
+
+ /* mark the table->corrupted bit only, since the caller
+ could be too deep in the stack for SYS_INDEXES update */
+ table->corrupted = TRUE;
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Flags an index corrupted both in the data dictionary cache
+and in the SYS_INDEXES */
+UNIV_INTERN
+void
+dict_set_corrupted(
+/*===============*/
+ dict_index_t* index) /*!< in/out: index */
+{
+ mem_heap_t* heap;
+ mtr_t mtr;
+ dict_index_t* sys_index;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ byte* buf;
+ const char* status;
+ btr_cur_t cursor;
+
+ ut_ad(index);
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(!dict_table_is_comp(dict_sys->sys_tables));
+ ut_ad(!dict_table_is_comp(dict_sys->sys_indexes));
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(sync_thread_levels_empty_except_dict());
+#endif
+
+ /* Mark the table as corrupted only if the clustered index
+ is corrupted */
+ if (dict_index_is_clust(index)) {
+ index->table->corrupted = TRUE;
+ }
+
+ if (UNIV_UNLIKELY(dict_index_is_corrupted(index))) {
+ /* The index was already flagged corrupted. */
+ ut_ad(index->table->corrupted);
+ return;
+ }
+
+ heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t)
+ + sizeof(que_fork_t) + sizeof(upd_node_t)
+ + sizeof(upd_t) + 12));
+ mtr_start(&mtr);
+ index->type |= DICT_CORRUPT;
+
+ sys_index = UT_LIST_GET_FIRST(dict_sys->sys_indexes->indexes);
+
+ /* Find the index row in SYS_INDEXES */
+ tuple = dtuple_create(heap, 2);
+
+ dfield = dtuple_get_nth_field(tuple, 0);
+ buf = mem_heap_alloc(heap, 8);
+ mach_write_to_8(buf, index->table->id);
+ dfield_set_data(dfield, buf, 8);
+
+ dfield = dtuple_get_nth_field(tuple, 1);
+ buf = mem_heap_alloc(heap, 8);
+ mach_write_to_8(buf, index->id);
+ dfield_set_data(dfield, buf, 8);
+
+ dict_index_copy_types(tuple, sys_index, 2);
+
+ btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_GE,
+ BTR_MODIFY_LEAF,
+ &cursor, 0, __FILE__, __LINE__, &mtr);
+
+ if (cursor.up_match == dtuple_get_n_fields(tuple)) {
+ /* UPDATE SYS_INDEXES SET TYPE=index->type
+ WHERE TABLE_ID=index->table->id AND INDEX_ID=index->id */
+ ulint len;
+ byte* field = rec_get_nth_field_old(
+ btr_cur_get_rec(&cursor),
+ DICT_SYS_INDEXES_TYPE_FIELD, &len);
+ if (len != 4) {
+ goto fail;
+ }
+ mlog_write_ulint(field, index->type, MLOG_4BYTES, &mtr);
+ status = " InnoDB: Flagged corruption of ";
+ } else {
+fail:
+ status = " InnoDB: Unable to flag corruption of ";
+ }
+
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ ut_print_timestamp(stderr);
+ fputs(status, stderr);
+ dict_index_name_print(stderr, NULL, index);
+ putc('\n', stderr);
+}
+
+/**********************************************************************//**
+Flags an index corrupted in the data dictionary cache only. This
+is used mostly to mark a corrupted index when index's own dictionary
+is corrupted, and we force to load such index for repair purpose */
+UNIV_INTERN
+void
+dict_set_corrupted_index_cache_only(
+/*================================*/
+ dict_index_t* index) /*!< in/out: index */
+{
+ ut_ad(index);
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(!dict_table_is_comp(dict_sys->sys_tables));
+ ut_ad(!dict_table_is_comp(dict_sys->sys_indexes));
+
+ /* Mark the table as corrupted only if the clustered index
+ is corrupted */
+ if (dict_index_is_clust(index)) {
+ index->table->corrupted = TRUE;
+ }
+
+ index->type |= DICT_CORRUPT;
+}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c
index ab1fb16361e..60590aa6638 100644
--- a/storage/innobase/dict/dict0load.c
+++ b/storage/innobase/dict/dict0load.c
@@ -52,6 +52,11 @@ static const char* SYSTEM_TABLE_NAME[] = {
"SYS_FOREIGN",
"SYS_FOREIGN_COLS"
};
+
+/* If this flag is TRUE, then we will load the cluster index's (and tables')
+metadata even if it is marked as "corrupted". */
+UNIV_INTERN my_bool srv_load_corrupted = FALSE;
+
/****************************************************************//**
Compare the name of an index column.
@return TRUE if the i'th column of index is 'name'. */
@@ -1324,6 +1329,9 @@ err_len:
goto err_len;
}
type = mach_read_from_4(field);
+ if (UNIV_UNLIKELY(type & (~0 << DICT_IT_BITS))) {
+ return("unknown SYS_INDEXES.TYPE bits");
+ }
field = rec_get_nth_field_old(rec, 7/*SPACE*/, &len);
if (UNIV_UNLIKELY(len != 4)) {
@@ -1423,16 +1431,47 @@ dict_load_indexes(
goto next_rec;
} else if (err_msg) {
fprintf(stderr, "InnoDB: %s\n", err_msg);
+ if (ignore_err & DICT_ERR_IGNORE_CORRUPT) {
+ goto next_rec;
+ }
error = DB_CORRUPTION;
goto func_exit;
}
ut_ad(index);
+ /* Check whether the index is corrupted */
+ if (dict_index_is_corrupted(index)) {
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: ", stderr);
+ dict_index_name_print(stderr, NULL, index);
+ fputs(" is corrupted\n", stderr);
+
+ if (!srv_load_corrupted
+ && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)
+ && dict_index_is_clust(index)) {
+ dict_mem_index_free(index);
+
+ error = DB_INDEX_CORRUPT;
+ goto func_exit;
+ } else {
+ /* We will load the index if
+ 1) srv_load_corrupted is TRUE
+ 2) ignore_err is set with
+ DICT_ERR_IGNORE_CORRUPT
+ 3) if the index corrupted is a secondary
+ index */
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: load corrupted index ", stderr);
+ dict_index_name_print(stderr, NULL, index);
+ putc('\n', stderr);
+ }
+ }
+
/* We check for unsupported types first, so that the
subsequent checks are relevant for the supported types. */
- if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE)) {
-
+ if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE
+ | DICT_CORRUPT)) {
fprintf(stderr,
"InnoDB: Error: unknown type %lu"
" of index %s of table %s\n",
@@ -1453,9 +1492,13 @@ dict_load_indexes(
/* If caller can tolerate this error,
we will continue to load the index and
let caller deal with this error. However
- mark the index and table corrupted */
- index->corrupted = TRUE;
- table->corrupted = TRUE;
+ mark the index and table corrupted. We
+ only need to mark such in the index
+ dictionary cache for such metadata corruption,
+ since we would always be able to set it
+ when loading the dictionary cache */
+ dict_set_corrupted_index_cache_only(index);
+
fprintf(stderr,
"InnoDB: Index is corrupt but forcing"
" load into data dictionary\n");
@@ -1495,9 +1538,10 @@ corrupted:
index->name, table->name);
/* If the force recovery flag is set, and
- if the failed index is not the primary index, we
- will continue and open other indexes */
- if (srv_force_recovery
+ if the failed index is not the clustered index,
+ we will continue and open other indexes */
+ if ((srv_force_recovery
+ || srv_load_corrupted)
&& !dict_index_is_clust(index)) {
error = DB_SUCCESS;
goto next_rec;
@@ -1812,6 +1856,30 @@ err_exit:
err = dict_load_indexes(table, heap, ignore_err);
+ if (err == DB_INDEX_CORRUPT) {
+ /* Refuse to load the table if the table has a corrupted
+ cluster index */
+ if (!srv_load_corrupted) {
+ fprintf(stderr, "InnoDB: Error: Load table ");
+ ut_print_name(stderr, NULL, TRUE, table->name);
+ fprintf(stderr, " failed, the table has corrupted"
+ " clustered indexes. Turn on"
+ " 'innodb_force_load_corrupted'"
+ " to drop it\n");
+
+ dict_table_remove_from_cache(table);
+ table = NULL;
+ goto func_exit;
+ } else {
+ dict_index_t* clust_index;
+ clust_index = dict_table_get_first_index(table);
+
+ if (dict_index_is_corrupted(clust_index)) {
+ table->corrupted = TRUE;
+ }
+ }
+ }
+
/* Initialize table foreign_child value. Its value could be
changed when dict_load_foreigns() is called below */
table->fk_max_recusive_level = 0;
@@ -1838,9 +1906,15 @@ err_exit:
index = dict_table_get_first_index(table);
if (!srv_force_recovery || !index
- || !dict_index_is_clust(index)) {
+ || !dict_index_is_clust(index)) {
dict_table_remove_from_cache(table);
table = NULL;
+ } else if (dict_index_is_corrupted(index)) {
+
+ /* It is possible we force to load a corrupted
+ clustered index if srv_load_corrupted is set.
+ Mark the table as corrupted in this case */
+ table->corrupted = TRUE;
}
}
#if 0
@@ -1867,6 +1941,7 @@ err_exit:
mutex_exit(&dict_foreign_err_mutex);
}
#endif /* 0 */
+func_exit:
mem_heap_free(heap);
return(table);
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index f8e96d5fa60..e5d714fc95f 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -1043,6 +1043,8 @@ convert_error_code_to_mysql(
#endif /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */
case DB_UNSUPPORTED:
return(HA_ERR_UNSUPPORTED);
+ case DB_INDEX_CORRUPT:
+ return(HA_ERR_INDEX_CORRUPT);
}
}
@@ -2078,6 +2080,27 @@ no_db_name:
}
+/*****************************************************************//**
+A wrapper function of innobase_convert_name(), convert a table or
+index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
+@return pointer to the end of buf */
+static inline
+void
+innobase_format_name(
+/*==================*/
+ char* buf, /*!< out: buffer for converted identifier */
+ ulint buflen, /*!< in: length of buf, in bytes */
+ const char* name, /*!< in: index or table name to format */
+ ibool is_index_name) /*!< in: index name */
+{
+ const char* bufend;
+
+ bufend = innobase_convert_name(buf, buflen, name, strlen(name),
+ NULL, is_index_name);
+
+ buf[bufend - buf] = '\0';
+}
+
/**********************************************************************//**
Determines if the currently running transaction has been interrupted.
@return TRUE if interrupted */
@@ -5664,12 +5687,14 @@ ha_innobase::index_read(
index = prebuilt->index;
- if (UNIV_UNLIKELY(index == NULL)) {
+ if (UNIV_UNLIKELY(index == NULL) || dict_index_is_corrupted(index)) {
prebuilt->index_usable = FALSE;
DBUG_RETURN(HA_ERR_CRASHED);
}
if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
- DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED);
+ DBUG_RETURN(dict_index_is_corrupted(index)
+ ? HA_ERR_INDEX_CORRUPT
+ : HA_ERR_TABLE_DEF_CHANGED);
}
/* Note that if the index for which the search template is built is not
@@ -5855,10 +5880,33 @@ ha_innobase::change_active_index(
prebuilt->index);
if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
- push_warning_printf(user_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
- HA_ERR_TABLE_DEF_CHANGED,
- "InnoDB: insufficient history for index %u",
- keynr);
+ if (dict_index_is_corrupted(prebuilt->index)) {
+ char index_name[MAX_FULL_NAME_LEN + 1];
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ index_name, sizeof index_name,
+ prebuilt->index->name, TRUE);
+
+ innobase_format_name(
+ table_name, sizeof table_name,
+ prebuilt->index->table->name, FALSE);
+
+ push_warning_printf(
+ user_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ HA_ERR_INDEX_CORRUPT,
+ "InnoDB: Index %s for table %s is"
+ " marked as corrupted",
+ index_name, table_name);
+ DBUG_RETURN(1);
+ } else {
+ push_warning_printf(
+ user_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ HA_ERR_TABLE_DEF_CHANGED,
+ "InnoDB: insufficient history for index %u",
+ keynr);
+ }
+
/* The caller seems to ignore this. Thus, we must check
this again in row_search_for_mysql(). */
DBUG_RETURN(2);
@@ -7518,6 +7566,10 @@ ha_innobase::records_in_range(
n_rows = HA_POS_ERROR;
goto func_exit;
}
+ if (dict_index_is_corrupted(index)) {
+ n_rows = HA_ERR_INDEX_CORRUPT;
+ goto func_exit;
+ }
if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) {
n_rows = HA_ERR_TABLE_DEF_CHANGED;
goto func_exit;
@@ -8184,6 +8236,7 @@ ha_innobase::check(
ulint n_rows_in_table = ULINT_UNDEFINED;
ibool is_ok = TRUE;
ulint old_isolation_level;
+ ibool table_corrupted;
DBUG_ENTER("ha_innobase::check");
DBUG_ASSERT(thd == ha_thd());
@@ -8225,6 +8278,14 @@ ha_innobase::check(
prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
+ /* Check whether the table is already marked as corrupted
+ before running the check table */
+ table_corrupted = prebuilt->table->corrupted;
+
+ /* Reset table->corrupted bit so that check table can proceed to
+ do additional check */
+ prebuilt->table->corrupted = FALSE;
+
/* Enlarge the fatal lock wait timeout during CHECK TABLE. */
mutex_enter(&kernel_mutex);
srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
@@ -8233,6 +8294,7 @@ ha_innobase::check(
for (index = dict_table_get_first_index(prebuilt->table);
index != NULL;
index = dict_table_get_next_index(index)) {
+ char index_name[MAX_FULL_NAME_LEN + 1];
#if 0
fputs("Validating index ", stderr);
ut_print_name(stderr, trx, FALSE, index->name);
@@ -8241,11 +8303,16 @@ ha_innobase::check(
if (!btr_validate_index(index, prebuilt->trx)) {
is_ok = FALSE;
+
+ innobase_format_name(
+ index_name, sizeof index_name,
+ prebuilt->index->name, TRUE);
+
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_NOT_KEYFILE,
"InnoDB: The B-tree of"
- " index '%-.200s' is corrupted.",
- index->name);
+ " index %s is corrupted.",
+ index_name);
continue;
}
@@ -8258,11 +8325,26 @@ ha_innobase::check(
prebuilt->trx, prebuilt->index);
if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
- push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
- HA_ERR_TABLE_DEF_CHANGED,
- "InnoDB: Insufficient history for"
- " index '%-.200s'",
- index->name);
+ innobase_format_name(
+ index_name, sizeof index_name,
+ prebuilt->index->name, TRUE);
+
+ if (dict_index_is_corrupted(prebuilt->index)) {
+ push_warning_printf(
+ user_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ HA_ERR_INDEX_CORRUPT,
+ "InnoDB: Index %s is marked as"
+ " corrupted",
+ index_name);
+ is_ok = FALSE;
+ } else {
+ push_warning_printf(
+ thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ HA_ERR_TABLE_DEF_CHANGED,
+ "InnoDB: Insufficient history for"
+ " index %s",
+ index_name);
+ }
continue;
}
@@ -8276,12 +8358,19 @@ ha_innobase::check(
prebuilt->select_lock_type = LOCK_NONE;
if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) {
+ innobase_format_name(
+ index_name, sizeof index_name,
+ index->name, TRUE);
+
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_NOT_KEYFILE,
"InnoDB: The B-tree of"
- " index '%-.200s' is corrupted.",
- index->name);
+ " index %s is corrupted.",
+ index_name);
is_ok = FALSE;
+ row_mysql_lock_data_dictionary(prebuilt->trx);
+ dict_set_corrupted(index);
+ row_mysql_unlock_data_dictionary(prebuilt->trx);
}
if (thd_killed(user_thd)) {
@@ -8308,6 +8397,20 @@ ha_innobase::check(
}
}
+ if (table_corrupted) {
+ /* If some previous operation has marked the table as
+ corrupted in memory, and has not propagated such to
+ clustered index, we will do so here */
+ index = dict_table_get_first_index(prebuilt->table);
+
+ if (!dict_index_is_corrupted(index)) {
+ mutex_enter(&dict_sys->mutex);
+ dict_set_corrupted(index);
+ mutex_exit(&dict_sys->mutex);
+ }
+ prebuilt->table->corrupted = TRUE;
+ }
+
/* Restore the original isolation level */
prebuilt->trx->isolation_level = old_isolation_level;
@@ -11101,6 +11204,11 @@ static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix,
"Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.",
NULL, NULL, FALSE);
+static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Force InnoDB to load metadata of corrupted table.",
+ NULL, NULL, FALSE);
+
static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Force InnoDB to not use next-key locking, to use only row-level locking.",
@@ -11360,6 +11468,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(flush_method),
MYSQL_SYSVAR(force_recovery),
MYSQL_SYSVAR(large_prefix),
+ MYSQL_SYSVAR(force_load_corrupted),
MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout),
#ifdef UNIV_LOG_ARCHIVE
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 74ef2d2dab7..eb40621abbe 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -203,6 +203,17 @@ void
buf_LRU_stat_update(void);
/*=====================*/
+/******************************************************************//**
+Remove one page from LRU list and put it to free list */
+UNIV_INTERN
+void
+buf_LRU_free_one_page(
+/*==================*/
+ buf_page_t* bpage) /*!< in/out: block, must contain a file page and
+ be in a state where it can be freed; there
+ may or may not be a hash index to the page */
+ __attribute__((nonnull));
+
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************//**
Validates the LRU list.
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
index 28ef64500cc..415470b61b4 100644
--- a/storage/innobase/include/db0err.h
+++ b/storage/innobase/include/db0err.h
@@ -110,6 +110,7 @@ enum db_err {
foreign keys as its prefix columns */
DB_TOO_BIG_INDEX_COL, /* index column size exceeds maximum
limit */
+ DB_INDEX_CORRUPT, /* we have corrupted index */
/* The following are partial failure codes */
DB_FAIL = 1000,
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
index 22df826da65..5d136862bc6 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innobase/include/dict0boot.h
@@ -137,8 +137,10 @@ dict_create(void);
header is created */
/*-------------------------------------------------------------*/
-/* The field number of the page number field in the sys_indexes table
-clustered index */
+/* The field numbers in the SYS_TABLES clustered index */
+#define DICT_SYS_TABLES_TYPE_FIELD 5
+
+/* The field numbers in the SYS_INDEXES clustered index */
#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
#define DICT_SYS_INDEXES_TYPE_FIELD 6
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index f979d0fcc96..93e9162dc87 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -585,6 +585,20 @@ dict_table_get_next_index(
# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index)
#endif /* UNIV_DEBUG */
#endif /* !UNIV_HOTBACKUP */
+
+/* Skip corrupted index */
+#define dict_table_skip_corrupt_index(index) \
+ while (index && dict_index_is_corrupted(index)) { \
+ index = dict_table_get_next_index(index); \
+ }
+
+/* Get the next non-corrupt index */
+#define dict_table_next_uncorrupted_index(index) \
+do { \
+ index = dict_table_get_next_index(index); \
+ dict_table_skip_corrupt_index(index); \
+} while (0)
+
/********************************************************************//**
Check whether the index is the clustered index.
@return nonzero for clustered index, zero for other indexes */
@@ -593,7 +607,7 @@ ulint
dict_index_is_clust(
/*================*/
const dict_index_t* index) /*!< in: index */
- __attribute__((pure));
+ __attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//**
Check whether the index is unique.
@return nonzero for unique index, zero for other indexes */
@@ -602,7 +616,7 @@ ulint
dict_index_is_unique(
/*=================*/
const dict_index_t* index) /*!< in: index */
- __attribute__((pure));
+ __attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//**
Check whether the index is the insert buffer tree.
@return nonzero for insert buffer, zero for other indexes */
@@ -611,7 +625,7 @@ ulint
dict_index_is_ibuf(
/*===============*/
const dict_index_t* index) /*!< in: index */
- __attribute__((pure));
+ __attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//**
Check whether the index is a secondary index or the insert buffer tree.
@return nonzero for insert buffer, zero for other indexes */
@@ -620,7 +634,7 @@ ulint
dict_index_is_sec_or_ibuf(
/*======================*/
const dict_index_t* index) /*!< in: index */
- __attribute__((pure));
+ __attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//**
Gets the number of user-defined columns in a table in the dictionary
@@ -630,7 +644,8 @@ UNIV_INLINE
ulint
dict_table_get_n_user_cols(
/*=======================*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//**
Gets the number of system columns in a table in the dictionary cache.
@return number of system (e.g., ROW_ID) columns of a table */
@@ -638,7 +653,8 @@ UNIV_INLINE
ulint
dict_table_get_n_sys_cols(
/*======================*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//**
Gets the number of all columns (also system) in a table in the dictionary
cache.
@@ -647,7 +663,8 @@ UNIV_INLINE
ulint
dict_table_get_n_cols(
/*==================*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, pure, warn_unused_result));
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth column of a table.
@@ -1243,6 +1260,56 @@ void
dict_close(void);
/*============*/
+/**********************************************************************//**
+Check whether the table is corrupted.
+@return nonzero for corrupted table, zero for valid tables */
+UNIV_INLINE
+ulint
+dict_table_is_corrupted(
+/*====================*/
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, pure, warn_unused_result));
+
+/**********************************************************************//**
+Check whether the index is corrupted.
+@return nonzero for corrupted index, zero for valid indexes */
+UNIV_INLINE
+ulint
+dict_index_is_corrupted(
+/*====================*/
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, pure, warn_unused_result));
+
+/**********************************************************************//**
+Flags an index and table corrupted both in the data dictionary cache
+and in the system table SYS_INDEXES. */
+UNIV_INTERN
+void
+dict_set_corrupted(
+/*===============*/
+ dict_index_t* index) /*!< in/out: index */
+ UNIV_COLD __attribute__((nonnull));
+
+/**********************************************************************//**
+Flags an index corrupted in the data dictionary cache only. This
+is used mostly to mark a corrupted index when index's own dictionary
+is corrupted, and we force to load such index for repair purpose */
+UNIV_INTERN
+void
+dict_set_corrupted_index_cache_only(
+/*================================*/
+ dict_index_t* index); /*!< in/out: index */
+
+/**********************************************************************//**
+Flags a table with specified space_id corrupted in the table dictionary
+cache.
+@return TRUE if successful */
+UNIV_INTERN
+ibool
+dict_set_corrupted_by_space(
+/*========================*/
+ ulint space_id); /*!< in: space ID */
+
#ifndef UNIV_NONINL
#include "dict0dict.ic"
#endif
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 59811568556..ade9e627e29 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -27,6 +27,7 @@ Created 1/8/1996 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
#include "dict0load.h"
#include "rem0types.h"
+#include "srv0srv.h"
/*********************************************************************//**
Gets the minimum number of bytes per character.
@@ -828,7 +829,7 @@ dict_table_check_if_in_cache_low(
}
/**********************************************************************//**
-load a table into dictionary cache, ignore any error specified during load;
+load a table into dictionary cache, ignore any error specified during load;
@return table, NULL if not found */
UNIV_INLINE
dict_table_t*
@@ -872,6 +873,18 @@ dict_table_get_low(
table = dict_table_check_if_in_cache_low(table_name);
+ if (table && table->corrupted) {
+ fprintf(stderr, "InnoDB: table");
+ ut_print_name(stderr, NULL, TRUE, table->name);
+ if (srv_load_corrupted) {
+ fputs(" is corrupted, but"
+ " innodb_force_load_corrupted is set\n", stderr);
+ } else {
+ fputs(" is corrupted\n", stderr);
+ return(NULL);
+ }
+ }
+
if (table == NULL) {
table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE);
}
@@ -937,4 +950,35 @@ dict_max_field_len_store_undo(
return(prefix_len);
}
+/********************************************************************//**
+Check whether the table is corrupted.
+@return nonzero for corrupted table, zero for valid tables */
+UNIV_INLINE
+ulint
+dict_table_is_corrupted(
+/*====================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ ut_ad(table);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ return(UNIV_UNLIKELY(table->corrupted));
+}
+
+/********************************************************************//**
+Check whether the index is corrupted.
+@return nonzero for corrupted index, zero for valid indexes */
+UNIV_INLINE
+ulint
+dict_index_is_corrupted(
+/*====================*/
+ const dict_index_t* index) /*!< in: index */
+{
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ return(UNIV_UNLIKELY((index->type & DICT_CORRUPT)
+ || (index->table && index->table->corrupted)));
+}
+
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 3a475fa85fc..9ded0dba39b 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -51,7 +51,12 @@ combination of types */
#define DICT_UNIQUE 2 /*!< unique index */
#define DICT_UNIVERSAL 4 /*!< index which can contain records from any
other index */
-#define DICT_IBUF 8 /*!< insert buffer tree */
+#define DICT_IBUF 8 /*!< insert buffer tree */
+#define DICT_CORRUPT 16 /*!< bit to store the corrupted flag
+ in SYS_INDEXES.TYPE */
+
+#define DICT_IT_BITS 5 /*!< number of bits used for
+ SYS_INDEXES.TYPE */
/* @} */
/** Types for a table object */
@@ -369,8 +374,9 @@ struct dict_index_struct{
/*!< space where the index tree is placed */
unsigned page:32;/*!< index tree root page number */
#endif /* !UNIV_HOTBACKUP */
- unsigned type:4; /*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
- DICT_UNIVERSAL, DICT_IBUF) */
+ unsigned type:DICT_IT_BITS;
+ /*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
+ DICT_UNIVERSAL, DICT_IBUF, DICT_CORRUPT) */
unsigned trx_id_offset:10;/*!< position of the trx id column
in a clustered index record, if the fields
before it are known to be of a fixed size,
@@ -391,8 +397,6 @@ struct dict_index_struct{
/*!< TRUE if this index is marked to be
dropped in ha_innobase::prepare_drop_index(),
otherwise FALSE */
- unsigned corrupted:1;
- /*!< TRUE if the index object is corrupted */
dict_field_t* fields; /*!< array of field descriptions */
#ifndef UNIV_HOTBACKUP
UT_LIST_NODE_T(dict_index_t)
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index 8cbd7cd5783..f0a05a38070 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -51,7 +51,8 @@ be or-ed together */
enum dict_err_ignore {
DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */
DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root
- page is FIL_NUL or incorrect value */
+ page is FIL_NULL or incorrect value */
+ DICT_ERR_IGNORE_CORRUPT = 2, /*!< skip corrupted indexes */
DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */
};
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 7a93548cb03..dfe7397d189 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -141,6 +141,10 @@ extern ulint srv_log_buffer_size;
extern ulong srv_flush_log_at_trx_commit;
extern char srv_adaptive_flushing;
+/* If this flag is TRUE, then we will load the indexes' (and tables') metadata
+even if they are marked as "corrupted". Mostly it is for DBA to process
+corrupted index and table */
+extern my_bool srv_load_corrupted;
/* The sort order table of the MySQL latin1_swedish_ci character set
collation */
diff --git a/storage/innobase/pars/pars0opt.c b/storage/innobase/pars/pars0opt.c
index 2e392ba4836..d992805d9ef 100644
--- a/storage/innobase/pars/pars0opt.c
+++ b/storage/innobase/pars/pars0opt.c
@@ -568,7 +568,7 @@ opt_search_plan_for_table(
best_last_op = last_op;
}
- index = dict_table_get_next_index(index);
+ dict_table_next_uncorrupted_index(index);
}
plan->index = best_index;
diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c
index 715e376f8f9..62146a95f11 100644
--- a/storage/innobase/row/row0ins.c
+++ b/storage/innobase/row/row0ins.c
@@ -118,6 +118,9 @@ ins_node_create_entry_list(
node->entry_sys_heap);
UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry);
+ /* We will include all indexes (include those corrupted
+ secondary indexes) in the entry list. Filteration of
+ these corrupted index will be done in row_ins() */
index = dict_table_get_next_index(index);
}
}
@@ -2046,7 +2049,6 @@ row_ins_index_entry_low(
mtr_start(&mtr);
if (err != DB_SUCCESS) {
-
goto function_exit;
}
@@ -2431,6 +2433,13 @@ row_ins(
node->index = dict_table_get_next_index(node->index);
node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
+
+ /* Skip corrupted secondar index and its entry */
+ while (node->index && dict_index_is_corrupted(node->index)) {
+
+ node->index = dict_table_get_next_index(node->index);
+ node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
+ }
}
ut_ad(node->entry == NULL);
diff --git a/storage/innobase/row/row0merge.c b/storage/innobase/row/row0merge.c
index 5be437add5a..d42f21241ca 100644
--- a/storage/innobase/row/row0merge.c
+++ b/storage/innobase/row/row0merge.c
@@ -2554,8 +2554,9 @@ row_merge_is_index_usable(
const trx_t* trx, /*!< in: transaction */
const dict_index_t* index) /*!< in: index to check */
{
- return(!trx->read_view
- || read_view_sees_trx_id(trx->read_view, index->trx_id));
+ return(!dict_index_is_corrupted(index)
+ && (!trx->read_view
+ || read_view_sees_trx_id(trx->read_view, index->trx_id)));
}
/*********************************************************************//**
diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
index a56d419d1f0..d98d47a5da6 100644
--- a/storage/innobase/row/row0mysql.c
+++ b/storage/innobase/row/row0mysql.c
@@ -3098,7 +3098,8 @@ row_drop_table_for_mysql(
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- table = dict_table_get_low_ignore_err(name, DICT_ERR_IGNORE_INDEX_ROOT);
+ table = dict_table_get_low_ignore_err(
+ name, DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT);
if (!table) {
err = DB_TABLE_NOT_FOUND;
diff --git a/storage/innobase/row/row0purge.c b/storage/innobase/row/row0purge.c
index 83e7c9e4857..c008c2d1c31 100644
--- a/storage/innobase/row/row0purge.c
+++ b/storage/innobase/row/row0purge.c
@@ -469,6 +469,13 @@ row_purge_del_mark(
heap = mem_heap_create(1024);
while (node->index != NULL) {
+ /* skip corrupted secondary index */
+ dict_table_skip_corrupt_index(node->index);
+
+ if (!node->index) {
+ break;
+ }
+
index = node->index;
/* Build the index entry */
@@ -516,6 +523,12 @@ row_purge_upd_exist_or_extern_func(
heap = mem_heap_create(1024);
while (node->index != NULL) {
+ dict_table_skip_corrupt_index(node->index);
+
+ if (!node->index) {
+ break;
+ }
+
index = node->index;
if (row_upd_changes_ord_field_binary(node->index, node->update,
diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
index 36da621e077..a1039010d00 100644
--- a/storage/innobase/row/row0sel.c
+++ b/storage/innobase/row/row0sel.c
@@ -3441,6 +3441,13 @@ row_search_for_mysql(
return(DB_MISSING_HISTORY);
}
+ if (dict_index_is_corrupted(index)) {
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
+#endif /* UNIV_SYNC_DEBUG */
+ return(DB_CORRUPTION);
+ }
+
if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
fprintf(stderr,
"InnoDB: Error: trying to free a corrupt\n"
diff --git a/storage/innobase/row/row0uins.c b/storage/innobase/row/row0uins.c
index d25afed3840..4fa97c9355d 100644
--- a/storage/innobase/row/row0uins.c
+++ b/storage/innobase/row/row0uins.c
@@ -328,6 +328,8 @@ row_undo_ins(
node->index = dict_table_get_next_index(
dict_table_get_first_index(node->table));
+ dict_table_skip_corrupt_index(node->index);
+
while (node->index != NULL) {
dtuple_t* entry;
ulint err;
@@ -355,7 +357,7 @@ row_undo_ins(
}
}
- node->index = dict_table_get_next_index(node->index);
+ dict_table_next_uncorrupted_index(node->index);
}
log_free_check();
diff --git a/storage/innobase/row/row0umod.c b/storage/innobase/row/row0umod.c
index 2188fdeff49..b86ce9eeabd 100644
--- a/storage/innobase/row/row0umod.c
+++ b/storage/innobase/row/row0umod.c
@@ -573,6 +573,14 @@ row_undo_mod_upd_del_sec(
heap = mem_heap_create(1024);
while (node->index != NULL) {
+
+ /* Skip all corrupted secondary index */
+ dict_table_skip_corrupt_index(node->index);
+
+ if (!node->index) {
+ break;
+ }
+
index = node->index;
entry = row_build_index_entry(node->row, node->ext,
@@ -626,6 +634,13 @@ row_undo_mod_del_mark_sec(
heap = mem_heap_create(1024);
while (node->index != NULL) {
+ /* Skip all corrupted secondary index */
+ dict_table_skip_corrupt_index(node->index);
+
+ if (!node->index) {
+ break;
+ }
+
index = node->index;
entry = row_build_index_entry(node->row, node->ext,
@@ -677,6 +692,13 @@ row_undo_mod_upd_exist_sec(
heap = mem_heap_create(1024);
while (node->index != NULL) {
+ /* Skip all corrupted secondary index */
+ dict_table_skip_corrupt_index(node->index);
+
+ if (!node->index) {
+ break;
+ }
+
index = node->index;
if (row_upd_changes_ord_field_binary(node->index, node->update,
@@ -859,6 +881,9 @@ row_undo_mod(
node->index = dict_table_get_next_index(
dict_table_get_first_index(node->table));
+ /* Skip all corrupted secondary index */
+ dict_table_skip_corrupt_index(node->index);
+
if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
err = row_undo_mod_upd_exist_sec(node, thr);
diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c
index a2f6c17413f..6559c529117 100644
--- a/storage/innobase/row/row0upd.c
+++ b/storage/innobase/row/row0upd.c
@@ -2320,6 +2320,13 @@ row_upd(
while (node->index != NULL) {
+ /* Skip corrupted index */
+ dict_table_skip_corrupt_index(node->index);
+
+ if (!node->index) {
+ break;
+ }
+
log_free_check();
err = row_upd_sec_step(node, thr);
diff --git a/storage/innobase/ut/ut0ut.c b/storage/innobase/ut/ut0ut.c
index 1ef1a082bb2..bd009f1fd32 100644
--- a/storage/innobase/ut/ut0ut.c
+++ b/storage/innobase/ut/ut0ut.c
@@ -712,6 +712,8 @@ ut_strerr(
return("No index on referencing keys in referencing table");
case DB_PARENT_NO_INDEX:
return("No index on referenced keys in referenced table");
+ case DB_INDEX_CORRUPT:
+ return("Index corrupted");
case DB_END_OF_INDEX:
return("End of index");
/* do not add default: in order to produce a warning if new code