diff options
author | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2018-03-27 13:47:56 +0530 |
---|---|---|
committer | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2018-03-27 13:47:56 +0530 |
commit | 73af8af094d65d1d8b8dfcdabf72e825e7cb7de5 (patch) | |
tree | 81119d3e56c95a8e27edd24aaf232323a33580e8 | |
parent | 60438451c3aaa14d7979e2d2b6976da240b4949e (diff) | |
download | mariadb-git-73af8af094d65d1d8b8dfcdabf72e825e7cb7de5.tar.gz |
MDEV-15325 Incomplete validation of missing tablespace during recovery
Problem:
=======
During validation of missing tablespace, missing tablespace id is
being compared with hash table of redo logs (recv_sys->addr_hash). But if the
hash table ran out of memory then there is a possibility that it will not contain
the redo logs of all tablespace. In that case, Server will load the InnoDB
even though there is a missing tablespace.
Solution:
========
If the recv_sys->addr_hash hash table ran out of memory then InnoDB needs
to scan the remaining redo log again to validate the missing tablespace.
-rw-r--r-- | mysql-test/suite/innodb/r/innodb-index.result | 17 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/innodb-index.test | 48 | ||||
-rw-r--r-- | storage/innobase/include/log0recv.h | 3 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 245 |
4 files changed, 230 insertions, 83 deletions
diff --git a/mysql-test/suite/innodb/r/innodb-index.result b/mysql-test/suite/innodb/r/innodb-index.result index 54ad4e8a927..df27769b810 100644 --- a/mysql-test/suite/innodb/r/innodb-index.result +++ b/mysql-test/suite/innodb/r/innodb-index.result @@ -1848,3 +1848,20 @@ create table t1(o1 int, o2 int, o3 int, primary key(o1,o2,o3)) engine = innodb; insert into t1 values(1,1,2),(2,2,1); alter table t1 drop primary key, add primary key(o1), lock=none; drop table t1; +# +# MDEV-15325 Incomplete validation of missing tablespace during recovery +# +CREATE TABLE t1(f1 INT PRIMARY KEY)ENGINE=InnoDB; +CREATE TABLE t2(f1 INT PRIMARY KEY)ENGINE=InnoDB; +# Kill the server +# Wrong space_id in a dirty file and a missing file +SELECT * FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +# Restore t1 and t2 +SELECT * FROM t1; +f1 +SELECT * FROM t2; +f1 +DROP TABLE t1, t2; diff --git a/mysql-test/suite/innodb/t/innodb-index.test b/mysql-test/suite/innodb/t/innodb-index.test index 721808c038c..8b9d2068499 100644 --- a/mysql-test/suite/innodb/t/innodb-index.test +++ b/mysql-test/suite/innodb/t/innodb-index.test @@ -1076,3 +1076,51 @@ drop table t1; # no skip sort cases --source suite/innodb/include/alter_table_pk_no_sort.inc + +--echo # +--echo # MDEV-15325 Incomplete validation of missing tablespace during recovery +--echo # + +--source include/no_checkpoint_start.inc +CREATE TABLE t1(f1 INT PRIMARY KEY)ENGINE=InnoDB; + +CREATE TABLE t2(f1 INT PRIMARY KEY)ENGINE=InnoDB; + +--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1, t2; +--source include/no_checkpoint_end.inc + +let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let $check_no_innodb=SELECT * FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); + +--echo # Wrong space_id in a dirty file and a missing file + +--copy_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/test/t0.ibd +--move_file $MYSQLD_DATADIR/test/t2.ibd $MYSQLD_DATADIR/test/t1.ibd + +--source include/start_mysqld.inc +--eval $check_no_innodb +--source include/shutdown_mysqld.inc + +--echo # Restore t1 and t2 + +--move_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/test/t2.ibd +--move_file $MYSQLD_DATADIR/test/t0.ibd $MYSQLD_DATADIR/test/t1.ibd + +--source include/start_mysqld.inc + +SELECT * FROM t1; +SELECT * FROM t2; + +DROP TABLE t1, t2; + +--disable_query_log + +call mtr.add_suppression("InnoDB: Tablespace .* was not found at .*t[12].ibd."); +call mtr.add_suppression("InnoDB: Set innodb_force_recovery=1 to ignore this and to permanently lose all changes to the tablespace"); +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +call mtr.add_suppression("Plugin 'InnoDB' init function returned error"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed"); + +--enable_query_log diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 065326ead88..6dcaaedd765 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -291,6 +291,9 @@ struct recv_sys_t{ recv_dblwr_t dblwr; + /** Lastly added LSN to the hash table of log records. */ + lsn_t last_stored_lsn; + /** Determine whether redo log recovery progress should be reported. @param[in] time the current time @return whether progress should be reported diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index b41ea4dabee..80cf9f1ac4b 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -141,12 +141,23 @@ struct file_name_t { std::string name; /** Tablespace object (NULL if not valid or not found) */ fil_space_t* space; - /** Whether the tablespace has been deleted */ - bool deleted; + + /** Tablespace status. */ + enum fil_status { + /** Normal tablespace */ + NORMAL, + /** Deleted tablespace */ + DELETED, + /** Missing tablespace */ + MISSING + }; + + /** Status of the tablespace */ + fil_status status; /** Constructor */ - file_name_t(std::string name_, bool deleted_) : - name(name_), space(NULL), deleted (deleted_) {} + file_name_t(std::string name_, bool deleted) : + name(name_), space(NULL), status(deleted ? DELETED: NORMAL) {} }; /** Map of dirty tablespaces during recovery */ @@ -202,8 +213,8 @@ fil_name_process( if (deleted) { /* Got MLOG_FILE_DELETE */ - if (!p.second && !f.deleted) { - f.deleted = true; + if (!p.second && f.status != file_name_t::DELETED) { + f.status = file_name_t::DELETED; if (f.space != NULL) { fil_space_free(space_id, false); f.space = NULL; @@ -226,7 +237,7 @@ fil_name_process( if (f.space == NULL || f.space == space) { f.name = fname.name; f.space = space; - f.deleted = false; + f.status = file_name_t::NORMAL; } else { ib::error() << "Tablespace " << space_id << " has been found in two places: '" @@ -2905,6 +2916,12 @@ recv_scan_log_recs( if (*store_to_hash != STORE_NO && mem_heap_get_size(recv_sys->heap) > available_memory) { + + DBUG_PRINT("ib_log", ("Ran out of memory and last " + "stored lsn " LSN_PF, + recv_sys->recovered_lsn)); + + recv_sys->last_stored_lsn = recv_sys->recovered_lsn; *store_to_hash = STORE_NO; } @@ -3037,15 +3054,99 @@ recv_init_missing_space(dberr_t err, const recv_spaces_t::const_iterator& i) return(err); } +/** Report the missing tablespace and discard the redo logs for the deleted +tablespace. +@param[in] rescan rescan of redo logs is needed + if hash table ran out of memory +@param[out] missing_tablespace missing tablespace exists or not +@return error code or DB_SUCCESS. */ +static MY_ATTRIBUTE((warn_unused_result)) +dberr_t +recv_validate_tablespace(bool rescan, bool& missing_tablespace) +{ + dberr_t err = DB_SUCCESS; + + for (ulint h = 0; h < hash_get_n_cells(recv_sys->addr_hash); h++) { + + for (recv_addr_t* recv_addr = static_cast<recv_addr_t*>( + HASH_GET_FIRST(recv_sys->addr_hash, h)); + recv_addr != 0; + recv_addr = static_cast<recv_addr_t*>( + HASH_GET_NEXT(addr_hash, recv_addr))) { + + const ulint space = recv_addr->space; + + if (is_predefined_tablespace(space)) { + continue; + } + + recv_spaces_t::iterator i + = recv_spaces.find(space); + ut_ad(i != recv_spaces.end()); + + switch(i->second.status) { + case file_name_t::MISSING: + err = recv_init_missing_space(err, i); + i->second.status = file_name_t::DELETED; + case file_name_t::DELETED: + recv_addr->state = RECV_DISCARDED; + case file_name_t::NORMAL: + break; + default: + ut_ad(0); + } + } + } + + if (err != DB_SUCCESS) { + return(err); + } + + /* When rescan is not needed then recv_sys->addr_hash will have + all space id belongs to redo log. If rescan is needed and + innodb_force_recovery > 0 then InnoDB can ignore missing tablespace. */ + for (recv_spaces_t::iterator i = recv_spaces.begin(); + i != recv_spaces.end(); i++) { + + if (i->second.status != file_name_t::MISSING) { + continue; + } + + missing_tablespace = true; + + if (srv_force_recovery > 0) { + ib::warn() << "Tablespace " << i->first + <<" was not found at " << i->second.name + <<", and innodb_force_recovery was set." + <<" All redo log for this tablespace" + <<" will be ignored!"; + continue; + } + + if (!rescan) { + ib::info() << "Tablespace " << i->first + << " was not found at '" + << i->second.name << "', but there" + <<" were no modifications either."; + } + } + + if (!rescan || srv_force_recovery > 0) { + missing_tablespace = false; + } + + return DB_SUCCESS; +} + /** Check if all tablespaces were found for crash recovery. +@param[in] rescan rescan of redo logs is needed +@param[out] missing_tablespace missing table exists @return error code or DB_SUCCESS */ static MY_ATTRIBUTE((warn_unused_result)) dberr_t -recv_init_crash_recovery_spaces() +recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace) { - typedef std::set<ulint> space_set_t; bool flag_deleted = false; - space_set_t missing_spaces; ut_ad(!srv_read_only_mode); ut_ad(recv_needed_recovery); @@ -3053,9 +3154,9 @@ recv_init_crash_recovery_spaces() for (recv_spaces_t::iterator i = recv_spaces.begin(); i != recv_spaces.end(); i++) { ut_ad(!is_predefined_tablespace(i->first)); - ut_ad(!i->second.deleted || !i->second.space); + ut_ad(i->second.status != file_name_t::DELETED || !i->second.space); - if (i->second.deleted) { + if (i->second.status == file_name_t::DELETED) { /* The tablespace was deleted, so we can ignore any redo log for it. */ flag_deleted = true; @@ -3071,84 +3172,18 @@ recv_init_crash_recovery_spaces() recv_sys->found_corrupt_log = true; return(DB_CORRUPTION); } else { - missing_spaces.insert(i->first); + i->second.status = file_name_t::MISSING; flag_deleted = true; } - ut_ad(i->second.deleted || i->second.name != ""); + ut_ad(i->second.status == file_name_t::DELETED || i->second.name != ""); } if (flag_deleted) { - dberr_t err = DB_SUCCESS; - - for (ulint h = 0; - h < hash_get_n_cells(recv_sys->addr_hash); - h++) { - for (recv_addr_t* recv_addr - = static_cast<recv_addr_t*>( - HASH_GET_FIRST( - recv_sys->addr_hash, h)); - recv_addr != 0; - recv_addr = static_cast<recv_addr_t*>( - HASH_GET_NEXT(addr_hash, recv_addr))) { - const ulint space = recv_addr->space; - - if (is_predefined_tablespace(space)) { - continue; - } - - recv_spaces_t::iterator i - = recv_spaces.find(space); - ut_ad(i != recv_spaces.end()); - - if (i->second.deleted) { - ut_ad(missing_spaces.find(space) - == missing_spaces.end()); - recv_addr->state = RECV_DISCARDED; - continue; - } - - space_set_t::iterator m = missing_spaces.find( - space); - - if (m != missing_spaces.end()) { - missing_spaces.erase(m); - err = recv_init_missing_space(err, i); - recv_addr->state = RECV_DISCARDED; - /* All further redo log for this - tablespace should be removed. */ - i->second.deleted = true; - } - } - } - - if (err != DB_SUCCESS) { - return(err); - } - } - - for (space_set_t::const_iterator m = missing_spaces.begin(); - m != missing_spaces.end(); m++) { - recv_spaces_t::iterator i = recv_spaces.find(*m); - ut_ad(i != recv_spaces.end()); - - ib::info() << "Tablespace " << i->first - << " was not found at '" << i->second.name - << "', but there were no modifications either."; + return recv_validate_tablespace(rescan, missing_tablespace); } - if (srv_operation == SRV_OPERATION_NORMAL) { - buf_dblwr_process(); - } - - if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { - /* Spawn the background thread to flush dirty pages - from the buffer pools. */ - recv_writer_thread_active = true; - os_thread_create(recv_writer_thread, 0, 0); - } - - return(DB_SUCCESS); + return DB_SUCCESS; } /** Start recovering from a redo log checkpoint. @@ -3324,13 +3359,57 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) log_sys->lsn = recv_sys->recovered_lsn; if (recv_needed_recovery) { - err = recv_init_crash_recovery_spaces(); + bool missing_tablespace = false; + + err = recv_init_crash_recovery_spaces( + rescan, missing_tablespace); if (err != DB_SUCCESS) { log_mutex_exit(); return(err); } + /* If there is any missing tablespace and rescan is needed + then there is a possiblity that hash table will not contain + all space ids redo logs. Rescan the remaining unstored + redo logs for the validation of missing tablespace. */ + while (missing_tablespace) { + DBUG_PRINT("ib_log", ("Rescan of redo log to validate " + "the missing tablespace. Scan " + "from last stored LSN " LSN_PF, + recv_sys->last_stored_lsn)); + + lsn_t recent_stored_lsn = recv_sys->last_stored_lsn; + rescan = recv_group_scan_log_recs( + group, checkpoint_lsn, + &recent_stored_lsn, false); + + ut_ad(!recv_sys->found_corrupt_fs); + + missing_tablespace = false; + + err = recv_sys->found_corrupt_log + ? DB_ERROR + : recv_validate_tablespace( + rescan, missing_tablespace); + + if (err != DB_SUCCESS) { + log_mutex_exit(); + return err; + } + } + + if (srv_operation == SRV_OPERATION_NORMAL) { + buf_dblwr_process(); + } + + ut_ad(srv_force_recovery <= SRV_FORCE_NO_UNDO_LOG_SCAN); + + /* Spawn the background thread to flush dirty pages + from the buffer pools. */ + recv_writer_thread_active = true; + os_thread_create(recv_writer_thread, 0, 0); + if (rescan) { contiguous_lsn = checkpoint_lsn; |