diff options
author | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2019-11-25 16:26:50 +0530 |
---|---|---|
committer | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2019-11-25 16:30:06 +0530 |
commit | 4a49cabb6a7c8bd4764a090d7af767910bf67aa4 (patch) | |
tree | e9e896c902455dd69099256d20a2457dc8b7245d | |
parent | 4111a53079da9850c630ce30eec7f8a38744eacd (diff) | |
download | mariadb-git-bb-10.4-MDEV-14481-TRX_SYS.tar.gz |
Avoid TRX_SYS page access before reading redo logbb-10.4-MDEV-14481-TRX_SYS
Basically it avoids the accessing of TRX_SYS page before reading the
redo log.
-rw-r--r-- | mysql-test/suite/innodb/r/log_file.result | 4 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/log_file.test | 4 | ||||
-rw-r--r-- | mysql-test/suite/mariabackup/undo_space_id.result | 10 | ||||
-rw-r--r-- | mysql-test/suite/mariabackup/undo_space_id.test | 2 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 6 | ||||
-rw-r--r-- | storage/innobase/os/os0file.cc | 82 | ||||
-rw-r--r-- | storage/innobase/srv/srv0start.cc | 215 |
7 files changed, 251 insertions, 72 deletions
diff --git a/mysql-test/suite/innodb/r/log_file.result b/mysql-test/suite/innodb/r/log_file.result index a4599ef303f..afcc5dd47e9 100644 --- a/mysql-test/suite/innodb/r/log_file.result +++ b/mysql-test/suite/innodb/r/log_file.result @@ -217,7 +217,7 @@ SELECT * FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS -FOUND 1 /InnoDB: Unable to open undo tablespace.*undo002/ in mysqld.1.err +FOUND 1 /InnoDB: Expected to open innodb_undo_tablespaces=3 but was able to find only 1/ in mysqld.1.err bak_ib_logfile0 bak_ib_logfile1 bak_ib_logfile2 @@ -255,7 +255,7 @@ SELECT * FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS -FOUND 1 /InnoDB: Unable to open undo tablespace.*undo001/ in mysqld.1.err +FOUND 1 /InnoDB: Expected to open innodb_undo_tablespaces=3 but was able to find only 0/ in mysqld.1.err bak_ib_logfile0 bak_ib_logfile1 bak_ib_logfile2 diff --git a/mysql-test/suite/innodb/t/log_file.test b/mysql-test/suite/innodb/t/log_file.test index 8a82ab7f29f..5c2f44303da 100644 --- a/mysql-test/suite/innodb/t/log_file.test +++ b/mysql-test/suite/innodb/t/log_file.test @@ -171,7 +171,7 @@ let SEARCH_PATTERN=undo tablespace .*undo003.* exists\. Creating system tablespa --source include/start_mysqld.inc eval $check_no_innodb; --source include/shutdown_mysqld.inc -let SEARCH_PATTERN=InnoDB: Unable to open undo tablespace.*undo002; +let SEARCH_PATTERN=InnoDB: Expected to open innodb_undo_tablespaces=3 but was able to find only 1; --source include/search_pattern_in_file.inc # clean up & Restore --source ../include/log_file_cleanup.inc @@ -183,7 +183,7 @@ let SEARCH_PATTERN=InnoDB: Unable to open undo tablespace.*undo002; --source include/start_mysqld.inc eval $check_no_innodb; --source include/shutdown_mysqld.inc -let SEARCH_PATTERN=InnoDB: Unable to open undo tablespace.*undo001; +let SEARCH_PATTERN=InnoDB: Expected to open innodb_undo_tablespaces=3 but was able to find only 0; --source include/search_pattern_in_file.inc # clean up & Restore diff --git a/mysql-test/suite/mariabackup/undo_space_id.result b/mysql-test/suite/mariabackup/undo_space_id.result index 96d3e2a58f4..bb0c2e13814 100644 --- a/mysql-test/suite/mariabackup/undo_space_id.result +++ b/mysql-test/suite/mariabackup/undo_space_id.result @@ -1,13 +1,13 @@ -# Create 2 UNDO TABLESPACE(UNDO003, UNDO004) +# Create 2 UNDO TABLESPACE(UNDO001(space_id = 3), UNDO002(space_id = 4)) CREATE TABLE t1(a varchar(60)) ENGINE INNODB; start transaction; INSERT INTO t1 VALUES(1); # xtrabackup backup # Display undo log files from target directory -undo003 -undo004 +undo001 +undo002 # xtrabackup prepare # Display undo log files from targer directory -undo003 -undo004 +undo001 +undo002 DROP TABLE t1; diff --git a/mysql-test/suite/mariabackup/undo_space_id.test b/mysql-test/suite/mariabackup/undo_space_id.test index 8adeb18e5a7..e68acc169fd 100644 --- a/mysql-test/suite/mariabackup/undo_space_id.test +++ b/mysql-test/suite/mariabackup/undo_space_id.test @@ -1,7 +1,7 @@ --source include/have_innodb.inc --source include/have_debug.inc ---echo # Create 2 UNDO TABLESPACE(UNDO003, UNDO004) +--echo # Create 2 UNDO TABLESPACE(UNDO001(space_id = 3), UNDO002(space_id = 4)) let $basedir=$MYSQLTEST_VARDIR/tmp/backup; diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index a1efd28f7ed..1215d22af42 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -591,6 +591,12 @@ struct fil_node_t { return(handle != OS_FILE_CLOSED); } + /** Fill the metadata from page0. + @param[in] first_page page0 from the tablespace + @param[in] first whether this is the first read + @return whether the page was found valid */ + bool fill_metadata(byte* first_page, bool first); + /** Read the first page of a data file. @param[in] first whether this is the very first read @return whether the page was found valid */ diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index a2243dec9cf..c2656e46a7f 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -7689,14 +7689,20 @@ void fil_node_t::find_metadata(os_file_t file } } -/** Read the first page of a data file. -@param[in] first whether this is the very first read +/** Fill the metadata from first page. +@param[in] first_page first page of the file +@param[in] first whether this is the very first read @return whether the page was found valid */ -bool fil_node_t::read_page0(bool first) +bool fil_node_t::fill_metadata(byte* first_page, bool first) { - ut_ad(mutex_own(&fil_system.mutex)); - ut_a(space->purpose != FIL_TYPE_LOG); + const ulint space_id = fsp_header_get_space_id(first_page); + ulint flags = fsp_header_get_flags(first_page); + const ulint size = fsp_header_get_field(first_page, FSP_SIZE); + const ulint free_limit = fsp_header_get_field(first_page, FSP_FREE_LIMIT); + const ulint free_len = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE + + first_page); const ulint psize = space->physical_size(); + #ifndef _WIN32 struct stat statbuf; if (fstat(handle, &statbuf)) { @@ -7717,22 +7723,6 @@ bool fil_node_t::read_page0(bool first) return false; } - byte* buf2 = static_cast<byte*>(ut_malloc_nokey(2 * psize)); - - /* Align the memory for file i/o if we might have O_DIRECT set */ - byte* page = static_cast<byte*>(ut_align(buf2, psize)); - IORequest request(IORequest::READ); - if (os_file_read(request, handle, page, 0, psize) != DB_SUCCESS) { - ib::error() << "Unable to read first page of file " << name; - ut_free(buf2); - return false; - } - const ulint space_id = fsp_header_get_space_id(page); - ulint flags = fsp_header_get_flags(page); - const ulint size = fsp_header_get_field(page, FSP_SIZE); - const ulint free_limit = fsp_header_get_field(page, FSP_FREE_LIMIT); - const ulint free_len = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE - + page); if (!fil_space_t::is_valid_flags(flags, space->id)) { ulint cflags = fsp_flags_convert_from_101(flags); if (cflags == ULINT_UNDEFINED) { @@ -7742,7 +7732,6 @@ invalid: << ib::hex(space->flags) << " but found " << ib::hex(flags) << " in the file " << name; - ut_free(buf2); return false; } @@ -7762,9 +7751,8 @@ invalid: /* Try to read crypt_data from page 0 if it is not yet read. */ if (!space->crypt_data) { space->crypt_data = fil_space_read_crypt_data( - fil_space_t::zip_size(flags), page); + fil_space_t::zip_size(flags), first_page); } - ut_free(buf2); if (UNIV_UNLIKELY(space_id != space->id)) { ib::error() << "Expected tablespace id " << space->id @@ -7811,6 +7799,52 @@ invalid: return true; } +/** Read the first page of a data file. +@param[in] first whether this is the very first read +@return whether the page was found valid */ +bool fil_node_t::read_page0(bool first) +{ + ut_ad(mutex_own(&fil_system.mutex)); + ut_a(space->purpose != FIL_TYPE_LOG); + const ulint psize = space->physical_size(); + +#ifndef _WIN32 + struct stat statbuf; + if (fstat(handle, &statbuf)) { + return false; + } + block_size = statbuf.st_blksize; + os_offset_t size_bytes = statbuf.st_size; +#else + os_offset_t size_bytes = os_file_get_size(handle); + ut_a(size_bytes != (os_offset_t) -1); +#endif + const ulint min_size = FIL_IBD_FILE_INITIAL_SIZE * psize; + + if (size_bytes < min_size) { + ib::error() << "The size of the file " << name + << " is only " << size_bytes + << " bytes, should be at least " << min_size; + return false; + } + + byte* buf2 = static_cast<byte*>(ut_malloc_nokey(2 * psize)); + /* Align the memory for file i/o if we might have O_DIRECT set */ + byte* page = static_cast<byte*>(ut_align(buf2, psize)); + IORequest request(IORequest::READ); + if (os_file_read(request, handle, page, 0, psize) != DB_SUCCESS) { + ib::error() << "Unable to read first page of file " << name; + ut_free(buf2); + return false; + } + + bool is_filled = fill_metadata(page, first); + + ut_free(buf2); + + return is_filled; +} + #else #include "univ.i" #endif /* !UNIV_INNOCHECKSUM */ diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 9c3046a53ac..b1fbac26bbb 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -789,6 +789,156 @@ srv_check_undo_redo_logs_exists() return(DB_SUCCESS); } +/** Validate the number of undo tables given with the number of +opened undo tablespaces. +@return DB_SUCCESS if it is valid. */ +static dberr_t srv_validate_undo_tablespaces() +{ + /* If the user says that there are fewer than what we find we + tolerate that discrepancy but not the inverse. Because there could + be unused undo tablespaces for future use. */ + + if (srv_undo_tablespaces > srv_undo_tablespaces_open) { + ib::error() << "Expected to open innodb_undo_tablespaces=" + << srv_undo_tablespaces + << " but was able to find only " + << srv_undo_tablespaces_open; + + return DB_ERROR; + + } else if (srv_undo_tablespaces_open > 0) { + + ib::info() << "Opened " << srv_undo_tablespaces_open + << " undo tablespaces"; + + if (srv_undo_tablespaces == 0) { + ib::warn() << "innodb_undo_tablespaces=0 disables" + " dedicated undo log tablespaces"; + } + } + + return DB_SUCCESS; +} + +/** Get the number of undo tablespaces from TRX_SYS page. +@return error code. */ +static dberr_t srv_check_undo_tablespaces() +{ + ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1]; + memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids)); + srv_undo_tablespaces_active = trx_rseg_get_n_undo_tablespaces( + undo_tablespace_ids); + return srv_validate_undo_tablespaces(); +} + +/** Open all undo tablespaces for normal shutdown/crash recovery. It should +avoid the fetching of TRX_SYS page. +@return error code. */ +static dberr_t srv_undo_tablespaces_open_all() +{ + char undo_name[sizeof "innodb_undo000"]; + + for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { + + char name[OS_FILE_MAX_PATH]; + pfs_os_file_t fh; + bool success; + + snprintf(name, sizeof(name), + "%s%cundo%03zu", + srv_undo_dir, OS_PATH_SEPARATOR, i + 1); + + fh = os_file_create( + innodb_data_file_key, name, OS_FILE_OPEN + | OS_FILE_ON_ERROR_NO_EXIT | OS_FILE_ON_ERROR_SILENT, + OS_FILE_AIO, OS_DATA_FILE, srv_read_only_mode, &success); + + if (!success) { + break; + } + + os_offset_t size = os_file_get_size(fh); + ut_a(size != os_offset_t(-1)); + + byte* buf2 = static_cast<byte*>( + ut_malloc_nokey(2 * UNIV_PAGE_SIZE_MAX)); + + /* Align the memory for file i/o if we might have O_DIRECT set */ + byte* page = static_cast<byte*>(ut_align(buf2, srv_page_size)); + + IORequest request(IORequest::READ); + + if (os_file_read(request, fh, page, 0, srv_page_size) + != DB_SUCCESS) { + ib::error() << "Unable to read first page of file " << name; + ut_free(buf2); + return DB_ERROR; + } + + ulint space_id = fsp_header_get_space_id(page); + + ut_ad(space_id != 0); + + /* Load the tablespace into InnoDB's internal data structures. */ + + /* We set the biggest space id to the undo tablespace + because InnoDB hasn't opened any other tablespace apart + from the system tablespace. */ + + fil_set_max_space_id_if_bigger(space_id); + + ulint fsp_flags; + switch (srv_checksum_algorithm) { + case SRV_CHECKSUM_ALGORITHM_FULL_CRC32: + case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32: + fsp_flags = (FSP_FLAGS_FCRC32_MASK_MARKER + | FSP_FLAGS_FCRC32_PAGE_SSIZE()); + break; + default: + fsp_flags = FSP_FLAGS_PAGE_SSIZE(); + } + + snprintf(undo_name, sizeof(undo_name), + "innodb_undo%03u", static_cast<unsigned>(space_id)); + + fil_space_t* space = fil_space_create( + undo_name, space_id, fsp_flags, + FIL_TYPE_TABLESPACE, NULL); + + ut_a(fil_validate()); + ut_a(space); + + fil_node_t* file = space->add(name, fh, 0, false, true); + + mutex_enter(&fil_system.mutex); + + success = file->fill_metadata(page, true); + + if (!success) { + os_file_close(file->handle); + file->handle = OS_FILE_CLOSED; + ut_a(fil_system.n_open > 0); + fil_system.n_open--; + } + + mutex_exit(&fil_system.mutex); + + /* Note the first undo tablespace id in case of + no active undo tablespace. */ + if (0 == srv_undo_tablespaces_open++) { + srv_undo_space_id_start = space_id; + } + + ut_free(buf2); + } + + if (srv_undo_tablespaces_open == 0) { + srv_undo_space_id_start = 0; + } + + return DB_SUCCESS; +} + /** Open the configured number of dedicated undo tablespaces. @param[in] create_new_db whether the database is being initialized @return DB_SUCCESS or error code */ @@ -830,7 +980,7 @@ srv_undo_tablespaces_init(bool create_new_db) snprintf( name, sizeof(name), "%s%cundo%03zu", - srv_undo_dir, OS_PATH_SEPARATOR, space_id); + srv_undo_dir, OS_PATH_SEPARATOR, i + 1); if (i == 0) { srv_undo_space_id_start = space_id; @@ -853,11 +1003,13 @@ srv_undo_tablespaces_init(bool create_new_db) the system tablespace (0). If we are creating a new instance then we build the undo_tablespace_ids ourselves since they don't already exist. */ - n_undo_tablespaces = create_new_db - || srv_operation == SRV_OPERATION_BACKUP - || srv_operation == SRV_OPERATION_RESTORE_DELTA - ? srv_undo_tablespaces - : trx_rseg_get_n_undo_tablespaces(undo_tablespace_ids); + + if (create_new_db + || srv_operation == SRV_OPERATION_BACKUP + || srv_operation == SRV_OPERATION_RESTORE_DELTA) { + n_undo_tablespaces = srv_undo_tablespaces; + } + srv_undo_tablespaces_active = srv_undo_tablespaces; switch (srv_operation) { @@ -887,7 +1039,7 @@ srv_undo_tablespaces_init(bool create_new_db) name, sizeof(name), "%s%cundo%03zu", srv_undo_dir, OS_PATH_SEPARATOR, - undo_tablespace_ids[i]); + i + 1); /* Should be no gaps in undo tablespace ids. */ ut_a(!i || prev_space_id + 1 == undo_tablespace_ids[i]); @@ -941,27 +1093,10 @@ srv_undo_tablespaces_init(bool create_new_db) srv_undo_space_id_start = 0; } - /* If the user says that there are fewer than what we find we - tolerate that discrepancy but not the inverse. Because there could - be unused undo tablespaces for future use. */ - - if (srv_undo_tablespaces > n_undo_tablespaces) { - ib::error() << "Expected to open innodb_undo_tablespaces=" - << srv_undo_tablespaces - << " but was able to find only " - << n_undo_tablespaces; - - return(err != DB_SUCCESS ? err : DB_ERROR); - - } else if (n_undo_tablespaces > 0) { - - ib::info() << "Opened " << n_undo_tablespaces - << " undo tablespaces"; - - if (srv_undo_tablespaces == 0) { - ib::warn() << "innodb_undo_tablespaces=0 disables" - " dedicated undo log tablespaces"; - } + if (srv_validate_undo_tablespaces() != DB_SUCCESS) { + goto func_exit; + } else { + err = DB_SUCCESS; } if (create_new_db) { @@ -976,7 +1111,8 @@ srv_undo_tablespaces_init(bool create_new_db) } } - return(DB_SUCCESS); +func_exit: + return (err != DB_SUCCESS ? err : DB_SUCCESS); } /** Create the temporary file tablespace. @@ -1788,7 +1924,13 @@ files_checked: fil_open_log_and_system_tablespace_files(); ut_d(fil_system.sys_space->recv_size = srv_sys_space_size_debug); - err = srv_undo_tablespaces_init(create_new_db); + if (create_new_db + || srv_operation == SRV_OPERATION_BACKUP + || srv_operation == SRV_OPERATION_RESTORE_DELTA) { + err = srv_undo_tablespaces_init(create_new_db); + } else { + err = srv_undo_tablespaces_open_all(); + } /* If the force recovery is set very high then we carry on regardless of all errors. Basically this is fingers crossed mode. */ @@ -1857,14 +1999,6 @@ files_checked: return(srv_init_abort(err)); } } else { - /* Work around the bug that we were performing a dirty read of - at least the TRX_SYS page into the buffer pool above, without - reading or applying any redo logs. - - MDEV-19229 FIXME: Remove the dirty reads and this call. - Add an assertion that the buffer pool is empty. */ - buf_pool_invalidate(); - /* We always try to do a recovery, even if the database had been shut down normally: this is the normal startup path */ @@ -1888,6 +2022,11 @@ files_checked: case SRV_OPERATION_RESTORE: /* This must precede recv_apply_hashed_log_recs(true). */ + err = srv_check_undo_tablespaces(); + if (err != DB_SUCCESS) { + return srv_init_abort(err); + } + trx_lists_init_at_db_start(); break; case SRV_OPERATION_RESTORE_DELTA: |