summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThirunarayanan Balathandayuthapani <thiru@mariadb.com>2019-11-25 16:26:50 +0530
committerThirunarayanan Balathandayuthapani <thiru@mariadb.com>2019-11-25 16:30:06 +0530
commit4a49cabb6a7c8bd4764a090d7af767910bf67aa4 (patch)
treee9e896c902455dd69099256d20a2457dc8b7245d
parent4111a53079da9850c630ce30eec7f8a38744eacd (diff)
downloadmariadb-git-bb-10.4-MDEV-14481-TRX_SYS.tar.gz
Avoid TRX_SYS page access before reading redo logbb-10.4-MDEV-14481-TRX_SYS
Basically it avoids the accessing of TRX_SYS page before reading the redo log.
-rw-r--r--mysql-test/suite/innodb/r/log_file.result4
-rw-r--r--mysql-test/suite/innodb/t/log_file.test4
-rw-r--r--mysql-test/suite/mariabackup/undo_space_id.result10
-rw-r--r--mysql-test/suite/mariabackup/undo_space_id.test2
-rw-r--r--storage/innobase/include/fil0fil.h6
-rw-r--r--storage/innobase/os/os0file.cc82
-rw-r--r--storage/innobase/srv/srv0start.cc215
7 files changed, 251 insertions, 72 deletions
diff --git a/mysql-test/suite/innodb/r/log_file.result b/mysql-test/suite/innodb/r/log_file.result
index a4599ef303f..afcc5dd47e9 100644
--- a/mysql-test/suite/innodb/r/log_file.result
+++ b/mysql-test/suite/innodb/r/log_file.result
@@ -217,7 +217,7 @@ SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
-FOUND 1 /InnoDB: Unable to open undo tablespace.*undo002/ in mysqld.1.err
+FOUND 1 /InnoDB: Expected to open innodb_undo_tablespaces=3 but was able to find only 1/ in mysqld.1.err
bak_ib_logfile0
bak_ib_logfile1
bak_ib_logfile2
@@ -255,7 +255,7 @@ SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
-FOUND 1 /InnoDB: Unable to open undo tablespace.*undo001/ in mysqld.1.err
+FOUND 1 /InnoDB: Expected to open innodb_undo_tablespaces=3 but was able to find only 0/ in mysqld.1.err
bak_ib_logfile0
bak_ib_logfile1
bak_ib_logfile2
diff --git a/mysql-test/suite/innodb/t/log_file.test b/mysql-test/suite/innodb/t/log_file.test
index 8a82ab7f29f..5c2f44303da 100644
--- a/mysql-test/suite/innodb/t/log_file.test
+++ b/mysql-test/suite/innodb/t/log_file.test
@@ -171,7 +171,7 @@ let SEARCH_PATTERN=undo tablespace .*undo003.* exists\. Creating system tablespa
--source include/start_mysqld.inc
eval $check_no_innodb;
--source include/shutdown_mysqld.inc
-let SEARCH_PATTERN=InnoDB: Unable to open undo tablespace.*undo002;
+let SEARCH_PATTERN=InnoDB: Expected to open innodb_undo_tablespaces=3 but was able to find only 1;
--source include/search_pattern_in_file.inc
# clean up & Restore
--source ../include/log_file_cleanup.inc
@@ -183,7 +183,7 @@ let SEARCH_PATTERN=InnoDB: Unable to open undo tablespace.*undo002;
--source include/start_mysqld.inc
eval $check_no_innodb;
--source include/shutdown_mysqld.inc
-let SEARCH_PATTERN=InnoDB: Unable to open undo tablespace.*undo001;
+let SEARCH_PATTERN=InnoDB: Expected to open innodb_undo_tablespaces=3 but was able to find only 0;
--source include/search_pattern_in_file.inc
# clean up & Restore
diff --git a/mysql-test/suite/mariabackup/undo_space_id.result b/mysql-test/suite/mariabackup/undo_space_id.result
index 96d3e2a58f4..bb0c2e13814 100644
--- a/mysql-test/suite/mariabackup/undo_space_id.result
+++ b/mysql-test/suite/mariabackup/undo_space_id.result
@@ -1,13 +1,13 @@
-# Create 2 UNDO TABLESPACE(UNDO003, UNDO004)
+# Create 2 UNDO TABLESPACE(UNDO001(space_id = 3), UNDO002(space_id = 4))
CREATE TABLE t1(a varchar(60)) ENGINE INNODB;
start transaction;
INSERT INTO t1 VALUES(1);
# xtrabackup backup
# Display undo log files from target directory
-undo003
-undo004
+undo001
+undo002
# xtrabackup prepare
# Display undo log files from targer directory
-undo003
-undo004
+undo001
+undo002
DROP TABLE t1;
diff --git a/mysql-test/suite/mariabackup/undo_space_id.test b/mysql-test/suite/mariabackup/undo_space_id.test
index 8adeb18e5a7..e68acc169fd 100644
--- a/mysql-test/suite/mariabackup/undo_space_id.test
+++ b/mysql-test/suite/mariabackup/undo_space_id.test
@@ -1,7 +1,7 @@
--source include/have_innodb.inc
--source include/have_debug.inc
---echo # Create 2 UNDO TABLESPACE(UNDO003, UNDO004)
+--echo # Create 2 UNDO TABLESPACE(UNDO001(space_id = 3), UNDO002(space_id = 4))
let $basedir=$MYSQLTEST_VARDIR/tmp/backup;
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index a1efd28f7ed..1215d22af42 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -591,6 +591,12 @@ struct fil_node_t {
return(handle != OS_FILE_CLOSED);
}
+ /** Fill the metadata from page0.
+ @param[in] first_page page0 from the tablespace
+ @param[in] first whether this is the first read
+ @return whether the page was found valid */
+ bool fill_metadata(byte* first_page, bool first);
+
/** Read the first page of a data file.
@param[in] first whether this is the very first read
@return whether the page was found valid */
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index a2243dec9cf..c2656e46a7f 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -7689,14 +7689,20 @@ void fil_node_t::find_metadata(os_file_t file
}
}
-/** Read the first page of a data file.
-@param[in] first whether this is the very first read
+/** Fill the metadata from first page.
+@param[in] first_page first page of the file
+@param[in] first whether this is the very first read
@return whether the page was found valid */
-bool fil_node_t::read_page0(bool first)
+bool fil_node_t::fill_metadata(byte* first_page, bool first)
{
- ut_ad(mutex_own(&fil_system.mutex));
- ut_a(space->purpose != FIL_TYPE_LOG);
+ const ulint space_id = fsp_header_get_space_id(first_page);
+ ulint flags = fsp_header_get_flags(first_page);
+ const ulint size = fsp_header_get_field(first_page, FSP_SIZE);
+ const ulint free_limit = fsp_header_get_field(first_page, FSP_FREE_LIMIT);
+ const ulint free_len = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE
+ + first_page);
const ulint psize = space->physical_size();
+
#ifndef _WIN32
struct stat statbuf;
if (fstat(handle, &statbuf)) {
@@ -7717,22 +7723,6 @@ bool fil_node_t::read_page0(bool first)
return false;
}
- byte* buf2 = static_cast<byte*>(ut_malloc_nokey(2 * psize));
-
- /* Align the memory for file i/o if we might have O_DIRECT set */
- byte* page = static_cast<byte*>(ut_align(buf2, psize));
- IORequest request(IORequest::READ);
- if (os_file_read(request, handle, page, 0, psize) != DB_SUCCESS) {
- ib::error() << "Unable to read first page of file " << name;
- ut_free(buf2);
- return false;
- }
- const ulint space_id = fsp_header_get_space_id(page);
- ulint flags = fsp_header_get_flags(page);
- const ulint size = fsp_header_get_field(page, FSP_SIZE);
- const ulint free_limit = fsp_header_get_field(page, FSP_FREE_LIMIT);
- const ulint free_len = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE
- + page);
if (!fil_space_t::is_valid_flags(flags, space->id)) {
ulint cflags = fsp_flags_convert_from_101(flags);
if (cflags == ULINT_UNDEFINED) {
@@ -7742,7 +7732,6 @@ invalid:
<< ib::hex(space->flags)
<< " but found " << ib::hex(flags)
<< " in the file " << name;
- ut_free(buf2);
return false;
}
@@ -7762,9 +7751,8 @@ invalid:
/* Try to read crypt_data from page 0 if it is not yet read. */
if (!space->crypt_data) {
space->crypt_data = fil_space_read_crypt_data(
- fil_space_t::zip_size(flags), page);
+ fil_space_t::zip_size(flags), first_page);
}
- ut_free(buf2);
if (UNIV_UNLIKELY(space_id != space->id)) {
ib::error() << "Expected tablespace id " << space->id
@@ -7811,6 +7799,52 @@ invalid:
return true;
}
+/** Read the first page of a data file.
+@param[in] first whether this is the very first read
+@return whether the page was found valid */
+bool fil_node_t::read_page0(bool first)
+{
+ ut_ad(mutex_own(&fil_system.mutex));
+ ut_a(space->purpose != FIL_TYPE_LOG);
+ const ulint psize = space->physical_size();
+
+#ifndef _WIN32
+ struct stat statbuf;
+ if (fstat(handle, &statbuf)) {
+ return false;
+ }
+ block_size = statbuf.st_blksize;
+ os_offset_t size_bytes = statbuf.st_size;
+#else
+ os_offset_t size_bytes = os_file_get_size(handle);
+ ut_a(size_bytes != (os_offset_t) -1);
+#endif
+ const ulint min_size = FIL_IBD_FILE_INITIAL_SIZE * psize;
+
+ if (size_bytes < min_size) {
+ ib::error() << "The size of the file " << name
+ << " is only " << size_bytes
+ << " bytes, should be at least " << min_size;
+ return false;
+ }
+
+ byte* buf2 = static_cast<byte*>(ut_malloc_nokey(2 * psize));
+ /* Align the memory for file i/o if we might have O_DIRECT set */
+ byte* page = static_cast<byte*>(ut_align(buf2, psize));
+ IORequest request(IORequest::READ);
+ if (os_file_read(request, handle, page, 0, psize) != DB_SUCCESS) {
+ ib::error() << "Unable to read first page of file " << name;
+ ut_free(buf2);
+ return false;
+ }
+
+ bool is_filled = fill_metadata(page, first);
+
+ ut_free(buf2);
+
+ return is_filled;
+}
+
#else
#include "univ.i"
#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 9c3046a53ac..b1fbac26bbb 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -789,6 +789,156 @@ srv_check_undo_redo_logs_exists()
return(DB_SUCCESS);
}
+/** Validate the number of undo tables given with the number of
+opened undo tablespaces.
+@return DB_SUCCESS if it is valid. */
+static dberr_t srv_validate_undo_tablespaces()
+{
+ /* If the user says that there are fewer than what we find we
+ tolerate that discrepancy but not the inverse. Because there could
+ be unused undo tablespaces for future use. */
+
+ if (srv_undo_tablespaces > srv_undo_tablespaces_open) {
+ ib::error() << "Expected to open innodb_undo_tablespaces="
+ << srv_undo_tablespaces
+ << " but was able to find only "
+ << srv_undo_tablespaces_open;
+
+ return DB_ERROR;
+
+ } else if (srv_undo_tablespaces_open > 0) {
+
+ ib::info() << "Opened " << srv_undo_tablespaces_open
+ << " undo tablespaces";
+
+ if (srv_undo_tablespaces == 0) {
+ ib::warn() << "innodb_undo_tablespaces=0 disables"
+ " dedicated undo log tablespaces";
+ }
+ }
+
+ return DB_SUCCESS;
+}
+
+/** Get the number of undo tablespaces from TRX_SYS page.
+@return error code. */
+static dberr_t srv_check_undo_tablespaces()
+{
+ ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
+ memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
+ srv_undo_tablespaces_active = trx_rseg_get_n_undo_tablespaces(
+ undo_tablespace_ids);
+ return srv_validate_undo_tablespaces();
+}
+
+/** Open all undo tablespaces for normal shutdown/crash recovery. It should
+avoid the fetching of TRX_SYS page.
+@return error code. */
+static dberr_t srv_undo_tablespaces_open_all()
+{
+ char undo_name[sizeof "innodb_undo000"];
+
+ for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+
+ char name[OS_FILE_MAX_PATH];
+ pfs_os_file_t fh;
+ bool success;
+
+ snprintf(name, sizeof(name),
+ "%s%cundo%03zu",
+ srv_undo_dir, OS_PATH_SEPARATOR, i + 1);
+
+ fh = os_file_create(
+ innodb_data_file_key, name, OS_FILE_OPEN
+ | OS_FILE_ON_ERROR_NO_EXIT | OS_FILE_ON_ERROR_SILENT,
+ OS_FILE_AIO, OS_DATA_FILE, srv_read_only_mode, &success);
+
+ if (!success) {
+ break;
+ }
+
+ os_offset_t size = os_file_get_size(fh);
+ ut_a(size != os_offset_t(-1));
+
+ byte* buf2 = static_cast<byte*>(
+ ut_malloc_nokey(2 * UNIV_PAGE_SIZE_MAX));
+
+ /* Align the memory for file i/o if we might have O_DIRECT set */
+ byte* page = static_cast<byte*>(ut_align(buf2, srv_page_size));
+
+ IORequest request(IORequest::READ);
+
+ if (os_file_read(request, fh, page, 0, srv_page_size)
+ != DB_SUCCESS) {
+ ib::error() << "Unable to read first page of file " << name;
+ ut_free(buf2);
+ return DB_ERROR;
+ }
+
+ ulint space_id = fsp_header_get_space_id(page);
+
+ ut_ad(space_id != 0);
+
+ /* Load the tablespace into InnoDB's internal data structures. */
+
+ /* We set the biggest space id to the undo tablespace
+ because InnoDB hasn't opened any other tablespace apart
+ from the system tablespace. */
+
+ fil_set_max_space_id_if_bigger(space_id);
+
+ ulint fsp_flags;
+ switch (srv_checksum_algorithm) {
+ case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
+ fsp_flags = (FSP_FLAGS_FCRC32_MASK_MARKER
+ | FSP_FLAGS_FCRC32_PAGE_SSIZE());
+ break;
+ default:
+ fsp_flags = FSP_FLAGS_PAGE_SSIZE();
+ }
+
+ snprintf(undo_name, sizeof(undo_name),
+ "innodb_undo%03u", static_cast<unsigned>(space_id));
+
+ fil_space_t* space = fil_space_create(
+ undo_name, space_id, fsp_flags,
+ FIL_TYPE_TABLESPACE, NULL);
+
+ ut_a(fil_validate());
+ ut_a(space);
+
+ fil_node_t* file = space->add(name, fh, 0, false, true);
+
+ mutex_enter(&fil_system.mutex);
+
+ success = file->fill_metadata(page, true);
+
+ if (!success) {
+ os_file_close(file->handle);
+ file->handle = OS_FILE_CLOSED;
+ ut_a(fil_system.n_open > 0);
+ fil_system.n_open--;
+ }
+
+ mutex_exit(&fil_system.mutex);
+
+ /* Note the first undo tablespace id in case of
+ no active undo tablespace. */
+ if (0 == srv_undo_tablespaces_open++) {
+ srv_undo_space_id_start = space_id;
+ }
+
+ ut_free(buf2);
+ }
+
+ if (srv_undo_tablespaces_open == 0) {
+ srv_undo_space_id_start = 0;
+ }
+
+ return DB_SUCCESS;
+}
+
/** Open the configured number of dedicated undo tablespaces.
@param[in] create_new_db whether the database is being initialized
@return DB_SUCCESS or error code */
@@ -830,7 +980,7 @@ srv_undo_tablespaces_init(bool create_new_db)
snprintf(
name, sizeof(name),
"%s%cundo%03zu",
- srv_undo_dir, OS_PATH_SEPARATOR, space_id);
+ srv_undo_dir, OS_PATH_SEPARATOR, i + 1);
if (i == 0) {
srv_undo_space_id_start = space_id;
@@ -853,11 +1003,13 @@ srv_undo_tablespaces_init(bool create_new_db)
the system tablespace (0). If we are creating a new instance then
we build the undo_tablespace_ids ourselves since they don't
already exist. */
- n_undo_tablespaces = create_new_db
- || srv_operation == SRV_OPERATION_BACKUP
- || srv_operation == SRV_OPERATION_RESTORE_DELTA
- ? srv_undo_tablespaces
- : trx_rseg_get_n_undo_tablespaces(undo_tablespace_ids);
+
+ if (create_new_db
+ || srv_operation == SRV_OPERATION_BACKUP
+ || srv_operation == SRV_OPERATION_RESTORE_DELTA) {
+ n_undo_tablespaces = srv_undo_tablespaces;
+ }
+
srv_undo_tablespaces_active = srv_undo_tablespaces;
switch (srv_operation) {
@@ -887,7 +1039,7 @@ srv_undo_tablespaces_init(bool create_new_db)
name, sizeof(name),
"%s%cundo%03zu",
srv_undo_dir, OS_PATH_SEPARATOR,
- undo_tablespace_ids[i]);
+ i + 1);
/* Should be no gaps in undo tablespace ids. */
ut_a(!i || prev_space_id + 1 == undo_tablespace_ids[i]);
@@ -941,27 +1093,10 @@ srv_undo_tablespaces_init(bool create_new_db)
srv_undo_space_id_start = 0;
}
- /* If the user says that there are fewer than what we find we
- tolerate that discrepancy but not the inverse. Because there could
- be unused undo tablespaces for future use. */
-
- if (srv_undo_tablespaces > n_undo_tablespaces) {
- ib::error() << "Expected to open innodb_undo_tablespaces="
- << srv_undo_tablespaces
- << " but was able to find only "
- << n_undo_tablespaces;
-
- return(err != DB_SUCCESS ? err : DB_ERROR);
-
- } else if (n_undo_tablespaces > 0) {
-
- ib::info() << "Opened " << n_undo_tablespaces
- << " undo tablespaces";
-
- if (srv_undo_tablespaces == 0) {
- ib::warn() << "innodb_undo_tablespaces=0 disables"
- " dedicated undo log tablespaces";
- }
+ if (srv_validate_undo_tablespaces() != DB_SUCCESS) {
+ goto func_exit;
+ } else {
+ err = DB_SUCCESS;
}
if (create_new_db) {
@@ -976,7 +1111,8 @@ srv_undo_tablespaces_init(bool create_new_db)
}
}
- return(DB_SUCCESS);
+func_exit:
+ return (err != DB_SUCCESS ? err : DB_SUCCESS);
}
/** Create the temporary file tablespace.
@@ -1788,7 +1924,13 @@ files_checked:
fil_open_log_and_system_tablespace_files();
ut_d(fil_system.sys_space->recv_size = srv_sys_space_size_debug);
- err = srv_undo_tablespaces_init(create_new_db);
+ if (create_new_db
+ || srv_operation == SRV_OPERATION_BACKUP
+ || srv_operation == SRV_OPERATION_RESTORE_DELTA) {
+ err = srv_undo_tablespaces_init(create_new_db);
+ } else {
+ err = srv_undo_tablespaces_open_all();
+ }
/* If the force recovery is set very high then we carry on regardless
of all errors. Basically this is fingers crossed mode. */
@@ -1857,14 +1999,6 @@ files_checked:
return(srv_init_abort(err));
}
} else {
- /* Work around the bug that we were performing a dirty read of
- at least the TRX_SYS page into the buffer pool above, without
- reading or applying any redo logs.
-
- MDEV-19229 FIXME: Remove the dirty reads and this call.
- Add an assertion that the buffer pool is empty. */
- buf_pool_invalidate();
-
/* We always try to do a recovery, even if the database had
been shut down normally: this is the normal startup path */
@@ -1888,6 +2022,11 @@ files_checked:
case SRV_OPERATION_RESTORE:
/* This must precede
recv_apply_hashed_log_recs(true). */
+ err = srv_check_undo_tablespaces();
+ if (err != DB_SUCCESS) {
+ return srv_init_abort(err);
+ }
+
trx_lists_init_at_db_start();
break;
case SRV_OPERATION_RESTORE_DELTA: