summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2019-04-17 15:45:53 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2019-04-17 15:59:30 +0300
commite7029e864f4b2c8fa88362677ee8150cc479f07f (patch)
tree84261da7e5a060e4fddae7187b14fcd422719c74
parentee4a2fef18136165a3267b4429e5921fc306cc20 (diff)
parent250799f961b8f261ae2922382e2c69bca7c3b6fa (diff)
downloadmariadb-git-e7029e864f4b2c8fa88362677ee8150cc479f07f.tar.gz
Merge 10.3 into 10.4
-rw-r--r--mysql-test/suite/encryption/r/corrupted_during_recovery.result22
-rw-r--r--mysql-test/suite/encryption/t/corrupted_during_recovery.test61
-rw-r--r--mysql-test/suite/innodb/r/corrupted_during_recovery.result22
-rw-r--r--mysql-test/suite/innodb/r/log_alter_table.result8
-rw-r--r--mysql-test/suite/innodb/t/corrupted_during_recovery.opt1
-rw-r--r--mysql-test/suite/innodb/t/corrupted_during_recovery.test61
-rw-r--r--mysql-test/suite/innodb/t/log_alter_table.test8
-rw-r--r--sql/sql_sequence.cc4
-rw-r--r--storage/innobase/buf/buf0buf.cc15
-rw-r--r--storage/innobase/handler/ha_innodb.cc2
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc3
-rw-r--r--storage/innobase/include/buf0buf.h9
-rw-r--r--storage/innobase/include/fil0fil.h3
-rw-r--r--storage/innobase/include/fsp0types.h7
-rw-r--r--storage/innobase/log/log0recv.cc305
-rw-r--r--storage/innobase/row/row0merge.cc41
-rw-r--r--storage/innobase/srv/srv0start.cc3
17 files changed, 516 insertions, 59 deletions
diff --git a/mysql-test/suite/encryption/r/corrupted_during_recovery.result b/mysql-test/suite/encryption/r/corrupted_during_recovery.result
new file mode 100644
index 00000000000..41c0d7d75a8
--- /dev/null
+++ b/mysql-test/suite/encryption/r/corrupted_during_recovery.result
@@ -0,0 +1,22 @@
+CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES;
+INSERT INTO t1 VALUES(1);
+CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES;
+INSERT INTO t1 VALUES(2);
+SET GLOBAL innodb_flush_log_at_trx_commit=1;
+INSERT INTO t2 VALUES(2);
+# Kill the server
+# Corrupt the pages
+SELECT * FROM t1;
+ERROR 42000: Unknown storage engine 'InnoDB'
+SELECT * FROM t1;
+a
+1
+2
+SELECT * FROM t2;
+a
+2
+CHECK TABLE t1,t2;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+test.t2 check status OK
+DROP TABLE t1, t2;
diff --git a/mysql-test/suite/encryption/t/corrupted_during_recovery.test b/mysql-test/suite/encryption/t/corrupted_during_recovery.test
new file mode 100644
index 00000000000..44cd03e9f8a
--- /dev/null
+++ b/mysql-test/suite/encryption/t/corrupted_during_recovery.test
@@ -0,0 +1,61 @@
+--source include/have_innodb.inc
+--source include/have_file_key_management_plugin.inc
+
+--disable_query_log
+call mtr.add_suppression("InnoDB: Plugin initialization aborted");
+call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
+call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
+call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace test/t1 page");
+call mtr.add_suppression("InnoDB: Failed to read file '.*test.t1\\.ibd' at offset 3: Table is encrypted but decrypt failed");
+call mtr.add_suppression("InnoDB: The page \\[page id: space=\\d+, page number=3\\] in file '.*test.t1\\.ibd' cannot be decrypted");
+call mtr.add_suppression("InnoDB: Table in tablespace \\d+ encrypted. However key management plugin or used key_version \\d+ is not found or used encryption algorithm or method does not match. Can't continue opening the table.");
+--enable_query_log
+
+let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
+CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES;
+INSERT INTO t1 VALUES(1);
+# Force a redo log checkpoint.
+let $restart_noprint=2;
+--source include/restart_mysqld.inc
+--source ../../suite/innodb/include/no_checkpoint_start.inc
+CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES;
+INSERT INTO t1 VALUES(2);
+SET GLOBAL innodb_flush_log_at_trx_commit=1;
+INSERT INTO t2 VALUES(2);
+
+--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1,t2;
+--source ../../suite/innodb/include/no_checkpoint_end.inc
+
+--echo # Corrupt the pages
+
+perl;
+my $ps = $ENV{INNODB_PAGE_SIZE};
+
+my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd";
+open(FILE, "+<$file") || die "Unable to open $file";
+binmode FILE;
+seek (FILE, $ENV{INNODB_PAGE_SIZE} * 3, SEEK_SET) or die "seek";
+print FILE "junk";
+close FILE or die "close";
+
+$file = "$ENV{MYSQLD_DATADIR}/test/t2.ibd";
+open(FILE, "+<$file") || die "Unable to open $file";
+binmode FILE;
+# Corrupt pages 1 to 3. MLOG_INIT_FILE_PAGE2 should protect us!
+# Unfortunately, we are not immune to page 0 corruption.
+seek (FILE, $ps, SEEK_SET) or die "seek";
+print FILE chr(0xff) x ($ps * 3);
+close FILE or die "close";
+EOF
+
+--source include/start_mysqld.inc
+--error ER_UNKNOWN_STORAGE_ENGINE
+SELECT * FROM t1;
+let $restart_parameters=--innodb_force_recovery=1;
+--source include/restart_mysqld.inc
+
+SELECT * FROM t1;
+SELECT * FROM t2;
+CHECK TABLE t1,t2;
+
+DROP TABLE t1, t2;
diff --git a/mysql-test/suite/innodb/r/corrupted_during_recovery.result b/mysql-test/suite/innodb/r/corrupted_during_recovery.result
new file mode 100644
index 00000000000..788f17e3284
--- /dev/null
+++ b/mysql-test/suite/innodb/r/corrupted_during_recovery.result
@@ -0,0 +1,22 @@
+CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(1);
+CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(2);
+SET GLOBAL innodb_flush_log_at_trx_commit=1;
+INSERT INTO t2 VALUES(1);
+# Kill the server
+# Corrupt the pages
+SELECT * FROM t1;
+ERROR 42000: Unknown storage engine 'InnoDB'
+SELECT * FROM t1;
+a
+0
+2
+SELECT * FROM t2;
+a
+1
+CHECK TABLE t1,t2;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+test.t2 check status OK
+DROP TABLE t1, t2;
diff --git a/mysql-test/suite/innodb/r/log_alter_table.result b/mysql-test/suite/innodb/r/log_alter_table.result
index 55e6b84de16..9de89bebaa6 100644
--- a/mysql-test/suite/innodb/r/log_alter_table.result
+++ b/mysql-test/suite/innodb/r/log_alter_table.result
@@ -8,12 +8,12 @@
#
CREATE TABLE t1 (a INT NOT NULL, b INT UNIQUE) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1,2);
-ALTER TABLE t1 ADD PRIMARY KEY(a), ALGORITHM=INPLACE;
-ALTER TABLE t1 DROP INDEX b, ADD INDEX (b);
+ALTER TABLE t1 ADD PRIMARY KEY(a), LOCK=SHARED, ALGORITHM=INPLACE;
+ALTER TABLE t1 DROP INDEX b, ADD INDEX (b), LOCK=SHARED;
# Kill the server
# restart: --debug=d,ib_log
-FOUND 1 /scan .*: multi-log rec MLOG_FILE_CREATE2.*page .*:0/ in mysqld.1.err
-FOUND 1 /scan .*: log rec MLOG_INDEX_LOAD/ in mysqld.1.err
+FOUND 2 /scan \d+: multi-log rec MLOG_FILE_CREATE2 len \d+ page \d+:0/ in mysqld.1.err
+FOUND 3 /scan \d+: log rec MLOG_INDEX_LOAD/ in mysqld.1.err
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check status OK
diff --git a/mysql-test/suite/innodb/t/corrupted_during_recovery.opt b/mysql-test/suite/innodb/t/corrupted_during_recovery.opt
new file mode 100644
index 00000000000..6051f4cd1fa
--- /dev/null
+++ b/mysql-test/suite/innodb/t/corrupted_during_recovery.opt
@@ -0,0 +1 @@
+--innodb_doublewrite=0
diff --git a/mysql-test/suite/innodb/t/corrupted_during_recovery.test b/mysql-test/suite/innodb/t/corrupted_during_recovery.test
new file mode 100644
index 00000000000..697d6e2dce0
--- /dev/null
+++ b/mysql-test/suite/innodb/t/corrupted_during_recovery.test
@@ -0,0 +1,61 @@
+--source include/have_innodb.inc
+
+--disable_query_log
+call mtr.add_suppression("InnoDB: Plugin initialization aborted");
+call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
+call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
+call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace test/t1 page");
+call mtr.add_suppression("InnoDB: Failed to read file '.*test.t1\\.ibd' at offset 3: Page read from tablespace is corrupted.");
+--enable_query_log
+
+let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
+CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(1);
+# Force a redo log checkpoint.
+let $restart_noprint=2;
+--source include/restart_mysqld.inc
+--source ../include/no_checkpoint_start.inc
+CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(2);
+SET GLOBAL innodb_flush_log_at_trx_commit=1;
+INSERT INTO t2 VALUES(1);
+
+--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1,t2;
+--source ../include/no_checkpoint_end.inc
+
+--echo # Corrupt the pages
+
+perl;
+my $ps = $ENV{INNODB_PAGE_SIZE};
+
+my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd";
+open(FILE, "+<$file") || die "Unable to open $file";
+binmode FILE;
+sysseek(FILE, 3*$ps, 0) || die "Unable to seek $file\n";
+die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps;
+# Replace the a=1 with a=0.
+$page =~ s/\x80\x0\x0\x0\x0\x0\x0\x1/\x80\x0\x0\x0\x0\x0\x0\x0/;
+sysseek(FILE, 3*$ps, 0) || die "Unable to seek $file\n";
+syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n";
+close FILE or die "close";
+
+$file = "$ENV{MYSQLD_DATADIR}/test/t2.ibd";
+open(FILE, "+<$file") || die "Unable to open $file";
+binmode FILE;
+# Corrupt pages 1 to 3. MLOG_INIT_FILE_PAGE2 should protect us!
+# Unfortunately, we are not immune to page 0 corruption.
+seek (FILE, $ps, SEEK_SET) or die "seek";
+print FILE chr(0xff) x ($ps * 3);
+close FILE or die "close";
+EOF
+
+--source include/start_mysqld.inc
+--error ER_UNKNOWN_STORAGE_ENGINE
+SELECT * FROM t1;
+let $restart_parameters=--innodb_force_recovery=1;
+--source include/restart_mysqld.inc
+SELECT * FROM t1;
+SELECT * FROM t2;
+CHECK TABLE t1,t2;
+
+DROP TABLE t1, t2;
diff --git a/mysql-test/suite/innodb/t/log_alter_table.test b/mysql-test/suite/innodb/t/log_alter_table.test
index 6f12dfaf0b9..c92953f16a1 100644
--- a/mysql-test/suite/innodb/t/log_alter_table.test
+++ b/mysql-test/suite/innodb/t/log_alter_table.test
@@ -19,9 +19,9 @@ CREATE TABLE t1 (a INT NOT NULL, b INT UNIQUE) ENGINE=InnoDB;
# MLOG_INDEX_LOAD will not be emitted for empty tables. Insert a row.
INSERT INTO t1 VALUES (1,2);
# We should get two MLOG_INDEX_LOAD for this.
-ALTER TABLE t1 ADD PRIMARY KEY(a), ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD PRIMARY KEY(a), LOCK=SHARED, ALGORITHM=INPLACE;
# And one MLOG_INDEX_LOAD for this.
-ALTER TABLE t1 DROP INDEX b, ADD INDEX (b);
+ALTER TABLE t1 DROP INDEX b, ADD INDEX (b), LOCK=SHARED;
--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1;
--source include/no_checkpoint_end.inc
@@ -32,10 +32,10 @@ ALTER TABLE t1 DROP INDEX b, ADD INDEX (b);
let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err;
let SEARCH_ABORT=NOT FOUND;
# ensure that we have exactly 2 records there.
-let SEARCH_PATTERN=scan .*: multi-log rec MLOG_FILE_CREATE2.*page .*:0;
+let SEARCH_PATTERN=scan \d+: multi-log rec MLOG_FILE_CREATE2 len \d+ page \d+:0;
--source include/search_pattern_in_file.inc
# ensure that we have exactly 3 records there.
-let SEARCH_PATTERN=scan .*: log rec MLOG_INDEX_LOAD;
+let SEARCH_PATTERN=scan \d+: log rec MLOG_INDEX_LOAD;
--source include/search_pattern_in_file.inc
CHECK TABLE t1;
diff --git a/sql/sql_sequence.cc b/sql/sql_sequence.cc
index 1ed0bb38e64..9f17590a315 100644
--- a/sql/sql_sequence.cc
+++ b/sql/sql_sequence.cc
@@ -88,13 +88,13 @@ bool sequence_definition::check_and_adjust(bool set_reserved_until)
/*
If min_value is not set, set it to LONGLONG_MIN or 1, depending on
- increment
+ real_increment
*/
if (!(used_fields & seq_field_used_min_value))
min_value= real_increment < 0 ? LONGLONG_MIN+1 : 1;
/*
- If min_value is not set, set it to LONGLONG_MAX or -1, depending on
+ If max_value is not set, set it to LONGLONG_MAX or -1, depending on
real_increment
*/
if (!(used_fields & seq_field_used_max_value))
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 24df4d76911..3fe7cac56ed 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -5597,7 +5597,15 @@ buf_page_create(
buf_block_free(free_block);
- return buf_page_get_with_no_latch(page_id, zip_size, mtr);
+ if (!recv_recovery_is_on()) {
+ return buf_page_get_with_no_latch(page_id, zip_size,
+ mtr);
+ }
+
+ mutex_exit(&recv_sys->mutex);
+ block = buf_page_get_with_no_latch(page_id, zip_size, mtr);
+ mutex_enter(&recv_sys->mutex);
+ return block;
}
/* If we get here, the page was not in buf_pool: init it there */
@@ -5663,7 +5671,9 @@ buf_page_create(
/* Delete possible entries for the page from the insert buffer:
such can exist if the page belonged to an index which was dropped */
- ibuf_merge_or_delete_for_page(NULL, page_id, zip_size, true);
+ if (!recv_recovery_is_on()) {
+ ibuf_merge_or_delete_for_page(NULL, page_id, zip_size, true);
+ }
frame = block->frame;
@@ -5678,6 +5688,7 @@ buf_page_create(
(3) key_version on encrypted pages (not page 0:0) */
memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
+ memset(frame + FIL_PAGE_LSN, 0, 8);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index c4fe6cdd449..e9a5d350f8b 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -4037,7 +4037,7 @@ static int innodb_init_params()
if (innobase_open_files > open_files_limit) {
ib::warn() << "innodb_open_files " << innobase_open_files
<< " should not be greater"
- << "than the open_files_limit " << open_files_limit;
+ << " than the open_files_limit " << open_files_limit;
if (innobase_open_files > tc_size) {
innobase_open_files = tc_size;
}
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index 639836c9da9..1425f1ea289 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -4397,7 +4397,8 @@ ibuf_merge_or_delete_for_page(
ulint dops[IBUF_OP_COUNT];
ut_ad(block == NULL || page_id == block->page.id);
- ut_ad(block == NULL || buf_block_get_io_fix(block) == BUF_IO_READ);
+ ut_ad(block == NULL || buf_block_get_io_fix(block) == BUF_IO_READ
+ || recv_recovery_is_on());
if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
|| trx_sys_hdr_page(page_id)
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index eae73fd2847..db9aa9d66fb 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -234,6 +234,15 @@ public:
}
bool operator!=(const page_id_t& rhs) const { return !(*this == rhs); }
+ bool operator<(const page_id_t& rhs) const
+ {
+ if (m_space == rhs.m_space) {
+ return m_page_no < rhs.m_page_no;
+ }
+
+ return m_space < rhs.m_space;
+ }
+
/** Retrieve the tablespace id.
@return tablespace id */
uint32_t space() const { return m_space; }
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index b58d4e85ac9..b06cb15e64a 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -89,6 +89,9 @@ struct fil_space_t {
Protected by log_sys.mutex.
If and only if this is nonzero, the
tablespace will be in named_spaces. */
+ /** Log sequence number of the latest MLOG_INDEX_LOAD record
+ that was found while parsing the redo log */
+ lsn_t enable_lsn;
bool stop_new_ops;
/*!< we set this true when we start
deleting a single-table tablespace.
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index 1b2ede47252..ee0259e5f2d 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -269,8 +269,7 @@ or have been introduced in MySQL 5.7 or 8.0:
=====================================================================
The flags below only exist in fil_space_t::flags, not in FSP_SPACE_FLAGS:
=====================================================================
- 25: DATA_DIR
- 26..27: ATOMIC_WRITES
+ 27: DATA_DIR
28..31: COMPRESSION_LEVEL
*/
@@ -278,9 +277,9 @@ The flags below only exist in fil_space_t::flags, not in FSP_SPACE_FLAGS:
#define FSP_FLAGS_MEM_MASK (~0U << FSP_FLAGS_MEM_DATA_DIR)
/** Zero relative shift position of the DATA_DIR flag */
-#define FSP_FLAGS_MEM_DATA_DIR 25
+#define FSP_FLAGS_MEM_DATA_DIR 27
/** Zero relative shift position of the COMPRESSION_LEVEL field */
-#define FSP_FLAGS_MEM_COMPRESSION_LEVEL 26
+#define FSP_FLAGS_MEM_COMPRESSION_LEVEL 28
/** Zero relative shift position of the POST_ANTELOPE field */
#define FSP_FLAGS_POS_POST_ANTELOPE 0
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 6305f5ec1d2..32c63546e5d 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -154,10 +154,22 @@ struct file_name_t {
/** FSP_SIZE of tablespace */
ulint size;
+ /** the log sequence number of the last observed MLOG_INDEX_LOAD
+ record for the tablespace */
+ lsn_t enable_lsn;
+
/** Constructor */
file_name_t(std::string name_, bool deleted) :
name(name_), space(NULL), status(deleted ? DELETED: NORMAL),
- size(0) {}
+ size(0), enable_lsn(0) {}
+
+ /** Report a MLOG_INDEX_LOAD operation, meaning that
+ mlog_init for any earlier LSN must be skipped.
+ @param lsn log sequence number of the MLOG_INDEX_LOAD */
+ void mlog_index_load(lsn_t lsn)
+ {
+ if (enable_lsn < lsn) enable_lsn = lsn;
+ }
};
/** Map of dirty tablespaces during recovery */
@@ -173,6 +185,8 @@ static recv_spaces_t recv_spaces;
enum recv_addr_state {
/** not yet processed */
RECV_NOT_PROCESSED,
+ /** not processed; the page will be reinitialized */
+ RECV_WILL_NOT_READ,
/** page is being read */
RECV_BEING_READ,
/** log records are being applied on the page */
@@ -215,6 +229,117 @@ void (*log_file_op)(ulint space_id, const byte* flags,
const byte* name, ulint len,
const byte* new_name, ulint new_len);
+/** Information about initializing page contents during redo log processing */
+class mlog_init_t
+{
+public:
+ /** A page initialization operation that was parsed from
+ the redo log */
+ struct init {
+ /** log sequence number of the page initialization */
+ lsn_t lsn;
+ /** Whether btr_page_create() avoided a read of the page.
+
+ At the end of the last recovery batch, ibuf_merge()
+ will invoke change buffer merge for pages that reside
+ in the buffer pool. (In the last batch, loading pages
+ would trigger change buffer merge.) */
+ bool created;
+ };
+
+private:
+ typedef std::map<const page_id_t, init,
+ std::less<const page_id_t>,
+ ut_allocator<std::pair<const page_id_t, init> > >
+ map;
+ /** Map of page initialization operations.
+ FIXME: Merge this to recv_sys->addr_hash! */
+ map inits;
+public:
+ /** Record that a page will be initialized by the redo log.
+ @param[in] space tablespace identifier
+ @param[in] page_no page number
+ @param[in] lsn log sequence number */
+ void add(ulint space, ulint page_no, lsn_t lsn)
+ {
+ ut_ad(mutex_own(&recv_sys->mutex));
+ const init init = { lsn, false };
+ std::pair<map::iterator, bool> p = inits.insert(
+ map::value_type(page_id_t(space, page_no), init));
+ ut_ad(!p.first->second.created);
+ if (!p.second && p.first->second.lsn < init.lsn) {
+ p.first->second = init;
+ }
+ }
+
+ /** Get the last stored lsn of the page id and its respective
+ init/load operation.
+ @param[in] page_id page id
+ @param[in,out] init initialize log or load log
+ @return the latest page initialization;
+ not valid after releasing recv_sys->mutex. */
+ init& last(page_id_t page_id)
+ {
+ ut_ad(mutex_own(&recv_sys->mutex));
+ return inits.find(page_id)->second;
+ }
+
+ /** At the end of each recovery batch, reset the 'created' flags. */
+ void reset()
+ {
+ ut_ad(mutex_own(&recv_sys->mutex));
+ ut_ad(recv_no_ibuf_operations);
+ for (map::iterator i= inits.begin(); i != inits.end(); i++) {
+ i->second.created = false;
+ }
+ }
+
+ /** On the last recovery batch, merge buffered changes to those
+ pages that were initialized by buf_page_create() and still reside
+ in the buffer pool. Stale pages are not allowed in the buffer pool.
+
+ Note: When MDEV-14481 implements redo log apply in the
+ background, we will have to ensure that buf_page_get_gen()
+ will not deliver stale pages to users (pages on which the
+ change buffer was not merged yet). Normally, the change
+ buffer merge is performed on I/O completion. Maybe, add a
+ flag to buf_page_t and perform the change buffer merge on
+ the first actual access?
+ @param[in,out] mtr dummy mini-transaction */
+ void ibuf_merge(mtr_t& mtr)
+ {
+ ut_ad(mutex_own(&recv_sys->mutex));
+ ut_ad(!recv_no_ibuf_operations);
+ mtr.start();
+
+ for (map::const_iterator i= inits.begin(); i != inits.end();
+ i++) {
+ if (!i->second.created) {
+ continue;
+ }
+ if (buf_block_t* block = buf_page_get_gen(
+ i->first, 0, RW_X_LATCH, NULL,
+ BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
+ &mtr, NULL)) {
+ mutex_exit(&recv_sys->mutex);
+ ibuf_merge_or_delete_for_page(
+ block, i->first,
+ block->zip_size(), true);
+ mtr.commit();
+ mtr.start();
+ mutex_enter(&recv_sys->mutex);
+ }
+ }
+
+ mtr.commit();
+ }
+
+ /** Clear the data structure */
+ void clear() { inits.clear(); }
+};
+
+static mlog_init_t mlog_init;
+
/** Process a MLOG_CREATE2 record that indicates that a tablespace
is being shrunk in size.
@param[in] space_id tablespace identifier
@@ -618,6 +743,7 @@ recv_sys_close()
}
recv_spaces.clear();
+ mlog_init.clear();
}
/************************************************************
@@ -1737,6 +1863,19 @@ recv_add_to_hash_table(
recv_sys->n_addrs++;
}
+ switch (type) {
+ case MLOG_INIT_FILE_PAGE2:
+ case MLOG_ZIP_PAGE_COMPRESS:
+ case MLOG_INIT_FREE_PAGE:
+ /* Ignore any earlier redo log records for this page. */
+ ut_ad(recv_addr->state == RECV_NOT_PROCESSED
+ || recv_addr->state == RECV_WILL_NOT_READ);
+ recv_addr->state = RECV_WILL_NOT_READ;
+ mlog_init.add(space, page_no, start_lsn);
+ default:
+ break;
+ }
+
UT_LIST_ADD_LAST(recv_addr->rec_list, recv);
prev_field = &(recv->data);
@@ -1805,9 +1944,11 @@ recv_data_copy_to_buf(
lsn of a log record.
@param[in,out] block buffer pool page
@param[in,out] mtr mini-transaction
-@param[in,out] recv_addr recovery address */
+@param[in,out] recv_addr recovery address
+@param[in,out] init page initialization operation, or NULL */
static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
- recv_addr_t* recv_addr)
+ recv_addr_t* recv_addr,
+ mlog_init_t::init* init = NULL)
{
page_t* page;
page_zip_des_t* page_zip;
@@ -1817,6 +1958,8 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
ut_ad(recv_needed_recovery);
ut_ad(recv_addr->state != RECV_BEING_PROCESSED);
ut_ad(recv_addr->state != RECV_PROCESSED);
+ ut_ad(!init || init->created);
+ ut_ad(!init || init->lsn);
if (UNIV_UNLIKELY(srv_print_verbose_log == 2)) {
fprintf(stderr, "Applying log to page %u:%u\n",
@@ -1838,7 +1981,9 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
}
+ bool free_page = false;
lsn_t start_lsn = 0, end_lsn = 0;
+ const lsn_t init_lsn = init ? init->lsn : 0;
for (recv_t* recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
recv; recv = UT_LIST_GET_NEXT(rec_list, recv)) {
@@ -1849,8 +1994,20 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
if (recv->start_lsn < page_lsn) {
/* Ignore this record, because there are later changes
for this page. */
+ DBUG_LOG("ib_log", "apply skip "
+ << get_mlog_string(recv->type)
+ << " LSN " << recv->start_lsn << " < "
+ << page_lsn);
+ } else if (recv->start_lsn < init_lsn) {
+ DBUG_LOG("ib_log", "init skip "
+ << get_mlog_string(recv->type)
+ << " LSN " << recv->start_lsn << " < "
+ << init_lsn);
} else {
- if (!start_lsn) {
+ if (recv->type == MLOG_INIT_FREE_PAGE) {
+ /* This does not really modify the page. */
+ free_page = true;
+ } else if (!start_lsn) {
start_lsn = recv->start_lsn;
}
@@ -1884,7 +2041,7 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
block->page.id.space(),
block->page.id.page_no(), true, block, &mtr);
- lsn_t end_lsn = recv->start_lsn + recv->len;
+ end_lsn = recv->start_lsn + recv->len;
mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
mach_write_to_8(srv_page_size
- FIL_PAGE_END_LSN_OLD_CHKSUM
@@ -1911,6 +2068,13 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
log_flush_order_mutex_enter();
buf_flush_note_modification(block, start_lsn, end_lsn, NULL);
log_flush_order_mutex_exit();
+ } else if (free_page && init) {
+ /* There have been no operations than MLOG_INIT_FREE_PAGE.
+ Any buffered changes must not be merged. A subsequent
+ buf_page_create() from a user thread should discard
+ any buffered changes. */
+ init->created = false;
+ ut_ad(!mtr.has_modifications());
}
/* Make sure that committing mtr does not change the modification
@@ -2094,25 +2258,103 @@ ignore:
case RECV_DISCARDED:
goto ignore;
case RECV_NOT_PROCESSED:
+ case RECV_WILL_NOT_READ:
break;
}
const page_id_t page_id(recv_addr->space,
recv_addr->page_no);
- mtr.start();
- mtr.set_log_mode(MTR_LOG_NONE);
- if (buf_block_t* block = buf_page_get_gen(
- page_id, 0, RW_X_LATCH,
- NULL, BUF_GET_IF_IN_POOL,
- __FILE__, __LINE__, &mtr, NULL)) {
- buf_block_dbg_add_level(
- block, SYNC_NO_ORDER_CHECK);
- recv_recover_page(block, mtr, recv_addr);
- ut_ad(mtr.has_committed());
+ if (recv_addr->state == RECV_NOT_PROCESSED) {
+apply:
+ mtr.start();
+ mtr.set_log_mode(MTR_LOG_NONE);
+ if (buf_block_t* block = buf_page_get_gen(
+ page_id, 0, RW_X_LATCH, NULL,
+ BUF_GET_IF_IN_POOL,
+ __FILE__, __LINE__, &mtr, NULL)) {
+ buf_block_dbg_add_level(
+ block, SYNC_NO_ORDER_CHECK);
+ recv_recover_page(block, mtr,
+ recv_addr);
+ ut_ad(mtr.has_committed());
+ } else {
+ mtr.commit();
+ recv_read_in_area(page_id);
+ }
} else {
- mtr.commit();
- recv_read_in_area(page_id);
+ mlog_init_t::init& i = mlog_init.last(page_id);
+ const lsn_t end_lsn = UT_LIST_GET_LAST(
+ recv_addr->rec_list)->end_lsn;
+
+ if (end_lsn < i.lsn) {
+ DBUG_LOG("ib_log", "skip log for page "
+ << page_id
+ << " LSN " << end_lsn
+ << " < " << i.lsn);
+skip:
+ recv_addr->state = RECV_PROCESSED;
+ goto ignore;
+ }
+
+ fil_space_t* space = fil_space_acquire_for_io(
+ recv_addr->space);
+ if (!space) {
+ goto skip;
+ }
+
+ if (space->enable_lsn) {
+do_read:
+ space->release_for_io();
+ recv_addr->state = RECV_NOT_PROCESSED;
+ goto apply;
+ }
+
+ /* Determine if a tablespace could be
+ for an internal table for FULLTEXT INDEX.
+ For those tables, no MLOG_INDEX_LOAD record
+ used to be written when redo logging was
+ disabled. Hence, we cannot optimize
+ away page reads when crash-upgrading
+ from MariaDB versions before 10.4,
+ because all the redo log records for
+ initializing and modifying the page in
+ the past could be older than the page
+ in the data file.
+
+ The check is too broad, causing all
+ tables whose names start with FTS_ to
+ skip the optimization. */
+ if ((log_sys.log.format
+ & ~LOG_HEADER_FORMAT_ENCRYPTED)
+ != LOG_HEADER_FORMAT_10_4
+ && strstr(space->name, "/FTS_")) {
+ goto do_read;
+ }
+
+ mtr.start();
+ mtr.set_log_mode(MTR_LOG_NONE);
+ buf_block_t* block = buf_page_create(
+ page_id, space->zip_size(), &mtr);
+ if (recv_addr->state == RECV_PROCESSED) {
+ /* The page happened to exist
+ in the buffer pool, or it was
+ just being read in. Before
+ buf_page_get_with_no_latch()
+ returned, all changes must have
+ been applied to the page already. */
+ mtr.commit();
+ } else {
+ i.created = true;
+ buf_block_dbg_add_level(
+ block, SYNC_NO_ORDER_CHECK);
+ mtr.x_latch_at_savepoint(0, block);
+ recv_recover_page(block, mtr,
+ recv_addr, &i);
+ ut_ad(mtr.has_committed());
+ }
+
+ space->release_for_io();
}
}
}
@@ -2120,7 +2362,13 @@ ignore:
/* Wait until all the pages have been processed */
while (recv_sys->n_addrs != 0) {
- bool abort = recv_sys->found_corrupt_log;
+ const bool abort = recv_sys->found_corrupt_log
+ || recv_sys->found_corrupt_fs;
+
+ if (recv_sys->found_corrupt_fs && !srv_force_recovery) {
+ ib::info() << "Set innodb_force_recovery=1"
+ " to ignore corrupted pages.";
+ }
mutex_exit(&(recv_sys->mutex));
@@ -2159,6 +2407,10 @@ ignore:
log_mutex_enter();
mutex_enter(&(recv_sys->mutex));
+ mlog_init.reset();
+ } else if (!recv_no_ibuf_operations) {
+ /* We skipped this in buf_page_create(). */
+ mlog_init.ibuf_merge(mtr);
}
recv_sys->apply_log_recs = FALSE;
@@ -2355,9 +2607,17 @@ recv_report_corrupt_log(
}
/** Report a MLOG_INDEX_LOAD operation.
-@param[in] space_id tablespace identifier */
-ATTRIBUTE_COLD static void recv_mlog_index_load(ulint space_id)
+@param[in] space_id tablespace id
+@param[in] page_no page number
+@param[in] lsn log sequence number */
+ATTRIBUTE_COLD static void
+recv_mlog_index_load(ulint space_id, ulint page_no, lsn_t lsn)
{
+ recv_spaces_t::iterator it = recv_spaces.find(space_id);
+ if (it != recv_spaces.end()) {
+ it->second.mlog_index_load(lsn);
+ }
+
if (log_optimized_ddl_op) {
log_optimized_ddl_op(space_id);
}
@@ -2519,7 +2779,7 @@ loop:
/* fall through */
case MLOG_INDEX_LOAD:
if (type == MLOG_INDEX_LOAD) {
- recv_mlog_index_load(space);
+ recv_mlog_index_load(space, page_no, old_lsn);
}
/* fall through */
case MLOG_FILE_NAME:
@@ -2673,7 +2933,7 @@ corrupted_log:
break;
#endif /* UNIV_LOG_LSN_DEBUG */
case MLOG_INDEX_LOAD:
- recv_mlog_index_load(space);
+ recv_mlog_index_load(space, page_no, old_lsn);
break;
case MLOG_FILE_NAME:
case MLOG_FILE_DELETE:
@@ -3211,6 +3471,7 @@ recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace)
/* The tablespace was found, and there
are some redo log records for it. */
fil_names_dirty(i->second.space);
+ i->second.space->enable_lsn = i->second.enable_lsn;
} else if (i->second.name == "") {
ib::error() << "Missing MLOG_FILE_NAME"
" or MLOG_FILE_DELETE"
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index dd3d1434418..64b1b0c5119 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -4932,23 +4932,28 @@ wait_again:
if (indexes[i]->type & DICT_FTS) {
row_fts_psort_info_destroy(psort_info, merge_info);
fts_psort_initiated = false;
- } else if (error != DB_SUCCESS || !online) {
- /* Do not apply any online log. */
+ } else if (dict_index_is_spatial(indexes[i])) {
+ /* We never disable redo logging for
+ creating SPATIAL INDEX. Avoid writing any
+ unnecessary MLOG_INDEX_LOAD record. */
} else if (old_table != new_table) {
ut_ad(!sort_idx->online_log);
ut_ad(sort_idx->online_status
== ONLINE_INDEX_COMPLETE);
- } else {
- if (dict_index_is_spatial(indexes[i])) {
- /* We never disable redo logging for
- creating SPATIAL INDEX. Avoid writing any
- unnecessary MLOG_INDEX_LOAD record. */
- } else if (FlushObserver* flush_observer =
- trx->get_flush_observer()) {
- flush_observer->flush();
- row_merge_write_redo(indexes[i]);
+ } else if (FlushObserver* flush_observer =
+ trx->get_flush_observer()) {
+ if (error != DB_SUCCESS) {
+ flush_observer->interrupted();
}
+ flush_observer->flush();
+ row_merge_write_redo(indexes[i]);
+ }
+ if (old_table != new_table
+ || (indexes[i]->type & (DICT_FTS | DICT_SPATIAL))
+ || error != DB_SUCCESS || !online) {
+ /* Do not apply any online log. */
+ } else {
if (global_system_variables.log_warnings > 2) {
sql_print_information(
"InnoDB: Online DDL : Applying"
@@ -5055,13 +5060,7 @@ func_exit:
flush_observer->flush();
- trx->remove_flush_observer();
-
- if (trx_is_interrupted(trx)) {
- error = DB_INTERRUPTED;
- }
-
- if (error == DB_SUCCESS && old_table != new_table) {
+ if (old_table != new_table) {
for (const dict_index_t* index
= dict_table_get_first_index(new_table);
index != NULL;
@@ -5072,6 +5071,12 @@ func_exit:
}
}
}
+
+ trx->remove_flush_observer();
+
+ if (trx_is_interrupted(trx)) {
+ error = DB_INTERRUPTED;
+ }
}
DBUG_RETURN(error);
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 085ab55a5aa..508ae260596 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -1882,7 +1882,8 @@ files_checked:
recv_apply_hashed_log_recs(true);
- if (recv_sys->found_corrupt_log) {
+ if (recv_sys->found_corrupt_log
+ || recv_sys->found_corrupt_fs) {
return(srv_init_abort(DB_CORRUPTION));
}