diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2019-04-17 15:45:53 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2019-04-17 15:59:30 +0300 |
commit | e7029e864f4b2c8fa88362677ee8150cc479f07f (patch) | |
tree | 84261da7e5a060e4fddae7187b14fcd422719c74 | |
parent | ee4a2fef18136165a3267b4429e5921fc306cc20 (diff) | |
parent | 250799f961b8f261ae2922382e2c69bca7c3b6fa (diff) | |
download | mariadb-git-e7029e864f4b2c8fa88362677ee8150cc479f07f.tar.gz |
Merge 10.3 into 10.4
-rw-r--r-- | mysql-test/suite/encryption/r/corrupted_during_recovery.result | 22 | ||||
-rw-r--r-- | mysql-test/suite/encryption/t/corrupted_during_recovery.test | 61 | ||||
-rw-r--r-- | mysql-test/suite/innodb/r/corrupted_during_recovery.result | 22 | ||||
-rw-r--r-- | mysql-test/suite/innodb/r/log_alter_table.result | 8 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/corrupted_during_recovery.opt | 1 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/corrupted_during_recovery.test | 61 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/log_alter_table.test | 8 | ||||
-rw-r--r-- | sql/sql_sequence.cc | 4 | ||||
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 15 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 2 | ||||
-rw-r--r-- | storage/innobase/ibuf/ibuf0ibuf.cc | 3 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 9 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 3 | ||||
-rw-r--r-- | storage/innobase/include/fsp0types.h | 7 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 305 | ||||
-rw-r--r-- | storage/innobase/row/row0merge.cc | 41 | ||||
-rw-r--r-- | storage/innobase/srv/srv0start.cc | 3 |
17 files changed, 516 insertions, 59 deletions
diff --git a/mysql-test/suite/encryption/r/corrupted_during_recovery.result b/mysql-test/suite/encryption/r/corrupted_during_recovery.result new file mode 100644 index 00000000000..41c0d7d75a8 --- /dev/null +++ b/mysql-test/suite/encryption/r/corrupted_during_recovery.result @@ -0,0 +1,22 @@ +CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES; +INSERT INTO t1 VALUES(1); +CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES; +INSERT INTO t1 VALUES(2); +SET GLOBAL innodb_flush_log_at_trx_commit=1; +INSERT INTO t2 VALUES(2); +# Kill the server +# Corrupt the pages +SELECT * FROM t1; +ERROR 42000: Unknown storage engine 'InnoDB' +SELECT * FROM t1; +a +1 +2 +SELECT * FROM t2; +a +2 +CHECK TABLE t1,t2; +Table Op Msg_type Msg_text +test.t1 check status OK +test.t2 check status OK +DROP TABLE t1, t2; diff --git a/mysql-test/suite/encryption/t/corrupted_during_recovery.test b/mysql-test/suite/encryption/t/corrupted_during_recovery.test new file mode 100644 index 00000000000..44cd03e9f8a --- /dev/null +++ b/mysql-test/suite/encryption/t/corrupted_during_recovery.test @@ -0,0 +1,61 @@ +--source include/have_innodb.inc +--source include/have_file_key_management_plugin.inc + +--disable_query_log +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +call mtr.add_suppression("Plugin 'InnoDB' init function returned error"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed"); +call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace test/t1 page"); +call mtr.add_suppression("InnoDB: Failed to read file '.*test.t1\\.ibd' at offset 3: Table is encrypted but decrypt failed"); +call mtr.add_suppression("InnoDB: The page \\[page id: space=\\d+, page number=3\\] in file '.*test.t1\\.ibd' cannot be decrypted"); +call mtr.add_suppression("InnoDB: Table in tablespace \\d+ encrypted. However key management plugin or used key_version \\d+ is not found or used encryption algorithm or method does not match. Can't continue opening the table."); +--enable_query_log + +let INNODB_PAGE_SIZE=`select @@innodb_page_size`; +CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES; +INSERT INTO t1 VALUES(1); +# Force a redo log checkpoint. +let $restart_noprint=2; +--source include/restart_mysqld.inc +--source ../../suite/innodb/include/no_checkpoint_start.inc +CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES; +INSERT INTO t1 VALUES(2); +SET GLOBAL innodb_flush_log_at_trx_commit=1; +INSERT INTO t2 VALUES(2); + +--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1,t2; +--source ../../suite/innodb/include/no_checkpoint_end.inc + +--echo # Corrupt the pages + +perl; +my $ps = $ENV{INNODB_PAGE_SIZE}; + +my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd"; +open(FILE, "+<$file") || die "Unable to open $file"; +binmode FILE; +seek (FILE, $ENV{INNODB_PAGE_SIZE} * 3, SEEK_SET) or die "seek"; +print FILE "junk"; +close FILE or die "close"; + +$file = "$ENV{MYSQLD_DATADIR}/test/t2.ibd"; +open(FILE, "+<$file") || die "Unable to open $file"; +binmode FILE; +# Corrupt pages 1 to 3. MLOG_INIT_FILE_PAGE2 should protect us! +# Unfortunately, we are not immune to page 0 corruption. +seek (FILE, $ps, SEEK_SET) or die "seek"; +print FILE chr(0xff) x ($ps * 3); +close FILE or die "close"; +EOF + +--source include/start_mysqld.inc +--error ER_UNKNOWN_STORAGE_ENGINE +SELECT * FROM t1; +let $restart_parameters=--innodb_force_recovery=1; +--source include/restart_mysqld.inc + +SELECT * FROM t1; +SELECT * FROM t2; +CHECK TABLE t1,t2; + +DROP TABLE t1, t2; diff --git a/mysql-test/suite/innodb/r/corrupted_during_recovery.result b/mysql-test/suite/innodb/r/corrupted_during_recovery.result new file mode 100644 index 00000000000..788f17e3284 --- /dev/null +++ b/mysql-test/suite/innodb/r/corrupted_during_recovery.result @@ -0,0 +1,22 @@ +CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES(1); +CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES(2); +SET GLOBAL innodb_flush_log_at_trx_commit=1; +INSERT INTO t2 VALUES(1); +# Kill the server +# Corrupt the pages +SELECT * FROM t1; +ERROR 42000: Unknown storage engine 'InnoDB' +SELECT * FROM t1; +a +0 +2 +SELECT * FROM t2; +a +1 +CHECK TABLE t1,t2; +Table Op Msg_type Msg_text +test.t1 check status OK +test.t2 check status OK +DROP TABLE t1, t2; diff --git a/mysql-test/suite/innodb/r/log_alter_table.result b/mysql-test/suite/innodb/r/log_alter_table.result index 55e6b84de16..9de89bebaa6 100644 --- a/mysql-test/suite/innodb/r/log_alter_table.result +++ b/mysql-test/suite/innodb/r/log_alter_table.result @@ -8,12 +8,12 @@ # CREATE TABLE t1 (a INT NOT NULL, b INT UNIQUE) ENGINE=InnoDB; INSERT INTO t1 VALUES (1,2); -ALTER TABLE t1 ADD PRIMARY KEY(a), ALGORITHM=INPLACE; -ALTER TABLE t1 DROP INDEX b, ADD INDEX (b); +ALTER TABLE t1 ADD PRIMARY KEY(a), LOCK=SHARED, ALGORITHM=INPLACE; +ALTER TABLE t1 DROP INDEX b, ADD INDEX (b), LOCK=SHARED; # Kill the server # restart: --debug=d,ib_log -FOUND 1 /scan .*: multi-log rec MLOG_FILE_CREATE2.*page .*:0/ in mysqld.1.err -FOUND 1 /scan .*: log rec MLOG_INDEX_LOAD/ in mysqld.1.err +FOUND 2 /scan \d+: multi-log rec MLOG_FILE_CREATE2 len \d+ page \d+:0/ in mysqld.1.err +FOUND 3 /scan \d+: log rec MLOG_INDEX_LOAD/ in mysqld.1.err CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK diff --git a/mysql-test/suite/innodb/t/corrupted_during_recovery.opt b/mysql-test/suite/innodb/t/corrupted_during_recovery.opt new file mode 100644 index 00000000000..6051f4cd1fa --- /dev/null +++ b/mysql-test/suite/innodb/t/corrupted_during_recovery.opt @@ -0,0 +1 @@ +--innodb_doublewrite=0 diff --git a/mysql-test/suite/innodb/t/corrupted_during_recovery.test b/mysql-test/suite/innodb/t/corrupted_during_recovery.test new file mode 100644 index 00000000000..697d6e2dce0 --- /dev/null +++ b/mysql-test/suite/innodb/t/corrupted_during_recovery.test @@ -0,0 +1,61 @@ +--source include/have_innodb.inc + +--disable_query_log +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +call mtr.add_suppression("Plugin 'InnoDB' init function returned error"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed"); +call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace test/t1 page"); +call mtr.add_suppression("InnoDB: Failed to read file '.*test.t1\\.ibd' at offset 3: Page read from tablespace is corrupted."); +--enable_query_log + +let INNODB_PAGE_SIZE=`select @@innodb_page_size`; +CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES(1); +# Force a redo log checkpoint. +let $restart_noprint=2; +--source include/restart_mysqld.inc +--source ../include/no_checkpoint_start.inc +CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES(2); +SET GLOBAL innodb_flush_log_at_trx_commit=1; +INSERT INTO t2 VALUES(1); + +--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1,t2; +--source ../include/no_checkpoint_end.inc + +--echo # Corrupt the pages + +perl; +my $ps = $ENV{INNODB_PAGE_SIZE}; + +my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd"; +open(FILE, "+<$file") || die "Unable to open $file"; +binmode FILE; +sysseek(FILE, 3*$ps, 0) || die "Unable to seek $file\n"; +die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps; +# Replace the a=1 with a=0. +$page =~ s/\x80\x0\x0\x0\x0\x0\x0\x1/\x80\x0\x0\x0\x0\x0\x0\x0/; +sysseek(FILE, 3*$ps, 0) || die "Unable to seek $file\n"; +syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n"; +close FILE or die "close"; + +$file = "$ENV{MYSQLD_DATADIR}/test/t2.ibd"; +open(FILE, "+<$file") || die "Unable to open $file"; +binmode FILE; +# Corrupt pages 1 to 3. MLOG_INIT_FILE_PAGE2 should protect us! +# Unfortunately, we are not immune to page 0 corruption. +seek (FILE, $ps, SEEK_SET) or die "seek"; +print FILE chr(0xff) x ($ps * 3); +close FILE or die "close"; +EOF + +--source include/start_mysqld.inc +--error ER_UNKNOWN_STORAGE_ENGINE +SELECT * FROM t1; +let $restart_parameters=--innodb_force_recovery=1; +--source include/restart_mysqld.inc +SELECT * FROM t1; +SELECT * FROM t2; +CHECK TABLE t1,t2; + +DROP TABLE t1, t2; diff --git a/mysql-test/suite/innodb/t/log_alter_table.test b/mysql-test/suite/innodb/t/log_alter_table.test index 6f12dfaf0b9..c92953f16a1 100644 --- a/mysql-test/suite/innodb/t/log_alter_table.test +++ b/mysql-test/suite/innodb/t/log_alter_table.test @@ -19,9 +19,9 @@ CREATE TABLE t1 (a INT NOT NULL, b INT UNIQUE) ENGINE=InnoDB; # MLOG_INDEX_LOAD will not be emitted for empty tables. Insert a row. INSERT INTO t1 VALUES (1,2); # We should get two MLOG_INDEX_LOAD for this. -ALTER TABLE t1 ADD PRIMARY KEY(a), ALGORITHM=INPLACE; +ALTER TABLE t1 ADD PRIMARY KEY(a), LOCK=SHARED, ALGORITHM=INPLACE; # And one MLOG_INDEX_LOAD for this. -ALTER TABLE t1 DROP INDEX b, ADD INDEX (b); +ALTER TABLE t1 DROP INDEX b, ADD INDEX (b), LOCK=SHARED; --let CLEANUP_IF_CHECKPOINT=DROP TABLE t1; --source include/no_checkpoint_end.inc @@ -32,10 +32,10 @@ ALTER TABLE t1 DROP INDEX b, ADD INDEX (b); let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; let SEARCH_ABORT=NOT FOUND; # ensure that we have exactly 2 records there. -let SEARCH_PATTERN=scan .*: multi-log rec MLOG_FILE_CREATE2.*page .*:0; +let SEARCH_PATTERN=scan \d+: multi-log rec MLOG_FILE_CREATE2 len \d+ page \d+:0; --source include/search_pattern_in_file.inc # ensure that we have exactly 3 records there. -let SEARCH_PATTERN=scan .*: log rec MLOG_INDEX_LOAD; +let SEARCH_PATTERN=scan \d+: log rec MLOG_INDEX_LOAD; --source include/search_pattern_in_file.inc CHECK TABLE t1; diff --git a/sql/sql_sequence.cc b/sql/sql_sequence.cc index 1ed0bb38e64..9f17590a315 100644 --- a/sql/sql_sequence.cc +++ b/sql/sql_sequence.cc @@ -88,13 +88,13 @@ bool sequence_definition::check_and_adjust(bool set_reserved_until) /* If min_value is not set, set it to LONGLONG_MIN or 1, depending on - increment + real_increment */ if (!(used_fields & seq_field_used_min_value)) min_value= real_increment < 0 ? LONGLONG_MIN+1 : 1; /* - If min_value is not set, set it to LONGLONG_MAX or -1, depending on + If max_value is not set, set it to LONGLONG_MAX or -1, depending on real_increment */ if (!(used_fields & seq_field_used_max_value)) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 24df4d76911..3fe7cac56ed 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -5597,7 +5597,15 @@ buf_page_create( buf_block_free(free_block); - return buf_page_get_with_no_latch(page_id, zip_size, mtr); + if (!recv_recovery_is_on()) { + return buf_page_get_with_no_latch(page_id, zip_size, + mtr); + } + + mutex_exit(&recv_sys->mutex); + block = buf_page_get_with_no_latch(page_id, zip_size, mtr); + mutex_enter(&recv_sys->mutex); + return block; } /* If we get here, the page was not in buf_pool: init it there */ @@ -5663,7 +5671,9 @@ buf_page_create( /* Delete possible entries for the page from the insert buffer: such can exist if the page belonged to an index which was dropped */ - ibuf_merge_or_delete_for_page(NULL, page_id, zip_size, true); + if (!recv_recovery_is_on()) { + ibuf_merge_or_delete_for_page(NULL, page_id, zip_size, true); + } frame = block->frame; @@ -5678,6 +5688,7 @@ buf_page_create( (3) key_version on encrypted pages (not page 0:0) */ memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); + memset(frame + FIL_PAGE_LSN, 0, 8); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 5771 || buf_validate()); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index c4fe6cdd449..e9a5d350f8b 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4037,7 +4037,7 @@ static int innodb_init_params() if (innobase_open_files > open_files_limit) { ib::warn() << "innodb_open_files " << innobase_open_files << " should not be greater" - << "than the open_files_limit " << open_files_limit; + << " than the open_files_limit " << open_files_limit; if (innobase_open_files > tc_size) { innobase_open_files = tc_size; } diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 639836c9da9..1425f1ea289 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -4397,7 +4397,8 @@ ibuf_merge_or_delete_for_page( ulint dops[IBUF_OP_COUNT]; ut_ad(block == NULL || page_id == block->page.id); - ut_ad(block == NULL || buf_block_get_io_fix(block) == BUF_IO_READ); + ut_ad(block == NULL || buf_block_get_io_fix(block) == BUF_IO_READ + || recv_recovery_is_on()); if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE || trx_sys_hdr_page(page_id) diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index eae73fd2847..db9aa9d66fb 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -234,6 +234,15 @@ public: } bool operator!=(const page_id_t& rhs) const { return !(*this == rhs); } + bool operator<(const page_id_t& rhs) const + { + if (m_space == rhs.m_space) { + return m_page_no < rhs.m_page_no; + } + + return m_space < rhs.m_space; + } + /** Retrieve the tablespace id. @return tablespace id */ uint32_t space() const { return m_space; } diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index b58d4e85ac9..b06cb15e64a 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -89,6 +89,9 @@ struct fil_space_t { Protected by log_sys.mutex. If and only if this is nonzero, the tablespace will be in named_spaces. */ + /** Log sequence number of the latest MLOG_INDEX_LOAD record + that was found while parsing the redo log */ + lsn_t enable_lsn; bool stop_new_ops; /*!< we set this true when we start deleting a single-table tablespace. diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h index 1b2ede47252..ee0259e5f2d 100644 --- a/storage/innobase/include/fsp0types.h +++ b/storage/innobase/include/fsp0types.h @@ -269,8 +269,7 @@ or have been introduced in MySQL 5.7 or 8.0: ===================================================================== The flags below only exist in fil_space_t::flags, not in FSP_SPACE_FLAGS: ===================================================================== - 25: DATA_DIR - 26..27: ATOMIC_WRITES + 27: DATA_DIR 28..31: COMPRESSION_LEVEL */ @@ -278,9 +277,9 @@ The flags below only exist in fil_space_t::flags, not in FSP_SPACE_FLAGS: #define FSP_FLAGS_MEM_MASK (~0U << FSP_FLAGS_MEM_DATA_DIR) /** Zero relative shift position of the DATA_DIR flag */ -#define FSP_FLAGS_MEM_DATA_DIR 25 +#define FSP_FLAGS_MEM_DATA_DIR 27 /** Zero relative shift position of the COMPRESSION_LEVEL field */ -#define FSP_FLAGS_MEM_COMPRESSION_LEVEL 26 +#define FSP_FLAGS_MEM_COMPRESSION_LEVEL 28 /** Zero relative shift position of the POST_ANTELOPE field */ #define FSP_FLAGS_POS_POST_ANTELOPE 0 diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 6305f5ec1d2..32c63546e5d 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -154,10 +154,22 @@ struct file_name_t { /** FSP_SIZE of tablespace */ ulint size; + /** the log sequence number of the last observed MLOG_INDEX_LOAD + record for the tablespace */ + lsn_t enable_lsn; + /** Constructor */ file_name_t(std::string name_, bool deleted) : name(name_), space(NULL), status(deleted ? DELETED: NORMAL), - size(0) {} + size(0), enable_lsn(0) {} + + /** Report a MLOG_INDEX_LOAD operation, meaning that + mlog_init for any earlier LSN must be skipped. + @param lsn log sequence number of the MLOG_INDEX_LOAD */ + void mlog_index_load(lsn_t lsn) + { + if (enable_lsn < lsn) enable_lsn = lsn; + } }; /** Map of dirty tablespaces during recovery */ @@ -173,6 +185,8 @@ static recv_spaces_t recv_spaces; enum recv_addr_state { /** not yet processed */ RECV_NOT_PROCESSED, + /** not processed; the page will be reinitialized */ + RECV_WILL_NOT_READ, /** page is being read */ RECV_BEING_READ, /** log records are being applied on the page */ @@ -215,6 +229,117 @@ void (*log_file_op)(ulint space_id, const byte* flags, const byte* name, ulint len, const byte* new_name, ulint new_len); +/** Information about initializing page contents during redo log processing */ +class mlog_init_t +{ +public: + /** A page initialization operation that was parsed from + the redo log */ + struct init { + /** log sequence number of the page initialization */ + lsn_t lsn; + /** Whether btr_page_create() avoided a read of the page. + + At the end of the last recovery batch, ibuf_merge() + will invoke change buffer merge for pages that reside + in the buffer pool. (In the last batch, loading pages + would trigger change buffer merge.) */ + bool created; + }; + +private: + typedef std::map<const page_id_t, init, + std::less<const page_id_t>, + ut_allocator<std::pair<const page_id_t, init> > > + map; + /** Map of page initialization operations. + FIXME: Merge this to recv_sys->addr_hash! */ + map inits; +public: + /** Record that a page will be initialized by the redo log. + @param[in] space tablespace identifier + @param[in] page_no page number + @param[in] lsn log sequence number */ + void add(ulint space, ulint page_no, lsn_t lsn) + { + ut_ad(mutex_own(&recv_sys->mutex)); + const init init = { lsn, false }; + std::pair<map::iterator, bool> p = inits.insert( + map::value_type(page_id_t(space, page_no), init)); + ut_ad(!p.first->second.created); + if (!p.second && p.first->second.lsn < init.lsn) { + p.first->second = init; + } + } + + /** Get the last stored lsn of the page id and its respective + init/load operation. + @param[in] page_id page id + @param[in,out] init initialize log or load log + @return the latest page initialization; + not valid after releasing recv_sys->mutex. */ + init& last(page_id_t page_id) + { + ut_ad(mutex_own(&recv_sys->mutex)); + return inits.find(page_id)->second; + } + + /** At the end of each recovery batch, reset the 'created' flags. */ + void reset() + { + ut_ad(mutex_own(&recv_sys->mutex)); + ut_ad(recv_no_ibuf_operations); + for (map::iterator i= inits.begin(); i != inits.end(); i++) { + i->second.created = false; + } + } + + /** On the last recovery batch, merge buffered changes to those + pages that were initialized by buf_page_create() and still reside + in the buffer pool. Stale pages are not allowed in the buffer pool. + + Note: When MDEV-14481 implements redo log apply in the + background, we will have to ensure that buf_page_get_gen() + will not deliver stale pages to users (pages on which the + change buffer was not merged yet). Normally, the change + buffer merge is performed on I/O completion. Maybe, add a + flag to buf_page_t and perform the change buffer merge on + the first actual access? + @param[in,out] mtr dummy mini-transaction */ + void ibuf_merge(mtr_t& mtr) + { + ut_ad(mutex_own(&recv_sys->mutex)); + ut_ad(!recv_no_ibuf_operations); + mtr.start(); + + for (map::const_iterator i= inits.begin(); i != inits.end(); + i++) { + if (!i->second.created) { + continue; + } + if (buf_block_t* block = buf_page_get_gen( + i->first, 0, RW_X_LATCH, NULL, + BUF_GET_IF_IN_POOL, __FILE__, __LINE__, + &mtr, NULL)) { + mutex_exit(&recv_sys->mutex); + ibuf_merge_or_delete_for_page( + block, i->first, + block->zip_size(), true); + mtr.commit(); + mtr.start(); + mutex_enter(&recv_sys->mutex); + } + } + + mtr.commit(); + } + + /** Clear the data structure */ + void clear() { inits.clear(); } +}; + +static mlog_init_t mlog_init; + /** Process a MLOG_CREATE2 record that indicates that a tablespace is being shrunk in size. @param[in] space_id tablespace identifier @@ -618,6 +743,7 @@ recv_sys_close() } recv_spaces.clear(); + mlog_init.clear(); } /************************************************************ @@ -1737,6 +1863,19 @@ recv_add_to_hash_table( recv_sys->n_addrs++; } + switch (type) { + case MLOG_INIT_FILE_PAGE2: + case MLOG_ZIP_PAGE_COMPRESS: + case MLOG_INIT_FREE_PAGE: + /* Ignore any earlier redo log records for this page. */ + ut_ad(recv_addr->state == RECV_NOT_PROCESSED + || recv_addr->state == RECV_WILL_NOT_READ); + recv_addr->state = RECV_WILL_NOT_READ; + mlog_init.add(space, page_no, start_lsn); + default: + break; + } + UT_LIST_ADD_LAST(recv_addr->rec_list, recv); prev_field = &(recv->data); @@ -1805,9 +1944,11 @@ recv_data_copy_to_buf( lsn of a log record. @param[in,out] block buffer pool page @param[in,out] mtr mini-transaction -@param[in,out] recv_addr recovery address */ +@param[in,out] recv_addr recovery address +@param[in,out] init page initialization operation, or NULL */ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, - recv_addr_t* recv_addr) + recv_addr_t* recv_addr, + mlog_init_t::init* init = NULL) { page_t* page; page_zip_des_t* page_zip; @@ -1817,6 +1958,8 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, ut_ad(recv_needed_recovery); ut_ad(recv_addr->state != RECV_BEING_PROCESSED); ut_ad(recv_addr->state != RECV_PROCESSED); + ut_ad(!init || init->created); + ut_ad(!init || init->lsn); if (UNIV_UNLIKELY(srv_print_verbose_log == 2)) { fprintf(stderr, "Applying log to page %u:%u\n", @@ -1838,7 +1981,9 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, page_lsn = mach_read_from_8(page + FIL_PAGE_LSN); } + bool free_page = false; lsn_t start_lsn = 0, end_lsn = 0; + const lsn_t init_lsn = init ? init->lsn : 0; for (recv_t* recv = UT_LIST_GET_FIRST(recv_addr->rec_list); recv; recv = UT_LIST_GET_NEXT(rec_list, recv)) { @@ -1849,8 +1994,20 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, if (recv->start_lsn < page_lsn) { /* Ignore this record, because there are later changes for this page. */ + DBUG_LOG("ib_log", "apply skip " + << get_mlog_string(recv->type) + << " LSN " << recv->start_lsn << " < " + << page_lsn); + } else if (recv->start_lsn < init_lsn) { + DBUG_LOG("ib_log", "init skip " + << get_mlog_string(recv->type) + << " LSN " << recv->start_lsn << " < " + << init_lsn); } else { - if (!start_lsn) { + if (recv->type == MLOG_INIT_FREE_PAGE) { + /* This does not really modify the page. */ + free_page = true; + } else if (!start_lsn) { start_lsn = recv->start_lsn; } @@ -1884,7 +2041,7 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, block->page.id.space(), block->page.id.page_no(), true, block, &mtr); - lsn_t end_lsn = recv->start_lsn + recv->len; + end_lsn = recv->start_lsn + recv->len; mach_write_to_8(FIL_PAGE_LSN + page, end_lsn); mach_write_to_8(srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM @@ -1911,6 +2068,13 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, log_flush_order_mutex_enter(); buf_flush_note_modification(block, start_lsn, end_lsn, NULL); log_flush_order_mutex_exit(); + } else if (free_page && init) { + /* There have been no operations than MLOG_INIT_FREE_PAGE. + Any buffered changes must not be merged. A subsequent + buf_page_create() from a user thread should discard + any buffered changes. */ + init->created = false; + ut_ad(!mtr.has_modifications()); } /* Make sure that committing mtr does not change the modification @@ -2094,25 +2258,103 @@ ignore: case RECV_DISCARDED: goto ignore; case RECV_NOT_PROCESSED: + case RECV_WILL_NOT_READ: break; } const page_id_t page_id(recv_addr->space, recv_addr->page_no); - mtr.start(); - mtr.set_log_mode(MTR_LOG_NONE); - if (buf_block_t* block = buf_page_get_gen( - page_id, 0, RW_X_LATCH, - NULL, BUF_GET_IF_IN_POOL, - __FILE__, __LINE__, &mtr, NULL)) { - buf_block_dbg_add_level( - block, SYNC_NO_ORDER_CHECK); - recv_recover_page(block, mtr, recv_addr); - ut_ad(mtr.has_committed()); + if (recv_addr->state == RECV_NOT_PROCESSED) { +apply: + mtr.start(); + mtr.set_log_mode(MTR_LOG_NONE); + if (buf_block_t* block = buf_page_get_gen( + page_id, 0, RW_X_LATCH, NULL, + BUF_GET_IF_IN_POOL, + __FILE__, __LINE__, &mtr, NULL)) { + buf_block_dbg_add_level( + block, SYNC_NO_ORDER_CHECK); + recv_recover_page(block, mtr, + recv_addr); + ut_ad(mtr.has_committed()); + } else { + mtr.commit(); + recv_read_in_area(page_id); + } } else { - mtr.commit(); - recv_read_in_area(page_id); + mlog_init_t::init& i = mlog_init.last(page_id); + const lsn_t end_lsn = UT_LIST_GET_LAST( + recv_addr->rec_list)->end_lsn; + + if (end_lsn < i.lsn) { + DBUG_LOG("ib_log", "skip log for page " + << page_id + << " LSN " << end_lsn + << " < " << i.lsn); +skip: + recv_addr->state = RECV_PROCESSED; + goto ignore; + } + + fil_space_t* space = fil_space_acquire_for_io( + recv_addr->space); + if (!space) { + goto skip; + } + + if (space->enable_lsn) { +do_read: + space->release_for_io(); + recv_addr->state = RECV_NOT_PROCESSED; + goto apply; + } + + /* Determine if a tablespace could be + for an internal table for FULLTEXT INDEX. + For those tables, no MLOG_INDEX_LOAD record + used to be written when redo logging was + disabled. Hence, we cannot optimize + away page reads when crash-upgrading + from MariaDB versions before 10.4, + because all the redo log records for + initializing and modifying the page in + the past could be older than the page + in the data file. + + The check is too broad, causing all + tables whose names start with FTS_ to + skip the optimization. */ + if ((log_sys.log.format + & ~LOG_HEADER_FORMAT_ENCRYPTED) + != LOG_HEADER_FORMAT_10_4 + && strstr(space->name, "/FTS_")) { + goto do_read; + } + + mtr.start(); + mtr.set_log_mode(MTR_LOG_NONE); + buf_block_t* block = buf_page_create( + page_id, space->zip_size(), &mtr); + if (recv_addr->state == RECV_PROCESSED) { + /* The page happened to exist + in the buffer pool, or it was + just being read in. Before + buf_page_get_with_no_latch() + returned, all changes must have + been applied to the page already. */ + mtr.commit(); + } else { + i.created = true; + buf_block_dbg_add_level( + block, SYNC_NO_ORDER_CHECK); + mtr.x_latch_at_savepoint(0, block); + recv_recover_page(block, mtr, + recv_addr, &i); + ut_ad(mtr.has_committed()); + } + + space->release_for_io(); } } } @@ -2120,7 +2362,13 @@ ignore: /* Wait until all the pages have been processed */ while (recv_sys->n_addrs != 0) { - bool abort = recv_sys->found_corrupt_log; + const bool abort = recv_sys->found_corrupt_log + || recv_sys->found_corrupt_fs; + + if (recv_sys->found_corrupt_fs && !srv_force_recovery) { + ib::info() << "Set innodb_force_recovery=1" + " to ignore corrupted pages."; + } mutex_exit(&(recv_sys->mutex)); @@ -2159,6 +2407,10 @@ ignore: log_mutex_enter(); mutex_enter(&(recv_sys->mutex)); + mlog_init.reset(); + } else if (!recv_no_ibuf_operations) { + /* We skipped this in buf_page_create(). */ + mlog_init.ibuf_merge(mtr); } recv_sys->apply_log_recs = FALSE; @@ -2355,9 +2607,17 @@ recv_report_corrupt_log( } /** Report a MLOG_INDEX_LOAD operation. -@param[in] space_id tablespace identifier */ -ATTRIBUTE_COLD static void recv_mlog_index_load(ulint space_id) +@param[in] space_id tablespace id +@param[in] page_no page number +@param[in] lsn log sequence number */ +ATTRIBUTE_COLD static void +recv_mlog_index_load(ulint space_id, ulint page_no, lsn_t lsn) { + recv_spaces_t::iterator it = recv_spaces.find(space_id); + if (it != recv_spaces.end()) { + it->second.mlog_index_load(lsn); + } + if (log_optimized_ddl_op) { log_optimized_ddl_op(space_id); } @@ -2519,7 +2779,7 @@ loop: /* fall through */ case MLOG_INDEX_LOAD: if (type == MLOG_INDEX_LOAD) { - recv_mlog_index_load(space); + recv_mlog_index_load(space, page_no, old_lsn); } /* fall through */ case MLOG_FILE_NAME: @@ -2673,7 +2933,7 @@ corrupted_log: break; #endif /* UNIV_LOG_LSN_DEBUG */ case MLOG_INDEX_LOAD: - recv_mlog_index_load(space); + recv_mlog_index_load(space, page_no, old_lsn); break; case MLOG_FILE_NAME: case MLOG_FILE_DELETE: @@ -3211,6 +3471,7 @@ recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace) /* The tablespace was found, and there are some redo log records for it. */ fil_names_dirty(i->second.space); + i->second.space->enable_lsn = i->second.enable_lsn; } else if (i->second.name == "") { ib::error() << "Missing MLOG_FILE_NAME" " or MLOG_FILE_DELETE" diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index dd3d1434418..64b1b0c5119 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -4932,23 +4932,28 @@ wait_again: if (indexes[i]->type & DICT_FTS) { row_fts_psort_info_destroy(psort_info, merge_info); fts_psort_initiated = false; - } else if (error != DB_SUCCESS || !online) { - /* Do not apply any online log. */ + } else if (dict_index_is_spatial(indexes[i])) { + /* We never disable redo logging for + creating SPATIAL INDEX. Avoid writing any + unnecessary MLOG_INDEX_LOAD record. */ } else if (old_table != new_table) { ut_ad(!sort_idx->online_log); ut_ad(sort_idx->online_status == ONLINE_INDEX_COMPLETE); - } else { - if (dict_index_is_spatial(indexes[i])) { - /* We never disable redo logging for - creating SPATIAL INDEX. Avoid writing any - unnecessary MLOG_INDEX_LOAD record. */ - } else if (FlushObserver* flush_observer = - trx->get_flush_observer()) { - flush_observer->flush(); - row_merge_write_redo(indexes[i]); + } else if (FlushObserver* flush_observer = + trx->get_flush_observer()) { + if (error != DB_SUCCESS) { + flush_observer->interrupted(); } + flush_observer->flush(); + row_merge_write_redo(indexes[i]); + } + if (old_table != new_table + || (indexes[i]->type & (DICT_FTS | DICT_SPATIAL)) + || error != DB_SUCCESS || !online) { + /* Do not apply any online log. */ + } else { if (global_system_variables.log_warnings > 2) { sql_print_information( "InnoDB: Online DDL : Applying" @@ -5055,13 +5060,7 @@ func_exit: flush_observer->flush(); - trx->remove_flush_observer(); - - if (trx_is_interrupted(trx)) { - error = DB_INTERRUPTED; - } - - if (error == DB_SUCCESS && old_table != new_table) { + if (old_table != new_table) { for (const dict_index_t* index = dict_table_get_first_index(new_table); index != NULL; @@ -5072,6 +5071,12 @@ func_exit: } } } + + trx->remove_flush_observer(); + + if (trx_is_interrupted(trx)) { + error = DB_INTERRUPTED; + } } DBUG_RETURN(error); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 085ab55a5aa..508ae260596 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1882,7 +1882,8 @@ files_checked: recv_apply_hashed_log_recs(true); - if (recv_sys->found_corrupt_log) { + if (recv_sys->found_corrupt_log + || recv_sys->found_corrupt_fs) { return(srv_init_abort(DB_CORRUPTION)); } |