From 424ad322ea096d6552c49e0bc268c730e897e915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 31 May 2022 14:37:26 +0300 Subject: squash! 444a56458f3fb393368e87d31133a31b1a27f9e1 Cover ROW_FORMAT=COMPRESSED buf_block_t::page_checksum(): Remove. log_phys_t::page_checksum(): Validate an OPT_PAGE_CHECKSUM record. mtr_t::is_logged(): Return whether log should be written. mtr_t::set_log_mode_sub(const mtr_t&): Set the logging mode of a sub-minitransaction when another mini-transaction is holding latches on some modified pages. When creating or freeing BLOB pages, we may only write OPT_PAGE_CHECKSUM records in the main mini-transaction, after all changes have been written to the log. MTR_LOG_SUB: Log mode for a sub-mini-transaction. FIXME: Occasionally, the test innodb_zip.wl5522_debug_zip would fail the check on an undo log page, with OPT_PAGE_CHECKSUM written immediately after FREE_PAGE. This is not reliably reproducible. --- storage/innobase/btr/btr0cur.cc | 4 +- storage/innobase/fil/fil0fil.cc | 2 +- storage/innobase/fsp/fsp0fsp.cc | 34 ++++++--------- storage/innobase/include/mtr0log.h | 29 ++++++------- storage/innobase/include/mtr0mtr.h | 19 ++++++++- storage/innobase/include/mtr0types.h | 5 +++ storage/innobase/log/log0recv.cc | 47 ++++++++++++++------ storage/innobase/mtr/mtr0mtr.cc | 83 +++++++++++++++++------------------- storage/innobase/page/page0cur.cc | 8 ++-- storage/innobase/page/page0zip.cc | 6 +-- 10 files changed, 133 insertions(+), 104 deletions(-) diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 437c7508a0b..807003222fb 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -7354,7 +7354,7 @@ btr_store_big_rec_extern_fields( mtr.start(); index->set_modified(mtr); - mtr.set_log_mode(btr_mtr->get_log_mode()); + mtr.set_log_mode_sub(*btr_mtr); buf_page_get(rec_block->page.id(), rec_block->zip_size(), RW_X_LATCH, &mtr); @@ -7704,7 +7704,7 @@ btr_free_externally_stored_field( mtr_start(&mtr); mtr.set_spaces(*local_mtr); - mtr.set_log_mode(local_mtr->get_log_mode()); + mtr.set_log_mode_sub(*local_mtr); ut_ad(!index->table->is_temporary() || local_mtr->get_log_mode() == MTR_LOG_NO_REDO); diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index fcb0b06c1c2..04846fd96e8 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1515,7 +1515,7 @@ inline void mtr_t::log_file_op(mfile_type_t type, ulint space_id, ut_ad(!strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD)); flag_modified(); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; m_last= nullptr; diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 962d18d8081..b6ee7f21535 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -502,26 +502,20 @@ updating an allocation bitmap page. @param[in] mtr mini-transaction */ void fil_space_t::modify_check(const mtr_t& mtr) const { - switch (mtr.get_log_mode()) { - case MTR_LOG_NONE: - /* These modes are only allowed within a non-bitmap page - when there is a higher-level redo log record written. */ - ut_ad(purpose == FIL_TYPE_TABLESPACE - || purpose == FIL_TYPE_TEMPORARY); - break; - case MTR_LOG_NO_REDO: - ut_ad(purpose == FIL_TYPE_TEMPORARY - || purpose == FIL_TYPE_IMPORT); - return; - case MTR_LOG_ALL: - /* We may only write redo log for a persistent - tablespace. */ - ut_ad(purpose == FIL_TYPE_TABLESPACE); - ut_ad(mtr.is_named_space(id)); - return; - } - - ut_ad("invalid log mode" == 0); + switch (mtr.get_log_mode()) { + case MTR_LOG_NONE: + /* These modes are only allowed within a non-bitmap page + when there is a higher-level redo log record written. */ + ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_TEMPORARY); + break; + case MTR_LOG_NO_REDO: + ut_ad(purpose == FIL_TYPE_TEMPORARY || purpose == FIL_TYPE_IMPORT); + break; + default: + /* We may only write redo log for a persistent tablespace. */ + ut_ad(purpose == FIL_TYPE_TABLESPACE); + ut_ad(mtr.is_named_space(id)); + } } #endif diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index dd77e37ce6b..f7547d81615 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -196,7 +196,7 @@ inline bool mtr_t::write(const buf_block_t &block, void *ptr, V val) } byte *p= static_cast(ptr); const byte *const end= p + l; - if (w != FORCED && m_log_mode == MTR_LOG_ALL) + if (w != FORCED && is_logged()) { const byte *b= buf; while (*p++ == *b++) @@ -224,7 +224,7 @@ inline void mtr_t::memset(const buf_block_t &b, ulint ofs, ulint len, byte val) { ut_ad(len); set_modified(b); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; static_assert(MIN_4BYTE > UNIV_PAGE_SIZE_MAX, "consistency"); @@ -261,7 +261,7 @@ inline void mtr_t::memset(const buf_block_t &b, ulint ofs, size_t len, ut_ad(size); ut_ad(len > size); /* use mtr_t::memcpy() for shorter writes */ set_modified(b); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; static_assert(MIN_4BYTE > UNIV_PAGE_SIZE_MAX, "consistency"); @@ -319,7 +319,7 @@ inline void mtr_t::memcpy_low(const buf_block_t &block, uint16_t offset, { ut_ad(len); set_modified(block); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; if (len < mtr_buf_t::MAX_DATA_SIZE - (1 + 3 + 3 + 5 + 5)) { @@ -354,7 +354,7 @@ inline void mtr_t::memmove(const buf_block_t &b, ulint d, ulint s, ulint len) ut_ad(d + len <= ulint(srv_page_size)); set_modified(b); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; static_assert(MIN_4BYTE > UNIV_PAGE_SIZE_MAX, "consistency"); size_t lenlen= (len < MIN_2BYTE ? 1 : len < MIN_3BYTE ? 2 : 3); @@ -491,7 +491,7 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str, ut_ad(ut_align_down(dest, srv_page_size) == b.frame); char *d= static_cast(dest); const char *s= static_cast(str); - if (w != FORCED && m_log_mode == MTR_LOG_ALL) + if (w != FORCED && is_logged()) { ut_ad(len); const char *const end= d + len; @@ -531,11 +531,8 @@ inline void mtr_t::init(buf_block_t *b) b->page.status= buf_page_t::INIT_ON_FLUSH; - if (m_log_mode != MTR_LOG_ALL) - { - ut_ad(m_log_mode == MTR_LOG_NONE || m_log_mode == MTR_LOG_NO_REDO); + if (!is_logged()) return; - } m_log.close(log_write(b->page.id(), &b->page)); m_last_offset= FIL_PAGE_TYPE; @@ -549,7 +546,7 @@ inline void mtr_t::free(fil_space_t &space, uint32_t offset) ut_ad(is_named_space(&space)); ut_ad(!m_freed_space || m_freed_space == &space); - if (m_log_mode == MTR_LOG_ALL) + if (is_logged()) m_log.close(log_write({space.id, offset}, nullptr)); } @@ -559,7 +556,7 @@ inline void mtr_t::free(fil_space_t &space, uint32_t offset) inline void mtr_t::log_write_extended(const buf_block_t &block, byte type) { set_modified(block); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; byte *l= log_write(block.page.id(), &block.page, 1, true); *l++= type; @@ -586,7 +583,7 @@ inline void mtr_t::page_delete(const buf_block_t &block, ulint prev_rec) ut_ad(!block.zip_size()); ut_ad(prev_rec < block.physical_size()); set_modified(block); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; size_t len= (prev_rec < MIN_2BYTE ? 2 : prev_rec < MIN_3BYTE ? 3 : 4); byte *l= log_write(block.page.id(), &block.page, len, true); @@ -613,7 +610,7 @@ inline void mtr_t::page_delete(const buf_block_t &block, ulint prev_rec, ut_ad(hdr_size < MIN_3BYTE); ut_ad(prev_rec < block.physical_size()); ut_ad(data_size < block.physical_size()); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; size_t len= prev_rec < MIN_2BYTE ? 2 : prev_rec < MIN_3BYTE ? 3 : 4; len+= hdr_size < MIN_2BYTE ? 1 : 2; @@ -645,7 +642,7 @@ inline void mtr_t::undo_append(const buf_block_t &block, { ut_ad(len > 2); set_modified(block); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; const bool small= len + 1 < mtr_buf_t::MAX_DATA_SIZE - (1 + 3 + 3 + 5 + 5); byte *end= log_write(block.page.id(), &block.page, len + 1, small); @@ -668,7 +665,7 @@ inline void mtr_t::undo_append(const buf_block_t &block, @param id first page identifier that will not be in the file */ inline void mtr_t::trim_pages(const page_id_t id) { - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; byte *l= log_write(id, nullptr, 1, true); *l++= TRIM_PAGES; diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index dda89dac427..45ab24d1809 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -137,10 +137,18 @@ struct mtr_t { mtr_log_t get_log_mode() const { static_assert(MTR_LOG_ALL == 0, "efficiency"); - ut_ad(m_log_mode <= MTR_LOG_NO_REDO); return static_cast(m_log_mode); } + /** @return whether log is to be written for changes */ + bool is_logged() const + { + static_assert(MTR_LOG_ALL == 0, "efficiency"); + static_assert(MTR_LOG_NONE & MTR_LOG_NO_REDO, "efficiency"); + static_assert(!(MTR_LOG_NONE & MTR_LOG_SUB), "efficiency"); + return !(m_log_mode & MTR_LOG_NONE); + } + /** Change the logging mode. @param mode logging mode @return old mode */ @@ -151,6 +159,15 @@ struct mtr_t { return old_mode; } + /** Set the log mode of a sub-minitransaction + @param mtr parent mini-transaction */ + void set_log_mode_sub(const mtr_t &mtr) + { + ut_ad(mtr.m_log_mode == MTR_LOG_ALL || mtr.m_log_mode == MTR_LOG_NO_REDO); + m_log_mode= mtr.m_log_mode | MTR_LOG_SUB; + static_assert((MTR_LOG_SUB | MTR_LOG_NO_REDO) == MTR_LOG_NO_REDO, ""); + } + /** Check if we are holding a block latch in exclusive mode @param block buffer pool block to search for */ bool have_x_latch(const buf_block_t &block) const; diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h index 95af96f113a..65d00f56071 100644 --- a/storage/innobase/include/mtr0types.h +++ b/storage/innobase/include/mtr0types.h @@ -44,6 +44,11 @@ enum mtr_log_t { Set for attempting modification of a ROW_FORMAT=COMPRESSED page. */ MTR_LOG_NONE, + /** Log all operations, but do not write any OPT_PAGE_CHECKSUM + records because some of the modified pages were also modified + by another mini-transaction that did not write its log yet. */ + MTR_LOG_SUB, + /** Don't generate REDO log but add dirty pages to flush list */ MTR_LOG_NO_REDO }; diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 44301d1eb31..5a177b3083e 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -86,7 +86,7 @@ is bigger than the lsn we are able to scan up to, that is an indication that the recovery failed and the database may be corrupt. */ static lsn_t recv_max_page_lsn; -/** Stored physical log record with logical LSN (@see log_t::FORMAT_10_5) */ +/** Stored physical log record */ struct log_phys_t : public log_rec_t { /** start LSN of the mini-transaction (not necessarily of this record) */ @@ -178,6 +178,35 @@ public: return false; } + /** Check an OPT_PAGE_CHECKSUM record. + @see mtr_t::page_checksum() + @param block buffer page + @param l pointer to checksum + @return whether an unrecoverable mismatch was found */ + static bool page_checksum(const buf_block_t &block, const byte *l) + { + size_t size; + const byte *page= block.page.zip.data; + if (UNIV_LIKELY_NULL(page)) + size= (UNIV_ZIP_SIZE_MIN >> 1) << block.page.zip.ssize; + else + { + page= block.frame; + size= srv_page_size; + } + if (UNIV_LIKELY(my_crc32c(my_crc32c(my_crc32c(0, page + FIL_PAGE_OFFSET, + FIL_PAGE_LSN - + FIL_PAGE_OFFSET), + page + FIL_PAGE_TYPE, 2), + page + FIL_PAGE_SPACE_ID, + size - (FIL_PAGE_SPACE_ID + 8)) == + mach_read_from_4(l))) + return false; + + ib::error() << "OPT_PAGE_CHECKSUM mismatch on " << block.page.id(); + return !srv_force_recovery; + } + /** The status of apply() */ enum apply_status { /** The page was not affected */ @@ -267,19 +296,13 @@ public: case OPTION: ut_ad(rlen == 5); ut_ad(*l == OPT_PAGE_CHECKSUM); - ut_ad(!block.page.zip.data); - if (UNIV_UNLIKELY(block.page_checksum() != mach_read_from_4(l + 1))) + if (page_checksum(block, l + 1)) { - ib::error() << "InnoDB: OPT_PAGE_CHECKSUM mismatch on " - << block.page.id(); - if (!srv_force_recovery) - { - applied= APPLIED_YES; + applied= APPLIED_YES; page_corrupted: - ib::error() << "Set innodb_force_recovery=1 to ignore corruption."; - recv_sys.found_corrupt_log= true; - return applied; - } + ib::error() << "Set innodb_force_recovery=1 to ignore corruption."; + recv_sys.found_corrupt_log= true; + return applied; } goto next_after_applying; } diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 3b1356160e9..dbfe2f28b6f 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -436,7 +436,7 @@ void mtr_t::commit() std::pair lsns; - if (UNIV_LIKELY(m_log_mode == MTR_LOG_ALL)) + if (UNIV_LIKELY(is_logged())) { lsns= do_write(); @@ -673,19 +673,9 @@ void mtr_t::commit_files(lsn_t checkpoint_lsn) bool mtr_t::is_named_space(ulint space) const { - ut_ad(!m_user_space || m_user_space->id != TRX_SYS_SPACE); - - switch (m_log_mode) { - case MTR_LOG_NONE: - case MTR_LOG_NO_REDO: - return(true); - case MTR_LOG_ALL: - return(m_user_space_id == space - || is_predefined_tablespace(space)); - } - - ut_error; - return(false); + ut_ad(!m_user_space || m_user_space->id != TRX_SYS_SPACE); + return !is_logged() || m_user_space_id == space || + is_predefined_tablespace(space); } /** Check if a tablespace is associated with the mini-transaction (needed for generating a FILE_MODIFY record) @@ -695,16 +685,8 @@ bool mtr_t::is_named_space(const fil_space_t* space) const { ut_ad(!m_user_space || m_user_space->id != TRX_SYS_SPACE); - switch (m_log_mode) { - case MTR_LOG_NONE: - case MTR_LOG_NO_REDO: - return true; - case MTR_LOG_ALL: - return m_user_space == space || is_predefined_tablespace(space->id); - } - - ut_error; - return false; + return !is_logged() || m_user_space == space || + is_predefined_tablespace(space->id); } #endif /* UNIV_DEBUG */ @@ -967,9 +949,27 @@ static mtr_t::page_flush_ahead log_close(lsn_t lsn) return mtr_t::PAGE_FLUSH_SYNC; } -/** @return checksum for an OPT_PAGE_CHECKSUM record */ -uint32_t buf_block_t::page_checksum() const +inline void mtr_t::page_checksum(const buf_block_t &block) { + const byte *page= block.frame; + size_t size= srv_page_size; + + if (UNIV_LIKELY_NULL(block.page.zip.data)) + { + size= (UNIV_ZIP_SIZE_MIN >> 1) << block.page.zip.ssize; + switch (fil_page_get_type(block.page.zip.data)) { + case FIL_PAGE_TYPE_ALLOCATED: + case FIL_PAGE_INODE: + case FIL_PAGE_IBUF_BITMAP: + case FIL_PAGE_TYPE_FSP_HDR: + case FIL_PAGE_TYPE_XDES: + /* These are essentially uncompressed pages. */ + break; + default: + page= block.page.zip.data; + } + } + /* We have to exclude from the checksum the normal page checksum that is written by buf_flush_init_for_writing() and FIL_PAGE_LSN which would be updated once we have actually @@ -980,28 +980,23 @@ uint32_t buf_block_t::page_checksum() const format we will unconditionally exclude the 8 bytes at FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION a.k.a. FIL_RTREE_SPLIT_SEQ_NUM. */ - return my_crc32c(my_crc32c(my_crc32c(0, frame + FIL_PAGE_OFFSET, - FIL_PAGE_LSN - FIL_PAGE_OFFSET), - frame + FIL_PAGE_TYPE, 2), - frame + FIL_PAGE_SPACE_ID, - srv_page_size - (FIL_PAGE_SPACE_ID + 8)); -} + const uint32_t checksum= + my_crc32c(my_crc32c(my_crc32c(0, page + FIL_PAGE_OFFSET, + FIL_PAGE_LSN - FIL_PAGE_OFFSET), + page + FIL_PAGE_TYPE, 2), + page + FIL_PAGE_SPACE_ID, size - (FIL_PAGE_SPACE_ID + 8)); -inline void mtr_t::page_checksum(const buf_block_t &block) -{ - if (UNIV_LIKELY_NULL(block.page.zip.data)) - return; /* FIXME: support ROW_FORMAT=COMPRESSED */ byte *l= log_write