diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2022-05-27 16:05:49 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2022-05-27 16:05:49 +0300 |
commit | d037c10d17853a4d1013290ed673e3b157fe3a8e (patch) | |
tree | 4be65bf839796d97fbaa962b03f6bb2575c1519e | |
parent | 05d049bdbe6814aee8f011fbd0d915f9d82a30ee (diff) | |
parent | 444a56458f3fb393368e87d31133a31b1a27f9e1 (diff) | |
download | mariadb-git-d037c10d17853a4d1013290ed673e3b157fe3a8e.tar.gz |
Merge 10.5 into 10.6
-rw-r--r-- | storage/innobase/fil/fil0crypt.cc | 3 | ||||
-rw-r--r-- | storage/innobase/fil/fil0fil.cc | 2 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 11 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 5 | ||||
-rw-r--r-- | storage/innobase/include/mtr0log.h | 2 | ||||
-rw-r--r-- | storage/innobase/include/mtr0mtr.h | 32 | ||||
-rw-r--r-- | storage/innobase/include/mtr0types.h | 21 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 38 | ||||
-rw-r--r-- | storage/innobase/mtr/mtr0mtr.cc | 93 |
9 files changed, 118 insertions, 89 deletions
diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index f16b66256dd..cd0c98d6fe2 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -1871,8 +1871,7 @@ fil_crypt_rotate_page( /* If block read failed mtr memo and log should be empty. */ ut_ad(!mtr.has_modifications()); ut_ad(!mtr.is_dirty()); - ut_ad(mtr.get_memo()->size() == 0); - ut_ad(mtr.get_log()->size() == 0); + ut_ad(mtr.is_empty()); mtr.commit(); } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 4c9bd056d4e..fa35f219d2c 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -3186,7 +3186,7 @@ fil_names_clear( for (auto it = fil_system.named_spaces.begin(); it != fil_system.named_spaces.end(); ) { - if (mtr.get_log()->size() + if (mtr.get_log_size() + strlen(it->chain.start->name) >= RECV_SCAN_SIZE - (3 + 5 + 1)) { /* Prevent log parse buffer overflow */ diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index c9534a2f455..02178b60b5a 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -24,8 +24,7 @@ The database buffer pool high-level routines Created 11/5/1995 Heikki Tuuri *******************************************************/ -#ifndef buf0buf_h -#define buf0buf_h +#pragma once /** Magic value to use instead of checksums when they are disabled */ #define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL @@ -43,9 +42,6 @@ Created 11/5/1995 Heikki Tuuri #include "transactional_lock_guard.h" #include <ostream> -// Forward declaration -struct fil_addr_t; - /** @name Modes for buf_page_get_gen */ /* @{ */ #define BUF_GET 10 /*!< get always */ @@ -916,6 +912,9 @@ public: /** @return ut_time_ms() at the time of first access of a block in buf_pool @retval 0 if not accessed */ unsigned is_accessed() const { ut_ad(in_file()); return access_time; } + + /** @return checksum for an OPT_PAGE_CHECKSUM record */ + uint32_t checksum() const; }; /** The buffer control block structure */ @@ -2220,5 +2219,3 @@ struct CheckUnzipLRUAndLRUList { #include "buf0buf.inl" #endif /* !UNIV_INNOCHECKSUM */ - -#endif diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 9df3a260152..98aa7268d0f 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1268,8 +1268,9 @@ struct fil_addr_t { /** For the first page in a system tablespace data file(ibdata*, not *.ibd): the file has been flushed to disk at least up to this lsn -For other pages: 32-bit key version used to encrypt the page + 32-bit checksum -or 64 bites of zero if no encryption */ +For other pages of tablespaces not in innodb_checksum_algorithm=full_crc32 +format: 32-bit key version used to encrypt the page + 32-bit checksum +or 64 bits of zero if no encryption */ #define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26U /** This overloads FIL_PAGE_FILE_FLUSH_LSN for RTREE Split Sequence Number */ diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index 8192c93a8f9..7751db84b47 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -387,7 +387,7 @@ template<byte type> inline byte *mtr_t::log_write(const page_id_t id, const buf_page_t *bpage, size_t len, bool alloc, size_t offset) { - static_assert(!(type & 15) && type != RESERVED && type != OPTION && + static_assert(!(type & 15) && type != RESERVED && type <= FILE_CHECKPOINT, "invalid type"); ut_ad(type >= FILE_CREATE || is_named_space(id.space())); ut_ad(!bpage || bpage->id() == id); diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 9d0f48a36b7..3a275d7ab3f 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -54,13 +54,6 @@ savepoint. */ #define mtr_memo_release(m, o, t) \ (m)->memo_release((o), (t)) -/** Print info of an mtr handle. */ -#define mtr_print(m) (m)->print() - -/** Return the log object of a mini-transaction buffer. -@return log */ -#define mtr_get_log(m) (m)->get_log() - /** Push an object to an mtr memo stack. */ #define mtr_memo_push(m, o, t) (m)->memo_push(o, t) @@ -360,30 +353,13 @@ public: const byte* ptr, ulint flags) const; - /** Print info of an mtr handle. */ - void print() const; - /** @return true if mini-transaction contains modifications. */ bool has_modifications() const { return m_modifications; } - - /** @return the memo stack */ - const mtr_buf_t* get_memo() const { return &m_memo; } - - /** @return the memo stack */ - mtr_buf_t* get_memo() { return &m_memo; } #endif /* UNIV_DEBUG */ /** @return true if a record was added to the mini-transaction */ bool is_dirty() const { return m_made_dirty; } - /** Get the buffered redo log of this mini-transaction. - @return redo log */ - const mtr_buf_t* get_log() const { return &m_log; } - - /** Get the buffered redo log of this mini-transaction. - @return redo log */ - mtr_buf_t* get_log() { return &m_log; } - /** Push an object to an mtr memo stack. @param object object @param type object type: MTR_MEMO_S_LOCK, ... */ @@ -395,6 +371,14 @@ public: static inline bool is_block_dirtied(const buf_block_t* block) MY_ATTRIBUTE((warn_unused_result)); + /** @return the size of the log is empty */ + size_t get_log_size() const { return m_log.size(); } + /** @return whether the log and memo are empty */ + bool is_empty() const { return m_memo.size() == 0 && m_log.size() == 0; } + + /** Write a OPT_PAGE_CHECKSUM record. */ + inline void page_checksum(const buf_page_t &bpage); + /** Write request types */ enum write_type { diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h index 9ee7810fa7b..4b2c815af59 100644 --- a/storage/innobase/include/mtr0types.h +++ b/storage/innobase/include/mtr0types.h @@ -77,12 +77,8 @@ type. The following record types refer to data pages: RESERVED (6): reserved for future use; a subtype code (encoded immediately after the length) would be written to reserve code space for further extensions - OPTION (7): optional record that may be ignored; a subtype code - (encoded immediately after the length) would distinguish actual - usage, such as: - * MDEV-18976 page checksum record - * binlog record - * SQL statement (at the start of statement) + OPTION (7): optional record that may be ignored; a subtype @see mrec_opt + (encoded immediately after the length) would distinguish actual usage Bits 3..0 indicate the redo log record length, excluding the first byte, but including additional length bytes and any other bytes, @@ -229,9 +225,7 @@ enum mrec_type_t /** Reserved for future use. */ RESERVED= 0x60, /** Optional record that may be ignored in crash recovery. - A subtype code will be encoded immediately after the length. - Possible subtypes would include a MDEV-18976 page checksum record, - a binlog record, or an SQL statement. */ + A subtype (@see mrec_opt) will be encoded after the page identifier. */ OPTION= 0x70 }; @@ -283,6 +277,15 @@ enum mrec_ext_t }; +/** Recognized OPTION record subtypes. */ +enum mrec_opt +{ + /** page checksum at the end of the mini-transaction */ + OPT_PAGE_CHECKSUM= 0 + /* Other possible subtypes: a binlog record, or an SQL statement. */ +}; + + /** Redo log record types for file-level operations. These bit patterns will be written to redo log files, so the existing codes or their interpretation on crash recovery must not be changed. */ diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 49331fcbe49..37c2f22c532 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -262,9 +262,26 @@ public: next_not_same_page: last_offset= 1; /* the next record must not be same_page */ } - next: l+= rlen; continue; + case OPTION: + ut_ad(rlen == 5); + ut_ad(*l == OPT_PAGE_CHECKSUM); + ut_ad(!block.page.zip.data); + if (UNIV_UNLIKELY(block.page.checksum() != mach_read_from_4(l + 1))) + { + ib::error() << "InnoDB: OPT_PAGE_CHECKSUM mismatch on " + << block.page.id(); + if (!srv_force_recovery) + { + applied= APPLIED_YES; +page_corrupted: + ib::error() << "Set innodb_force_recovery=1 to ignore corruption."; + recv_sys.set_corrupt_log(); + return applied; + } + } + goto next_after_applying; } ut_ad(mach_read_from_4(frame + FIL_PAGE_OFFSET) == @@ -275,8 +292,6 @@ public: ut_ad(last_offset <= size); switch (b & 0x70) { - case OPTION: - goto next; case EXTENDED: if (UNIV_UNLIKELY(block.page.id().page_no() < 3 || block.page.zip.ssize)) @@ -305,12 +320,7 @@ public: if (UNIV_UNLIKELY(rlen <= 3)) goto record_corrupted; if (undo_append(block, ++l, --rlen) && !srv_force_recovery) - { -page_corrupted: - ib::error() << "Set innodb_force_recovery=1 to ignore corruption."; - recv_sys.set_corrupt_log(); - return applied; - } + goto page_corrupted; break; case INSERT_HEAP_REDUNDANT: case INSERT_REUSE_REDUNDANT: @@ -2333,7 +2343,8 @@ same_page: if (got_page_op) { const page_id_t id(space_id, page_no); - ut_d(if ((b & 0x70) == INIT_PAGE) freed.erase(id)); + ut_d(if ((b & 0x70) == INIT_PAGE || (b & 0x70) == OPTION) + freed.erase(id)); ut_ad(freed.find(id) == freed.end()); switch (b & 0x70) { case FREE_PAGE: @@ -2369,8 +2380,11 @@ same_page: } last_offset= FIL_PAGE_TYPE; break; - case RESERVED: case OPTION: + if (rlen == 5 && *l == OPT_PAGE_CHECKSUM) + break; + /* fall through */ + case RESERVED: continue; case WRITE: case MEMMOVE: @@ -2462,9 +2476,9 @@ same_page: #if 0 && defined UNIV_DEBUG switch (b & 0x70) { case RESERVED: - case OPTION: ut_ad(0); /* we did "continue" earlier */ break; + case OPTION: case FREE_PAGE: break; default: diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 74d7a9a3376..dbc2597fd05 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -359,22 +359,10 @@ struct DebugCheck { struct ReleaseBlocks { const lsn_t start, end; -#ifdef UNIV_DEBUG - const mtr_buf_t &memo; - - ReleaseBlocks(lsn_t start, lsn_t end, const mtr_buf_t &memo) : - start(start), end(end), memo(memo) -#else /* UNIV_DEBUG */ - ReleaseBlocks(lsn_t start, lsn_t end, const mtr_buf_t&) : - start(start), end(end) -#endif /* UNIV_DEBUG */ - { - ut_ad(start); - ut_ad(end); - } + ReleaseBlocks(lsn_t start, lsn_t end) : start(start), end(end) {} /** @return true always */ - bool operator()(mtr_memo_slot_t* slot) const + bool operator()(mtr_memo_slot_t *slot) const { if (!slot->object) return true; @@ -387,8 +375,8 @@ struct ReleaseBlocks return true; } - buf_flush_note_modification(static_cast<buf_block_t*>(slot->object), - start, end); + buf_block_t *block= static_cast<buf_block_t*>(slot->object); + buf_flush_note_modification(block, start, end); return true; } }; @@ -492,9 +480,8 @@ void mtr_t::commit() else ut_ad(!m_freed_space); - m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks> - (ReleaseBlocks(lsns.first, m_commit_lsn, - m_memo))); + m_memo.for_each_block_in_reverse + (CIterate<const ReleaseBlocks>(ReleaseBlocks(lsns.first, m_commit_lsn))); if (m_made_dirty) mysql_mutex_unlock(&log_sys.flush_order_mutex); @@ -590,6 +577,7 @@ void mtr_t::commit_shrink(fil_space_t &space) log_write_and_flush_prepare(); const lsn_t start_lsn= do_write().first; + ut_d(m_log.erase()); mysql_mutex_lock(&log_sys.flush_order_mutex); /* Durably write the reduced FSP_SIZE before truncating the data file. */ @@ -622,8 +610,7 @@ void mtr_t::commit_shrink(fil_space_t &space) m_memo.for_each_block_in_reverse(CIterate<Shrink>{space}); m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks> - (ReleaseBlocks(start_lsn, m_commit_lsn, - m_memo))); + (ReleaseBlocks(start_lsn, m_commit_lsn))); mysql_mutex_unlock(&log_sys.flush_order_mutex); mysql_mutex_lock(&fil_system.mutex); @@ -992,6 +979,55 @@ static mtr_t::page_flush_ahead log_close(lsn_t lsn) return mtr_t::PAGE_FLUSH_SYNC; } +/** @return checksum for an OPT_PAGE_CHECKSUM record */ +uint32_t buf_page_t::checksum() const +{ + /* We have to exclude from the checksum the normal + page checksum that is written by buf_flush_init_for_writing() + and FIL_PAGE_LSN which would be updated once we have actually + allocated the LSN. + + Unfortunately, we cannot access fil_space_t easily here. In order to + be compatible with encrypted tablespaces in the pre-full_crc32 + format we will unconditionally exclude the 8 bytes at + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + a.k.a. FIL_RTREE_SPLIT_SEQ_NUM. */ + return my_crc32c(my_crc32c(my_crc32c(0, frame + FIL_PAGE_OFFSET, + FIL_PAGE_LSN - FIL_PAGE_OFFSET), + frame + FIL_PAGE_TYPE, 2), + frame + FIL_PAGE_SPACE_ID, + srv_page_size - (FIL_PAGE_SPACE_ID + 8)); +} + +inline void mtr_t::page_checksum(const buf_page_t &bpage) +{ + if (UNIV_LIKELY_NULL(bpage.zip.data)) + return; /* FIXME: support ROW_FORMAT=COMPRESSED */ + byte *l= log_write<OPTION>(bpage.id(), nullptr, 5, true, 0); + *l++= OPT_PAGE_CHECKSUM; + mach_write_to_4(l, bpage.checksum()); + m_log.close(l + 4); +} + +/** Write OPT_PAGE_CHECKSUM records for modified pages */ +struct Write_OPT_PAGE_CHECKSUM +{ + mtr_t &mtr; + Write_OPT_PAGE_CHECKSUM(mtr_t &mtr) : mtr(mtr) {} + + /** @return true always */ + bool operator()(const mtr_memo_slot_t *slot) const + { + if (slot->type & MTR_MEMO_MODIFY) + { + const buf_page_t &b= static_cast<const buf_block_t*>(slot->object)->page; + if (!b.is_freed()) + mtr.page_checksum(b); + } + return true; + } +}; + /** Write the block contents to the REDO log */ struct mtr_write_log { @@ -1012,6 +1048,11 @@ std::pair<lsn_t,mtr_t::page_flush_ahead> mtr_t::do_write() ulint len = m_log.size(); ut_ad(len > 0); +#ifdef UNIV_DEBUG + m_memo.for_each_block(CIterate<Write_OPT_PAGE_CHECKSUM>(*this)); + len = m_log.size(); +#endif + if (len > srv_log_buffer_size / 2) { log_buffer_extend(ulong((len + 1) * 2)); } @@ -1394,16 +1435,6 @@ mtr_t::memo_contains_page_flagged( return m_memo.for_each_block_in_reverse(iteration) ? NULL : iteration.functor.get_block(); } - -/** Print info of an mtr handle. */ -void -mtr_t::print() const -{ - ib::info() << "Mini-transaction handle: memo size " - << m_memo.size() << " bytes log size " - << get_log()->size() << " bytes"; -} - #endif /* UNIV_DEBUG */ |