summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThirunarayanan Balathandayuthapani <thiru@mariadb.com>2020-07-30 15:43:21 +0530
committerMarko Mäkelä <marko.makela@mariadb.com>2020-07-31 08:05:25 +0300
commit3ced3a10eb1be24aa84b774d824dc8d5b2d2d5e6 (patch)
treecbd8711f5c29df6ec0e3d853835bd4ed4522e24a
parent8a612314d0c9bc5b1db6f3998f26c28967915949 (diff)
downloadmariadb-git-3ced3a10eb1be24aa84b774d824dc8d5b2d2d5e6.tar.gz
MDEV-11799 InnoDB can abort if the doublewrite buffer contains a bad and a good copy
Problem: ======== 1) buf_dblwr_process() tries to uncompress the page_compressed encrypted page without decrypting the page. So it fails to validate the page compressed encrypted page. 2) buf_dblwr_process() also looking for the first valid copy of the page even though there could be newer copies of the pages. Restoring the old copy of the page could lead to failure during redo log apply. Solution: ======== recv_dblwr_t::validate_page(): Validate the page by doing the decryption, decompress and check the corruption of it. For doublewrite page, it does check that page lsn shouldn't be older than checkpoint lsn or greater than scanned lsn buf_dblwr_process(): Use validate_page() for data page and use find_page() to find the latest valid page to recover the dblwr page.
-rw-r--r--storage/innobase/buf/buf0dblwr.cc121
-rw-r--r--storage/innobase/fsp/fsp0file.cc17
-rw-r--r--storage/innobase/include/log0recv.h46
-rw-r--r--storage/innobase/log/log0recv.cc84
4 files changed, 155 insertions, 113 deletions
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index 6cc4bd6dc00..5b9c4563161 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -532,28 +532,52 @@ buf_dblwr_init_or_load_pages(
void
buf_dblwr_process()
{
+ ut_ad(recv_sys->parse_start_lsn);
+
ulint page_no_dblwr = 0;
byte* read_buf;
- byte* unaligned_read_buf;
recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
if (!buf_dblwr) {
return;
}
- unaligned_read_buf = static_cast<byte*>(
- ut_malloc_nokey(3 * UNIV_PAGE_SIZE));
-
read_buf = static_cast<byte*>(
- ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
- byte* const buf = read_buf + UNIV_PAGE_SIZE;
+ aligned_malloc(3 * srv_page_size, srv_page_size));
+ byte* const buf = read_buf + srv_page_size;
for (recv_dblwr_t::list::iterator i = recv_dblwr.pages.begin();
i != recv_dblwr.pages.end();
++i, ++page_no_dblwr) {
- byte* page = *i;
- ulint space_id = page_get_space_id(page);
- fil_space_t* space = fil_space_get(space_id);
+ byte* page = *i;
+ const ulint page_no = page_get_page_no(page);
+
+ if (!page_no) {
+ /* page 0 should have been recovered
+ already via Datafile::restore_from_doublewrite() */
+ continue;
+ }
+
+ const ulint space_id = page_get_space_id(page);
+ const lsn_t lsn = mach_read_from_8(page + FIL_PAGE_LSN);
+
+ if (recv_sys->parse_start_lsn > lsn) {
+ /* Pages written before the checkpoint are
+ not useful for recovery. */
+ continue;
+ }
+
+ const page_id_t page_id(space_id, page_no);
+
+ if (recv_sys->scanned_lsn < lsn) {
+ ib::warn() << "Ignoring doublewrite copy of "
+ << page_id
+ << " with future log sequence number "
+ << lsn;
+ continue;
+ }
+
+ fil_space_t* space = fil_space_acquire_for_io(space_id);
if (space == NULL) {
/* Maybe we have dropped the tablespace
@@ -563,9 +587,6 @@ buf_dblwr_process()
fil_space_open_if_needed(space);
- const ulint page_no = page_get_page_no(page);
- const page_id_t page_id(space_id, page_no);
-
if (UNIV_UNLIKELY(page_no >= space->size)) {
/* Do not report the warning if the tablespace
@@ -581,6 +602,8 @@ buf_dblwr_process()
<< space->name
<< " (" << space->size << " pages)";
}
+next_page:
+ fil_space_release_for_io(space);
continue;
}
@@ -609,76 +632,27 @@ buf_dblwr_process()
<< "error: " << err;
}
- const bool is_all_zero = buf_is_zeroes(
- span<const byte>(read_buf, page_size.physical()));
- const bool expect_encrypted = space->crypt_data
- && space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED;
-
- if (is_all_zero) {
+ if (buf_is_zeroes(span<const byte>(read_buf,
+ page_size.physical()))) {
/* We will check if the copy in the
doublewrite buffer is valid. If not, we will
ignore this page (there should be redo log
records to initialize it). */
+ } else if (recv_dblwr.validate_page(
+ page_id, read_buf, space, buf)) {
+ goto next_page;
} else {
- /* Decompress the page before
- validating the checksum. */
- ulint decomp = fil_page_decompress(buf, read_buf);
- if (!decomp || (decomp != srv_page_size
- && page_size.is_compressed())) {
- goto bad;
- }
-
- if (expect_encrypted && mach_read_from_4(
- read_buf
- + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION)
- ? fil_space_verify_crypt_checksum(read_buf,
- page_size)
- : !buf_page_is_corrupted(true, read_buf,
- page_size, space)) {
- /* The page is good; there is no need
- to consult the doublewrite buffer. */
- continue;
- }
-
-bad:
/* We intentionally skip this message for
- is_all_zero pages. */
+ all-zero pages. */
ib::info()
<< "Trying to recover page " << page_id
<< " from the doublewrite buffer.";
}
- ulint decomp = fil_page_decompress(buf, page);
- if (!decomp || (decomp != srv_page_size
- && page_size.is_compressed())) {
- continue;
- }
-
- if (expect_encrypted && mach_read_from_4(
- page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION)
- ? !fil_space_verify_crypt_checksum(page, page_size)
- : buf_page_is_corrupted(true, page, page_size, space)) {
- /* Theoretically we could have another good
- copy for this page in the doublewrite
- buffer. If not, we will report a fatal error
- for a corrupted page somewhere else if that
- page was truly needed. */
- continue;
- }
+ page = recv_dblwr.find_page(page_id, space, buf);
- if (page_no == 0) {
- /* Check the FSP_SPACE_FLAGS. */
- ulint flags = fsp_header_get_flags(page);
- if (!fsp_flags_is_valid(flags, space_id)
- && fsp_flags_convert_from_101(flags)
- == ULINT_UNDEFINED) {
- ib::warn() << "Ignoring a doublewrite copy"
- " of page " << page_id
- << " due to invalid flags "
- << ib::hex(flags);
- continue;
- }
- /* The flags on the page should be converted later. */
+ if (!page) {
+ goto next_page;
}
/* Write the good page from the doublewrite buffer to
@@ -687,17 +661,18 @@ bad:
IORequest write_request(IORequest::WRITE);
fil_io(write_request, true, page_id, page_size,
- 0, page_size.physical(),
- const_cast<byte*>(page), NULL);
+ 0, page_size.physical(), page, NULL);
ib::info() << "Recovered page " << page_id
<< " from the doublewrite buffer.";
+
+ goto next_page;
}
recv_dblwr.pages.clear();
fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
- ut_free(unaligned_read_buf);
+ aligned_free(read_buf);
}
/****************************************************************//**
diff --git a/storage/innobase/fsp/fsp0file.cc b/storage/innobase/fsp/fsp0file.cc
index 5abdf22d939..b367ed37c2b 100644
--- a/storage/innobase/fsp/fsp0file.cc
+++ b/storage/innobase/fsp/fsp0file.cc
@@ -768,10 +768,10 @@ Datafile::restore_from_doublewrite()
}
/* Find if double write buffer contains page_no of given space id. */
- const byte* page = recv_sys->dblwr.find_page(m_space_id, 0);
const page_id_t page_id(m_space_id, 0);
+ const byte* page = recv_sys->dblwr.find_page(page_id);
- if (page == NULL) {
+ if (!page) {
/* If the first page of the given user tablespace is not there
in the doublewrite buffer, then the recovery is going to fail
now. Hence this is treated as an error. */
@@ -788,15 +788,10 @@ Datafile::restore_from_doublewrite()
FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
if (!fsp_flags_is_valid(flags, m_space_id)) {
- ulint cflags = fsp_flags_convert_from_101(flags);
- if (cflags == ULINT_UNDEFINED) {
- ib::warn()
- << "Ignoring a doublewrite copy of page "
- << page_id
- << " due to invalid flags " << ib::hex(flags);
- return(true);
- }
- flags = cflags;
+ flags = fsp_flags_convert_from_101(flags);
+ /* recv_dblwr_t::validate_page() inside find_page()
+ checked this already. */
+ ut_ad(flags != ULINT_UNDEFINED);
/* The flags on the page should be converted later. */
}
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index b91312e81e2..266e93c0ce2 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -195,23 +195,35 @@ struct recv_t{
rec_list;/*!< list of log records for this page */
};
-struct recv_dblwr_t {
- /** Add a page frame to the doublewrite recovery buffer. */
- void add(byte* page) {
- pages.push_back(page);
- }
-
- /** Find a doublewrite copy of a page.
- @param[in] space_id tablespace identifier
- @param[in] page_no page number
- @return page frame
- @retval NULL if no page was found */
- const byte* find_page(ulint space_id, ulint page_no);
-
- typedef std::list<byte*, ut_allocator<byte*> > list;
-
- /** Recovered doublewrite buffer page frames */
- list pages;
+struct recv_dblwr_t
+{
+ /** Add a page frame to the doublewrite recovery buffer. */
+ void add(byte *page) { pages.push_back(page); }
+
+ /** Validate the page.
+ @param page_id page identifier
+ @param page page contents
+ @param space the tablespace of the page (not available for page 0)
+ @param tmp_buf 2*srv_page_size for decrypting and decompressing any
+ page_compressed or encrypted pages
+ @return whether the page is valid */
+ bool validate_page(const page_id_t page_id, const byte *page,
+ const fil_space_t *space, byte *tmp_buf);
+
+ /** Find a doublewrite copy of a page.
+ @param page_id page identifier
+ @param space tablespace (not available for page_id.page_no()==0)
+ @param tmp_buf 2*srv_page_size for decrypting and decompressing any
+ page_compressed or encrypted pages
+ @return page frame
+ @retval NULL if no valid page for page_id was found */
+ byte* find_page(const page_id_t page_id, const fil_space_t *space= NULL,
+ byte *tmp_buf= NULL);
+
+ typedef std::list<byte*, ut_allocator<byte*> > list;
+
+ /** Recovered doublewrite buffer page frames */
+ list pages;
};
/** Recovery system data structure */
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index c2eb1fe7659..281ac10c5e9 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -57,6 +57,7 @@ Created 9/20/1997 Heikki Tuuri
#include "srv0start.h"
#include "trx0roll.h"
#include "row0merge.h"
+#include "fil0pagecompress.h"
/** Log records are stored in the hash table in chunks at most of this size;
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
@@ -3910,6 +3911,8 @@ skip_apply:
rescan = true;
}
+ recv_sys->parse_start_lsn = checkpoint_lsn;
+
if (srv_operation == SRV_OPERATION_NORMAL) {
buf_dblwr_process();
}
@@ -4084,26 +4087,83 @@ recv_recovery_rollback_active(void)
}
}
-/** Find a doublewrite copy of a page.
-@param[in] space_id tablespace identifier
-@param[in] page_no page number
-@return page frame
-@retval NULL if no page was found */
-const byte*
-recv_dblwr_t::find_page(ulint space_id, ulint page_no)
+bool recv_dblwr_t::validate_page(const page_id_t page_id,
+ const byte *page,
+ const fil_space_t *space,
+ byte *tmp_buf)
+{
+ if (page_id.page_no() == 0)
+ {
+ ulint flags= fsp_header_get_flags(page);
+ if (!fsp_flags_is_valid(flags, page_id.space()) &&
+ fsp_flags_convert_from_101(flags) == ULINT_UNDEFINED)
+ {
+ ib::warn() << "Ignoring a doublewrite copy of page "
+ << page_id
+ << "due to invalid flags " << ib::hex(flags);
+ return false;
+ }
+
+ /* Page 0 is never page_compressed or encrypted. */
+ return !buf_page_is_corrupted(true, page, page_size_t(flags));
+ }
+
+ ut_ad(tmp_buf);
+ ut_ad(space);
+ byte *tmp_frame= tmp_buf;
+ byte *tmp_page= tmp_buf + srv_page_size;
+ const uint16_t page_type= mach_read_from_2(page + FIL_PAGE_TYPE);
+ const page_size_t page_size(space->flags);
+ const bool expect_encrypted= space->crypt_data &&
+ space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED;
+
+ if (expect_encrypted &&
+ mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION))
+ {
+ if (!fil_space_verify_crypt_checksum(page, page_size))
+ return false;
+ if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)
+ return true;
+ if (page_size.is_compressed())
+ return false;
+ memcpy(tmp_page, page, page_size.physical());
+ if (!fil_space_decrypt(space, tmp_frame, tmp_page))
+ return false;
+ }
+
+ switch (page_type) {
+ case FIL_PAGE_PAGE_COMPRESSED:
+ memcpy(tmp_page, page, page_size.physical());
+ /* fall through */
+ case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED:
+ return !page_size.is_compressed() &&
+ fil_page_decompress(tmp_frame, tmp_page) == srv_page_size &&
+ !buf_page_is_corrupted(true, tmp_page, page_size, space);
+ }
+
+ return !buf_page_is_corrupted(true, page, page_size, space);
+}
+
+byte *recv_dblwr_t::find_page(const page_id_t page_id,
+ const fil_space_t *space, byte *tmp_buf)
{
- const byte *result= NULL;
+ byte *result= NULL;
lsn_t max_lsn= 0;
for (list::const_iterator i = pages.begin(); i != pages.end(); ++i)
{
- const byte *page= *i;
- if (page_get_page_no(page) != page_no ||
- page_get_space_id(page) != space_id)
+ byte *page= *i;
+ if (page_get_page_no(page) != page_id.page_no() ||
+ page_get_space_id(page) != page_id.space())
continue;
const lsn_t lsn= mach_read_from_8(page + FIL_PAGE_LSN);
- if (lsn <= max_lsn)
+ if (lsn <= max_lsn ||
+ !validate_page(page_id, page, space, tmp_buf))
+ {
+ /* Mark processed for subsequent iterations in buf_dblwr_process() */
+ memset(page + FIL_PAGE_LSN, 0, 8);
continue;
+ }
max_lsn= lsn;
result= page;
}