summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThirunarayanan Balathandayuthapani <thiru@mariadb.com>2020-03-23 16:37:44 +0530
committerThirunarayanan Balathandayuthapani <thiru@mariadb.com>2020-03-23 16:41:48 +0530
commit6697135c6d03935118c3dfa1c97faea7fa76afa6 (patch)
treef9896818863ef09cbe20515296a43a33d5026a33
parent1e6be6938059d230903029fa99ad6504c53d90ea (diff)
downloadmariadb-git-6697135c6d03935118c3dfa1c97faea7fa76afa6.tar.gz
MDEV-21572 buf_page_get_gen() should apply buffered page initialized
redo log during recovery - InnoDB unnecessarily reads the page even though it has fully initialized buffered redo log records. Allow the page initialization redo log to apply for the page in buf_page_get_gen() during recovery. - Renamed buf_page_get_gen() to buf_page_get_low() - Newly added buf_page_get_gen() will check for buffered redo log for the particular page id during recovery - Added new function buf_page_mtr_lock() which basically latches the page for the given latch type. - recv_recovery_create_page() is inline function which creates a page if it has page initialization redo log records.
-rw-r--r--mysql-test/suite/innodb/r/corrupted_during_recovery.result16
-rw-r--r--mysql-test/suite/innodb/t/corrupted_during_recovery.test28
-rw-r--r--storage/innobase/buf/buf0buf.cc108
-rw-r--r--storage/innobase/include/buf0buf.h24
-rw-r--r--storage/innobase/include/log0recv.h18
-rw-r--r--storage/innobase/log/log0recv.cc171
6 files changed, 261 insertions, 104 deletions
diff --git a/mysql-test/suite/innodb/r/corrupted_during_recovery.result b/mysql-test/suite/innodb/r/corrupted_during_recovery.result
index ee4db08fc85..951233ced3c 100644
--- a/mysql-test/suite/innodb/r/corrupted_during_recovery.result
+++ b/mysql-test/suite/innodb/r/corrupted_during_recovery.result
@@ -17,3 +17,19 @@ CHECK TABLE t2;
Table Op Msg_type Msg_text
test.t2 check status OK
DROP TABLE t1, t2;
+CREATE TABLE t1(pk SERIAL) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1),(2),(3);
+connect con1,localhost,root,,;
+BEGIN;
+DELETE FROM t1 WHERE pk=1;
+connection default;
+SET GLOBAL innodb_flush_log_at_trx_commit=1;
+DELETE FROM t1 WHERE pk=3;
+# Kill the server
+disconnect con1;
+# Corrupt the pages
+SELECT * FROM t1;
+pk
+1
+2
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/t/corrupted_during_recovery.test b/mysql-test/suite/innodb/t/corrupted_during_recovery.test
index dad08645085..67f2f2dde40 100644
--- a/mysql-test/suite/innodb/t/corrupted_during_recovery.test
+++ b/mysql-test/suite/innodb/t/corrupted_during_recovery.test
@@ -62,3 +62,31 @@ SELECT * FROM t2;
CHECK TABLE t2;
DROP TABLE t1, t2;
+
+# MDEV-21572 buf_page_get_gen() should apply buffered page
+# initialized redo log during recovery
+--source ../include/no_checkpoint_start.inc
+CREATE TABLE t1(pk SERIAL) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1),(2),(3);
+connect (con1,localhost,root,,);
+BEGIN;
+DELETE FROM t1 WHERE pk=1;
+connection default;
+SET GLOBAL innodb_flush_log_at_trx_commit=1;
+DELETE FROM t1 WHERE pk=3;
+--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1;
+--source ../include/no_checkpoint_end.inc
+disconnect con1;
+
+--echo # Corrupt the pages
+perl;
+my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd";
+open(FILE, "+<$file") || die "Unable to open $file";
+binmode FILE;
+seek (FILE, $ENV{INNODB_PAGE_SIZE} * 3, SEEK_SET) or die "seek";
+print FILE "junk";
+close FILE or die "close";
+EOF
+--source include/start_mysqld.inc
+SELECT * FROM t1;
+DROP TABLE t1;
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index ad0f40ce67e..954b16eb2d2 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -4190,7 +4190,45 @@ buf_wait_for_read(
}
}
-/** This is the general function used to get access to a database page.
+/** Lock the page with the given latch type.
+@param[in,out] block block to be locked
+@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in] mtr mini-transaction
+@param[in] file file name
+@param[in] line line where called
+@return pointer to locked block */
+static buf_block_t* buf_page_mtr_lock(buf_block_t *block,
+ ulint rw_latch,
+ mtr_t* mtr,
+ const char *file,
+ unsigned line)
+{
+ mtr_memo_type_t fix_type;
+ switch (rw_latch)
+ {
+ case RW_NO_LATCH:
+ fix_type= MTR_MEMO_BUF_FIX;
+ break;
+ case RW_S_LATCH:
+ rw_lock_s_lock_inline(&block->lock, 0, file, line);
+ fix_type= MTR_MEMO_PAGE_S_FIX;
+ break;
+ case RW_SX_LATCH:
+ rw_lock_sx_lock_inline(&block->lock, 0, file, line);
+ fix_type= MTR_MEMO_PAGE_SX_FIX;
+ break;
+ default:
+ ut_ad(rw_latch == RW_X_LATCH);
+ rw_lock_x_lock_inline(&block->lock, 0, file, line);
+ fix_type= MTR_MEMO_PAGE_X_FIX;
+ break;
+ }
+
+ mtr_memo_push(mtr, block, fix_type);
+ return block;
+}
+
+/** This is the low level function used to get access to a database page.
@param[in] page_id page id
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@@ -4201,7 +4239,7 @@ BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
@param[in] mtr mini-transaction
@return pointer to the block or NULL */
buf_block_t*
-buf_page_get_gen(
+buf_page_get_low(
const page_id_t page_id,
const page_size_t& page_size,
ulint rw_latch,
@@ -4844,35 +4882,7 @@ evict_from_pool:
return NULL;
}
- mtr_memo_type_t fix_type;
-
- switch (rw_latch) {
- case RW_NO_LATCH:
-
- fix_type = MTR_MEMO_BUF_FIX;
- break;
-
- case RW_S_LATCH:
- rw_lock_s_lock_inline(&fix_block->lock, 0, file, line);
-
- fix_type = MTR_MEMO_PAGE_S_FIX;
- break;
-
- case RW_SX_LATCH:
- rw_lock_sx_lock_inline(&fix_block->lock, 0, file, line);
-
- fix_type = MTR_MEMO_PAGE_SX_FIX;
- break;
-
- default:
- ut_ad(rw_latch == RW_X_LATCH);
- rw_lock_x_lock_inline(&fix_block->lock, 0, file, line);
-
- fix_type = MTR_MEMO_PAGE_X_FIX;
- break;
- }
-
- mtr_memo_push(mtr, fix_block, fix_type);
+ fix_block = buf_page_mtr_lock(fix_block, rw_latch, mtr, file, line);
if (mode != BUF_PEEK_IF_IN_POOL && !access_time) {
/* In the case of a first access, try to apply linear
@@ -4887,6 +4897,42 @@ evict_from_pool:
return(fix_block);
}
+/** This is the general function used to get access to a database page.
+It does page initialization and applies the buffered redo logs.
+@param[in] page_id page id
+@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in] guess guessed block or NULL
+@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
+BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in] file file name
+@param[in] line line where called
+@param[in] mtr mini-transaction
+@param[out] err DB_SUCCESS or error code
+@return pointer to the block or NULL */
+buf_block_t*
+buf_page_get_gen(
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint rw_latch,
+ buf_block_t* guess,
+ ulint mode,
+ const char* file,
+ unsigned line,
+ mtr_t* mtr,
+ dberr_t* err)
+{
+ if (buf_block_t *block = recv_recovery_create_page(page_id))
+ {
+ buf_block_fix(block);
+ ut_ad(rw_lock_s_lock_nowait(&block->debug_latch, file, line));
+ block= buf_page_mtr_lock(block, rw_latch, mtr, file, line);
+ return block;
+ }
+
+ return buf_page_get_low(page_id, page_size, rw_latch,
+ guess, mode, file, line, mtr, err);
+}
+
/********************************************************************//**
This is the general function used to get optimistic access to a database
page.
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index a04936a19cf..d120dc36091 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -436,6 +436,7 @@ buf_page_get_zip(
const page_size_t& page_size);
/** This is the general function used to get access to a database page.
+It does page initialization and applies the buffered redo logs.
@param[in] page_id page id
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@@ -458,6 +459,29 @@ buf_page_get_gen(
mtr_t* mtr,
dberr_t* err);
+/** This is the low level function used to get access to a database page.
+@param[in] page_id page id
+@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in] guess guessed block or NULL
+@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
+BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in] file file name
+@param[in] line line where called
+@param[in] mtr mini-transaction
+@param[out] err DB_SUCCESS or error code
+@return pointer to the block or NULL */
+buf_block_t*
+buf_page_get_low(
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint rw_latch,
+ buf_block_t* guess,
+ ulint mode,
+ const char* file,
+ unsigned line,
+ mtr_t* mtr,
+ dberr_t* err);
+
/** Initializes a page to the buffer buf_pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED =>
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index 068d7813c20..b91312e81e2 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -342,4 +342,22 @@ times! */
roll-forward */
#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
+/** This is a low level function for the recovery system
+to create a page which has buffered intialized redo log records.
+@param[in] page_id page to be created using redo logs
+@return whether the page creation successfully */
+buf_block_t* recv_recovery_create_page_low(const page_id_t page_id);
+
+/** Recovery system creates a page which has buffered intialized
+redo log records.
+@param[in] page_id page to be created using redo logs
+@return block which contains page was initialized */
+inline buf_block_t* recv_recovery_create_page(const page_id_t page_id)
+{
+ if (UNIV_LIKELY(!recv_recovery_on))
+ return NULL;
+
+ return recv_recovery_create_page_low(page_id);
+}
+
#endif
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 01f8e3636bc..73230def4a1 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -313,7 +313,7 @@ public:
if (!i->second.created) {
continue;
}
- if (buf_block_t* block = buf_page_get_gen(
+ if (buf_block_t* block = buf_page_get_low(
i->first, univ_page_size, RW_X_LATCH, NULL,
BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
&mtr, NULL)) {
@@ -2293,6 +2293,99 @@ static void recv_read_in_area(const page_id_t page_id)
mutex_enter(&recv_sys->mutex);
}
+/** This is another low level function for the recovery system
+to create a page which has buffered page intialization redo log records.
+@param[in] page_id page to be created using redo logs
+@param[in,out] recv_addr Hashed redo logs for the given page id
+@return whether the page creation successfully */
+static buf_block_t* recv_recovery_create_page_low(const page_id_t page_id,
+ recv_addr_t* recv_addr)
+{
+ mtr_t mtr;
+ mlog_init_t::init& i = mlog_init.last(page_id);
+ const lsn_t end_lsn = UT_LIST_GET_LAST(recv_addr->rec_list)->end_lsn;
+
+ if (end_lsn < i.lsn)
+ {
+ DBUG_LOG("ib_log", "skip log for page "
+ << page_id
+ << " LSN " << end_lsn
+ << " < " << i.lsn);
+ recv_addr->state = RECV_PROCESSED;
+ignore:
+ ut_a(recv_sys->n_addrs);
+ recv_sys->n_addrs--;
+ return NULL;
+ }
+
+ fil_space_t* space = fil_space_acquire(recv_addr->space);
+ if (!space)
+ {
+ recv_addr->state = RECV_PROCESSED;
+ goto ignore;
+ }
+
+ if (space->enable_lsn)
+ {
+init_fail:
+ fil_space_release(space);
+ recv_addr->state = RECV_NOT_PROCESSED;
+ return NULL;
+ }
+
+ /* Determine if a tablespace could be for an internal table
+ for FULLTEXT INDEX. For those tables, no MLOG_INDEX_LOAD record
+ used to be written when redo logging was disabled. Hence, we
+ cannot optimize away page reads, because all the redo
+ log records for initializing and modifying the page in the
+ past could be older than the page in the data file.
+
+ The check is too broad, causing all
+ tables whose names start with FTS_ to skip the optimization. */
+
+ if (strstr(space->name, "/FTS_"))
+ goto init_fail;
+
+ mtr.start();
+ mtr.set_log_mode(MTR_LOG_NONE);
+ buf_block_t* block = buf_page_create(page_id, page_size_t(space->flags),
+ &mtr);
+ if (recv_addr->state == RECV_PROCESSED)
+ /* The page happened to exist in the buffer pool, or it was
+ just being read in. Before buf_page_get_with_no_latch() returned,
+ all changes must have been applied to the page already. */
+ mtr.commit();
+ else
+ {
+ i.created = true;
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+ mtr.x_latch_at_savepoint(0, block);
+ recv_recover_page(block, mtr, recv_addr, i.lsn);
+ ut_ad(mtr.has_committed());
+ }
+
+ fil_space_release(space);
+ return block;
+}
+
+/** This is a low level function for the recovery system
+to create a page which has buffered intialized redo log records.
+@param[in] page_id page to be created using redo logs
+@return whether the page creation successfully */
+buf_block_t* recv_recovery_create_page_low(const page_id_t page_id)
+{
+ buf_block_t* block= NULL;
+ mutex_enter(&recv_sys->mutex);
+ recv_addr_t* recv_addr= recv_get_fil_addr_struct(page_id.space(),
+ page_id.page_no());
+ if (recv_addr && recv_addr->state == RECV_WILL_NOT_READ)
+ {
+ block= recv_recovery_create_page_low(page_id, recv_addr);
+ }
+ mutex_exit(&recv_sys->mutex);
+ return block;
+}
+
/** Apply the hash table of stored log records to persistent data pages.
@param[in] last_batch whether the change buffer merge will be
performed as part of the operation */
@@ -2384,7 +2477,7 @@ ignore:
apply:
mtr.start();
mtr.set_log_mode(MTR_LOG_NONE);
- if (buf_block_t* block = buf_page_get_gen(
+ if (buf_block_t* block = buf_page_get_low(
page_id, univ_page_size,
RW_X_LATCH, NULL,
BUF_GET_IF_IN_POOL,
@@ -2398,77 +2491,9 @@ apply:
mtr.commit();
recv_read_in_area(page_id);
}
- } else {
- mlog_init_t::init& i = mlog_init.last(page_id);
- const lsn_t end_lsn = UT_LIST_GET_LAST(
- recv_addr->rec_list)->end_lsn;
-
- if (end_lsn < i.lsn) {
- DBUG_LOG("ib_log", "skip log for page "
- << page_id
- << " LSN " << end_lsn
- << " < " << i.lsn);
-skip:
- recv_addr->state = RECV_PROCESSED;
- goto ignore;
- }
-
- fil_space_t* space = fil_space_acquire(
- recv_addr->space);
- if (!space) {
- goto skip;
- }
-
- if (space->enable_lsn) {
-do_read:
- fil_space_release(space);
- recv_addr->state = RECV_NOT_PROCESSED;
- goto apply;
- }
-
- /* Determine if a tablespace could be
- for an internal table for FULLTEXT INDEX.
- For those tables, no MLOG_INDEX_LOAD record
- used to be written when redo logging was
- disabled. Hence, we cannot optimize
- away page reads, because all the redo
- log records for initializing and
- modifying the page in the past could
- be older than the page in the data
- file.
-
- The check is too broad, causing all
- tables whose names start with FTS_ to
- skip the optimization. */
-
- if (strstr(space->name, "/FTS_")) {
- goto do_read;
- }
-
- mtr.start();
- mtr.set_log_mode(MTR_LOG_NONE);
- buf_block_t* block = buf_page_create(
- page_id, page_size_t(space->flags),
- &mtr);
- if (recv_addr->state == RECV_PROCESSED) {
- /* The page happened to exist
- in the buffer pool, or it was
- just being read in. Before
- buf_page_get_with_no_latch()
- returned, all changes must have
- been applied to the page already. */
- mtr.commit();
- } else {
- i.created = true;
- buf_block_dbg_add_level(
- block, SYNC_NO_ORDER_CHECK);
- mtr.x_latch_at_savepoint(0, block);
- recv_recover_page(block, mtr,
- recv_addr, i.lsn);
- ut_ad(mtr.has_committed());
- }
-
- fil_space_release(space);
+ } else if (!recv_recovery_create_page_low(
+ page_id, recv_addr)) {
+ goto apply;
}
}
}