diff options
author | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2020-03-23 16:37:44 +0530 |
---|---|---|
committer | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2020-03-23 16:41:48 +0530 |
commit | 6697135c6d03935118c3dfa1c97faea7fa76afa6 (patch) | |
tree | f9896818863ef09cbe20515296a43a33d5026a33 /storage | |
parent | 1e6be6938059d230903029fa99ad6504c53d90ea (diff) | |
download | mariadb-git-6697135c6d03935118c3dfa1c97faea7fa76afa6.tar.gz |
MDEV-21572 buf_page_get_gen() should apply buffered page initialized
redo log during recovery
- InnoDB unnecessarily reads the page even though it has fully initialized
buffered redo log records. Allow the page initialization redo log to
apply for the page in buf_page_get_gen() during recovery.
- Renamed buf_page_get_gen() to buf_page_get_low()
- Newly added buf_page_get_gen() will check for buffered redo log for
the particular page id during recovery
- Added new function buf_page_mtr_lock() which basically latches the page
for the given latch type.
- recv_recovery_create_page() is inline function which creates a page
if it has page initialization redo log records.
Diffstat (limited to 'storage')
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 108 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 24 | ||||
-rw-r--r-- | storage/innobase/include/log0recv.h | 18 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 171 |
4 files changed, 217 insertions, 104 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index ad0f40ce67e..954b16eb2d2 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -4190,7 +4190,45 @@ buf_wait_for_read( } } -/** This is the general function used to get access to a database page. +/** Lock the page with the given latch type. +@param[in,out] block block to be locked +@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH +@param[in] mtr mini-transaction +@param[in] file file name +@param[in] line line where called +@return pointer to locked block */ +static buf_block_t* buf_page_mtr_lock(buf_block_t *block, + ulint rw_latch, + mtr_t* mtr, + const char *file, + unsigned line) +{ + mtr_memo_type_t fix_type; + switch (rw_latch) + { + case RW_NO_LATCH: + fix_type= MTR_MEMO_BUF_FIX; + break; + case RW_S_LATCH: + rw_lock_s_lock_inline(&block->lock, 0, file, line); + fix_type= MTR_MEMO_PAGE_S_FIX; + break; + case RW_SX_LATCH: + rw_lock_sx_lock_inline(&block->lock, 0, file, line); + fix_type= MTR_MEMO_PAGE_SX_FIX; + break; + default: + ut_ad(rw_latch == RW_X_LATCH); + rw_lock_x_lock_inline(&block->lock, 0, file, line); + fix_type= MTR_MEMO_PAGE_X_FIX; + break; + } + + mtr_memo_push(mtr, block, fix_type); + return block; +} + +/** This is the low level function used to get access to a database page. @param[in] page_id page id @param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH @param[in] guess guessed block or NULL @@ -4201,7 +4239,7 @@ BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH @param[in] mtr mini-transaction @return pointer to the block or NULL */ buf_block_t* -buf_page_get_gen( +buf_page_get_low( const page_id_t page_id, const page_size_t& page_size, ulint rw_latch, @@ -4844,35 +4882,7 @@ evict_from_pool: return NULL; } - mtr_memo_type_t fix_type; - - switch (rw_latch) { - case RW_NO_LATCH: - - fix_type = MTR_MEMO_BUF_FIX; - break; - - case RW_S_LATCH: - rw_lock_s_lock_inline(&fix_block->lock, 0, file, line); - - fix_type = MTR_MEMO_PAGE_S_FIX; - break; - - case RW_SX_LATCH: - rw_lock_sx_lock_inline(&fix_block->lock, 0, file, line); - - fix_type = MTR_MEMO_PAGE_SX_FIX; - break; - - default: - ut_ad(rw_latch == RW_X_LATCH); - rw_lock_x_lock_inline(&fix_block->lock, 0, file, line); - - fix_type = MTR_MEMO_PAGE_X_FIX; - break; - } - - mtr_memo_push(mtr, fix_block, fix_type); + fix_block = buf_page_mtr_lock(fix_block, rw_latch, mtr, file, line); if (mode != BUF_PEEK_IF_IN_POOL && !access_time) { /* In the case of a first access, try to apply linear @@ -4887,6 +4897,42 @@ evict_from_pool: return(fix_block); } +/** This is the general function used to get access to a database page. +It does page initialization and applies the buffered redo logs. +@param[in] page_id page id +@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH +@param[in] guess guessed block or NULL +@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL, +BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH +@param[in] file file name +@param[in] line line where called +@param[in] mtr mini-transaction +@param[out] err DB_SUCCESS or error code +@return pointer to the block or NULL */ +buf_block_t* +buf_page_get_gen( + const page_id_t page_id, + const page_size_t& page_size, + ulint rw_latch, + buf_block_t* guess, + ulint mode, + const char* file, + unsigned line, + mtr_t* mtr, + dberr_t* err) +{ + if (buf_block_t *block = recv_recovery_create_page(page_id)) + { + buf_block_fix(block); + ut_ad(rw_lock_s_lock_nowait(&block->debug_latch, file, line)); + block= buf_page_mtr_lock(block, rw_latch, mtr, file, line); + return block; + } + + return buf_page_get_low(page_id, page_size, rw_latch, + guess, mode, file, line, mtr, err); +} + /********************************************************************//** This is the general function used to get optimistic access to a database page. diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index a04936a19cf..d120dc36091 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -436,6 +436,7 @@ buf_page_get_zip( const page_size_t& page_size); /** This is the general function used to get access to a database page. +It does page initialization and applies the buffered redo logs. @param[in] page_id page id @param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH @param[in] guess guessed block or NULL @@ -458,6 +459,29 @@ buf_page_get_gen( mtr_t* mtr, dberr_t* err); +/** This is the low level function used to get access to a database page. +@param[in] page_id page id +@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH +@param[in] guess guessed block or NULL +@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL, +BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH +@param[in] file file name +@param[in] line line where called +@param[in] mtr mini-transaction +@param[out] err DB_SUCCESS or error code +@return pointer to the block or NULL */ +buf_block_t* +buf_page_get_low( + const page_id_t page_id, + const page_size_t& page_size, + ulint rw_latch, + buf_block_t* guess, + ulint mode, + const char* file, + unsigned line, + mtr_t* mtr, + dberr_t* err); + /** Initializes a page to the buffer buf_pool. The page is usually not read from a file even if it cannot be found in the buffer buf_pool. This is one of the functions which perform to a block a state transition NOT_USED => diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 068d7813c20..b91312e81e2 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -342,4 +342,22 @@ times! */ roll-forward */ #define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE) +/** This is a low level function for the recovery system +to create a page which has buffered intialized redo log records. +@param[in] page_id page to be created using redo logs +@return whether the page creation successfully */ +buf_block_t* recv_recovery_create_page_low(const page_id_t page_id); + +/** Recovery system creates a page which has buffered intialized +redo log records. +@param[in] page_id page to be created using redo logs +@return block which contains page was initialized */ +inline buf_block_t* recv_recovery_create_page(const page_id_t page_id) +{ + if (UNIV_LIKELY(!recv_recovery_on)) + return NULL; + + return recv_recovery_create_page_low(page_id); +} + #endif diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 01f8e3636bc..73230def4a1 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -313,7 +313,7 @@ public: if (!i->second.created) { continue; } - if (buf_block_t* block = buf_page_get_gen( + if (buf_block_t* block = buf_page_get_low( i->first, univ_page_size, RW_X_LATCH, NULL, BUF_GET_IF_IN_POOL, __FILE__, __LINE__, &mtr, NULL)) { @@ -2293,6 +2293,99 @@ static void recv_read_in_area(const page_id_t page_id) mutex_enter(&recv_sys->mutex); } +/** This is another low level function for the recovery system +to create a page which has buffered page intialization redo log records. +@param[in] page_id page to be created using redo logs +@param[in,out] recv_addr Hashed redo logs for the given page id +@return whether the page creation successfully */ +static buf_block_t* recv_recovery_create_page_low(const page_id_t page_id, + recv_addr_t* recv_addr) +{ + mtr_t mtr; + mlog_init_t::init& i = mlog_init.last(page_id); + const lsn_t end_lsn = UT_LIST_GET_LAST(recv_addr->rec_list)->end_lsn; + + if (end_lsn < i.lsn) + { + DBUG_LOG("ib_log", "skip log for page " + << page_id + << " LSN " << end_lsn + << " < " << i.lsn); + recv_addr->state = RECV_PROCESSED; +ignore: + ut_a(recv_sys->n_addrs); + recv_sys->n_addrs--; + return NULL; + } + + fil_space_t* space = fil_space_acquire(recv_addr->space); + if (!space) + { + recv_addr->state = RECV_PROCESSED; + goto ignore; + } + + if (space->enable_lsn) + { +init_fail: + fil_space_release(space); + recv_addr->state = RECV_NOT_PROCESSED; + return NULL; + } + + /* Determine if a tablespace could be for an internal table + for FULLTEXT INDEX. For those tables, no MLOG_INDEX_LOAD record + used to be written when redo logging was disabled. Hence, we + cannot optimize away page reads, because all the redo + log records for initializing and modifying the page in the + past could be older than the page in the data file. + + The check is too broad, causing all + tables whose names start with FTS_ to skip the optimization. */ + + if (strstr(space->name, "/FTS_")) + goto init_fail; + + mtr.start(); + mtr.set_log_mode(MTR_LOG_NONE); + buf_block_t* block = buf_page_create(page_id, page_size_t(space->flags), + &mtr); + if (recv_addr->state == RECV_PROCESSED) + /* The page happened to exist in the buffer pool, or it was + just being read in. Before buf_page_get_with_no_latch() returned, + all changes must have been applied to the page already. */ + mtr.commit(); + else + { + i.created = true; + buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); + mtr.x_latch_at_savepoint(0, block); + recv_recover_page(block, mtr, recv_addr, i.lsn); + ut_ad(mtr.has_committed()); + } + + fil_space_release(space); + return block; +} + +/** This is a low level function for the recovery system +to create a page which has buffered intialized redo log records. +@param[in] page_id page to be created using redo logs +@return whether the page creation successfully */ +buf_block_t* recv_recovery_create_page_low(const page_id_t page_id) +{ + buf_block_t* block= NULL; + mutex_enter(&recv_sys->mutex); + recv_addr_t* recv_addr= recv_get_fil_addr_struct(page_id.space(), + page_id.page_no()); + if (recv_addr && recv_addr->state == RECV_WILL_NOT_READ) + { + block= recv_recovery_create_page_low(page_id, recv_addr); + } + mutex_exit(&recv_sys->mutex); + return block; +} + /** Apply the hash table of stored log records to persistent data pages. @param[in] last_batch whether the change buffer merge will be performed as part of the operation */ @@ -2384,7 +2477,7 @@ ignore: apply: mtr.start(); mtr.set_log_mode(MTR_LOG_NONE); - if (buf_block_t* block = buf_page_get_gen( + if (buf_block_t* block = buf_page_get_low( page_id, univ_page_size, RW_X_LATCH, NULL, BUF_GET_IF_IN_POOL, @@ -2398,77 +2491,9 @@ apply: mtr.commit(); recv_read_in_area(page_id); } - } else { - mlog_init_t::init& i = mlog_init.last(page_id); - const lsn_t end_lsn = UT_LIST_GET_LAST( - recv_addr->rec_list)->end_lsn; - - if (end_lsn < i.lsn) { - DBUG_LOG("ib_log", "skip log for page " - << page_id - << " LSN " << end_lsn - << " < " << i.lsn); -skip: - recv_addr->state = RECV_PROCESSED; - goto ignore; - } - - fil_space_t* space = fil_space_acquire( - recv_addr->space); - if (!space) { - goto skip; - } - - if (space->enable_lsn) { -do_read: - fil_space_release(space); - recv_addr->state = RECV_NOT_PROCESSED; - goto apply; - } - - /* Determine if a tablespace could be - for an internal table for FULLTEXT INDEX. - For those tables, no MLOG_INDEX_LOAD record - used to be written when redo logging was - disabled. Hence, we cannot optimize - away page reads, because all the redo - log records for initializing and - modifying the page in the past could - be older than the page in the data - file. - - The check is too broad, causing all - tables whose names start with FTS_ to - skip the optimization. */ - - if (strstr(space->name, "/FTS_")) { - goto do_read; - } - - mtr.start(); - mtr.set_log_mode(MTR_LOG_NONE); - buf_block_t* block = buf_page_create( - page_id, page_size_t(space->flags), - &mtr); - if (recv_addr->state == RECV_PROCESSED) { - /* The page happened to exist - in the buffer pool, or it was - just being read in. Before - buf_page_get_with_no_latch() - returned, all changes must have - been applied to the page already. */ - mtr.commit(); - } else { - i.created = true; - buf_block_dbg_add_level( - block, SYNC_NO_ORDER_CHECK); - mtr.x_latch_at_savepoint(0, block); - recv_recover_page(block, mtr, - recv_addr, i.lsn); - ut_ad(mtr.has_committed()); - } - - fil_space_release(space); + } else if (!recv_recovery_create_page_low( + page_id, recv_addr)) { + goto apply; } } } |