diff options
Diffstat (limited to 'storage/innobase/buf/buf0lru.cc')
-rw-r--r-- | storage/innobase/buf/buf0lru.cc | 1867 |
1 files changed, 519 insertions, 1348 deletions
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index c85f9331580..a6e73068787 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2020, MariaDB Corporation. +Copyright (c) 2017, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -25,55 +25,37 @@ Created 11/5/1995 Heikki Tuuri *******************************************************/ #include "buf0lru.h" -#include "ut0byte.h" -#include "ut0rnd.h" #include "sync0rw.h" -#include "hash0hash.h" -#include "os0event.h" #include "fil0fil.h" #include "btr0btr.h" #include "buf0buddy.h" #include "buf0buf.h" -#include "buf0dblwr.h" #include "buf0flu.h" #include "buf0rea.h" #include "btr0sea.h" -#include "ibuf0ibuf.h" #include "os0file.h" #include "page0zip.h" #include "log0recv.h" #include "srv0srv.h" #include "srv0mon.h" +/** Flush this many pages in buf_LRU_get_free_block() */ +size_t innodb_lru_flush_size; + /** The number of blocks from the LRU_old pointer onward, including -the block pointed to, must be buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV +the block pointed to, must be buf_pool.LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the whole LRU list length, except that the tolerance defined below is allowed. Note that the tolerance must be small enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not allowed to point to either end of the LRU list. */ -static const ulint BUF_LRU_OLD_TOLERANCE = 20; +static constexpr ulint BUF_LRU_OLD_TOLERANCE = 20; /** The minimum amount of non-old blocks when the LRU_old list exists (that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks). @see buf_LRU_old_adjust_len */ #define BUF_LRU_NON_OLD_MIN_LEN 5 -#ifdef BTR_CUR_HASH_ADAPT -/** When dropping the search hash index entries before deleting an ibd -file, we build a local array of pages belonging to that tablespace -in the buffer pool. Following is the size of that array. -We also release buf_pool->mutex after scanning this many pages of the -flush_list when dropping a table. This is to ensure that other threads -are not blocked for extended period of time when using very large -buffer pools. */ -static const ulint BUF_LRU_DROP_SEARCH_SIZE = 1024; -#endif /* BTR_CUR_HASH_ADAPT */ - -/** We scan these many blocks when looking for a clean page to evict -during LRU eviction. */ -static const ulint BUF_LRU_SEARCH_SCAN_THRESHOLD = 100; - /** If we switch on the InnoDB monitor because there are too few available frames in the buffer pool, we set this to TRUE */ static bool buf_lru_switched_on_innodb_mon = false; @@ -94,13 +76,12 @@ uncompressed and compressed data), which must be clean. */ /* @{ */ /** Number of intervals for which we keep the history of these stats. -Each interval is 1 second, defined by the rate at which -srv_error_monitor_thread() calls buf_LRU_stat_update(). */ -static const ulint BUF_LRU_STAT_N_INTERVAL = 50; +Updated at SRV_MONITOR_INTERVAL (the buf_LRU_stat_update() call rate). */ +static constexpr ulint BUF_LRU_STAT_N_INTERVAL= 4; /** Co-efficient with which we multiply I/O operations to equate them with page_zip_decompress() operations. */ -static const ulint BUF_LRU_IO_TO_UNZIP_FACTOR = 50; +static constexpr ulint BUF_LRU_IO_TO_UNZIP_FACTOR= 50; /** Sampled values buf_LRU_stat_cur. Not protected by any mutex. Updated by buf_LRU_stat_update(). */ @@ -125,82 +106,66 @@ least this many milliseconds ago. Not protected by any mutex or latch. */ uint buf_LRU_old_threshold_ms; /* @} */ -/******************************************************************//** -Takes a block out of the LRU list and page hash table. -If the block is compressed-only (BUF_BLOCK_ZIP_PAGE), +/** Remove bpage from buf_pool.LRU and buf_pool.page_hash. + +If bpage->state() == BUF_BLOCK_ZIP_PAGE && !bpage->oldest_modification(), the object will be freed. -The caller must hold buf_pool->mutex, the buf_page_get_mutex() mutex -and the appropriate hash_lock. This function will release the -buf_page_get_mutex() and the hash_lock. +@param bpage buffer block +@param id page identifier +@param hash_lock buf_pool.page_hash latch (will be released here) +@param zip whether bpage->zip of BUF_BLOCK_FILE_PAGE should be freed If a compressed page is freed other compressed pages may be relocated. @retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The caller needs to free the page to the free list @retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In this case the block is already returned to the buddy allocator. */ -static MY_ATTRIBUTE((warn_unused_result)) -bool -buf_LRU_block_remove_hashed( -/*========================*/ - buf_page_t* bpage, /*!< in: block, must contain a file page and - be in a state where it can be freed; there - may or may not be a hash index to the page */ - bool zip); /*!< in: true if should remove also the - compressed page of an uncompressed page */ -/******************************************************************//** -Puts a file page whose has no hash index to the free list. */ -static -void -buf_LRU_block_free_hashed_page( -/*===========================*/ - buf_block_t* block); /*!< in: block, must contain a file page and - be in a state where it can be freed */ +static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, + page_hash_latch *hash_lock, bool zip); -/******************************************************************//** -Increases LRU size in bytes with page size inline function */ -static inline -void -incr_LRU_size_in_bytes( -/*===================*/ - buf_page_t* bpage, /*!< in: control block */ - buf_pool_t* buf_pool) /*!< in: buffer pool instance */ +/** Free a block to buf_pool */ +static void buf_LRU_block_free_hashed_page(buf_block_t *block) { - ut_ad(buf_pool_mutex_own(buf_pool)); + block->page.free_file_page(); + buf_LRU_block_free_non_file_page(block); +} + +/** Increase LRU size in bytes by the page size. +@param[in] bpage control block */ +static inline void incr_LRU_size_in_bytes(const buf_page_t* bpage) +{ + /* FIXME: use atomics, not mutex */ + mysql_mutex_assert_owner(&buf_pool.mutex); - buf_pool->stat.LRU_bytes += bpage->physical_size(); + buf_pool.stat.LRU_bytes += bpage->physical_size(); - ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size); + ut_ad(buf_pool.stat.LRU_bytes <= buf_pool.curr_pool_size); } -/******************************************************************//** -Determines if the unzip_LRU list should be used for evicting a victim -instead of the general LRU list. -@return TRUE if should use unzip_LRU */ -ibool -buf_LRU_evict_from_unzip_LRU( -/*=========================*/ - buf_pool_t* buf_pool) +/** @return whether the unzip_LRU list should be used for evicting a victim +instead of the general LRU list */ +bool buf_LRU_evict_from_unzip_LRU() { - ut_ad(buf_pool_mutex_own(buf_pool)); + mysql_mutex_assert_owner(&buf_pool.mutex); /* If the unzip_LRU list is empty, we can only use the LRU. */ - if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) { - return(FALSE); + if (UT_LIST_GET_LEN(buf_pool.unzip_LRU) == 0) { + return false; } /* If unzip_LRU is at most 10% of the size of the LRU list, then use the LRU. This slack allows us to keep hot decompressed pages in the buffer pool. */ - if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) - <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) { - return(FALSE); + if (UT_LIST_GET_LEN(buf_pool.unzip_LRU) + <= UT_LIST_GET_LEN(buf_pool.LRU) / 10) { + return false; } /* If eviction hasn't started yet, we assume by default that a workload is disk bound. */ - if (buf_pool->freed_page_clock == 0) { - return(TRUE); + if (buf_pool.freed_page_clock == 0) { + return true; } /* Calculate the average over past intervals, and add the values @@ -218,428 +183,33 @@ buf_LRU_evict_from_unzip_LRU( return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR); } -#ifdef BTR_CUR_HASH_ADAPT -/******************************************************************//** -While flushing (or removing dirty) pages from a tablespace we don't -want to hog the CPU and resources. Release the buffer pool and block -mutex and try to force a context switch. Then reacquire the same mutexes. -The current page is "fixed" before the release of the mutexes and then -"unfixed" again once we have reacquired the mutexes. */ -static -void -buf_flush_yield( -/*============*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ - buf_page_t* bpage) /*!< in/out: current page */ -{ - BPageMutex* block_mutex; - - ut_ad(buf_pool_mutex_own(buf_pool)); - ut_ad(buf_page_in_file(bpage)); - - block_mutex = buf_page_get_mutex(bpage); - - mutex_enter(block_mutex); - - /* "Fix" the block so that the position cannot be - changed after we release the buffer pool and - block mutexes. */ - buf_page_set_sticky(bpage); - - /* Now it is safe to release the buf_pool->mutex. */ - buf_pool_mutex_exit(buf_pool); - - mutex_exit(block_mutex); - /* Try and force a context switch. */ - os_thread_yield(); - - buf_pool_mutex_enter(buf_pool); - - mutex_enter(block_mutex); - - /* "Unfix" the block now that we have both the - buffer pool and block mutex again. */ - buf_page_unset_sticky(bpage); - mutex_exit(block_mutex); -} - -/******************************************************************//** -If we have hogged the resources for too long then release the buffer -pool and flush list mutex and do a thread yield. Set the current page -to "sticky" so that it is not relocated during the yield. -@return true if yielded */ -static MY_ATTRIBUTE((warn_unused_result)) -bool -buf_flush_try_yield( -/*================*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ - buf_page_t* bpage, /*!< in/out: bpage to remove */ - ulint processed) /*!< in: number of pages processed */ -{ - /* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the - loop we release buf_pool->mutex to let other threads - do their job but only if the block is not IO fixed. This - ensures that the block stays in its position in the - flush_list. */ - - if (bpage != NULL - && processed >= BUF_LRU_DROP_SEARCH_SIZE - && buf_page_get_io_fix(bpage) == BUF_IO_NONE) { - - buf_flush_list_mutex_exit(buf_pool); - - /* Release the buffer pool and block mutex - to give the other threads a go. */ - - buf_flush_yield(buf_pool, bpage); - - buf_flush_list_mutex_enter(buf_pool); - - /* Should not have been removed from the flush - list during the yield. However, this check is - not sufficient to catch a remove -> add. */ - - ut_ad(bpage->in_flush_list); - - return(true); - } - - return(false); -} -#endif /* BTR_CUR_HASH_ADAPT */ - -/******************************************************************//** -Removes a single page from a given tablespace inside a specific -buffer pool instance. -@return true if page was removed. */ -static MY_ATTRIBUTE((warn_unused_result)) -bool -buf_flush_or_remove_page( -/*=====================*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ - buf_page_t* bpage, /*!< in/out: bpage to remove */ - bool flush) /*!< in: flush to disk if true but - don't remove else remove without - flushing to disk */ -{ - ut_ad(buf_pool_mutex_own(buf_pool)); - ut_ad(buf_flush_list_mutex_own(buf_pool)); - - /* bpage->space and bpage->io_fix are protected by - buf_pool->mutex and block_mutex. It is safe to check - them while holding buf_pool->mutex only. */ - - if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) { - - /* We cannot remove this page during this scan - yet; maybe the system is currently reading it - in, or flushing the modifications to the file */ - return(false); - - } - - BPageMutex* block_mutex; - bool processed = false; - - block_mutex = buf_page_get_mutex(bpage); - - /* We have to release the flush_list_mutex to obey the - latching order. We are however guaranteed that the page - will stay in the flush_list and won't be relocated because - buf_flush_remove() and buf_flush_relocate_on_flush_list() - need buf_pool->mutex as well. */ - - buf_flush_list_mutex_exit(buf_pool); - - mutex_enter(block_mutex); - - ut_ad(bpage->oldest_modification != 0); - - if (!flush) { - - buf_flush_remove(bpage); - - mutex_exit(block_mutex); - - processed = true; - - } else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) { - - /* The following call will release the buffer pool - and block mutex. */ - processed = buf_flush_page( - buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false); - - if (processed) { - /* Wake possible simulated aio thread to actually - post the writes to the operating system */ - os_aio_simulated_wake_handler_threads(); - buf_pool_mutex_enter(buf_pool); - } else { - mutex_exit(block_mutex); - } - } else { - mutex_exit(block_mutex); - } - - buf_flush_list_mutex_enter(buf_pool); - - ut_ad(!mutex_own(block_mutex)); - ut_ad(buf_pool_mutex_own(buf_pool)); - - return(processed); -} - -/** Remove all dirty pages belonging to a given tablespace inside a specific -buffer pool instance when we are deleting the data file(s) of that -tablespace. The pages still remain a part of LRU and are evicted from -the list as they age towards the tail of the LRU. -@param[in,out] buf_pool buffer pool -@param[in] id tablespace identifier -@param[in] observer flush observer (to check for interrupt), - or NULL if the files should not be written to -@param[in] first first page to be flushed or evicted -@return whether all matching dirty pages were removed */ -static MY_ATTRIBUTE((warn_unused_result)) -bool -buf_flush_or_remove_pages( - buf_pool_t* buf_pool, - ulint id, - FlushObserver* observer, - ulint first) -{ - buf_page_t* prev; - buf_page_t* bpage; - ulint processed = 0; - - buf_flush_list_mutex_enter(buf_pool); - -rescan: - bool all_freed = true; - - for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list); - bpage != NULL; - bpage = prev) { - - ut_a(buf_page_in_file(bpage)); - - /* Save the previous link because once we free the - page we can't rely on the links. */ - - prev = UT_LIST_GET_PREV(list, bpage); - - /* Flush the pages matching space id, - or pages matching the flush observer. */ - if (observer && observer->is_partial_flush()) { - if (observer != bpage->flush_observer) { - /* Skip this block. */ - } else if (!buf_flush_or_remove_page( - buf_pool, bpage, - !observer->is_interrupted())) { - all_freed = false; - } else if (!observer->is_interrupted()) { - /* The processing was successful. And during the - processing we have released the buf_pool mutex - when calling buf_page_flush(). We cannot trust - prev pointer. */ - goto rescan; - } - } else if (id != bpage->id.space()) { - /* Skip this block, because it is for a - different tablespace. */ - } else if (bpage->id.page_no() < first) { - /* Skip this block, because it is below the limit. */ - } else if (!buf_flush_or_remove_page( - buf_pool, bpage, observer != NULL)) { - - /* Remove was unsuccessful, we have to try again - by scanning the entire list from the end. - This also means that we never released the - buf_pool mutex. Therefore we can trust the prev - pointer. - buf_flush_or_remove_page() released the - flush list mutex but not the buf_pool mutex. - Therefore it is possible that a new page was - added to the flush list. For example, in case - where we are at the head of the flush list and - prev == NULL. That is OK because we have the - tablespace quiesced and no new pages for this - space-id should enter flush_list. This is - because the only callers of this function are - DROP TABLE and FLUSH TABLE FOR EXPORT. - We know that we'll have to do at least one more - scan but we don't break out of loop here and - try to do as much work as we can in this - iteration. */ - - all_freed = false; - } else if (observer) { - - /* The processing was successful. And during the - processing we have released the buf_pool mutex - when calling buf_page_flush(). We cannot trust - prev pointer. */ - goto rescan; - } - -#ifdef BTR_CUR_HASH_ADAPT - ++processed; - - /* Yield if we have hogged the CPU and mutexes for too long. */ - if (buf_flush_try_yield(buf_pool, prev, processed)) { - - /* Reset the batch size counter if we had to yield. */ - - processed = 0; - } -#endif /* BTR_CUR_HASH_ADAPT */ - - /* The check for trx is interrupted is expensive, we want - to check every N iterations. */ - if (!processed && observer) { - observer->check_interrupted(); - } - } - - buf_flush_list_mutex_exit(buf_pool); - - return(all_freed); -} - -/** Remove or flush all the dirty pages that belong to a given tablespace -inside a specific buffer pool instance. The pages will remain in the LRU -list and will be evicted from the LRU list as they age and move towards -the tail of the LRU list. -@param[in,out] buf_pool buffer pool -@param[in] id tablespace identifier -@param[in] observer flush observer, - or NULL if the files should not be written to -@param[in] first first page to be flushed or evicted */ -static -void -buf_flush_dirty_pages( - buf_pool_t* buf_pool, - ulint id, - FlushObserver* observer, - ulint first) -{ - for (;;) { - buf_pool_mutex_enter(buf_pool); - - bool freed = buf_flush_or_remove_pages(buf_pool, id, observer, - first); - - buf_pool_mutex_exit(buf_pool); - - ut_ad(buf_flush_validate(buf_pool)); - - if (freed) { - break; - } - - os_thread_sleep(2000); - ut_ad(buf_flush_validate(buf_pool)); - } - - ut_ad((observer && observer->is_interrupted()) - || first - || buf_pool_get_dirty_pages_count(buf_pool, id, observer) == 0); -} - -/** Empty the flush list for all pages belonging to a tablespace. -@param[in] id tablespace identifier -@param[in] observer flush observer, - or NULL if nothing is to be written -@param[in] first first page to be flushed or evicted */ -void buf_LRU_flush_or_remove_pages(ulint id, FlushObserver* observer, - ulint first) -{ - /* Pages in the system tablespace must never be discarded. */ - ut_ad(id || observer); - - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_flush_dirty_pages(buf_pool_from_array(i), id, observer, - first); - } - - if (observer && !observer->is_interrupted()) { - /* Ensure that all asynchronous IO is completed. */ - os_aio_wait_until_no_pending_writes(); - fil_flush(id); - } -} - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/********************************************************************//** -Insert a compressed block into buf_pool->zip_clean in the LRU order. */ -void -buf_LRU_insert_zip_clean( -/*=====================*/ - buf_page_t* bpage) /*!< in: pointer to the block in question */ -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - ut_ad(buf_pool_mutex_own(buf_pool)); - ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE); - - /* Find the first successor of bpage in the LRU list - that is in the zip_clean list. */ - buf_page_t* b = bpage; - - do { - b = UT_LIST_GET_NEXT(LRU, b); - } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE); - - /* Insert bpage before b, i.e., after the predecessor of b. */ - if (b != NULL) { - b = UT_LIST_GET_PREV(list, b); - } - - if (b != NULL) { - UT_LIST_INSERT_AFTER(buf_pool->zip_clean, b, bpage); - } else { - UT_LIST_ADD_FIRST(buf_pool->zip_clean, bpage); - } -} -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - -/******************************************************************//** -Try to free an uncompressed page of a compressed block from the unzip +/** Try to free an uncompressed page of a compressed block from the unzip LRU list. The compressed page is preserved, and it need not be clean. +@param limit maximum number of blocks to scan @return true if freed */ -static -bool -buf_LRU_free_from_unzip_LRU_list( -/*=============================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - bool scan_all) /*!< in: scan whole LRU list - if true, otherwise scan only - srv_LRU_scan_depth / 2 blocks. */ +static bool buf_LRU_free_from_unzip_LRU_list(ulint limit) { - ut_ad(buf_pool_mutex_own(buf_pool)); + mysql_mutex_assert_owner(&buf_pool.mutex); - if (!buf_LRU_evict_from_unzip_LRU(buf_pool)) { + if (!buf_LRU_evict_from_unzip_LRU()) { return(false); } ulint scanned = 0; bool freed = false; - for (buf_block_t* block = UT_LIST_GET_LAST(buf_pool->unzip_LRU); - block != NULL - && !freed - && (scan_all || scanned < srv_LRU_scan_depth); - ++scanned) { + for (buf_block_t* block = UT_LIST_GET_LAST(buf_pool.unzip_LRU); + block && scanned < limit; ++scanned) { + buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU, block); - buf_block_t* prev_block; - - prev_block = UT_LIST_GET_PREV(unzip_LRU, block); - - ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); ut_ad(block->in_unzip_LRU_list); ut_ad(block->page.in_LRU_list); freed = buf_LRU_free_page(&block->page, false); + if (freed) { + break; + } block = prev_block; } @@ -655,57 +225,35 @@ buf_LRU_free_from_unzip_LRU_list( return(freed); } -/******************************************************************//** -Try to free a clean page from the common LRU list. -@return true if freed */ -static -bool -buf_LRU_free_from_common_LRU_list( -/*==============================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - bool scan_all) /*!< in: scan whole LRU list - if true, otherwise scan only - up to BUF_LRU_SEARCH_SCAN_THRESHOLD */ +/** Try to free a clean page from the common LRU list. +@param limit maximum number of blocks to scan +@return whether a page was freed */ +static bool buf_LRU_free_from_common_LRU_list(ulint limit) { - ut_ad(buf_pool_mutex_own(buf_pool)); + mysql_mutex_assert_owner(&buf_pool.mutex); ulint scanned = 0; bool freed = false; - for (buf_page_t* bpage = buf_pool->lru_scan_itr.start(); - bpage != NULL - && !freed - && (scan_all || scanned < BUF_LRU_SEARCH_SCAN_THRESHOLD); - ++scanned, bpage = buf_pool->lru_scan_itr.get()) { - + for (buf_page_t* bpage = buf_pool.lru_scan_itr.start(); + bpage && scanned < limit; + ++scanned, bpage = buf_pool.lru_scan_itr.get()) { buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage); - BPageMutex* mutex = buf_page_get_mutex(bpage); - - buf_pool->lru_scan_itr.set(prev); - - mutex_enter(mutex); - - ut_ad(buf_page_in_file(bpage)); - ut_ad(bpage->in_LRU_list); - - unsigned accessed = buf_page_is_accessed(bpage); - - if (buf_flush_ready_for_replace(bpage)) { - mutex_exit(mutex); - freed = buf_LRU_free_page(bpage, true); - } else { - mutex_exit(mutex); - } + buf_pool.lru_scan_itr.set(prev); + + const auto accessed = bpage->is_accessed(); + if (!bpage->oldest_modification() + && buf_LRU_free_page(bpage, true)) { + if (!accessed) { + /* Keep track of pages that are evicted without + ever being accessed. This gives us a measure of + the effectiveness of readahead */ + ++buf_pool.stat.n_ra_pages_evicted; + } - if (freed && !accessed) { - /* Keep track of pages that are evicted without - ever being accessed. This gives us a measure of - the effectiveness of readahead */ - ++buf_pool->stat.n_ra_pages_evicted; + freed = true; + break; } - - ut_ad(buf_pool_mutex_own(buf_pool)); - ut_ad(!mutex_own(mutex)); } if (scanned) { @@ -719,109 +267,57 @@ buf_LRU_free_from_common_LRU_list( return(freed); } -/******************************************************************//** -Try to free a replaceable block. +/** Try to free a replaceable block. +@param limit maximum number of blocks to scan @return true if found and freed */ -bool -buf_LRU_scan_and_free_block( -/*========================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - bool scan_all) /*!< in: scan whole LRU list - if true, otherwise scan only - BUF_LRU_SEARCH_SCAN_THRESHOLD - blocks. */ +bool buf_LRU_scan_and_free_block(ulint limit) { - ut_ad(buf_pool_mutex_own(buf_pool)); + mysql_mutex_assert_owner(&buf_pool.mutex); - return(buf_LRU_free_from_unzip_LRU_list(buf_pool, scan_all) - || buf_LRU_free_from_common_LRU_list(buf_pool, scan_all)); + return buf_LRU_free_from_unzip_LRU_list(limit) || + buf_LRU_free_from_common_LRU_list(limit); } -/******************************************************************//** -Returns TRUE if less than 25 % of the buffer pool in any instance is -available. This can be used in heuristics to prevent huge transactions -eating up the whole buffer pool for their locks. -@return TRUE if less than 25 % of buffer pool left */ -ibool -buf_LRU_buf_pool_running_out(void) -/*==============================*/ -{ - ibool ret = FALSE; - - for (ulint i = 0; i < srv_buf_pool_instances && !ret; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - buf_pool_mutex_enter(buf_pool); - - if (!recv_recovery_is_on() - && UT_LIST_GET_LEN(buf_pool->free) - + UT_LIST_GET_LEN(buf_pool->LRU) - < ut_min(buf_pool->curr_size, - buf_pool->old_size) / 4) { - - ret = TRUE; - } - - buf_pool_mutex_exit(buf_pool); - } - - return(ret); -} - -/******************************************************************//** -Returns a free block from the buf_pool. The block is taken off the -free list. If it is empty, returns NULL. -@return a free control block, or NULL if the buf_block->free list is empty */ -buf_block_t* -buf_LRU_get_free_only( -/*==================*/ - buf_pool_t* buf_pool) +/** @return a buffer block from the buf_pool.free list +@retval NULL if the free list is empty */ +buf_block_t* buf_LRU_get_free_only() { buf_block_t* block; - ut_ad(buf_pool_mutex_own(buf_pool)); + mysql_mutex_assert_owner(&buf_pool.mutex); block = reinterpret_cast<buf_block_t*>( - UT_LIST_GET_FIRST(buf_pool->free)); + UT_LIST_GET_FIRST(buf_pool.free)); while (block != NULL) { - ut_ad(block->page.in_free_list); ut_d(block->page.in_free_list = FALSE); - ut_ad(!block->page.in_flush_list); + ut_ad(!block->page.oldest_modification()); ut_ad(!block->page.in_LRU_list); - ut_a(!buf_page_in_file(&block->page)); - UT_LIST_REMOVE(buf_pool->free, &block->page); - - if (buf_pool->curr_size >= buf_pool->old_size - || UT_LIST_GET_LEN(buf_pool->withdraw) - >= buf_pool->withdraw_target - || !buf_block_will_withdrawn(buf_pool, block)) { - /* found valid free block */ - buf_page_mutex_enter(block); + ut_a(!block->page.in_file()); + UT_LIST_REMOVE(buf_pool.free, &block->page); + + if (buf_pool.curr_size >= buf_pool.old_size + || UT_LIST_GET_LEN(buf_pool.withdraw) + >= buf_pool.withdraw_target + || !buf_pool.will_be_withdrawn(block->page)) { /* No adaptive hash index entries may point to a free block. */ assert_block_ahi_empty(block); - buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE); + block->page.set_state(BUF_BLOCK_MEMORY); MEM_MAKE_ADDRESSABLE(block->frame, srv_page_size); - - ut_ad(buf_pool_from_block(block) == buf_pool); - - buf_page_mutex_exit(block); break; } /* This should be withdrawn */ UT_LIST_ADD_LAST( - buf_pool->withdraw, + buf_pool.withdraw, &block->page); - ut_d(block->in_withdraw_list = TRUE); + ut_d(block->in_withdraw_list = true); block = reinterpret_cast<buf_block_t*>( - UT_LIST_GET_FIRST(buf_pool->free)); + UT_LIST_GET_FIRST(buf_pool.free)); } return(block); @@ -832,162 +328,129 @@ Checks how much of buf_pool is occupied by non-data objects like AHI, lock heaps etc. Depending on the size of non-data objects this function will either assert or issue a warning and switch on the status monitor. */ -static -void -buf_LRU_check_size_of_non_data_objects( -/*===================================*/ - const buf_pool_t* buf_pool) /*!< in: buffer pool instance */ +static void buf_LRU_check_size_of_non_data_objects() { - ut_ad(buf_pool_mutex_own(buf_pool)); + mysql_mutex_assert_owner(&buf_pool.mutex); - if (!recv_recovery_is_on() - && buf_pool->curr_size == buf_pool->old_size - && UT_LIST_GET_LEN(buf_pool->free) - + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) { + if (recv_recovery_is_on() || buf_pool.curr_size != buf_pool.old_size) + return; - ib::fatal() << "Over 95 percent of the buffer pool is" - " occupied by lock heaps" + const auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU); + + if (s < buf_pool.curr_size / 20) + ib::fatal() << "Over 95 percent of the buffer pool is" + " occupied by lock heaps" #ifdef BTR_CUR_HASH_ADAPT - " or the adaptive hash index!" + " or the adaptive hash index" #endif /* BTR_CUR_HASH_ADAPT */ - " Check that your transactions do not set too many" - " row locks, or review if" - " innodb_buffer_pool_size=" - << (buf_pool->curr_size >> (20U - srv_page_size_shift)) - << "M could be bigger."; - } else if (!recv_recovery_is_on() - && buf_pool->curr_size == buf_pool->old_size - && (UT_LIST_GET_LEN(buf_pool->free) - + UT_LIST_GET_LEN(buf_pool->LRU)) - < buf_pool->curr_size / 3) { - - if (!buf_lru_switched_on_innodb_mon && srv_monitor_event) { - - /* Over 67 % of the buffer pool is occupied by lock - heaps or the adaptive hash index. This may be a memory - leak! */ - - ib::warn() << "Over 67 percent of the buffer pool is" - " occupied by lock heaps" + "! Check that your transactions do not set too many" + " row locks, or review if innodb_buffer_pool_size=" + << (buf_pool.curr_size >> (20U - srv_page_size_shift)) + << "M could be bigger."; + + if (s < buf_pool.curr_size / 3) + { + if (!buf_lru_switched_on_innodb_mon && srv_monitor_timer) + { + /* Over 67 % of the buffer pool is occupied by lock heaps or + the adaptive hash index. This may be a memory leak! */ + ib::warn() << "Over 67 percent of the buffer pool is" + " occupied by lock heaps" #ifdef BTR_CUR_HASH_ADAPT - " or the adaptive hash index!" + " or the adaptive hash index" #endif /* BTR_CUR_HASH_ADAPT */ - " Check that your transactions do not" - " set too many row locks." - " innodb_buffer_pool_size=" - << (buf_pool->curr_size >> - (20U - srv_page_size_shift)) << "M." - " Starting the InnoDB Monitor to print" - " diagnostics."; - - buf_lru_switched_on_innodb_mon = true; - srv_print_innodb_monitor = TRUE; - os_event_set(srv_monitor_event); - } - - } else if (buf_lru_switched_on_innodb_mon) { - - /* Switch off the InnoDB Monitor; this is a simple way - to stop the monitor if the situation becomes less urgent, - but may also surprise users if the user also switched on the - monitor! */ - - buf_lru_switched_on_innodb_mon = false; - srv_print_innodb_monitor = FALSE; - } + "! Check that your transactions do not set too many row locks." + " innodb_buffer_pool_size=" + << (buf_pool.curr_size >> (20U - srv_page_size_shift)) + << "M. Starting the InnoDB Monitor to print diagnostics."; + buf_lru_switched_on_innodb_mon= true; + srv_print_innodb_monitor= TRUE; + srv_monitor_timer_schedule_now(); + } + } + else if (buf_lru_switched_on_innodb_mon) + { + /* Switch off the InnoDB Monitor; this is a simple way to stop the + monitor if the situation becomes less urgent, but may also + surprise users who did SET GLOBAL innodb_status_output=ON earlier! */ + buf_lru_switched_on_innodb_mon= false; + srv_print_innodb_monitor= FALSE; + } } -/******************************************************************//** -Returns a free block from the buf_pool. The block is taken off the -free list. If free list is empty, blocks are moved from the end of the -LRU list to the free list. +/** Get a block from the buf_pool.free list. +If the list is empty, blocks will be moved from the end of buf_pool.LRU +to buf_pool.free. + This function is called from a user thread when it needs a clean block to read in a page. Note that we only ever get a block from the free list. Even when we flush a page or find a page in LRU scan we put it to free list to be used. * iteration 0: - * get a block from free list, success:done - * if buf_pool->try_LRU_scan is set - * scan LRU up to srv_LRU_scan_depth to find a clean block - * the above will put the block on free list + * get a block from the buf_pool.free list, success:done + * if buf_pool.try_LRU_scan is set + * scan LRU up to 100 pages to free a clean block * success:retry the free list - * flush one dirty page from tail of LRU to disk - * the above will put the block on free list + * flush up to innodb_lru_flush_size LRU blocks to data files + (until UT_LIST_GET_GEN(buf_pool.free) < innodb_lru_scan_depth) + * on buf_page_write_complete() the blocks will put on buf_pool.free list * success: retry the free list -* iteration 1: - * same as iteration 0 except: - * scan whole LRU list - * scan LRU list even if buf_pool->try_LRU_scan is not set -* iteration > 1: - * same as iteration 1 but sleep 10ms -@return the free control block, in state BUF_BLOCK_READY_FOR_USE */ -buf_block_t* -buf_LRU_get_free_block( -/*===================*/ - buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */ +* subsequent iterations: same as iteration 0 except: + * scan whole LRU list + * scan LRU list even if buf_pool.try_LRU_scan is not set + +@param have_mutex whether buf_pool.mutex is already being held +@return the free control block, in state BUF_BLOCK_MEMORY */ +buf_block_t* buf_LRU_get_free_block(bool have_mutex) { - buf_block_t* block = NULL; - bool freed = false; ulint n_iterations = 0; ulint flush_failures = 0; - MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH); -loop: - buf_pool_mutex_enter(buf_pool); - - buf_LRU_check_size_of_non_data_objects(buf_pool); + if (have_mutex) { + mysql_mutex_assert_owner(&buf_pool.mutex); + goto got_mutex; + } + mysql_mutex_lock(&buf_pool.mutex); +got_mutex: + buf_LRU_check_size_of_non_data_objects(); DBUG_EXECUTE_IF("ib_lru_force_no_free_page", if (!buf_lru_free_blocks_error_printed) { n_iterations = 21; goto not_found;}); +retry: /* If there is a block in the free list, take it */ - block = buf_LRU_get_free_only(buf_pool); - - if (block != NULL) { - - buf_pool_mutex_exit(buf_pool); - ut_ad(buf_pool_from_block(block) == buf_pool); + if (buf_block_t* block = buf_LRU_get_free_only()) { + if (!have_mutex) { + mysql_mutex_unlock(&buf_pool.mutex); + } memset(&block->page.zip, 0, sizeof block->page.zip); - - block->page.flush_observer = NULL; - return(block); + return block; } MONITOR_INC( MONITOR_LRU_GET_FREE_LOOPS ); - freed = false; - if (buf_pool->try_LRU_scan || n_iterations > 0) { + if (n_iterations || buf_pool.try_LRU_scan) { /* If no block was in the free list, search from the end of the LRU list and try to free a block there. If we are doing for the first time we'll scan only tail of the LRU list otherwise we scan the whole LRU list. */ - freed = buf_LRU_scan_and_free_block( - buf_pool, n_iterations > 0); - - if (!freed && n_iterations == 0) { - /* Tell other threads that there is no point - in scanning the LRU list. This flag is set to - TRUE again when we flush a batch from this - buffer pool. */ - buf_pool->try_LRU_scan = FALSE; - - /* Also tell the page_cleaner thread that - there is work for it to do. */ - os_event_set(buf_flush_event); + if (buf_LRU_scan_and_free_block(n_iterations + ? ULINT_UNDEFINED : 100)) { + goto retry; } + + /* Tell other threads that there is no point + in scanning the LRU list. */ + buf_pool.try_LRU_scan = false; } #ifndef DBUG_OFF not_found: #endif - - buf_pool_mutex_exit(buf_pool); - - if (freed) { - goto loop; - } + mysql_mutex_unlock(&buf_pool.mutex); + buf_flush_wait_batch_end_acquiring_mutex(true); if (n_iterations > 20 && !buf_lru_free_blocks_error_printed && srv_buf_pool_old_size == srv_buf_pool_size) { @@ -998,7 +461,7 @@ not_found: " flush a page!" " Consider increasing innodb_buffer_pool_size." " Pending flushes (fsync) log: " - << fil_n_pending_log_flushes + << log_sys.get_pending_flushes() << "; buffer pool: " << fil_n_pending_tablespace_flushes << ". " << os_n_file_reads << " OS file reads, " @@ -1009,18 +472,8 @@ not_found: buf_lru_free_blocks_error_printed = true; } - /* If we have scanned the whole LRU and still are unable to - find a free block then we should sleep here to let the - page_cleaner do an LRU batch for us. */ - - if (!srv_read_only_mode) { - os_event_set(buf_flush_event); - } - if (n_iterations > 1) { - MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS ); - os_thread_sleep(10000); } /* No free block was found: try to flush the LRU list. @@ -1031,61 +484,55 @@ not_found: TODO: A more elegant way would have been to return the freed up block to the caller here but the code that deals with removing the block from page_hash and LRU_list is fairly - involved (particularly in case of compressed pages). We + involved (particularly in case of ROW_FORMAT=COMPRESSED pages). We can do that in a separate patch sometime in future. */ - if (!buf_flush_single_page_from_LRU(buf_pool)) { + if (!buf_flush_lists(innodb_lru_flush_size, 0)) { MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT); ++flush_failures; } - srv_stats.buf_pool_wait_free.inc(); - n_iterations++; - - goto loop; + mysql_mutex_lock(&buf_pool.mutex); + buf_pool.stat.LRU_waits++; + goto got_mutex; } -/*******************************************************************//** -Moves the LRU_old pointer so that the length of the old blocks list +/** Move the LRU_old pointer so that the length of the old blocks list is inside the allowed limits. */ -UNIV_INLINE -void -buf_LRU_old_adjust_len( -/*===================*/ - buf_pool_t* buf_pool) /*!< in: buffer pool instance */ +static void buf_LRU_old_adjust_len() { ulint old_len; ulint new_len; - ut_a(buf_pool->LRU_old); - ut_ad(buf_pool_mutex_own(buf_pool)); - ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN); - ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX); + ut_a(buf_pool.LRU_old); + mysql_mutex_assert_owner(&buf_pool.mutex); + ut_ad(buf_pool.LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN); + ut_ad(buf_pool.LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX); compile_time_assert(BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN > BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)); compile_time_assert(BUF_LRU_NON_OLD_MIN_LEN < BUF_LRU_OLD_MIN_LEN); #ifdef UNIV_LRU_DEBUG - /* buf_pool->LRU_old must be the first item in the LRU list + /* buf_pool.LRU_old must be the first item in the LRU list whose "old" flag is set. */ - ut_a(buf_pool->LRU_old->old); - ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old) - || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old); - ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old) - || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old); + ut_a(buf_pool.LRU_old->old); + ut_a(!UT_LIST_GET_PREV(LRU, buf_pool.LRU_old) + || !UT_LIST_GET_PREV(LRU, buf_pool.LRU_old)->old); + ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool.LRU_old) + || UT_LIST_GET_NEXT(LRU, buf_pool.LRU_old)->old); #endif /* UNIV_LRU_DEBUG */ - old_len = buf_pool->LRU_old_len; - new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU) - * buf_pool->LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV, - UT_LIST_GET_LEN(buf_pool->LRU) + old_len = buf_pool.LRU_old_len; + new_len = ut_min(UT_LIST_GET_LEN(buf_pool.LRU) + * buf_pool.LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV, + UT_LIST_GET_LEN(buf_pool.LRU) - (BUF_LRU_OLD_TOLERANCE + BUF_LRU_NON_OLD_MIN_LEN)); for (;;) { - buf_page_t* LRU_old = buf_pool->LRU_old; + buf_page_t* LRU_old = buf_pool.LRU_old; ut_a(LRU_old); ut_ad(LRU_old->in_LRU_list); @@ -1097,174 +544,130 @@ buf_LRU_old_adjust_len( if (old_len + BUF_LRU_OLD_TOLERANCE < new_len) { - buf_pool->LRU_old = LRU_old = UT_LIST_GET_PREV( + buf_pool.LRU_old = LRU_old = UT_LIST_GET_PREV( LRU, LRU_old); #ifdef UNIV_LRU_DEBUG ut_a(!LRU_old->old); #endif /* UNIV_LRU_DEBUG */ - old_len = ++buf_pool->LRU_old_len; - buf_page_set_old(LRU_old, TRUE); + old_len = ++buf_pool.LRU_old_len; + LRU_old->set_old(true); } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) { - buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old); - old_len = --buf_pool->LRU_old_len; - buf_page_set_old(LRU_old, FALSE); + buf_pool.LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old); + old_len = --buf_pool.LRU_old_len; + LRU_old->set_old(false); } else { return; } } } -/*******************************************************************//** -Initializes the old blocks pointer in the LRU list. This function should be +/** Initialize the old blocks pointer in the LRU list. This function should be called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */ -static -void -buf_LRU_old_init( -/*=============*/ - buf_pool_t* buf_pool) +static void buf_LRU_old_init() { - ut_ad(buf_pool_mutex_own(buf_pool)); - ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN); + mysql_mutex_assert_owner(&buf_pool.mutex); + ut_a(UT_LIST_GET_LEN(buf_pool.LRU) == BUF_LRU_OLD_MIN_LEN); /* We first initialize all blocks in the LRU list as old and then use the adjust function to move the LRU_old pointer to the right position */ - for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->LRU); + for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool.LRU); bpage != NULL; bpage = UT_LIST_GET_PREV(LRU, bpage)) { ut_ad(bpage->in_LRU_list); - ut_ad(buf_page_in_file(bpage)); /* This loop temporarily violates the - assertions of buf_page_set_old(). */ - bpage->old = TRUE; + assertions of buf_page_t::set_old(). */ + bpage->old = true; } - buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU); - buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU); + buf_pool.LRU_old = UT_LIST_GET_FIRST(buf_pool.LRU); + buf_pool.LRU_old_len = UT_LIST_GET_LEN(buf_pool.LRU); - buf_LRU_old_adjust_len(buf_pool); + buf_LRU_old_adjust_len(); } -/******************************************************************//** -Remove a block from the unzip_LRU list if it belonged to the list. */ -static -void -buf_unzip_LRU_remove_block_if_needed( -/*=================================*/ - buf_page_t* bpage) /*!< in/out: control block */ +/** Remove a block from the unzip_LRU list if it belonged to the list. +@param[in] bpage control block */ +static void buf_unzip_LRU_remove_block_if_needed(buf_page_t* bpage) { - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + ut_ad(bpage->in_file()); + mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(buf_page_in_file(bpage)); - ut_ad(buf_pool_mutex_own(buf_pool)); - - if (buf_page_belongs_to_unzip_LRU(bpage)) { + if (bpage->belongs_to_unzip_LRU()) { buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage); ut_ad(block->in_unzip_LRU_list); - ut_d(block->in_unzip_LRU_list = FALSE); + ut_d(block->in_unzip_LRU_list = false); - UT_LIST_REMOVE(buf_pool->unzip_LRU, block); + UT_LIST_REMOVE(buf_pool.unzip_LRU, block); } } -/******************************************************************//** -Adjust LRU hazard pointers if needed. */ -void -buf_LRU_adjust_hp( -/*==============*/ - buf_pool_t* buf_pool,/*!< in: buffer pool instance */ - const buf_page_t* bpage) /*!< in: control block */ -{ - buf_pool->lru_hp.adjust(bpage); - buf_pool->lru_scan_itr.adjust(bpage); - buf_pool->single_scan_itr.adjust(bpage); -} - -/******************************************************************//** -Removes a block from the LRU list. */ -UNIV_INLINE -void -buf_LRU_remove_block( -/*=================*/ - buf_page_t* bpage) /*!< in: control block */ +/** Removes a block from the LRU list. +@param[in] bpage control block */ +static inline void buf_LRU_remove_block(buf_page_t* bpage) { - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - ut_ad(buf_pool_mutex_own(buf_pool)); - - ut_a(buf_page_in_file(bpage)); - - ut_ad(bpage->in_LRU_list); - /* Important that we adjust the hazard pointers before removing bpage from the LRU list. */ - buf_LRU_adjust_hp(buf_pool, bpage); + buf_page_t* prev_bpage = buf_pool.LRU_remove(bpage); /* If the LRU_old pointer is defined and points to just this block, move it backward one step */ - if (bpage == buf_pool->LRU_old) { + if (bpage == buf_pool.LRU_old) { /* Below: the previous block is guaranteed to exist, because the LRU_old pointer is only allowed to differ by BUF_LRU_OLD_TOLERANCE from strict - buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU + buf_pool.LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU list length. */ - buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage); - ut_a(prev_bpage); #ifdef UNIV_LRU_DEBUG ut_a(!prev_bpage->old); #endif /* UNIV_LRU_DEBUG */ - buf_pool->LRU_old = prev_bpage; - buf_page_set_old(prev_bpage, TRUE); + buf_pool.LRU_old = prev_bpage; + prev_bpage->set_old(true); - buf_pool->LRU_old_len++; + buf_pool.LRU_old_len++; } - /* Remove the block from the LRU list */ - UT_LIST_REMOVE(buf_pool->LRU, bpage); - ut_d(bpage->in_LRU_list = FALSE); - - buf_pool->stat.LRU_bytes -= bpage->physical_size(); + buf_pool.stat.LRU_bytes -= bpage->physical_size(); buf_unzip_LRU_remove_block_if_needed(bpage); /* If the LRU list is so short that LRU_old is not defined, clear the "old" flags and return */ - if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) { + if (UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN) { - for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU); + for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool.LRU); bpage != NULL; bpage = UT_LIST_GET_NEXT(LRU, bpage)) { /* This loop temporarily violates the - assertions of buf_page_set_old(). */ - bpage->old = FALSE; + assertions of buf_page_t::set_old(). */ + bpage->old = false; } - buf_pool->LRU_old = NULL; - buf_pool->LRU_old_len = 0; + buf_pool.LRU_old = NULL; + buf_pool.LRU_old_len = 0; return; } - ut_ad(buf_pool->LRU_old); + ut_ad(buf_pool.LRU_old); /* Update the LRU_old_len field if necessary */ - if (buf_page_is_old(bpage)) { - - buf_pool->LRU_old_len--; + if (bpage->old) { + buf_pool.LRU_old_len--; } /* Adjust the length of the old block list if necessary */ - buf_LRU_old_adjust_len(buf_pool); + buf_LRU_old_adjust_len(); } /******************************************************************//** @@ -1276,19 +679,15 @@ buf_unzip_LRU_add_block( ibool old) /*!< in: TRUE if should be put to the end of the list, else put to the start */ { - buf_pool_t* buf_pool = buf_pool_from_block(block); - - ut_ad(buf_pool_mutex_own(buf_pool)); - - ut_a(buf_page_belongs_to_unzip_LRU(&block->page)); - + mysql_mutex_assert_owner(&buf_pool.mutex); + ut_a(block->page.belongs_to_unzip_LRU()); ut_ad(!block->in_unzip_LRU_list); - ut_d(block->in_unzip_LRU_list = TRUE); + ut_d(block->in_unzip_LRU_list = true); if (old) { - UT_LIST_ADD_LAST(buf_pool->unzip_LRU, block); + UT_LIST_ADD_LAST(buf_pool.unzip_LRU, block); } else { - UT_LIST_ADD_FIRST(buf_pool->unzip_LRU, block); + UT_LIST_ADD_FIRST(buf_pool.unzip_LRU, block); } } @@ -1296,144 +695,114 @@ buf_unzip_LRU_add_block( Adds a block to the LRU list. Please make sure that the page_size is already set when invoking the function, so that we can get correct page_size from the buffer page when adding a block into LRU */ -UNIV_INLINE void -buf_LRU_add_block_low( -/*==================*/ +buf_LRU_add_block( buf_page_t* bpage, /*!< in: control block */ - ibool old) /*!< in: TRUE if should be put to the old blocks + bool old) /*!< in: true if should be put to the old blocks in the LRU list, else put to the start; if the LRU list is very short, the block is added to the start, regardless of this parameter */ { - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - ut_ad(buf_pool_mutex_own(buf_pool)); - - ut_a(buf_page_in_file(bpage)); + mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(!bpage->in_LRU_list); - if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) { + if (!old || (UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN)) { - UT_LIST_ADD_FIRST(buf_pool->LRU, bpage); + UT_LIST_ADD_FIRST(buf_pool.LRU, bpage); - bpage->freed_page_clock = buf_pool->freed_page_clock; + bpage->freed_page_clock = buf_pool.freed_page_clock + & ((1U << 31) - 1); } else { #ifdef UNIV_LRU_DEBUG - /* buf_pool->LRU_old must be the first item in the LRU list + /* buf_pool.LRU_old must be the first item in the LRU list whose "old" flag is set. */ - ut_a(buf_pool->LRU_old->old); - ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old) - || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old); - ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old) - || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old); + ut_a(buf_pool.LRU_old->old); + ut_a(!UT_LIST_GET_PREV(LRU, buf_pool.LRU_old) + || !UT_LIST_GET_PREV(LRU, buf_pool.LRU_old)->old); + ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool.LRU_old) + || UT_LIST_GET_NEXT(LRU, buf_pool.LRU_old)->old); #endif /* UNIV_LRU_DEBUG */ - UT_LIST_INSERT_AFTER(buf_pool->LRU, buf_pool->LRU_old, + UT_LIST_INSERT_AFTER(buf_pool.LRU, buf_pool.LRU_old, bpage); - buf_pool->LRU_old_len++; + buf_pool.LRU_old_len++; } ut_d(bpage->in_LRU_list = TRUE); - incr_LRU_size_in_bytes(bpage, buf_pool); + incr_LRU_size_in_bytes(bpage); - if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) { + if (UT_LIST_GET_LEN(buf_pool.LRU) > BUF_LRU_OLD_MIN_LEN) { - ut_ad(buf_pool->LRU_old); + ut_ad(buf_pool.LRU_old); /* Adjust the length of the old block list if necessary */ - buf_page_set_old(bpage, old); - buf_LRU_old_adjust_len(buf_pool); + bpage->set_old(old); + buf_LRU_old_adjust_len(); - } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) { + } else if (UT_LIST_GET_LEN(buf_pool.LRU) == BUF_LRU_OLD_MIN_LEN) { /* The LRU list is now long enough for LRU_old to become defined: init it */ - buf_LRU_old_init(buf_pool); + buf_LRU_old_init(); } else { - buf_page_set_old(bpage, buf_pool->LRU_old != NULL); + bpage->set_old(buf_pool.LRU_old != NULL); } /* If this is a zipped block with decompressed frame as well then put it on the unzip_LRU list */ - if (buf_page_belongs_to_unzip_LRU(bpage)) { + if (bpage->belongs_to_unzip_LRU()) { buf_unzip_LRU_add_block((buf_block_t*) bpage, old); } } -/******************************************************************//** -Adds a block to the LRU list. Please make sure that the page_size is -already set when invoking the function, so that we can get correct -page_size from the buffer page when adding a block into LRU */ -void -buf_LRU_add_block( -/*==============*/ - buf_page_t* bpage, /*!< in: control block */ - ibool old) /*!< in: TRUE if should be put to the old - blocks in the LRU list, else put to the start; - if the LRU list is very short, the block is - added to the start, regardless of this - parameter */ +/** Move a block to the start of the LRU list. */ +void buf_page_make_young(buf_page_t *bpage) { - buf_LRU_add_block_low(bpage, old); -} + ut_ad(bpage->in_file()); -/******************************************************************//** -Moves a block to the start of the LRU list. */ -void -buf_LRU_make_block_young( -/*=====================*/ - buf_page_t* bpage) /*!< in: control block */ -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + mysql_mutex_lock(&buf_pool.mutex); - ut_ad(buf_pool_mutex_own(buf_pool)); + if (UNIV_UNLIKELY(bpage->old)) + buf_pool.stat.n_pages_made_young++; - if (bpage->old) { - buf_pool->stat.n_pages_made_young++; - } + buf_LRU_remove_block(bpage); + buf_LRU_add_block(bpage, false); - buf_LRU_remove_block(bpage); - buf_LRU_add_block_low(bpage, FALSE); + mysql_mutex_unlock(&buf_pool.mutex); } -/******************************************************************//** -Try to free a block. If bpage is a descriptor of a compressed-only -page, the descriptor object will be freed as well. - -NOTE: If this function returns true, it will temporarily -release buf_pool->mutex. Furthermore, the page frame will no longer be -accessible via bpage. - -The caller must hold buf_pool->mutex and must not hold any -buf_page_get_mutex() when calling this function. -@return true if freed, false otherwise. */ -bool -buf_LRU_free_page( -/*===============*/ - buf_page_t* bpage, /*!< in: block to be freed */ - bool zip) /*!< in: true if should remove also the - compressed page of an uncompressed page */ +/** Try to free a block. If bpage is a descriptor of a compressed-only +ROW_FORMAT=COMPRESSED page, the buf_page_t object will be freed as well. +The caller must hold buf_pool.mutex. +@param bpage block to be freed +@param zip whether to remove both copies of a ROW_FORMAT=COMPRESSED page +@retval true if freed and buf_pool.mutex may have been temporarily released +@retval false if the page was not freed */ +bool buf_LRU_free_page(buf_page_t *bpage, bool zip) { - buf_page_t* b = NULL; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + const page_id_t id(bpage->id()); + buf_page_t* b = nullptr; - rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id); - - BPageMutex* block_mutex = buf_page_get_mutex(bpage); - - ut_ad(buf_pool_mutex_own(buf_pool)); - ut_ad(buf_page_in_file(bpage)); + mysql_mutex_assert_owner(&buf_pool.mutex); + ut_ad(bpage->in_file()); ut_ad(bpage->in_LRU_list); - rw_lock_x_lock(hash_lock); - mutex_enter(block_mutex); + /* First, perform a quick check before we acquire hash_lock. */ + if (!bpage->can_relocate()) { + return false; + } - if (!buf_page_can_relocate(bpage)) { + /* We must hold an exclusive hash_lock to prevent + bpage->can_relocate() from changing due to a concurrent + execution of buf_page_get_low(). */ + const ulint fold = id.fold(); + page_hash_latch* hash_lock = buf_pool.page_hash.lock_get(fold); + hash_lock->write_lock(); + if (UNIV_UNLIKELY(!bpage->can_relocate())) { /* Do not free buffer fixed and I/O-fixed blocks. */ goto func_exit; } @@ -1442,180 +811,152 @@ buf_LRU_free_page( /* This would completely free the block. */ /* Do not completely free dirty blocks. */ - if (bpage->oldest_modification) { + if (bpage->oldest_modification()) { goto func_exit; } - } else if (bpage->oldest_modification > 0 - && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { - - ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY); - + } else if (bpage->oldest_modification() + && bpage->state() != BUF_BLOCK_FILE_PAGE) { func_exit: - rw_lock_x_unlock(hash_lock); - mutex_exit(block_mutex); + hash_lock->write_unlock(); return(false); - } else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) { + } else if (bpage->state() == BUF_BLOCK_FILE_PAGE) { b = buf_page_alloc_descriptor(); ut_a(b); new (b) buf_page_t(*bpage); + b->set_state(BUF_BLOCK_ZIP_PAGE); } - ut_ad(buf_pool_mutex_own(buf_pool)); - ut_ad(buf_page_in_file(bpage)); + mysql_mutex_assert_owner(&buf_pool.mutex); + ut_ad(bpage->in_file()); ut_ad(bpage->in_LRU_list); - ut_ad(!bpage->in_flush_list == !bpage->oldest_modification); DBUG_PRINT("ib_buf", ("free page %u:%u", - bpage->id.space(), bpage->id.page_no())); + id.space(), id.page_no())); - ut_ad(rw_lock_own(hash_lock, RW_LOCK_X)); - ut_ad(buf_page_can_relocate(bpage)); + ut_ad(bpage->can_relocate()); - if (!buf_LRU_block_remove_hashed(bpage, zip)) { + if (!buf_LRU_block_remove_hashed(bpage, id, hash_lock, zip)) { return(true); } - /* buf_LRU_block_remove_hashed() releases the hash_lock */ - ut_ad(!rw_lock_own_flagged(hash_lock, - RW_LOCK_FLAG_X | RW_LOCK_FLAG_S)); - - /* We have just freed a BUF_BLOCK_FILE_PAGE. If b != NULL + /* We have just freed a BUF_BLOCK_FILE_PAGE. If b != nullptr then it was a compressed page with an uncompressed frame and we are interested in freeing only the uncompressed frame. Therefore we have to reinsert the compressed page descriptor into the LRU and page_hash (and possibly flush_list). - if b == NULL then it was a regular page that has been freed */ + if !b then it was a regular page that has been freed */ - if (b != NULL) { + if (UNIV_LIKELY_NULL(b)) { buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b); - rw_lock_x_lock(hash_lock); - - mutex_enter(block_mutex); - - ut_a(!buf_page_hash_get_low(buf_pool, b->id)); - - b->state = b->oldest_modification - ? BUF_BLOCK_ZIP_DIRTY - : BUF_BLOCK_ZIP_PAGE; + hash_lock->write_lock(); + ut_ad(!buf_pool.page_hash_get_low(id, fold)); ut_ad(b->zip_size()); - /* The fields in_page_hash and in_LRU_list of + /* The field in_LRU_list of the to-be-freed block descriptor should have been cleared in buf_LRU_block_remove_hashed(), which invokes buf_LRU_remove_block(). */ - ut_ad(!bpage->in_page_hash); ut_ad(!bpage->in_LRU_list); /* bpage->state was BUF_BLOCK_FILE_PAGE because - b != NULL. The type cast below is thus valid. */ + b != nullptr. The type cast below is thus valid. */ ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list); /* The fields of bpage were copied to b before buf_LRU_block_remove_hashed() was invoked. */ ut_ad(!b->in_zip_hash); - ut_ad(b->in_page_hash); ut_ad(b->in_LRU_list); + ut_ad(b->in_page_hash); - HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, - b->id.fold(), b); + HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, b); /* Insert b where bpage was in the LRU list. */ - if (prev_b != NULL) { + if (prev_b) { ulint lru_len; ut_ad(prev_b->in_LRU_list); - ut_ad(buf_page_in_file(prev_b)); + ut_ad(prev_b->in_file()); - UT_LIST_INSERT_AFTER(buf_pool->LRU, prev_b, b); + UT_LIST_INSERT_AFTER(buf_pool.LRU, prev_b, b); - incr_LRU_size_in_bytes(b, buf_pool); + incr_LRU_size_in_bytes(b); - if (buf_page_is_old(b)) { - buf_pool->LRU_old_len++; - if (buf_pool->LRU_old + if (b->is_old()) { + buf_pool.LRU_old_len++; + if (buf_pool.LRU_old == UT_LIST_GET_NEXT(LRU, b)) { - buf_pool->LRU_old = b; + buf_pool.LRU_old = b; } } - lru_len = UT_LIST_GET_LEN(buf_pool->LRU); + lru_len = UT_LIST_GET_LEN(buf_pool.LRU); if (lru_len > BUF_LRU_OLD_MIN_LEN) { - ut_ad(buf_pool->LRU_old); + ut_ad(buf_pool.LRU_old); /* Adjust the length of the old block list if necessary */ - buf_LRU_old_adjust_len(buf_pool); + buf_LRU_old_adjust_len(); } else if (lru_len == BUF_LRU_OLD_MIN_LEN) { /* The LRU list is now long enough for LRU_old to become defined: init it */ - buf_LRU_old_init(buf_pool); + buf_LRU_old_init(); } #ifdef UNIV_LRU_DEBUG /* Check that the "old" flag is consistent in the block and its neighbours. */ - buf_page_set_old(b, buf_page_is_old(b)); + b->set_old(b->is_old()); #endif /* UNIV_LRU_DEBUG */ } else { ut_d(b->in_LRU_list = FALSE); - buf_LRU_add_block_low(b, buf_page_is_old(b)); + buf_LRU_add_block(b, b->old); } - if (b->state == BUF_BLOCK_ZIP_PAGE) { -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - buf_LRU_insert_zip_clean(b); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - } else { - /* Relocate on buf_pool->flush_list. */ - buf_flush_relocate_on_flush_list(bpage, b); - } + buf_flush_relocate_on_flush_list(bpage, b); - bpage->zip.data = NULL; + bpage->zip.data = nullptr; page_zip_set_size(&bpage->zip, 0); - mutex_exit(block_mutex); - /* Prevent buf_page_get_gen() from decompressing the block while we release - buf_pool->mutex and block_mutex. */ - block_mutex = buf_page_get_mutex(b); - - mutex_enter(block_mutex); - - buf_page_set_sticky(b); - - mutex_exit(block_mutex); - - rw_lock_x_unlock(hash_lock); + hash_lock. */ + b->set_io_fix(BUF_IO_PIN); + hash_lock->write_unlock(); } - buf_pool_mutex_exit(buf_pool); - - /* Remove possible adaptive hash index on the page. - The page was declared uninitialized by - buf_LRU_block_remove_hashed(). We need to flag - the contents of the page valid (which it still is) in - order to avoid bogus Valgrind or MSAN warnings.*/ buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage); - MEM_MAKE_DEFINED(block->frame, srv_page_size); - btr_search_drop_page_hash_index(block); - MEM_UNDEFINED(block->frame, srv_page_size); - - buf_pool_mutex_enter(buf_pool); - - if (b) { - mutex_enter(block_mutex); - - buf_page_unset_sticky(b); +#ifdef BTR_CUR_HASH_ADAPT + if (block->index) { + mysql_mutex_unlock(&buf_pool.mutex); + + /* Remove the adaptive hash index on the page. + The page was declared uninitialized by + buf_LRU_block_remove_hashed(). We need to flag + the contents of the page valid (which it still is) in + order to avoid bogus Valgrind or MSAN warnings.*/ + + MEM_MAKE_DEFINED(block->frame, srv_page_size); + btr_search_drop_page_hash_index(block); + MEM_UNDEFINED(block->frame, srv_page_size); + + if (UNIV_LIKELY_NULL(b)) { + ut_ad(b->zip_size()); + b->io_unfix(); + } - mutex_exit(block_mutex); + mysql_mutex_lock(&buf_pool.mutex); + } else +#endif + if (UNIV_LIKELY_NULL(b)) { + ut_ad(b->zip_size()); + b->io_unfix(); } buf_LRU_block_free_hashed_page(block); @@ -1631,106 +972,92 @@ buf_LRU_block_free_non_file_page( buf_block_t* block) /*!< in: block, must not contain a file page */ { void* data; - buf_pool_t* buf_pool = buf_pool_from_block(block); - - ut_ad(buf_pool_mutex_own(buf_pool)); - ut_ad(buf_page_mutex_own(block)); - - switch (buf_block_get_state(block)) { - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_READY_FOR_USE: - break; - default: - ut_error; - } + ut_ad(block->page.state() == BUF_BLOCK_MEMORY); assert_block_ahi_empty(block); ut_ad(!block->page.in_free_list); - ut_ad(!block->page.in_flush_list); + ut_ad(!block->page.oldest_modification()); ut_ad(!block->page.in_LRU_list); - buf_block_set_state(block, BUF_BLOCK_NOT_USED); + block->page.set_state(BUF_BLOCK_NOT_USED); MEM_UNDEFINED(block->frame, srv_page_size); /* Wipe page_no and space_id */ - memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4); - memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4); + static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); + memset_aligned<4>(block->frame + FIL_PAGE_OFFSET, 0xfe, 4); + static_assert(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID % 4 == 2, + "not perfect alignment"); + memset_aligned<2>(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, + 0xfe, 4); data = block->page.zip.data; if (data != NULL) { block->page.zip.data = NULL; - buf_page_mutex_exit(block); - buf_pool_mutex_exit_forbid(buf_pool); + buf_pool_mutex_exit_forbid(); ut_ad(block->zip_size()); - buf_buddy_free(buf_pool, data, block->zip_size()); - - buf_pool_mutex_exit_allow(buf_pool); - buf_page_mutex_enter(block); + buf_buddy_free(data, block->zip_size()); + buf_pool_mutex_exit_allow(); page_zip_set_size(&block->page.zip, 0); } - if (buf_pool->curr_size < buf_pool->old_size - && UT_LIST_GET_LEN(buf_pool->withdraw) < buf_pool->withdraw_target - && buf_block_will_withdrawn(buf_pool, block)) { + if (buf_pool.curr_size < buf_pool.old_size + && UT_LIST_GET_LEN(buf_pool.withdraw) < buf_pool.withdraw_target + && buf_pool.will_be_withdrawn(block->page)) { /* This should be withdrawn */ UT_LIST_ADD_LAST( - buf_pool->withdraw, + buf_pool.withdraw, &block->page); - ut_d(block->in_withdraw_list = TRUE); + ut_d(block->in_withdraw_list = true); } else { - UT_LIST_ADD_FIRST(buf_pool->free, &block->page); - ut_d(block->page.in_free_list = TRUE); + UT_LIST_ADD_FIRST(buf_pool.free, &block->page); + ut_d(block->page.in_free_list = true); } MEM_NOACCESS(block->frame, srv_page_size); } -/******************************************************************//** -Takes a block out of the LRU list and page hash table. -If the block is compressed-only (BUF_BLOCK_ZIP_PAGE), +/** Release a memory block to the buffer pool. */ +ATTRIBUTE_COLD void buf_pool_t::free_block(buf_block_t *block) +{ + ut_ad(this == &buf_pool); + mysql_mutex_lock(&mutex); + buf_LRU_block_free_non_file_page(block); + mysql_mutex_unlock(&mutex); +} + + +/** Remove bpage from buf_pool.LRU and buf_pool.page_hash. + +If bpage->state() == BUF_BLOCK_ZIP_PAGE && !bpage->oldest_modification(), the object will be freed. -The caller must hold buf_pool->mutex, the buf_page_get_mutex() mutex -and the appropriate hash_lock. This function will release the -buf_page_get_mutex() and the hash_lock. +@param bpage buffer block +@param id page identifier +@param hash_lock buf_pool.page_hash latch (will be released here) +@param zip whether bpage->zip of BUF_BLOCK_FILE_PAGE should be freed If a compressed page is freed other compressed pages may be relocated. @retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The caller needs to free the page to the free list @retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In this case the block is already returned to the buddy allocator. */ -static -bool -buf_LRU_block_remove_hashed( -/*========================*/ - buf_page_t* bpage, /*!< in: block, must contain a file page and - be in a state where it can be freed; there - may or may not be a hash index to the page */ - bool zip) /*!< in: true if should remove also the - compressed page of an uncompressed page */ +static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, + page_hash_latch *hash_lock, bool zip) { - const buf_page_t* hashed_bpage; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - rw_lock_t* hash_lock; - - ut_ad(buf_pool_mutex_own(buf_pool)); - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - - hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id); - - ut_ad(rw_lock_own(hash_lock, RW_LOCK_X)); + mysql_mutex_assert_owner(&buf_pool.mutex); + ut_ad(hash_lock->is_write_locked()); - ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE); - ut_a(bpage->buf_fix_count == 0); + ut_a(bpage->io_fix() == BUF_IO_NONE); + ut_a(!bpage->buf_fix_count()); buf_LRU_remove_block(bpage); - buf_pool->freed_page_clock += 1; + buf_pool.freed_page_clock += 1; - switch (buf_page_get_state(bpage)) { + switch (bpage->state()) { case BUF_BLOCK_FILE_PAGE: MEM_CHECK_ADDRESSABLE(bpage, sizeof(buf_block_t)); MEM_CHECK_ADDRESSABLE(((buf_block_t*) bpage)->frame, @@ -1739,7 +1066,7 @@ buf_LRU_block_remove_hashed( if (bpage->zip.data) { const page_t* page = ((buf_block_t*) bpage)->frame; - ut_a(!zip || bpage->oldest_modification == 0); + ut_a(!zip || !bpage->oldest_modification()); ut_ad(bpage->zip_size()); switch (fil_page_get_type(page)) { @@ -1764,7 +1091,10 @@ buf_LRU_block_remove_hashed( case FIL_PAGE_INDEX: case FIL_PAGE_RTREE: #if defined UNIV_ZIP_DEBUG && defined BTR_CUR_HASH_ADAPT - ut_a(page_zip_validate( + /* During recovery, we only update the + compressed page, not the uncompressed one. */ + ut_a(recv_recovery_is_on() + || page_zip_validate( &bpage->zip, page, ((buf_block_t*) bpage)->index)); #endif /* UNIV_ZIP_DEBUG && BTR_CUR_HASH_ADAPT */ @@ -1787,63 +1117,49 @@ buf_LRU_block_remove_hashed( } /* fall through */ case BUF_BLOCK_ZIP_PAGE: - ut_a(bpage->oldest_modification == 0); + ut_a(!bpage->oldest_modification()); MEM_CHECK_ADDRESSABLE(bpage->zip.data, bpage->zip_size()); break; - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_DIRTY: case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: case BUF_BLOCK_MEMORY: case BUF_BLOCK_REMOVE_HASH: ut_error; break; } - hashed_bpage = buf_page_hash_get_low(buf_pool, bpage->id); - if (UNIV_UNLIKELY(bpage != hashed_bpage)) { - ib::fatal() << "Page not found in the hash table: " - << bpage->id; - } - ut_ad(!bpage->in_zip_hash); - ut_ad(bpage->in_page_hash); - ut_d(bpage->in_page_hash = FALSE); - - HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, bpage->id.fold(), - bpage); + HASH_DELETE(buf_page_t, hash, &buf_pool.page_hash, id.fold(), bpage); - switch (buf_page_get_state(bpage)) { + switch (bpage->state()) { case BUF_BLOCK_ZIP_PAGE: ut_ad(!bpage->in_free_list); - ut_ad(!bpage->in_flush_list); ut_ad(!bpage->in_LRU_list); ut_a(bpage->zip.data); ut_a(bpage->zip.ssize); + ut_ad(!bpage->oldest_modification()); -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - UT_LIST_REMOVE(buf_pool->zip_clean, bpage); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ + hash_lock->write_unlock(); + buf_pool_mutex_exit_forbid(); - mutex_exit(&buf_pool->zip_mutex); - rw_lock_x_unlock(hash_lock); - buf_pool_mutex_exit_forbid(buf_pool); + buf_buddy_free(bpage->zip.data, bpage->zip_size()); - buf_buddy_free(buf_pool, bpage->zip.data, bpage->zip_size()); - - buf_pool_mutex_exit_allow(buf_pool); + buf_pool_mutex_exit_allow(); buf_page_free_descriptor(bpage); return(false); case BUF_BLOCK_FILE_PAGE: - memset(((buf_block_t*) bpage)->frame - + FIL_PAGE_OFFSET, 0xff, 4); - memset(((buf_block_t*) bpage)->frame - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4); + static_assert(FIL_NULL == 0xffffffffU, "fill pattern"); + static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); + memset_aligned<4>(reinterpret_cast<buf_block_t*>(bpage)->frame + + FIL_PAGE_OFFSET, 0xff, 4); + static_assert(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID % 4 == 2, + "not perfect alignment"); + memset_aligned<2>(reinterpret_cast<buf_block_t*>(bpage)->frame + + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4); MEM_UNDEFINED(((buf_block_t*) bpage)->frame, srv_page_size); - buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH); + bpage->set_state(BUF_BLOCK_REMOVE_HASH); - /* Question: If we release bpage and hash mutex here + /* Question: If we release hash_lock here then what protects us against: 1) Some other thread buffer fixing this page 2) Some other thread trying to read this page and @@ -1862,8 +1178,7 @@ buf_LRU_block_remove_hashed( and by the time we'll release it in the caller we'd have inserted the compressed only descriptor in the page_hash. */ - rw_lock_x_unlock(hash_lock); - mutex_exit(&((buf_block_t*) bpage)->mutex); + hash_lock->write_unlock(); if (zip && bpage->zip.data) { /* Free the compressed page. */ @@ -1871,23 +1186,20 @@ buf_LRU_block_remove_hashed( bpage->zip.data = NULL; ut_ad(!bpage->in_free_list); - ut_ad(!bpage->in_flush_list); + ut_ad(!bpage->oldest_modification()); ut_ad(!bpage->in_LRU_list); - buf_pool_mutex_exit_forbid(buf_pool); + buf_pool_mutex_exit_forbid(); - buf_buddy_free(buf_pool, data, bpage->zip_size()); + buf_buddy_free(data, bpage->zip_size()); - buf_pool_mutex_exit_allow(buf_pool); + buf_pool_mutex_exit_allow(); page_zip_set_size(&bpage->zip, 0); } return(true); - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_DIRTY: case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: case BUF_BLOCK_MEMORY: case BUF_BLOCK_REMOVE_HASH: break; @@ -1897,83 +1209,32 @@ buf_LRU_block_remove_hashed( return(false); } -/******************************************************************//** -Puts a file page whose has no hash index to the free list. */ -static -void -buf_LRU_block_free_hashed_page( -/*===========================*/ - buf_block_t* block) /*!< in: block, must contain a file page and - be in a state where it can be freed */ -{ - buf_pool_t* buf_pool = buf_pool_from_block(block); - ut_ad(buf_pool_mutex_own(buf_pool)); - - buf_page_mutex_enter(block); - - if (buf_pool->flush_rbt == NULL) { - block->page.id - = page_id_t(ULINT32_UNDEFINED, ULINT32_UNDEFINED); - } - - buf_block_set_state(block, BUF_BLOCK_MEMORY); - - buf_LRU_block_free_non_file_page(block); - buf_page_mutex_exit(block); -} - /** Remove one page from LRU list and put it to free list. -@param[in,out] bpage block, must contain a file page and be in - a freeable state; there may or may not be a - hash index to the page -@param[in] old_page_id page number before bpage->id was invalidated */ -void buf_LRU_free_one_page(buf_page_t* bpage, page_id_t old_page_id) +@param bpage file page to be freed +@param id page identifier +@param hash_lock buf_pool.page_hash latch (will be released here) */ +void buf_LRU_free_one_page(buf_page_t *bpage, const page_id_t id, + page_hash_latch *hash_lock) { - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, - old_page_id); - BPageMutex* block_mutex = buf_page_get_mutex(bpage); - - ut_ad(buf_pool_mutex_own(buf_pool)); + while (bpage->buf_fix_count()) + /* Wait for other threads to release the fix count + before releasing the bpage from LRU list. */ + (void) LF_BACKOFF(); - rw_lock_x_lock(hash_lock); - - while (bpage->buf_fix_count > 0) { - /* Wait for other threads to release the fix count - before releasing the bpage from LRU list. */ - } - - mutex_enter(block_mutex); - - bpage->id = old_page_id; - - if (buf_LRU_block_remove_hashed(bpage, true)) { - buf_LRU_block_free_hashed_page((buf_block_t*) bpage); - } - - /* buf_LRU_block_remove_hashed() releases hash_lock and block_mutex */ - ut_ad(!rw_lock_own_flagged(hash_lock, - RW_LOCK_FLAG_X | RW_LOCK_FLAG_S)); - ut_ad(!mutex_own(block_mutex)); + if (buf_LRU_block_remove_hashed(bpage, id, hash_lock, true)) + buf_LRU_block_free_hashed_page(reinterpret_cast<buf_block_t*>(bpage)); } -/**********************************************************************//** -Updates buf_pool->LRU_old_ratio for one buffer pool instance. +/** Update buf_pool.LRU_old_ratio. +@param[in] old_pct Reserve this percentage of + the buffer pool for "old" blocks +@param[in] adjust true=adjust the LRU list; + false=just assign buf_pool.LRU_old_ratio + during the initialization of InnoDB @return updated old_pct */ -static -uint -buf_LRU_old_ratio_update_instance( -/*==============================*/ - buf_pool_t* buf_pool,/*!< in: buffer pool instance */ - uint old_pct,/*!< in: Reserve this percentage of - the buffer pool for "old" blocks. */ - bool adjust) /*!< in: true=adjust the LRU list; - false=just assign buf_pool->LRU_old_ratio - during the initialization of InnoDB */ +uint buf_LRU_old_ratio_update(uint old_pct, bool adjust) { - uint ratio; - - ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100; + uint ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100; if (ratio < BUF_LRU_OLD_RATIO_MIN) { ratio = BUF_LRU_OLD_RATIO_MIN; } else if (ratio > BUF_LRU_OLD_RATIO_MAX) { @@ -1981,77 +1242,36 @@ buf_LRU_old_ratio_update_instance( } if (adjust) { - buf_pool_mutex_enter(buf_pool); + mysql_mutex_lock(&buf_pool.mutex); - if (ratio != buf_pool->LRU_old_ratio) { - buf_pool->LRU_old_ratio = ratio; + if (ratio != buf_pool.LRU_old_ratio) { + buf_pool.LRU_old_ratio = ratio; - if (UT_LIST_GET_LEN(buf_pool->LRU) + if (UT_LIST_GET_LEN(buf_pool.LRU) >= BUF_LRU_OLD_MIN_LEN) { - - buf_LRU_old_adjust_len(buf_pool); + buf_LRU_old_adjust_len(); } } - buf_pool_mutex_exit(buf_pool); + mysql_mutex_unlock(&buf_pool.mutex); } else { - buf_pool->LRU_old_ratio = ratio; + buf_pool.LRU_old_ratio = ratio; } /* the reverse of ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100 */ return((uint) (ratio * 100 / (double) BUF_LRU_OLD_RATIO_DIV + 0.5)); } -/**********************************************************************//** -Updates buf_pool->LRU_old_ratio. -@return updated old_pct */ -uint -buf_LRU_old_ratio_update( -/*=====================*/ - uint old_pct,/*!< in: Reserve this percentage of - the buffer pool for "old" blocks. */ - bool adjust) /*!< in: true=adjust the LRU list; - false=just assign buf_pool->LRU_old_ratio - during the initialization of InnoDB */ -{ - uint new_ratio = 0; - - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - new_ratio = buf_LRU_old_ratio_update_instance( - buf_pool, old_pct, adjust); - } - - return(new_ratio); -} - /********************************************************************//** Update the historical stats that we are collecting for LRU eviction policy at the end of each interval. */ void -buf_LRU_stat_update(void) -/*=====================*/ +buf_LRU_stat_update() { buf_LRU_stat_t* item; - buf_pool_t* buf_pool; - bool evict_started = FALSE; buf_LRU_stat_t cur_stat; - /* If we haven't started eviction yet then don't update stats. */ - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - - buf_pool = buf_pool_from_array(i); - - if (buf_pool->freed_page_clock != 0) { - evict_started = true; - break; - } - } - - if (!evict_started) { + if (!buf_pool.freed_page_clock) { goto func_exit; } @@ -2079,29 +1299,24 @@ func_exit: memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur); } -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/**********************************************************************//** -Validates the LRU list for one buffer pool instance. */ -static -void -buf_LRU_validate_instance( -/*======================*/ - buf_pool_t* buf_pool) +#ifdef UNIV_DEBUG +/** Validate the LRU list. */ +void buf_LRU_validate() { - ulint old_len; - ulint new_len; + ulint old_len; + ulint new_len; - buf_pool_mutex_enter(buf_pool); + mysql_mutex_lock(&buf_pool.mutex); - if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { + if (UT_LIST_GET_LEN(buf_pool.LRU) >= BUF_LRU_OLD_MIN_LEN) { - ut_a(buf_pool->LRU_old); - old_len = buf_pool->LRU_old_len; + ut_a(buf_pool.LRU_old); + old_len = buf_pool.LRU_old_len; - new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU) - * buf_pool->LRU_old_ratio + new_len = ut_min(UT_LIST_GET_LEN(buf_pool.LRU) + * buf_pool.LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV, - UT_LIST_GET_LEN(buf_pool->LRU) + UT_LIST_GET_LEN(buf_pool.LRU) - (BUF_LRU_OLD_TOLERANCE + BUF_LRU_NON_OLD_MIN_LEN)); @@ -2109,128 +1324,101 @@ buf_LRU_validate_instance( ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE); } - CheckInLRUList::validate(buf_pool); + CheckInLRUList::validate(); old_len = 0; - for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU); + for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool.LRU); bpage != NULL; bpage = UT_LIST_GET_NEXT(LRU, bpage)) { - switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_POOL_WATCH: + switch (bpage->state()) { case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: case BUF_BLOCK_MEMORY: case BUF_BLOCK_REMOVE_HASH: ut_error; break; case BUF_BLOCK_FILE_PAGE: - ut_ad(((buf_block_t*) bpage)->in_unzip_LRU_list - == buf_page_belongs_to_unzip_LRU(bpage)); + ut_ad(reinterpret_cast<buf_block_t*>(bpage) + ->in_unzip_LRU_list + == bpage->belongs_to_unzip_LRU()); case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: break; } - if (buf_page_is_old(bpage)) { + if (bpage->is_old()) { const buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage); const buf_page_t* next = UT_LIST_GET_NEXT(LRU, bpage); if (!old_len++) { - ut_a(buf_pool->LRU_old == bpage); + ut_a(buf_pool.LRU_old == bpage); } else { - ut_a(!prev || buf_page_is_old(prev)); + ut_a(!prev || prev->is_old()); } - ut_a(!next || buf_page_is_old(next)); + ut_a(!next || next->is_old()); } } - ut_a(buf_pool->LRU_old_len == old_len); + ut_a(buf_pool.LRU_old_len == old_len); - CheckInFreeList::validate(buf_pool); + CheckInFreeList::validate(); - for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->free); + for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool.free); bpage != NULL; bpage = UT_LIST_GET_NEXT(list, bpage)) { - ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED); + ut_a(bpage->state() == BUF_BLOCK_NOT_USED); } - CheckUnzipLRUAndLRUList::validate(buf_pool); + CheckUnzipLRUAndLRUList::validate(); - for (buf_block_t* block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU); + for (buf_block_t* block = UT_LIST_GET_FIRST(buf_pool.unzip_LRU); block != NULL; block = UT_LIST_GET_NEXT(unzip_LRU, block)) { ut_ad(block->in_unzip_LRU_list); ut_ad(block->page.in_LRU_list); - ut_a(buf_page_belongs_to_unzip_LRU(&block->page)); + ut_a(block->page.belongs_to_unzip_LRU()); } - buf_pool_mutex_exit(buf_pool); + mysql_mutex_unlock(&buf_pool.mutex); } +#endif /* UNIV_DEBUG */ -/**********************************************************************//** -Validates the LRU list. -@return TRUE */ -ibool -buf_LRU_validate(void) -/*==================*/ +#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG +/** Dump the LRU list to stderr. */ +void buf_LRU_print() { - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; + mysql_mutex_lock(&buf_pool.mutex); - buf_pool = buf_pool_from_array(i); - buf_LRU_validate_instance(buf_pool); - } - - return(TRUE); -} -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - -#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/**********************************************************************//** -Prints the LRU list for one buffer pool instance. */ -static -void -buf_LRU_print_instance( -/*===================*/ - buf_pool_t* buf_pool) -{ - buf_pool_mutex_enter(buf_pool); - - for (const buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU); + for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool.LRU); bpage != NULL; bpage = UT_LIST_GET_NEXT(LRU, bpage)) { - - mutex_enter(buf_page_get_mutex(bpage)); + const page_id_t id(bpage->id()); fprintf(stderr, "BLOCK space %u page %u ", - bpage->id.space(), bpage->id.page_no()); + id.space(), id.page_no()); - if (buf_page_is_old(bpage)) { + if (bpage->is_old()) { fputs("old ", stderr); } - if (bpage->buf_fix_count) { - fprintf(stderr, "buffix count %u ", - uint32_t(bpage->buf_fix_count)); + if (const uint32_t buf_fix_count = bpage->buf_fix_count()) { + fprintf(stderr, "buffix count %u ", buf_fix_count); } - if (buf_page_get_io_fix(bpage)) { - fprintf(stderr, "io_fix %d ", - buf_page_get_io_fix(bpage)); + if (const auto io_fix = bpage->io_fix()) { + fprintf(stderr, "io_fix %d ", io_fix); } - if (bpage->oldest_modification) { + if (bpage->oldest_modification()) { fputs("modif. ", stderr); } - switch (buf_page_get_state(bpage)) { + switch (const auto state = bpage->state()) { const byte* frame; case BUF_BLOCK_FILE_PAGE: frame = buf_block_get_frame((buf_block_t*) bpage); @@ -2248,28 +1436,11 @@ buf_LRU_print_instance( break; default: - fprintf(stderr, "\n!state %d!\n", - buf_page_get_state(bpage)); + fprintf(stderr, "\n!state %d!\n", state); break; } - - mutex_exit(buf_page_get_mutex(bpage)); } - buf_pool_mutex_exit(buf_pool); -} - -/**********************************************************************//** -Prints the LRU list. */ -void -buf_LRU_print(void) -/*===============*/ -{ - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - buf_LRU_print_instance(buf_pool); - } + mysql_mutex_unlock(&buf_pool.mutex); } -#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ +#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG */ |