diff options
author | Michael Widenius <monty@askmonty.org> | 2013-03-26 00:03:13 +0200 |
---|---|---|
committer | Michael Widenius <monty@askmonty.org> | 2013-03-26 00:03:13 +0200 |
commit | 068c61978e3a81836d52b8caf11e044290159ad1 (patch) | |
tree | 2cbca861ab2cebe3bd99379ca9668bb483ca0d2a /storage/innobase/buf/buf0flu.cc | |
parent | 35bc8f9f4353b64da215e52ff6f1612a8ce66f43 (diff) | |
download | mariadb-git-068c61978e3a81836d52b8caf11e044290159ad1.tar.gz |
Temporary commit of 10.0-merge
Diffstat (limited to 'storage/innobase/buf/buf0flu.cc')
-rw-r--r-- | storage/innobase/buf/buf0flu.cc | 707 |
1 files changed, 412 insertions, 295 deletions
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 023ed766c62..542c1669667 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -25,6 +25,10 @@ Created 11/11/1995 Heikki Tuuri #include "buf0flu.h" +#ifdef UNIV_NONINL +#include "buf0flu.ic" +#endif + #include "buf0buf.h" #include "buf0checksum.h" #include "srv0start.h" @@ -44,39 +48,6 @@ Created 11/11/1995 Heikki Tuuri #include "srv0mon.h" #include "mysql/plugin.h" #include "mysql/service_thd_wait.h" -#include "buf0dblwr.h" - -#ifdef UNIV_NONINL -#include "buf0flu.ic" -#endif - -/********************************************************************** -These statistics are generated for heuristics used in estimating the -rate at which we should flush the dirty blocks to avoid bursty IO -activity. Note that the rate of flushing not only depends on how many -dirty pages we have in the buffer pool but it is also a fucntion of -how much redo the workload is generating and at what rate. */ -/* @{ */ - -/** Number of intervals for which we keep the history of these stats. -Each interval is 1 second, defined by the rate at which -srv_error_monitor_thread() calls buf_flush_stat_update(). */ -#define BUF_FLUSH_STAT_N_INTERVAL 20 - -/** Sampled values buf_flush_stat_cur. -Not protected by any mutex. Updated by buf_flush_stat_update(). */ -static buf_flush_stat_t buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL]; - -/** Cursor to buf_flush_stat_arr[]. Updated in a round-robin fashion. */ -static ulint buf_flush_stat_arr_ind; - -/** Values at start of the current interval. Reset by -buf_flush_stat_update(). */ -static buf_flush_stat_t buf_flush_stat_cur; - -/** Running sum of past values of buf_flush_stat_cur. -Updated by buf_flush_stat_update(). Not protected by any mutex. */ -static buf_flush_stat_t buf_flush_stat_sum; /** Number of pages flushed through non flush_list flushes. */ static ulint buf_lru_flush_page_count = 0; @@ -104,6 +75,22 @@ in thrashing. */ /* @} */ +/******************************************************************//** +Increases flush_list size in bytes with zip_size for compressed page, +UNIV_PAGE_SIZE for uncompressed page in inline function */ +static inline +void +incr_flush_list_size_in_bytes( +/*==========================*/ + buf_block_t* block, /*!< in: control block */ + buf_pool_t* buf_pool) /*!< in: buffer pool instance */ +{ + ut_ad(buf_flush_list_mutex_own(buf_pool)); + ulint zip_size = page_zip_get_size(&block->page.zip); + buf_pool->stat.flush_list_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE; + ut_ad(buf_pool->stat.flush_list_bytes <= buf_pool->curr_pool_size); +} + #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /******************************************************************//** Validates the flush list. @@ -333,6 +320,7 @@ buf_flush_insert_into_flush_list( ut_d(block->page.in_flush_list = TRUE); block->page.oldest_modification = lsn; UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page); + incr_flush_list_size_in_bytes(block, buf_pool); #ifdef UNIV_DEBUG_VALGRIND { @@ -437,7 +425,7 @@ buf_flush_insert_sorted_into_flush_list( prev_b, &block->page); } - MONITOR_INC(MONITOR_PAGE_INFLUSH); + incr_flush_list_size_in_bytes(block, buf_pool); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(buf_flush_validate_low(buf_pool)); @@ -538,6 +526,7 @@ buf_flush_remove( buf_page_t* bpage) /*!< in: pointer to the block in question */ { buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + ulint zip_size; ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(mutex_own(buf_page_get_mutex(bpage))); @@ -576,14 +565,15 @@ buf_flush_remove( because we assert on in_flush_list in comparison function. */ ut_d(bpage->in_flush_list = FALSE); + zip_size = page_zip_get_size(&bpage->zip); + buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE; + bpage->oldest_modification = 0; #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(buf_flush_validate_skip(buf_pool)); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - MONITOR_DEC(MONITOR_PAGE_INFLUSH); - buf_flush_list_mutex_exit(buf_pool); } @@ -606,7 +596,7 @@ buf_flush_relocate_on_flush_list( buf_page_t* dpage) /*!< in/out: destination block */ { buf_page_t* prev; - buf_page_t* prev_b = NULL; + buf_page_t* prev_b = NULL; buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); ut_ad(buf_pool_mutex_own(buf_pool)); @@ -710,6 +700,27 @@ buf_flush_write_complete( #endif /* !UNIV_HOTBACKUP */ /********************************************************************//** +Calculate the checksum of a page from compressed table and update the page. */ +UNIV_INTERN +void +buf_flush_update_zip_checksum( +/*==========================*/ + buf_frame_t* page, /*!< in/out: Page to update */ + ulint zip_size, /*!< in: Compressed page size */ + lsn_t lsn) /*!< in: Lsn to stamp on the page */ +{ + ut_a(zip_size > 0); + + ib_uint32_t checksum = page_zip_calc_checksum( + page, zip_size, + static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm)); + + mach_write_to_8(page + FIL_PAGE_LSN, lsn); + memset(page + FIL_PAGE_FILE_FLUSH_LSN, 0, 8); + mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum); +} + +/********************************************************************//** Initializes a page for writing to the tablespace. */ UNIV_INTERN void @@ -747,17 +758,10 @@ buf_flush_init_for_writing( case FIL_PAGE_TYPE_ZBLOB: case FIL_PAGE_TYPE_ZBLOB2: case FIL_PAGE_INDEX: - checksum = page_zip_calc_checksum( - page_zip->data, zip_size, - static_cast<srv_checksum_algorithm_t>( - srv_checksum_algorithm)); - - mach_write_to_8(page_zip->data - + FIL_PAGE_LSN, newest_lsn); - memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8); - mach_write_to_4(page_zip->data - + FIL_PAGE_SPACE_OR_CHKSUM, - checksum); + + buf_flush_update_zip_checksum( + page_zip->data, zip_size, newest_lsn); + return; } @@ -865,7 +869,7 @@ buf_flush_write_block_low( #endif #ifdef UNIV_LOG_DEBUG - static ibool univ_log_debug_warned; + static ibool univ_log_debug_warned; #endif /* UNIV_LOG_DEBUG */ ut_ad(buf_page_in_file(bpage)); @@ -949,15 +953,15 @@ os_aio_simulated_wake_handler_threads after we have posted a batch of writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be held upon entering this function, and they will be released by this function. */ -static +UNIV_INTERN void buf_flush_page( /*===========*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ buf_page_t* bpage, /*!< in: buffer control block */ - enum buf_flush flush_type) /*!< in: type of flush */ + buf_flush flush_type) /*!< in: type of flush */ { - mutex_t* block_mutex; + ib_mutex_t* block_mutex; ibool is_uncompressed; ut_ad(flush_type < BUF_FLUSH_N_TYPES); @@ -1091,6 +1095,56 @@ buf_flush_page_try( } # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ /***********************************************************//** +Check the page is in buffer pool and can be flushed. +@return true if the page can be flushed. */ +static +bool +buf_flush_check_neighbor( +/*=====================*/ + ulint space, /*!< in: space id */ + ulint offset, /*!< in: page offset */ + enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or + BUF_FLUSH_LIST */ +{ + buf_page_t* bpage; + buf_pool_t* buf_pool = buf_pool_get(space, offset); + bool ret; + + ut_ad(flush_type == BUF_FLUSH_LRU + || flush_type == BUF_FLUSH_LIST); + + buf_pool_mutex_enter(buf_pool); + + /* We only want to flush pages from this buffer pool. */ + bpage = buf_page_hash_get(buf_pool, space, offset); + + if (!bpage) { + + buf_pool_mutex_exit(buf_pool); + return(false); + } + + ut_a(buf_page_in_file(bpage)); + + /* We avoid flushing 'non-old' blocks in an LRU flush, + because the flushed blocks are soon freed */ + + ret = false; + if (flush_type != BUF_FLUSH_LRU || buf_page_is_old(bpage)) { + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); + + mutex_enter(block_mutex); + if (buf_flush_ready_for_flush(bpage, flush_type)) { + ret = true; + } + mutex_exit(block_mutex); + } + buf_pool_mutex_exit(buf_pool); + + return(ret); +} + +/***********************************************************//** Flushes to disk all flushable pages within the flush area. @return number of pages flushed */ static @@ -1115,7 +1169,7 @@ buf_flush_try_neighbors( ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN - || !srv_flush_neighbors) { + || srv_flush_neighbors == 0) { /* If there is little space or neighbor flushing is not enabled then just flush the victim. */ low = offset; @@ -1133,6 +1187,30 @@ buf_flush_try_neighbors( low = (offset / buf_flush_area) * buf_flush_area; high = (offset / buf_flush_area + 1) * buf_flush_area; + + if (srv_flush_neighbors == 1) { + /* adjust 'low' and 'high' to limit + for contiguous dirty area */ + if (offset > low) { + for (i = offset - 1; + i >= low + && buf_flush_check_neighbor( + space, i, flush_type); + i--) { + /* do nothing */ + } + low = i + 1; + } + + for (i = offset + 1; + i < high + && buf_flush_check_neighbor( + space, i, flush_type); + i++) { + /* do nothing */ + } + high = i; + } } /* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */ @@ -1181,7 +1259,7 @@ buf_flush_try_neighbors( if (flush_type != BUF_FLUSH_LRU || i == offset || buf_page_is_old(bpage)) { - mutex_t* block_mutex = buf_page_get_mutex(bpage); + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); mutex_enter(block_mutex); @@ -1240,7 +1318,7 @@ buf_flush_page_and_try_neighbors( ulint* count) /*!< in/out: number of pages flushed */ { - mutex_t* block_mutex; + ib_mutex_t* block_mutex; ibool flushed = FALSE; #ifdef UNIV_DEBUG buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); @@ -1374,7 +1452,7 @@ buf_flush_LRU_list_batch( && free_len < srv_LRU_scan_depth && lru_len > BUF_LRU_MIN_LEN) { - mutex_t* block_mutex = buf_page_get_mutex(bpage); + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); ibool evict; mutex_enter(block_mutex); @@ -1576,8 +1654,7 @@ NOTE 1: in the case of an LRU flush the calling thread may own latches to pages: to avoid deadlocks, this function must be written so that it cannot end up waiting for these latches! NOTE 2: in the case of a flush list flush, the calling thread is not allowed to own any latches on pages! -@return number of blocks for which the write request was queued; -ULINT_UNDEFINED if there was a flush of the same type already running */ +@return number of blocks for which the write request was queued */ static ulint buf_flush_batch( @@ -1621,8 +1698,6 @@ buf_flush_batch( buf_pool_mutex_exit(buf_pool); - buf_dblwr_flush_buffered_writes(); - #ifdef UNIV_DEBUG if (buf_debug_prints && count > 0) { fprintf(stderr, flush_type == BUF_FLUSH_LRU @@ -1632,8 +1707,6 @@ buf_flush_batch( } #endif /* UNIV_DEBUG */ - srv_buf_pool_flushed += count; - return(count); } @@ -1659,14 +1732,7 @@ buf_flush_common( } #endif /* UNIV_DEBUG */ - srv_buf_pool_flushed += page_count; - - if (flush_type == BUF_FLUSH_LRU) { - /* We keep track of all flushes happening as part of LRU - flush. When estimating the desired rate at which flush_list - should be flushed we factor in this value. */ - buf_lru_flush_page_count += page_count; - } + srv_stats.buf_pool_flushed.add(page_count); } /******************************************************************//** @@ -1750,7 +1816,7 @@ buf_flush_wait_batch_end( } } else { thd_wait_begin(NULL, THD_WAIT_DISKIO); - os_event_wait(buf_pool->no_flush[type]); + os_event_wait(buf_pool->no_flush[type]); thd_wait_end(NULL); } } @@ -1760,21 +1826,28 @@ This utility flushes dirty blocks from the end of the LRU list and also puts replaceable clean pages from the end of the LRU list to the free list. NOTE: The calling thread is not allowed to own any latches on pages! -@return number of blocks for which the write request was queued; -ULINT_UNDEFINED if there was a flush of the same type already running */ +@return true if a batch was queued successfully. false if another batch +of same type was already running. */ static -ulint +bool buf_flush_LRU( /*==========*/ buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ - ulint min_n) /*!< in: wished minimum mumber of blocks + ulint min_n, /*!< in: wished minimum mumber of blocks flushed (it is not guaranteed that the actual number is that big, though) */ + ulint* n_processed) /*!< out: the number of pages + which were processed is passed + back to caller. Ignored if NULL */ { ulint page_count; + if (n_processed) { + *n_processed = 0; + } + if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) { - return(ULINT_UNDEFINED); + return(false); } page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0); @@ -1783,31 +1856,43 @@ buf_flush_LRU( buf_flush_common(BUF_FLUSH_LRU, page_count); - return(page_count); + if (n_processed) { + *n_processed = page_count; + } + + return(true); } /*******************************************************************//** This utility flushes dirty blocks from the end of the flush list of all buffer pool instances. NOTE: The calling thread is not allowed to own any latches on pages! -@return number of blocks for which the write request was queued; -ULINT_UNDEFINED if there was a flush of the same type already running */ +@return true if a batch was queued successfully for each buffer pool +instance. false if another batch of same type was already running in +at least one of the buffer pool instance */ UNIV_INTERN -ulint +bool buf_flush_list( /*===========*/ ulint min_n, /*!< in: wished minimum mumber of blocks flushed (it is not guaranteed that the actual number is that big, though) */ - lsn_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all + lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all blocks whose oldest_modification is smaller than this should be flushed (if their number does not exceed min_n), otherwise ignored */ + ulint* n_processed) /*!< out: the number of pages + which were processed is passed + back to caller. Ignored if NULL */ + { ulint i; - ulint total_page_count = 0; - ibool skipped = FALSE; + bool success = true; + + if (n_processed) { + *n_processed = 0; + } if (min_n != ULINT_MAX) { /* Ensure that flushing is spread evenly amongst the @@ -1836,7 +1921,7 @@ buf_flush_list( pools based on the assumption that it will help in the retry which will follow the failure. */ - skipped = TRUE; + success = false; continue; } @@ -1848,7 +1933,9 @@ buf_flush_list( buf_flush_common(BUF_FLUSH_LIST, page_count); - total_page_count += page_count; + if (n_processed) { + *n_processed += page_count; + } if (page_count) { MONITOR_INC_VALUE_CUMULATIVE( @@ -1859,8 +1946,7 @@ buf_flush_list( } } - return(lsn_limit != LSN_MAX && skipped - ? ULINT_UNDEFINED : total_page_count); + return(success); } /******************************************************************//** @@ -1879,7 +1965,7 @@ buf_flush_single_page_from_LRU( { ulint scanned; buf_page_t* bpage; - mutex_t* block_mutex; + ib_mutex_t* block_mutex; ibool freed; ibool evict_zip; @@ -1957,128 +2043,6 @@ buf_flush_single_page_from_LRU( return(freed); } -/********************************************************************* -Update the historical stats that we are collecting for flush rate -heuristics at the end of each interval. -Flush rate heuristic depends on (a) rate of redo log generation and -(b) the rate at which LRU flush is happening. */ -UNIV_INTERN -void -buf_flush_stat_update(void) -/*=======================*/ -{ - buf_flush_stat_t* item; - lsn_t lsn_diff; - lsn_t lsn; - ulint n_flushed; - - lsn = log_get_lsn(); - if (buf_flush_stat_cur.redo == 0) { - /* First time around. Just update the current LSN - and return. */ - buf_flush_stat_cur.redo = lsn; - return; - } - - item = &buf_flush_stat_arr[buf_flush_stat_arr_ind]; - - /* values for this interval */ - lsn_diff = lsn - buf_flush_stat_cur.redo; - n_flushed = buf_lru_flush_page_count - - buf_flush_stat_cur.n_flushed; - - /* add the current value and subtract the obsolete entry. */ - buf_flush_stat_sum.redo += lsn_diff - item->redo; - buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed; - - /* put current entry in the array. */ - item->redo = lsn_diff; - item->n_flushed = n_flushed; - - /* update the index */ - buf_flush_stat_arr_ind++; - buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL; - - /* reset the current entry. */ - buf_flush_stat_cur.redo = lsn; - buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count; -} - -/********************************************************************* -Determines the fraction of dirty pages that need to be flushed based -on the speed at which we generate redo log. Note that if redo log -is generated at a significant rate without corresponding increase -in the number of dirty pages (for example, an in-memory workload) -it can cause IO bursts of flushing. This function implements heuristics -to avoid this burstiness. -@return number of dirty pages to be flushed / second */ -static -ulint -buf_flush_get_desired_flush_rate(void) -/*==================================*/ -{ - ulint i; - lsn_t redo_avg; - ulint n_dirty = 0; - ib_uint64_t n_flush_req; - ib_uint64_t lru_flush_avg; - lsn_t lsn = log_get_lsn(); - lsn_t log_capacity = log_get_capacity(); - - /* log_capacity should never be zero after the initialization - of log subsystem. */ - ut_ad(log_capacity != 0); - - /* Get total number of dirty pages. It is OK to access - flush_list without holding any mutex as we are using this - only for heuristics. */ - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - n_dirty += UT_LIST_GET_LEN(buf_pool->flush_list); - } - - /* An overflow can happen if we generate more than 2^32 bytes - of redo in this interval i.e.: 4G of redo in 1 second. We can - safely consider this as infinity because if we ever come close - to 4G we'll start a synchronous flush of dirty pages. */ - /* redo_avg below is average at which redo is generated in - past BUF_FLUSH_STAT_N_INTERVAL + redo generated in the current - interval. */ - redo_avg = buf_flush_stat_sum.redo / BUF_FLUSH_STAT_N_INTERVAL - + (lsn - buf_flush_stat_cur.redo); - - /* An overflow can happen possibly if we flush more than 2^32 - pages in BUF_FLUSH_STAT_N_INTERVAL. This is a very very - unlikely scenario. Even when this happens it means that our - flush rate will be off the mark. It won't affect correctness - of any subsystem. */ - /* lru_flush_avg below is rate at which pages are flushed as - part of LRU flush in past BUF_FLUSH_STAT_N_INTERVAL + the - number of pages flushed in the current interval. */ - lru_flush_avg = buf_flush_stat_sum.n_flushed - / BUF_FLUSH_STAT_N_INTERVAL - + (buf_lru_flush_page_count - - buf_flush_stat_cur.n_flushed); - - n_flush_req = (n_dirty * redo_avg) / log_capacity; - - /* The number of pages that we want to flush from the flush - list is the difference between the required rate and the - number of pages that we are historically flushing from the - LRU list */ - if (n_flush_req <= lru_flush_avg) { - return(0); - } else { - ib_uint64_t rate; - - rate = n_flush_req - lru_flush_avg; - - return((ulint) (rate < PCT_IO(100) ? rate : PCT_IO(100))); - } -} - /*********************************************************************//** Clears up tail of the LRU lists: * Put replaceable pages at the tail of LRU to the free list @@ -2086,36 +2050,35 @@ Clears up tail of the LRU lists: The depth to which we scan each buffer pool is controlled by dynamic config parameter innodb_LRU_scan_depth. @return total pages flushed */ -UNIV_INLINE +UNIV_INTERN ulint -page_cleaner_flush_LRU_tail(void) -/*=============================*/ +buf_flush_LRU_tail(void) +/*====================*/ { - ulint i; - ulint j; ulint total_flushed = 0; - for (i = 0; i < srv_buf_pool_instances; i++) { + for (ulint i = 0; i < srv_buf_pool_instances; i++) { buf_pool_t* buf_pool = buf_pool_from_array(i); /* We divide LRU flush into smaller chunks because there may be user threads waiting for the flush to end in buf_LRU_get_free_block(). */ - for (j = 0; + for (ulint j = 0; j < srv_LRU_scan_depth; j += PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE) { - ulint n_flushed = buf_flush_LRU(buf_pool, - PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE); + ulint n_flushed = 0; /* Currently page_cleaner is the only thread that can trigger an LRU flush. It is possible that a batch triggered during last iteration is still running, */ - if (n_flushed != ULINT_UNDEFINED) { - total_flushed += n_flushed; - } + buf_flush_LRU(buf_pool, + PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE, + &n_flushed); + + total_flushed += n_flushed; } } @@ -2132,14 +2095,12 @@ page_cleaner_flush_LRU_tail(void) /*********************************************************************//** Wait for any possible LRU flushes that are in progress to end. */ -UNIV_INLINE +UNIV_INTERN void -page_cleaner_wait_LRU_flush(void) -/*=============================*/ +buf_flush_wait_LRU_batch_end(void) +/*==============================*/ { - ulint i; - - for (i = 0; i < srv_buf_pool_instances; i++) { + for (ulint i = 0; i < srv_buf_pool_instances; i++) { buf_pool_t* buf_pool; buf_pool = buf_pool_from_array(i); @@ -2166,22 +2127,87 @@ ulint page_cleaner_do_flush_batch( /*========================*/ ulint n_to_flush, /*!< in: number of pages that - we should attempt to flush. If - an lsn_limit is provided then - this value will have no affect */ + we should attempt to flush. */ lsn_t lsn_limit) /*!< in: LSN up to which flushing must happen */ { ulint n_flushed; - ut_ad(n_to_flush == ULINT_MAX || lsn_limit == LSN_MAX); + buf_flush_list(n_to_flush, lsn_limit, &n_flushed); + + return(n_flushed); +} - n_flushed = buf_flush_list(n_to_flush, lsn_limit); - if (n_flushed == ULINT_UNDEFINED) { - n_flushed = 0; +/*********************************************************************//** +Calculates if flushing is required based on number of dirty pages in +the buffer pool. +@return percent of io_capacity to flush to manage dirty page ratio */ +static +ulint +af_get_pct_for_dirty() +/*==================*/ +{ + ulint dirty_pct = buf_get_modified_ratio_pct(); + + ut_a(srv_max_dirty_pages_pct_lwm + <= srv_max_buf_pool_modified_pct); + + if (srv_max_dirty_pages_pct_lwm == 0) { + /* The user has not set the option to preflush dirty + pages as we approach the high water mark. */ + if (dirty_pct > srv_max_buf_pool_modified_pct) { + /* We have crossed the high water mark of dirty + pages In this case we start flushing at 100% of + innodb_io_capacity. */ + return(100); + } + } else if (dirty_pct > srv_max_dirty_pages_pct_lwm) { + /* We should start flushing pages gradually. */ + return((dirty_pct * 100) + / (srv_max_buf_pool_modified_pct + 1)); } - return(n_flushed); + return(0); +} + +/*********************************************************************//** +Calculates if flushing is required based on redo generation rate. +@return percent of io_capacity to flush to manage redo space */ +static +ulint +af_get_pct_for_lsn( +/*===============*/ + lsn_t age) /*!< in: current age of LSN. */ +{ + lsn_t max_async_age; + lsn_t lsn_age_factor; + lsn_t af_lwm = (srv_adaptive_flushing_lwm + * log_get_capacity()) / 100; + + if (age < af_lwm) { + /* No adaptive flushing. */ + return(0); + } + + max_async_age = log_get_max_modified_age_async(); + + if (age < max_async_age && !srv_adaptive_flushing) { + /* We have still not reached the max_async point and + the user has disabled adaptive flushing. */ + return(0); + } + + /* If we are here then we know that either: + 1) User has enabled adaptive flushing + 2) User may have disabled adaptive flushing but we have reached + max_async_age. */ + lsn_age_factor = (age * 100) / max_async_age; + + ut_ad(srv_max_io_capacity >= srv_io_capacity); + return(static_cast<ulint>( + ((srv_max_io_capacity / srv_io_capacity) + * (lsn_age_factor * sqrt((double)lsn_age_factor))) + / 7.5)); } /*********************************************************************//** @@ -2195,78 +2221,103 @@ ulint page_cleaner_flush_pages_if_needed(void) /*====================================*/ { - ulint n_pages_flushed = 0; - lsn_t lsn_limit = log_async_flush_lsn(); + static lsn_t lsn_avg_rate = 0; + static lsn_t prev_lsn = 0; + static lsn_t last_lsn = 0; + static ulint sum_pages = 0; + static ulint last_pages = 0; + static ulint prev_pages = 0; + static ulint avg_page_rate = 0; + static ulint n_iterations = 0; + lsn_t oldest_lsn; + lsn_t cur_lsn; + lsn_t age; + lsn_t lsn_rate; + ulint n_pages = 0; + ulint pct_for_dirty = 0; + ulint pct_for_lsn = 0; + ulint pct_total = 0; + int age_factor = 0; + + cur_lsn = log_get_lsn(); + + if (prev_lsn == 0) { + /* First time around. */ + prev_lsn = cur_lsn; + return(0); + } - /* Currently we decide whether or not to flush and how much to - flush based on three factors. + if (prev_lsn == cur_lsn) { + return(0); + } - 1) If the amount of LSN for which pages are not flushed to disk - yet is greater than log_sys->max_modified_age_async. This is - the most urgent type of flush and we attempt to cleanup enough - of the tail of the flush_list to avoid flushing inside user - threads. + /* We update our variables every srv_flushing_avg_loops + iterations to smooth out transition in workload. */ + if (++n_iterations >= srv_flushing_avg_loops) { - 2) If modified page ratio is greater than the one specified by - the user. In that case we flush full 100% IO_CAPACITY of the - server. Note that 1 and 2 are not mutually exclusive. We can - end up executing both steps. + avg_page_rate = ((sum_pages / srv_flushing_avg_loops) + + avg_page_rate) / 2; - 3) If adaptive_flushing is set by the user and neither of 1 - or 2 has occurred above then we flush a batch based on our - heuristics. */ + /* How much LSN we have generated since last call. */ + lsn_rate = (cur_lsn - prev_lsn) / srv_flushing_avg_loops; - if (lsn_limit != LSN_MAX) { + lsn_avg_rate = (lsn_avg_rate + lsn_rate) / 2; - /* async flushing is requested */ - n_pages_flushed = page_cleaner_do_flush_batch(ULINT_MAX, - lsn_limit); + prev_lsn = cur_lsn; - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_FLUSH_ASYNC_TOTAL_PAGE, - MONITOR_FLUSH_ASYNC_COUNT, - MONITOR_FLUSH_ASYNC_PAGES, - n_pages_flushed); + n_iterations = 0; + + sum_pages = 0; } - if (UNIV_UNLIKELY(n_pages_flushed < PCT_IO(100) - && buf_get_modified_ratio_pct() - > srv_max_buf_pool_modified_pct)) { + oldest_lsn = buf_pool_get_oldest_modification(); - /* Try to keep the number of modified pages in the - buffer pool under the limit wished by the user */ + ut_ad(oldest_lsn <= cur_lsn); - n_pages_flushed += page_cleaner_do_flush_batch(PCT_IO(100), - LSN_MAX); + age = cur_lsn - oldest_lsn; - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_FLUSH_MAX_DIRTY_TOTAL_PAGE, - MONITOR_FLUSH_MAX_DIRTY_COUNT, - MONITOR_FLUSH_MAX_DIRTY_PAGES, - n_pages_flushed); + pct_for_dirty = af_get_pct_for_dirty(); + pct_for_lsn = af_get_pct_for_lsn(age); + + pct_total = ut_max(pct_for_dirty, pct_for_lsn); + + /* Cap the maximum IO capacity that we are going to use by + max_io_capacity. */ + n_pages = (PCT_IO(pct_total) + avg_page_rate) / 2; + + if (n_pages > srv_max_io_capacity) { + n_pages = srv_max_io_capacity; } - if (srv_adaptive_flushing && n_pages_flushed == 0) { + if (last_pages && cur_lsn - last_lsn > lsn_avg_rate / 2) { + age_factor = prev_pages / last_pages; + } - /* Try to keep the rate of flushing of dirty - pages such that redo log generation does not - produce bursts of IO at checkpoint time. */ - ulint n_flush = buf_flush_get_desired_flush_rate(); + MONITOR_SET(MONITOR_FLUSH_N_TO_FLUSH_REQUESTED, n_pages); - ut_ad(n_flush <= PCT_IO(100)); - if (n_flush) { - n_pages_flushed = page_cleaner_do_flush_batch( - n_flush, LSN_MAX); + prev_pages = n_pages; + n_pages = page_cleaner_do_flush_batch( + n_pages, oldest_lsn + lsn_avg_rate * (age_factor + 1)); - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, - MONITOR_FLUSH_ADAPTIVE_COUNT, - MONITOR_FLUSH_ADAPTIVE_PAGES, - n_pages_flushed); - } + last_lsn= cur_lsn; + last_pages= n_pages + 1; + + MONITOR_SET(MONITOR_FLUSH_AVG_PAGE_RATE, avg_page_rate); + MONITOR_SET(MONITOR_FLUSH_LSN_AVG_RATE, lsn_avg_rate); + MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, pct_for_dirty); + MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn); + + if (n_pages) { + MONITOR_INC_VALUE_CUMULATIVE( + MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, + MONITOR_FLUSH_ADAPTIVE_COUNT, + MONITOR_FLUSH_ADAPTIVE_PAGES, + n_pages); + + sum_pages += n_pages; } - return(n_pages_flushed); + return(n_pages); } /*********************************************************************//** @@ -2306,7 +2357,8 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( ulint next_loop_time = ut_time_ms() + 1000; ulint n_flushed = 0; ulint last_activity = srv_get_activity_count(); - ulint i; + + ut_ad(!srv_read_only_mode); #ifdef UNIV_PFS_THREAD pfs_register_thread(buf_page_cleaner_thread_key); @@ -2336,7 +2388,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( last_activity = srv_get_activity_count(); /* Flush pages from end of LRU if required */ - n_flushed = page_cleaner_flush_LRU_tail(); + n_flushed = buf_flush_LRU_tail(); /* Flush pages from flush_list if required */ n_flushed += page_cleaner_flush_pages_if_needed(); @@ -2396,19 +2448,21 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( sweep and we'll come out of the loop leaving behind dirty pages in the flush_list */ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - page_cleaner_wait_LRU_flush(); + buf_flush_wait_LRU_batch_end(); + + bool success; do { - n_flushed = buf_flush_list(PCT_IO(100), LSN_MAX); + success = buf_flush_list(PCT_IO(100), LSN_MAX, &n_flushed); buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - } while (n_flushed > 0); + } while (!success || n_flushed > 0); /* Some sanity checks */ ut_a(srv_get_active_thread_type() == SRV_NONE); ut_a(srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE); - for (i = 0; i < srv_buf_pool_instances; i++) { + for (ulint i = 0; i < srv_buf_pool_instances; i++) { buf_pool_t* buf_pool = buf_pool_from_array(i); ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == 0); } @@ -2521,3 +2575,66 @@ buf_flush_validate( } #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #endif /* !UNIV_HOTBACKUP */ + +#ifdef UNIV_DEBUG +/******************************************************************//** +Check if there are any dirty pages that belong to a space id in the flush +list in a particular buffer pool. +@return number of dirty pages present in a single buffer pool */ +UNIV_INTERN +ulint +buf_pool_get_dirty_pages_count( +/*===========================*/ + buf_pool_t* buf_pool, /*!< in: buffer pool */ + ulint id) /*!< in: space id to check */ + +{ + ulint count = 0; + + buf_pool_mutex_enter(buf_pool); + buf_flush_list_mutex_enter(buf_pool); + + buf_page_t* bpage; + + for (bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); + bpage != 0; + bpage = UT_LIST_GET_NEXT(list, bpage)) { + + ut_ad(buf_page_in_file(bpage)); + ut_ad(bpage->in_flush_list); + ut_ad(bpage->oldest_modification > 0); + + if (buf_page_get_space(bpage) == id) { + ++count; + } + } + + buf_flush_list_mutex_exit(buf_pool); + buf_pool_mutex_exit(buf_pool); + + return(count); +} + +/******************************************************************//** +Check if there are any dirty pages that belong to a space id in the flush list. +@return number of dirty pages present in all the buffer pools */ +UNIV_INTERN +ulint +buf_flush_get_dirty_pages_count( +/*============================*/ + ulint id) /*!< in: space id to check */ + +{ + ulint count = 0; + + for (ulint i = 0; i < srv_buf_pool_instances; ++i) { + buf_pool_t* buf_pool; + + buf_pool = buf_pool_from_array(i); + + count += buf_pool_get_dirty_pages_count(buf_pool, id); + } + + return(count); +} +#endif /* UNIV_DEBUG */ |