diff options
Diffstat (limited to 'storage/xtradb/buf')
-rw-r--r-- | storage/xtradb/buf/buf0buddy.c | 163 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0buf.c | 650 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0flu.c | 328 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0lru.c | 269 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0rea.c | 167 |
5 files changed, 853 insertions, 724 deletions
diff --git a/storage/xtradb/buf/buf0buddy.c b/storage/xtradb/buf/buf0buddy.c index 494db91d159..6ee7a71a2e5 100644 --- a/storage/xtradb/buf/buf0buddy.c +++ b/storage/xtradb/buf/buf0buddy.c @@ -16,7 +16,8 @@ Place, Suite 330, Boston, MA 02111-1307 USA *****************************************************************************/ -/****************************************************** +/**************************************************//** +@file buf/buf0buddy.c Binary buddy allocator for compressed pages Created December 2006 by Marko Makela @@ -44,15 +45,15 @@ static ulint buf_buddy_n_frames; Protected by buf_pool_mutex. */ UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1]; -/************************************************************************** -Get the offset of the buddy of a compressed page frame. */ +/**********************************************************************//** +Get the offset of the buddy of a compressed page frame. +@return the buddy relative of page */ UNIV_INLINE byte* buf_buddy_get( /*==========*/ - /* out: the buddy relative of page */ - byte* page, /* in: compressed page */ - ulint size) /* in: page size in bytes */ + byte* page, /*!< in: compressed page */ + ulint size) /*!< in: page size in bytes */ { ut_ad(ut_is_2pow(size)); ut_ad(size >= BUF_BUDDY_LOW); @@ -66,14 +67,14 @@ buf_buddy_get( } } -/************************************************************************** +/**********************************************************************//** Add a block to the head of the appropriate buddy free list. */ UNIV_INLINE void buf_buddy_add_to_free( /*==================*/ - buf_page_t* bpage, /* in,own: block to be freed */ - ulint i) /* in: index of buf_pool->zip_free[] */ + buf_page_t* bpage, /*!< in,own: block to be freed */ + ulint i) /*!< in: index of buf_pool->zip_free[] */ { #ifdef UNIV_DEBUG_VALGRIND buf_page_t* b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); @@ -81,6 +82,9 @@ buf_buddy_add_to_free( if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i); #endif /* UNIV_DEBUG_VALGRIND */ + //ut_ad(buf_pool_mutex_own()); + ut_ad(mutex_own(&zip_free_mutex)); + ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE); ut_ad(buf_pool->zip_free[i].start != bpage); UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage); @@ -90,14 +94,14 @@ buf_buddy_add_to_free( #endif /* UNIV_DEBUG_VALGRIND */ } -/************************************************************************** +/**********************************************************************//** Remove a block from the appropriate buddy free list. */ UNIV_INLINE void buf_buddy_remove_from_free( /*=======================*/ - buf_page_t* bpage, /* in: block to be removed */ - ulint i) /* in: index of buf_pool->zip_free[] */ + buf_page_t* bpage, /*!< in: block to be removed */ + ulint i) /*!< in: index of buf_pool->zip_free[] */ { #ifdef UNIV_DEBUG_VALGRIND buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage); @@ -110,6 +114,8 @@ buf_buddy_remove_from_free( ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE); #endif /* UNIV_DEBUG_VALGRIND */ + //ut_ad(buf_pool_mutex_own()); + ut_ad(mutex_own(&zip_free_mutex)); ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE); UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage); @@ -119,15 +125,14 @@ buf_buddy_remove_from_free( #endif /* UNIV_DEBUG_VALGRIND */ } -/************************************************************************** -Try to allocate a block from buf_pool->zip_free[]. */ +/**********************************************************************//** +Try to allocate a block from buf_pool->zip_free[]. +@return allocated block, or NULL if buf_pool->zip_free[] was empty */ static void* buf_buddy_alloc_zip( /*================*/ - /* out: allocated block, or NULL - if buf_pool->zip_free[] was empty */ - ulint i) /* in: index of buf_pool->zip_free[] */ + ulint i) /*!< in: index of buf_pool->zip_free[] */ { buf_page_t* bpage; @@ -135,10 +140,12 @@ buf_buddy_alloc_zip( ut_ad(mutex_own(&zip_free_mutex)); ut_a(i < BUF_BUDDY_SIZES); -#if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND +#ifndef UNIV_DEBUG_VALGRIND /* Valgrind would complain about accessing free memory. */ - UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i]); -#endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */ + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i], + ut_ad(buf_page_get_state(ut_list_node_313) + == BUF_BLOCK_ZIP_FREE))); +#endif /* !UNIV_DEBUG_VALGRIND */ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]); if (bpage) { @@ -172,13 +179,13 @@ buf_buddy_alloc_zip( return(bpage); } -/************************************************************************** +/**********************************************************************//** Deallocate a buffer frame of UNIV_PAGE_SIZE. */ static void buf_buddy_block_free( /*=================*/ - void* buf, /* in: buffer frame to deallocate */ + void* buf, /*!< in: buffer frame to deallocate */ ibool have_page_hash_mutex) { const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf); @@ -216,17 +223,18 @@ buf_buddy_block_free( ut_d(buf_buddy_n_frames--); } -/************************************************************************** +/**********************************************************************//** Allocate a buffer block to the buddy allocator. */ static void buf_buddy_block_register( /*=====================*/ - buf_block_t* block) /* in: buffer frame to allocate */ + buf_block_t* block) /*!< in: buffer frame to allocate */ { const ulint fold = BUF_POOL_ZIP_FOLD(block); //ut_ad(buf_pool_mutex_own()); ut_ad(!mutex_own(&buf_pool_zip_mutex)); + ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE); buf_block_set_state(block, BUF_BLOCK_MEMORY); @@ -244,16 +252,16 @@ buf_buddy_block_register( ut_d(buf_buddy_n_frames++); } -/************************************************************************** -Allocate a block from a bigger object. */ +/**********************************************************************//** +Allocate a block from a bigger object. +@return allocated block */ static void* buf_buddy_alloc_from( /*=================*/ - /* out: allocated block */ - void* buf, /* in: a block that is free to use */ - ulint i, /* in: index of buf_pool->zip_free[] */ - ulint j) /* in: size of buf as an index + void* buf, /*!< in: a block that is free to use */ + ulint i, /*!< in: index of buf_pool->zip_free[] */ + ulint j) /*!< in: size of buf as an index of buf_pool->zip_free[] */ { ulint offs = BUF_BUDDY_LOW << j; @@ -271,29 +279,31 @@ buf_buddy_alloc_from( bpage = (buf_page_t*) ((byte*) buf + offs); ut_d(memset(bpage, j, BUF_BUDDY_LOW << j)); bpage->state = BUF_BLOCK_ZIP_FREE; -#if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND +#ifndef UNIV_DEBUG_VALGRIND /* Valgrind would complain about accessing free memory. */ - UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[j]); -#endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */ + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i], + ut_ad(buf_page_get_state( + ut_list_node_313) + == BUF_BLOCK_ZIP_FREE))); +#endif /* !UNIV_DEBUG_VALGRIND */ buf_buddy_add_to_free(bpage, j); } return(buf); } -/************************************************************************** +/**********************************************************************//** Allocate a block. The thread calling this function must hold buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex. -The buf_pool_mutex may only be released and reacquired if lru != NULL. */ +The buf_pool_mutex may only be released and reacquired if lru != NULL. +@return allocated block, possibly NULL if lru==NULL */ UNIV_INTERN void* buf_buddy_alloc_low( /*================*/ - /* out: allocated block, - possibly NULL if lru==NULL */ - ulint i, /* in: index of buf_pool->zip_free[], + ulint i, /*!< in: index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */ - ibool* lru, /* in: pointer to a variable that will be assigned + ibool* lru, /*!< in: pointer to a variable that will be assigned TRUE if storage was allocated from the LRU list and buf_pool_mutex was temporarily released, or NULL if the LRU list should not be used */ @@ -357,15 +367,15 @@ func_exit: return(block); } -/************************************************************************** -Try to relocate the control block of a compressed page. */ +/**********************************************************************//** +Try to relocate the control block of a compressed page. +@return TRUE if relocated */ static ibool buf_buddy_relocate_block( /*=====================*/ - /* out: TRUE if relocated */ - buf_page_t* bpage, /* in: block to relocate */ - buf_page_t* dpage) /* in: free block to relocate to */ + buf_page_t* bpage, /*!< in: block to relocate */ + buf_page_t* dpage) /*!< in: free block to relocate to */ { buf_page_t* b; @@ -425,16 +435,16 @@ buf_buddy_relocate_block( return(TRUE); } -/************************************************************************** -Try to relocate a block. */ +/**********************************************************************//** +Try to relocate a block. +@return TRUE if relocated */ static ibool buf_buddy_relocate( /*===============*/ - /* out: TRUE if relocated */ - void* src, /* in: block to relocate */ - void* dst, /* in: free block to relocate to */ - ulint i, /* in: index of buf_pool->zip_free[] */ + void* src, /*!< in: block to relocate */ + void* dst, /*!< in: free block to relocate to */ + ulint i, /*!< in: index of buf_pool->zip_free[] */ ibool have_page_hash_mutex) { buf_page_t* bpage; @@ -461,16 +471,15 @@ buf_buddy_relocate( actually is a properly initialized buf_page_t object. */ if (size >= PAGE_ZIP_MIN_SIZE) { - mutex_t* mutex; - if (!have_page_hash_mutex) - mutex_exit(&zip_free_mutex); - /* This is a compressed page. */ + mutex_t* mutex; if (!have_page_hash_mutex) { + mutex_exit(&zip_free_mutex); mutex_enter(&LRU_list_mutex); rw_lock_x_lock(&page_hash_latch); } + /* The src block may be split into smaller blocks, some of which may be free. Thus, the mach_read_from_4() calls below may attempt to read @@ -521,15 +530,9 @@ buf_buddy_relocate( contain uninitialized data. */ UNIV_MEM_ASSERT_W(src, size); - mutex = buf_page_get_mutex(bpage); + mutex = buf_page_get_mutex_enter(bpage); + ut_a(mutex); -retry_lock: - mutex_enter(mutex); - if (mutex != buf_page_get_mutex(bpage)) { - mutex_exit(mutex); - mutex = buf_page_get_mutex(bpage); - goto retry_lock; - } mutex_enter(&zip_free_mutex); if (buf_page_can_relocate(bpage)) { @@ -594,15 +597,16 @@ success: return(FALSE); } -/************************************************************************** +/**********************************************************************//** Deallocate a block. */ UNIV_INTERN void buf_buddy_free_low( /*===============*/ - void* buf, /* in: block to be freed, must not be + void* buf, /*!< in: block to be freed, must not be pointed to by the buffer pool */ - ulint i, /* in: index of buf_pool->zip_free[] */ + ulint i, /*!< in: index of buf_pool->zip_free[], + or BUF_BUDDY_SIZES */ ibool have_page_hash_mutex) { buf_page_t* bpage; @@ -676,7 +680,9 @@ buddy_free2: #ifndef UNIV_DEBUG_VALGRIND buddy_nonfree: /* Valgrind would complain about accessing free memory. */ - ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i])); + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i], + ut_ad(buf_page_get_state(ut_list_node_313) + == BUF_BLOCK_ZIP_FREE))); #endif /* UNIV_DEBUG_VALGRIND */ /* The buddy is not free. Is there a free block of this size? */ @@ -702,21 +708,20 @@ buddy_nonfree: buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage), BUF_BUDDY_LOW << i); -#if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND - { - const buf_page_t* b; +#ifndef UNIV_DEBUG_VALGRIND + /* Valgrind would complain about accessing free memory. */ - /* The buddy must not be (completely) free, because - we always recombine adjacent free blocks. - (Parts of the buddy can be free in - buf_pool->zip_free[j] with j < i.)*/ - for (b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); - b; b = UT_LIST_GET_NEXT(zip_list, b)) { + /* The buddy must not be (completely) free, because we + always recombine adjacent free blocks. - ut_a(b != buddy); - } - } -#endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */ + (Parts of the buddy can be free in + buf_pool->zip_free[j] with j < i.) */ + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i], + ut_ad(buf_page_get_state( + ut_list_node_313) + == BUF_BLOCK_ZIP_FREE + && ut_list_node_313 != buddy))); +#endif /* !UNIV_DEBUG_VALGRIND */ if (buf_buddy_relocate(buddy, buf, i, have_page_hash_mutex)) { diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c index a02b7879121..8da0a87751d 100644 --- a/storage/xtradb/buf/buf0buf.c +++ b/storage/xtradb/buf/buf0buf.c @@ -23,7 +23,8 @@ Place, Suite 330, Boston, MA 02111-1307 USA *****************************************************************************/ -/****************************************************** +/**************************************************//** +@file buf/buf0buf.c The database buffer buf_pool Created 11/5/1995 Heikki Tuuri @@ -35,17 +36,20 @@ Created 11/5/1995 Heikki Tuuri #include "buf0buf.ic" #endif -#include "buf0buddy.h" #include "mem0mem.h" #include "btr0btr.h" #include "fil0fil.h" +#ifndef UNIV_HOTBACKUP +#include "buf0buddy.h" #include "lock0lock.h" #include "btr0sea.h" #include "ibuf0ibuf.h" -#include "dict0dict.h" -#include "log0recv.h" #include "trx0undo.h" +#include "log0log.h" +#endif /* !UNIV_HOTBACKUP */ #include "srv0srv.h" +#include "dict0dict.h" +#include "log0recv.h" #include "page0zip.h" /* @@ -235,13 +239,14 @@ that the whole area may be needed in the near future, and issue the read requests for the whole area. */ -/* Value in microseconds */ +#ifndef UNIV_HOTBACKUP +/** Value in microseconds */ static const int WAIT_FOR_READ = 5000; -/* The buffer buf_pool of the database */ +/** The buffer buf_pool of the database */ UNIV_INTERN buf_pool_t* buf_pool = NULL; -/* mutex protecting the buffer pool struct and control blocks, except the +/** mutex protecting the buffer pool struct and control blocks, except the read-write lock in them */ UNIV_INTERN mutex_t buf_pool_mutex; UNIV_INTERN mutex_t LRU_list_mutex; @@ -250,12 +255,12 @@ UNIV_INTERN rw_lock_t page_hash_latch; UNIV_INTERN mutex_t free_list_mutex; UNIV_INTERN mutex_t zip_free_mutex; UNIV_INTERN mutex_t zip_hash_mutex; -/* mutex protecting the control blocks of compressed-only pages +/** mutex protecting the control blocks of compressed-only pages (of type buf_page_t, not buf_block_t) */ UNIV_INTERN mutex_t buf_pool_zip_mutex; #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -static ulint buf_dbg_counter = 0; /* This is used to insert validation +static ulint buf_dbg_counter = 0; /*!< This is used to insert validation operations in excution in the debug version */ /** Flag to forbid the release of the buffer pool mutex. @@ -263,30 +268,31 @@ Protected by buf_pool_mutex. */ UNIV_INTERN ulint buf_pool_mutex_exit_forbidden = 0; #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #ifdef UNIV_DEBUG -/* If this is set TRUE, the program prints info whenever +/** If this is set TRUE, the program prints info whenever read-ahead or flush occurs */ UNIV_INTERN ibool buf_debug_prints = FALSE; #endif /* UNIV_DEBUG */ -/* A chunk of buffers. The buffer pool is allocated in chunks. */ +/** A chunk of buffers. The buffer pool is allocated in chunks. */ struct buf_chunk_struct{ - ulint mem_size; /* allocated size of the chunk */ - ulint size; /* size of frames[] and blocks[] */ - void* mem; /* pointer to the memory area which + ulint mem_size; /*!< allocated size of the chunk */ + ulint size; /*!< size of frames[] and blocks[] */ + void* mem; /*!< pointer to the memory area which was allocated for the frames */ - buf_block_t* blocks; /* array of buffer control blocks */ + buf_block_t* blocks; /*!< array of buffer control blocks */ }; +#endif /* !UNIV_HOTBACKUP */ -/************************************************************************ +/********************************************************************//** Calculates a page checksum which is stored to the page when it is written to a file. Note that we must be careful to calculate the same value on -32-bit and 64-bit architectures. */ +32-bit and 64-bit architectures. +@return checksum */ UNIV_INTERN ulint buf_calc_page_new_checksum( /*=======================*/ - /* out: checksum */ - const byte* page) /* in: buffer page */ + const byte* page) /*!< in: buffer page */ { ulint checksum; @@ -308,19 +314,19 @@ buf_calc_page_new_checksum( return(checksum); } -/************************************************************************ +/********************************************************************//** In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only looked at the first few bytes of the page. This calculates that old checksum. NOTE: we must first store the new formula checksum to FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum -because this takes that field as an input! */ +because this takes that field as an input! +@return checksum */ UNIV_INTERN ulint buf_calc_page_old_checksum( /*=======================*/ - /* out: checksum */ - const byte* page) /* in: buffer page */ + const byte* page) /*!< in: buffer page */ { ulint checksum; @@ -331,22 +337,20 @@ buf_calc_page_old_checksum( return(checksum); } -/************************************************************************ -Checks if a page is corrupt. */ +/********************************************************************//** +Checks if a page is corrupt. +@return TRUE if corrupted */ UNIV_INTERN ibool buf_page_is_corrupted( /*==================*/ - /* out: TRUE if corrupted */ - const byte* read_buf, /* in: a database page */ - ulint zip_size) /* in: size of compressed page; + const byte* read_buf, /*!< in: a database page */ + ulint zip_size) /*!< in: size of compressed page; 0 for uncompressed pages */ { ulint checksum_field; ulint old_checksum_field; -#ifndef UNIV_HOTBACKUP - ib_uint64_t current_lsn; -#endif + if (UNIV_LIKELY(!zip_size) && memcmp(read_buf + FIL_PAGE_LSN + 4, read_buf + UNIV_PAGE_SIZE @@ -359,8 +363,11 @@ buf_page_is_corrupted( } #ifndef UNIV_HOTBACKUP - if (recv_lsn_checks_on && log_peek_lsn(¤t_lsn)) { - if (current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) { + if (recv_lsn_checks_on) { + ib_uint64_t current_lsn; + + if (log_peek_lsn(¤t_lsn) + && current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) { ut_print_timestamp(stderr); fprintf(stderr, @@ -372,8 +379,7 @@ buf_page_is_corrupted( "you may have copied the InnoDB\n" "InnoDB: tablespace but not the InnoDB " "log files. See\n" - "InnoDB: http://dev.mysql.com/doc/refman/" - "5.1/en/forcing-recovery.html\n" + "InnoDB: " REFMAN "forcing-recovery.html\n" "InnoDB: for more information.\n", (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET), @@ -434,17 +440,19 @@ buf_page_is_corrupted( return(FALSE); } -/************************************************************************ +/********************************************************************//** Prints a page to stderr. */ UNIV_INTERN void buf_page_print( /*===========*/ - const byte* read_buf, /* in: a database page */ - ulint zip_size) /* in: compressed page size, or + const byte* read_buf, /*!< in: a database page */ + ulint zip_size) /*!< in: compressed page size, or 0 for uncompressed pages */ { +#ifndef UNIV_HOTBACKUP dict_index_t* index; +#endif /* !UNIV_HOTBACKUP */ ulint checksum; ulint old_checksum; ulint size = zip_size; @@ -558,6 +566,7 @@ buf_page_print( (ulong) mach_read_from_4(read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)); +#ifndef UNIV_HOTBACKUP if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT) { fprintf(stderr, @@ -568,6 +577,7 @@ buf_page_print( fprintf(stderr, "InnoDB: Page may be an update undo log page\n"); } +#endif /* !UNIV_HOTBACKUP */ switch (fil_page_get_type(read_buf)) { case FIL_PAGE_INDEX: @@ -578,16 +588,7 @@ buf_page_print( btr_page_get_index_id(read_buf)), (ulong) ut_dulint_get_low( btr_page_get_index_id(read_buf))); - -#ifdef UNIV_HOTBACKUP - /* If the code is in ibbackup, dict_sys may be uninitialized, - i.e., NULL */ - - if (dict_sys == NULL) { - break; - } -#endif /* UNIV_HOTBACKUP */ - +#ifndef UNIV_HOTBACKUP index = dict_index_find_on_id_low( btr_page_get_index_id(read_buf)); if (index) { @@ -595,6 +596,7 @@ buf_page_print( dict_index_name_print(stderr, NULL, index); fputs(")\n", stderr); } +#endif /* !UNIV_HOTBACKUP */ break; case FIL_PAGE_INODE: fputs("InnoDB: Page may be an 'inode' page\n", stderr); @@ -639,14 +641,15 @@ buf_page_print( } } -/************************************************************************ +#ifndef UNIV_HOTBACKUP +/********************************************************************//** Initializes a buffer control block when the buf_pool is created. */ static void buf_block_init( /*===========*/ - buf_block_t* block, /* in: pointer to control block */ - byte* frame) /* in: pointer to buffer frame */ + buf_block_t* block, /*!< in: pointer to control block */ + byte* frame) /*!< in: pointer to buffer frame */ { UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block); @@ -688,15 +691,15 @@ buf_block_init( #endif /* UNIV_SYNC_DEBUG */ } -/************************************************************************ -Allocates a chunk of buffer frames. */ +/********************************************************************//** +Allocates a chunk of buffer frames. +@return chunk, or NULL on failure */ static buf_chunk_t* buf_chunk_init( /*===========*/ - /* out: chunk, or NULL on failure */ - buf_chunk_t* chunk, /* out: chunk of buffers */ - ulint mem_size) /* in: requested size in bytes */ + buf_chunk_t* chunk, /*!< out: chunk of buffers */ + ulint mem_size) /*!< in: requested size in bytes */ { buf_block_t* block; byte* frame; @@ -770,17 +773,16 @@ buf_chunk_init( } #ifdef UNIV_DEBUG -/************************************************************************* +/*********************************************************************//** Finds a block in the given buffer chunk that points to a -given compressed page. */ +given compressed page. +@return buffer block pointing to the compressed page, or NULL */ static buf_block_t* buf_chunk_contains_zip( /*===================*/ - /* out: buffer block pointing to - the compressed page, or NULL */ - buf_chunk_t* chunk, /* in: chunk being checked */ - const void* data) /* in: pointer to compressed page */ + buf_chunk_t* chunk, /*!< in: chunk being checked */ + const void* data) /*!< in: pointer to compressed page */ { buf_block_t* block; ulint i; @@ -800,16 +802,15 @@ buf_chunk_contains_zip( return(NULL); } -/************************************************************************* +/*********************************************************************//** Finds a block in the buffer pool that points to a -given compressed page. */ +given compressed page. +@return buffer block pointing to the compressed page, or NULL */ UNIV_INTERN buf_block_t* buf_pool_contains_zip( /*==================*/ - /* out: buffer block pointing to - the compressed page, or NULL */ - const void* data) /* in: pointer to compressed page */ + const void* data) /*!< in: pointer to compressed page */ { ulint n; buf_chunk_t* chunk = buf_pool->chunks; @@ -826,15 +827,14 @@ buf_pool_contains_zip( } #endif /* UNIV_DEBUG */ -/************************************************************************* -Checks that all file pages in the buffer chunk are in a replaceable state. */ +/*********************************************************************//** +Checks that all file pages in the buffer chunk are in a replaceable state. +@return address of a non-free block, or NULL if all freed */ static const buf_block_t* buf_chunk_not_freed( /*================*/ - /* out: address of a non-free block, - or NULL if all freed */ - buf_chunk_t* chunk) /* in: chunk being checked */ + buf_chunk_t* chunk) /*!< in: chunk being checked */ { buf_block_t* block; ulint i; @@ -860,14 +860,14 @@ buf_chunk_not_freed( return(NULL); } -/************************************************************************* -Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state. */ +/*********************************************************************//** +Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state. +@return TRUE if all freed */ static ibool buf_chunk_all_free( /*===============*/ - /* out: TRUE if all freed */ - const buf_chunk_t* chunk) /* in: chunk being checked */ + const buf_chunk_t* chunk) /*!< in: chunk being checked */ { const buf_block_t* block; ulint i; @@ -888,13 +888,13 @@ buf_chunk_all_free( return(TRUE); } -/************************************************************************ +/********************************************************************//** Frees a chunk of buffer frames. */ static void buf_chunk_free( /*===========*/ - buf_chunk_t* chunk) /* out: chunk of buffers */ + buf_chunk_t* chunk) /*!< out: chunk of buffers */ { buf_block_t* block; const buf_block_t* block_end; @@ -928,14 +928,13 @@ buf_chunk_free( os_mem_free_large(chunk->mem, chunk->mem_size); } -/************************************************************************ -Creates the buffer pool. */ +/********************************************************************//** +Creates the buffer pool. +@return own: buf_pool object, NULL if not enough memory or error */ UNIV_INTERN buf_pool_t* buf_pool_init(void) /*===============*/ - /* out, own: buf_pool object, NULL if not - enough memory or error */ { buf_chunk_t* chunk; ulint i; @@ -1005,7 +1004,7 @@ buf_pool_init(void) return(buf_pool); } -/************************************************************************ +/********************************************************************//** Frees the buffer pool at shutdown. This must not be invoked before freeing all mutexes. */ UNIV_INTERN @@ -1028,8 +1027,7 @@ buf_pool_free(void) buf_pool->n_chunks = 0; } - -/************************************************************************ +/********************************************************************//** Drops the adaptive hash index. To prevent a livelock, this function is only to be called while holding btr_search_latch and while btr_search_enabled == FALSE. */ @@ -1110,7 +1108,7 @@ buf_pool_drop_hash_index(void) } while (released_search_latch); } -/************************************************************************ +/********************************************************************//** Relocate a buffer control block. Relocates the block on the LRU list and in buf_pool->page_hash. Does not relocate bpage->list. The caller must take care of relocating bpage->list. */ @@ -1118,10 +1116,10 @@ UNIV_INTERN void buf_relocate( /*=========*/ - buf_page_t* bpage, /* in/out: control block being relocated; + buf_page_t* bpage, /*!< in/out: control block being relocated; buf_page_get_state(bpage) must be BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */ - buf_page_t* dpage) /* in/out: destination control block */ + buf_page_t* dpage) /*!< in/out: destination control block */ { buf_page_t* b; ulint fold; @@ -1180,7 +1178,8 @@ buf_relocate( #endif /* UNIV_LRU_DEBUG */ } - ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU)); + ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU, + ut_ad(ut_list_node_313->in_LRU_list))); /* relocate buf_pool->page_hash */ fold = buf_page_address_fold(bpage->space, bpage->offset); @@ -1191,14 +1190,13 @@ buf_relocate( UNIV_MEM_INVALID(bpage, sizeof *bpage); } -/************************************************************************ +/********************************************************************//** Shrinks the buffer pool. */ static void buf_pool_shrink( /*============*/ - /* out: TRUE if shrunk */ - ulint chunk_size) /* in: number of pages to remove */ + ulint chunk_size) /*!< in: number of pages to remove */ { buf_chunk_t* chunks; buf_chunk_t* chunk; @@ -1346,7 +1344,7 @@ func_exit: btr_search_enable(); } -/************************************************************************ +/********************************************************************//** Rebuild buf_pool->page_hash. */ static void @@ -1450,7 +1448,7 @@ buf_pool_page_hash_rebuild(void) mutex_exit(&flush_list_mutex); } -/************************************************************************ +/********************************************************************//** Resizes the buffer pool. */ UNIV_INTERN void @@ -1511,14 +1509,14 @@ buf_pool_resize(void) buf_pool_page_hash_rebuild(); } -/************************************************************************ -Moves to the block to the start of the LRU list if there is a danger +/********************************************************************//** +Moves the block to the start of the LRU list if there is a danger that the block would drift out of the buffer pool. */ UNIV_INLINE void buf_block_make_young( /*=================*/ - buf_page_t* bpage) /* in: block to make younger */ + buf_page_t* bpage) /*!< in: block to make younger */ { ut_ad(!buf_pool_mutex_own()); @@ -1538,7 +1536,7 @@ buf_block_make_young( } } -/************************************************************************ +/********************************************************************//** Moves a page to the start of the buffer pool LRU list. This high-level function can be used to prevent an important page from from slipping out of the buffer pool. */ @@ -1546,7 +1544,7 @@ UNIV_INTERN void buf_page_make_young( /*================*/ - buf_page_t* bpage) /* in: buffer block of a file page */ + buf_page_t* bpage) /*!< in: buffer block of a file page */ { //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); @@ -1559,15 +1557,15 @@ buf_page_make_young( mutex_exit(&LRU_list_mutex); } -/************************************************************************ +/********************************************************************//** Resets the check_index_page_at_flush field of a page if found in the buffer pool. */ UNIV_INTERN void buf_reset_check_index_page_at_flush( /*================================*/ - ulint space, /* in: space id */ - ulint offset) /* in: page number */ + ulint space, /*!< in: space id */ + ulint offset) /*!< in: page number */ { buf_block_t* block; @@ -1584,18 +1582,17 @@ buf_reset_check_index_page_at_flush( rw_lock_s_unlock(&page_hash_latch); } -/************************************************************************ +/********************************************************************//** Returns the current state of is_hashed of a page. FALSE if the page is not in the pool. NOTE that this operation does not fix the page in the -pool if it is found there. */ +pool if it is found there. +@return TRUE if page hash index is built in search system */ UNIV_INTERN ibool buf_page_peek_if_search_hashed( /*===========================*/ - /* out: TRUE if page hash index is built in search - system */ - ulint space, /* in: space id */ - ulint offset) /* in: page number */ + ulint space, /*!< in: space id */ + ulint offset) /*!< in: page number */ { buf_block_t* block; ibool is_hashed; @@ -1618,19 +1615,18 @@ buf_page_peek_if_search_hashed( } #ifdef UNIV_DEBUG_FILE_ACCESSES -/************************************************************************ +/********************************************************************//** Sets file_page_was_freed TRUE if the page is found in the buffer pool. This function should be called when we free a file page and want the debug version to check that it is not accessed any more unless -reallocated. */ +reallocated. +@return control block if found in page hash table, otherwise NULL */ UNIV_INTERN buf_page_t* buf_page_set_file_page_was_freed( /*=============================*/ - /* out: control block if found in page hash table, - otherwise NULL */ - ulint space, /* in: space id */ - ulint offset) /* in: page number */ + ulint space, /*!< in: space id */ + ulint offset) /*!< in: page number */ { buf_page_t* bpage; @@ -1649,19 +1645,18 @@ buf_page_set_file_page_was_freed( return(bpage); } -/************************************************************************ +/********************************************************************//** Sets file_page_was_freed FALSE if the page is found in the buffer pool. This function should be called when we free a file page and want the debug version to check that it is not accessed any more unless -reallocated. */ +reallocated. +@return control block if found in page hash table, otherwise NULL */ UNIV_INTERN buf_page_t* buf_page_reset_file_page_was_freed( /*===============================*/ - /* out: control block if found in page hash table, - otherwise NULL */ - ulint space, /* in: space id */ - ulint offset) /* in: page number */ + ulint space, /*!< in: space id */ + ulint offset) /*!< in: page number */ { buf_page_t* bpage; @@ -1681,22 +1676,22 @@ buf_page_reset_file_page_was_freed( } #endif /* UNIV_DEBUG_FILE_ACCESSES */ -/************************************************************************ +/********************************************************************//** Get read access to a compressed page (usually of type FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2). The page must be released with buf_page_release_zip(). NOTE: the page is not protected by any latch. Mutual exclusion has to be implemented at a higher level. In other words, all possible accesses to a given page through this function must be protected by -the same set of mutexes or latches. */ +the same set of mutexes or latches. +@return pointer to the block */ UNIV_INTERN buf_page_t* buf_page_get_zip( /*=============*/ - /* out: pointer to the block */ - ulint space, /* in: space id */ - ulint zip_size,/* in: compressed page size */ - ulint offset) /* in: page number */ + ulint space, /*!< in: space id */ + ulint zip_size,/*!< in: compressed page size */ + ulint offset) /*!< in: page number */ { buf_page_t* bpage; mutex_t* block_mutex; @@ -1730,19 +1725,13 @@ lookup: if (UNIV_UNLIKELY(!bpage->zip.data)) { /* There is no compressed page. */ +err_exit: //buf_pool_mutex_exit(); rw_lock_s_unlock(&page_hash_latch); return(NULL); } - block_mutex = buf_page_get_mutex(bpage); -retry_lock: - mutex_enter(block_mutex); - if (block_mutex != buf_page_get_mutex(bpage)) { - mutex_exit(block_mutex); - block_mutex = buf_page_get_mutex(bpage); - goto retry_lock; - } + block_mutex = buf_page_get_mutex_enter(bpage); rw_lock_s_unlock(&page_hash_latch); @@ -1752,13 +1741,17 @@ retry_lock: case BUF_BLOCK_MEMORY: case BUF_BLOCK_REMOVE_HASH: case BUF_BLOCK_ZIP_FREE: - ut_error; + if (block_mutex) + mutex_exit(block_mutex); break; case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_ZIP_DIRTY: + ut_a(block_mutex == &buf_pool_zip_mutex); bpage->buf_fix_count++; - break; + goto got_block; case BUF_BLOCK_FILE_PAGE: + ut_a(block_mutex == &((buf_block_t*) bpage)->mutex); + /* Discard the uncompressed page frame if possible. */ if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE) == BUF_LRU_FREED) { @@ -1769,9 +1762,13 @@ retry_lock: buf_block_buf_fix_inc((buf_block_t*) bpage, __FILE__, __LINE__); - break; + goto got_block; } + ut_error; + goto err_exit; + +got_block: must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ; //buf_pool_mutex_exit(); @@ -1819,13 +1816,13 @@ retry_lock: return(bpage); } -/************************************************************************ +/********************************************************************//** Initialize some fields of a control block. */ UNIV_INLINE void buf_block_init_low( /*===============*/ - buf_block_t* block) /* in: block to init */ + buf_block_t* block) /*!< in: block to init */ { block->check_index_page_at_flush = FALSE; block->index = NULL; @@ -1836,16 +1833,17 @@ buf_block_init_low( block->n_bytes = 0; block->left_side = TRUE; } +#endif /* !UNIV_HOTBACKUP */ -/************************************************************************ -Decompress a block. */ -static +/********************************************************************//** +Decompress a block. +@return TRUE if successful */ +UNIV_INTERN ibool buf_zip_decompress( /*===============*/ - /* out: TRUE if successful */ - buf_block_t* block, /* in/out: block */ - ibool check) /* in: TRUE=verify the page checksum */ + buf_block_t* block, /*!< in/out: block */ + ibool check) /*!< in: TRUE=verify the page checksum */ { const byte* frame = block->page.zip.data; @@ -1903,14 +1901,15 @@ buf_zip_decompress( return(FALSE); } -/*********************************************************************** -Gets the block to whose frame the pointer is pointing to. */ +#ifndef UNIV_HOTBACKUP +/*******************************************************************//** +Gets the block to whose frame the pointer is pointing to. +@return pointer to block, never NULL */ UNIV_INTERN buf_block_t* buf_block_align( /*============*/ - /* out: pointer to block, never NULL */ - const byte* ptr) /* in: pointer to a frame */ + const byte* ptr) /*!< in: pointer to a frame */ { buf_chunk_t* chunk; ulint i; @@ -1990,31 +1989,25 @@ buf_block_align( return(NULL); } -/************************************************************************ -Find out if a buffer block was created by buf_chunk_init(). */ -static +/********************************************************************//** +Find out if a pointer belongs to a buf_block_t. It can be a pointer to +the buf_block_t itself or a member of it +@return TRUE if ptr belongs to a buf_block_t struct */ +UNIV_INTERN ibool -buf_block_is_uncompressed( -/*======================*/ - /* out: TRUE if "block" has - been added to buf_pool->free - by buf_chunk_init() */ - const buf_block_t* block) /* in: pointer to block, - not dereferenced */ +buf_pointer_is_block_field( +/*=======================*/ + const void* ptr) /*!< in: pointer not + dereferenced */ { const buf_chunk_t* chunk = buf_pool->chunks; const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks; - //ut_ad(buf_pool_mutex_own()); - - if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) { - /* The pointer should be aligned. */ - return(FALSE); - } - + /* TODO: protect buf_pool->chunks with a mutex (it will + currently remain constant after buf_pool_init()) */ while (chunk < echunk) { - if (block >= chunk->blocks - && block < chunk->blocks + chunk->size) { + if (ptr >= (void *)chunk->blocks + && ptr < (void *)(chunk->blocks + chunk->size)) { return(TRUE); } @@ -2025,24 +2018,44 @@ buf_block_is_uncompressed( return(FALSE); } -/************************************************************************ -This is the general function used to get access to a database page. */ +/********************************************************************//** +Find out if a buffer block was created by buf_chunk_init(). +@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */ +static +ibool +buf_block_is_uncompressed( +/*======================*/ + const buf_block_t* block) /*!< in: pointer to block, + not dereferenced */ +{ + //ut_ad(buf_pool_mutex_own()); + + if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) { + /* The pointer should be aligned. */ + return(FALSE); + } + + return(buf_pointer_is_block_field((void *)block)); +} + +/********************************************************************//** +This is the general function used to get access to a database page. +@return pointer to the block or NULL */ UNIV_INTERN buf_block_t* buf_page_get_gen( /*=============*/ - /* out: pointer to the block or NULL */ - ulint space, /* in: space id */ - ulint zip_size,/* in: compressed page size in bytes + ulint space, /*!< in: space id */ + ulint zip_size,/*!< in: compressed page size in bytes or 0 for uncompressed pages */ - ulint offset, /* in: page number */ - ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ - buf_block_t* guess, /* in: guessed block or NULL */ - ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL, + ulint offset, /*!< in: page number */ + ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ + buf_block_t* guess, /*!< in: guessed block or NULL */ + ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL, BUF_GET_NO_LATCH */ - const char* file, /* in: file name */ - ulint line, /* in: line where called */ - mtr_t* mtr) /* in: mini-transaction */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ + mtr_t* mtr) /*!< in: mini-transaction */ { buf_block_t* block; ibool accessed; @@ -2058,6 +2071,7 @@ buf_page_get_gen( ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL) || (mode == BUF_GET_NO_LATCH)); ut_ad(zip_size == fil_space_get_zip_size(space)); + ut_ad(ut_is_2pow(zip_size)); #ifndef UNIV_LOG_DEBUG ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL)); #endif @@ -2067,14 +2081,8 @@ loop: //buf_pool_mutex_enter(); if (block) { - block_mutex = buf_page_get_mutex((buf_page_t*)block); -retry_lock_1: - mutex_enter(block_mutex); - if (block_mutex != buf_page_get_mutex((buf_page_t*)block)) { - mutex_exit(block_mutex); - block_mutex = buf_page_get_mutex((buf_page_t*)block); - goto retry_lock_1; - } + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block); + ut_a(block_mutex); /* If the guess is a compressed page descriptor that has been allocated by buf_buddy_alloc(), it may have @@ -2102,14 +2110,8 @@ retry_lock_1: rw_lock_s_lock(&page_hash_latch); block = (buf_block_t*) buf_page_hash_get(space, offset); if (block) { - block_mutex = buf_page_get_mutex((buf_page_t*)block); -retry_lock_2: - mutex_enter(block_mutex); - if (block_mutex != buf_page_get_mutex((buf_page_t*)block)) { - mutex_exit(block_mutex); - block_mutex = buf_page_get_mutex((buf_page_t*)block); - goto retry_lock_2; - } + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block); + ut_a(block_mutex); } rw_lock_s_unlock(&page_hash_latch); } @@ -2161,12 +2163,16 @@ loop2: case BUF_BLOCK_ZIP_DIRTY: ut_ad(block_mutex == &buf_pool_zip_mutex); bpage = &block->page; + /* Protect bpage->buf_fix_count. */ + /* Already proteced here. */ + //mutex_enter(&buf_pool_zip_mutex); if (bpage->buf_fix_count || buf_page_get_io_fix(bpage) != BUF_IO_NONE) { /* This condition often occurs when the buffer is not buffer-fixed, but I/O-fixed by buf_page_init_for_read(). */ + //mutex_exit(&buf_pool_zip_mutex); wait_until_unfixed: /* The block is buffer-fixed or I/O-fixed. Try again later. */ @@ -2179,6 +2185,7 @@ wait_until_unfixed: /* Allocate an uncompressed page. */ //buf_pool_mutex_exit(); + //mutex_exit(&buf_pool_zip_mutex); mutex_exit(block_mutex); block = buf_LRU_get_free_block(0); @@ -2204,14 +2211,8 @@ wait_until_unfixed: block = (buf_block_t*) hash_bpage; if (block) { - block_mutex = buf_page_get_mutex((buf_page_t*)block); -retry_lock_3: - mutex_enter(block_mutex); - if (block_mutex != buf_page_get_mutex((buf_page_t*)block)) { - mutex_exit(block_mutex); - block_mutex = buf_page_get_mutex((buf_page_t*)block); - goto retry_lock_3; - } + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block); + ut_a(block_mutex); } rw_lock_x_unlock(&page_hash_latch); mutex_exit(&LRU_list_mutex); @@ -2293,15 +2294,14 @@ retry_lock_3: block->page.buf_fix_count = 1; buf_block_set_io_fix(block, BUF_IO_READ); + rw_lock_x_lock(&block->lock); + mutex_exit(block_mutex); + mutex_exit(&buf_pool_zip_mutex); mutex_enter(&buf_pool_mutex); buf_pool->n_pend_unzip++; mutex_exit(&buf_pool_mutex); - rw_lock_x_lock(&block->lock); - mutex_exit(block_mutex); - mutex_exit(&buf_pool_zip_mutex); - buf_buddy_free(bpage, sizeof *bpage, FALSE); //buf_pool_mutex_exit(); @@ -2319,12 +2319,12 @@ retry_lock_3: //buf_pool_mutex_enter(); block_mutex = &block->mutex; mutex_enter(block_mutex); + block->page.buf_fix_count--; + buf_block_set_io_fix(block, BUF_IO_NONE); + mutex_enter(&buf_pool_mutex); buf_pool->n_pend_unzip--; mutex_exit(&buf_pool_mutex); - block->page.buf_fix_count--; - buf_block_set_io_fix(block, BUF_IO_NONE); - //mutex_exit(&block->mutex); rw_lock_x_unlock(&block->lock); if (UNIV_UNLIKELY(!success)) { @@ -2428,21 +2428,21 @@ retry_lock_3: return(block); } -/************************************************************************ +/********************************************************************//** This is the general function used to get optimistic access to a database -page. */ +page. +@return TRUE if success */ UNIV_INTERN ibool buf_page_optimistic_get_func( /*=========================*/ - /* out: TRUE if success */ - ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ - buf_block_t* block, /* in: guessed buffer block */ - ib_uint64_t modify_clock,/* in: modify clock value if mode is + ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ + buf_block_t* block, /*!< in: guessed buffer block */ + ib_uint64_t modify_clock,/*!< in: modify clock value if mode is ..._GUESS_ON_CLOCK */ - const char* file, /* in: file name */ - ulint line, /* in: line where called */ - mtr_t* mtr) /* in: mini-transaction */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ + mtr_t* mtr) /*!< in: mini-transaction */ { ibool accessed; ibool success; @@ -2538,21 +2538,21 @@ buf_page_optimistic_get_func( return(TRUE); } -/************************************************************************ +/********************************************************************//** This is used to get access to a known database page, when no waiting can be done. For example, if a search in an adaptive hash index leads us to this -frame. */ +frame. +@return TRUE if success */ UNIV_INTERN ibool buf_page_get_known_nowait( /*======================*/ - /* out: TRUE if success */ - ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ - buf_block_t* block, /* in: the known page */ - ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */ - const char* file, /* in: file name */ - ulint line, /* in: line where called */ - mtr_t* mtr) /* in: mini-transaction */ + ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ + buf_block_t* block, /*!< in: the known page */ + ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ + mtr_t* mtr) /*!< in: mini-transaction */ { ibool success; ulint fix_type; @@ -2626,20 +2626,20 @@ buf_page_get_known_nowait( return(TRUE); } -/*********************************************************************** +/*******************************************************************//** Given a tablespace id and page number tries to get that page. If the page is not in the buffer pool it is not loaded and NULL is returned. -Suitable for using when holding the kernel mutex. */ +Suitable for using when holding the kernel mutex. +@return pointer to a page or NULL */ UNIV_INTERN const buf_block_t* buf_page_try_get_func( /*==================*/ - /* out: pointer to a page or NULL */ - ulint space_id,/* in: tablespace id */ - ulint page_no,/* in: page number */ - const char* file, /* in: file name */ - ulint line, /* in: line where called */ - mtr_t* mtr) /* in: mini-transaction */ + ulint space_id,/*!< in: tablespace id */ + ulint page_no,/*!< in: page number */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ + mtr_t* mtr) /*!< in: mini-transaction */ { buf_block_t* block; ibool success; @@ -2710,13 +2710,13 @@ buf_page_try_get_func( return(block); } -/************************************************************************ +/********************************************************************//** Initialize some fields of a control block. */ UNIV_INLINE void buf_page_init_low( /*==============*/ - buf_page_t* bpage) /* in: block to init */ + buf_page_t* bpage) /*!< in: block to init */ { bpage->flush_type = BUF_FLUSH_LRU; bpage->accessed = FALSE; @@ -2731,49 +2731,16 @@ buf_page_init_low( #endif /* UNIV_DEBUG_FILE_ACCESSES */ } -#ifdef UNIV_HOTBACKUP -/************************************************************************ -Inits a page to the buffer buf_pool, for use in ibbackup --restore. */ -UNIV_INTERN -void -buf_page_init_for_backup_restore( -/*=============================*/ - ulint space, /* in: space id */ - ulint offset, /* in: offset of the page within space - in units of a page */ - ulint zip_size,/* in: compressed page size in bytes - or 0 for uncompressed pages */ - buf_block_t* block) /* in: block to init */ -{ - buf_block_init_low(block); - - block->lock_hash_val = 0; - - buf_page_init_low(&block->page); - block->page.state = BUF_BLOCK_FILE_PAGE; - block->page.space = space; - block->page.offset = offset; - - page_zip_des_init(&block->page.zip); - - /* We assume that block->page.data has been allocated - with zip_size == UNIV_PAGE_SIZE. */ - ut_ad(zip_size <= UNIV_PAGE_SIZE); - ut_ad(ut_is_2pow(zip_size)); - page_zip_set_size(&block->page.zip, zip_size); -} -#endif /* UNIV_HOTBACKUP */ - -/************************************************************************ +/********************************************************************//** Inits a page to the buffer buf_pool. */ static void buf_page_init( /*==========*/ - ulint space, /* in: space id */ - ulint offset, /* in: offset of the page within space + ulint space, /*!< in: space id */ + ulint offset, /*!< in: offset of the page within space in units of a page */ - buf_block_t* block) /* in: block to init */ + buf_block_t* block) /*!< in: block to init */ { buf_page_t* hash_page; @@ -2832,7 +2799,7 @@ buf_page_init( buf_page_address_fold(space, offset), &block->page); } -/************************************************************************ +/********************************************************************//** Function which inits a page for read to the buffer buf_pool. If the page is (1) already in buf_pool, or (2) if we specify to read only ibuf pages and the page is not an ibuf page, or @@ -2840,21 +2807,21 @@ Function which inits a page for read to the buffer buf_pool. If the page is then this function does nothing. Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock on the buffer frame. The io-handler must take care that the flag is cleared -and the lock released later. */ +and the lock released later. +@return pointer to the block or NULL */ UNIV_INTERN buf_page_t* buf_page_init_for_read( /*===================*/ - /* out: pointer to the block or NULL */ - ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */ - ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */ - ulint space, /* in: space id */ - ulint zip_size,/* in: compressed page size, or 0 */ - ibool unzip, /* in: TRUE=request uncompressed page */ - ib_int64_t tablespace_version,/* in: prevents reading from a wrong + ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */ + ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */ + ulint space, /*!< in: space id */ + ulint zip_size,/*!< in: compressed page size, or 0 */ + ibool unzip, /*!< in: TRUE=request uncompressed page */ + ib_int64_t tablespace_version,/*!< in: prevents reading from a wrong version of the tablespace in case we have done DISCARD + IMPORT */ - ulint offset) /* in: page number */ + ulint offset) /*!< in: page number */ { buf_block_t* block; buf_page_t* bpage; @@ -3058,21 +3025,21 @@ func_exit: return(bpage); } -/************************************************************************ +/********************************************************************//** Initializes a page to the buffer buf_pool. The page is usually not read from a file even if it cannot be found in the buffer buf_pool. This is one of the functions which perform to a block a state transition NOT_USED => -FILE_PAGE (the other is buf_page_get_gen). */ +FILE_PAGE (the other is buf_page_get_gen). +@return pointer to the block, page bufferfixed */ UNIV_INTERN buf_block_t* buf_page_create( /*============*/ - /* out: pointer to the block, page bufferfixed */ - ulint space, /* in: space id */ - ulint offset, /* in: offset of the page within space in units of + ulint space, /*!< in: space id */ + ulint offset, /*!< in: offset of the page within space in units of a page */ - ulint zip_size,/* in: compressed page size, or 0 */ - mtr_t* mtr) /* in: mini-transaction handle */ + ulint zip_size,/*!< in: compressed page size, or 0 */ + mtr_t* mtr) /*!< in: mini-transaction handle */ { buf_frame_t* frame; buf_block_t* block; @@ -3206,14 +3173,14 @@ buf_page_create( return(block); } -/************************************************************************ +/********************************************************************//** Completes an asynchronous read or write request of a file page to or from the buffer pool. */ UNIV_INTERN void buf_page_io_complete( /*=================*/ - buf_page_t* bpage) /* in: pointer to the block in question */ + buf_page_t* bpage) /*!< in: pointer to the block in question */ { enum buf_io_fix io_type; const ibool uncompressed = (buf_page_get_state(bpage) @@ -3327,9 +3294,8 @@ corrupt: " You can use CHECK\n" "InnoDB: TABLE to scan your" " table for corruption.\n" - "InnoDB: See also" - " http://dev.mysql.com/doc/refman/5.1/en/" - "forcing-recovery.html\n" + "InnoDB: See also " + REFMAN "forcing-recovery.html\n" "InnoDB: about forcing recovery.\n", stderr); if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) { @@ -3343,7 +3309,7 @@ corrupt: if (recv_recovery_is_on()) { /* Pages must be uncompressed for crash recovery. */ ut_a(uncompressed); - recv_recover_page(FALSE, TRUE, (buf_block_t*) bpage); + recv_recover_page(TRUE, (buf_block_t*) bpage); } if (uncompressed && !recv_no_ibuf_operations) { @@ -3362,14 +3328,8 @@ corrupt: mutex_enter(&LRU_list_mutex); //} } - block_mutex = buf_page_get_mutex(bpage); -retry_lock: - mutex_enter(block_mutex); - if (block_mutex != buf_page_get_mutex(bpage)) { - mutex_exit(block_mutex); - block_mutex = buf_page_get_mutex(bpage); - goto retry_lock; - } + block_mutex = buf_page_get_mutex_enter(bpage); + ut_a(block_mutex); mutex_enter(&buf_pool_mutex); #ifdef UNIV_IBUF_COUNT_DEBUG @@ -3442,7 +3402,7 @@ retry_lock: //buf_pool_mutex_exit(); } -/************************************************************************* +/*********************************************************************//** Invalidates the file pages in the buffer pool when an archive recovery is completed. All the file pages buffered must be in a replaceable state when this function is called: not latched and not modified. */ @@ -3472,8 +3432,9 @@ buf_pool_invalidate(void) } #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/************************************************************************* -Validates the buffer buf_pool data structure. */ +/*********************************************************************//** +Validates the buffer buf_pool data structure. +@return TRUE */ UNIV_INTERN ibool buf_validate(void) @@ -3708,7 +3669,7 @@ buf_validate(void) #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/************************************************************************* +/*********************************************************************//** Prints info of the buffer buf_pool data structure. */ UNIV_INTERN void @@ -3827,8 +3788,9 @@ buf_print(void) #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ #ifdef UNIV_DEBUG -/************************************************************************* -Returns the number of latched pages in the buffer pool. */ +/*********************************************************************//** +Returns the number of latched pages in the buffer pool. +@return number of latched pages */ UNIV_INTERN ulint buf_get_latched_pages_number(void) @@ -3917,8 +3879,9 @@ buf_get_latched_pages_number(void) } #endif /* UNIV_DEBUG */ -/************************************************************************* -Returns the number of pending buf pool ios. */ +/*********************************************************************//** +Returns the number of pending buf pool ios. +@return number of pending I/O operations */ UNIV_INTERN ulint buf_get_n_pending_ios(void) @@ -3930,9 +3893,10 @@ buf_get_n_pending_ios(void) + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]); } -/************************************************************************* +/*********************************************************************//** Returns the ratio in percents of modified pages in the buffer pool / -database pages in the buffer pool. */ +database pages in the buffer pool. +@return modified page percentage ratio */ UNIV_INTERN ulint buf_get_modified_ratio_pct(void) @@ -3953,13 +3917,13 @@ buf_get_modified_ratio_pct(void) return(ratio); } -/************************************************************************* +/*********************************************************************//** Prints info of the buffer i/o. */ UNIV_INTERN void buf_print_io( /*=========*/ - FILE* file) /* in/out: buffer where to print */ + FILE* file) /*!< in/out: buffer where to print */ { time_t current_time; double time_elapsed; @@ -4046,7 +4010,7 @@ buf_print_io( mutex_exit(&flush_list_mutex); } -/************************************************************************** +/**********************************************************************//** Refreshes the statistics used to print per-second averages. */ UNIV_INTERN void @@ -4060,8 +4024,9 @@ buf_refresh_io_stats(void) buf_pool->n_pages_written_old = buf_pool->n_pages_written; } -/************************************************************************* -Checks that all file pages in the buffer are in a replaceable state. */ +/*********************************************************************//** +Asserts that all file pages in the buffer are in a replaceable state. +@return TRUE */ UNIV_INTERN ibool buf_all_freed(void) @@ -4094,14 +4059,14 @@ buf_all_freed(void) return(TRUE); } -/************************************************************************* +/*********************************************************************//** Checks that there currently are no pending i/o-operations for the buffer -pool. */ +pool. +@return TRUE if there is no pending i/o */ UNIV_INTERN ibool buf_pool_check_no_pending_io(void) /*==============================*/ - /* out: TRUE if there is no pending i/o */ { ibool ret; @@ -4122,8 +4087,9 @@ buf_pool_check_no_pending_io(void) return(ret); } -/************************************************************************* -Gets the current length of the free list of buffer blocks. */ +/*********************************************************************//** +Gets the current length of the free list of buffer blocks. +@return length of the free list */ UNIV_INTERN ulint buf_get_free_list_len(void) @@ -4141,3 +4107,33 @@ buf_get_free_list_len(void) return(len); } +#else /* !UNIV_HOTBACKUP */ +/********************************************************************//** +Inits a page to the buffer buf_pool, for use in ibbackup --restore. */ +UNIV_INTERN +void +buf_page_init_for_backup_restore( +/*=============================*/ + ulint space, /*!< in: space id */ + ulint offset, /*!< in: offset of the page within space + in units of a page */ + ulint zip_size,/*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + buf_block_t* block) /*!< in: block to init */ +{ + block->page.state = BUF_BLOCK_FILE_PAGE; + block->page.space = space; + block->page.offset = offset; + + page_zip_des_init(&block->page.zip); + + /* We assume that block->page.data has been allocated + with zip_size == UNIV_PAGE_SIZE. */ + ut_ad(zip_size <= UNIV_PAGE_SIZE); + ut_ad(ut_is_2pow(zip_size)); + page_zip_set_size(&block->page.zip, zip_size); + if (zip_size) { + block->page.zip.data = block->frame + UNIV_PAGE_SIZE; + } +} +#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/buf/buf0flu.c b/storage/xtradb/buf/buf0flu.c index ba8b0e9dc60..d465483691a 100644 --- a/storage/xtradb/buf/buf0flu.c +++ b/storage/xtradb/buf/buf0flu.c @@ -16,7 +16,8 @@ Place, Suite 330, Boston, MA 02111-1307 USA *****************************************************************************/ -/****************************************************** +/**************************************************//** +@file buf/buf0flu.c The database buffer buf_pool flush algorithm Created 11/11/1995 Heikki Tuuri @@ -26,40 +27,73 @@ Created 11/11/1995 Heikki Tuuri #ifdef UNIV_NONINL #include "buf0flu.ic" -#include "trx0sys.h" #endif +#include "buf0buf.h" +#include "srv0srv.h" +#include "page0zip.h" +#ifndef UNIV_HOTBACKUP #include "ut0byte.h" #include "ut0lst.h" #include "page0page.h" -#include "page0zip.h" #include "fil0fil.h" -#include "buf0buf.h" #include "buf0lru.h" #include "buf0rea.h" #include "ibuf0ibuf.h" #include "log0log.h" #include "os0file.h" #include "trx0sys.h" -#include "srv0srv.h" -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /********************************************************************** -Validates the flush list. */ +These statistics are generated for heuristics used in estimating the +rate at which we should flush the dirty blocks to avoid bursty IO +activity. Note that the rate of flushing not only depends on how many +dirty pages we have in the buffer pool but it is also a fucntion of +how much redo the workload is generating and at what rate. */ +/* @{ */ + +/** Number of intervals for which we keep the history of these stats. +Each interval is 1 second, defined by the rate at which +srv_error_monitor_thread() calls buf_flush_stat_update(). */ +#define BUF_FLUSH_STAT_N_INTERVAL 20 + +/** Sampled values buf_flush_stat_cur. +Not protected by any mutex. Updated by buf_flush_stat_update(). */ +static buf_flush_stat_t buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL]; + +/** Cursor to buf_flush_stat_arr[]. Updated in a round-robin fashion. */ +static ulint buf_flush_stat_arr_ind; + +/** Values at start of the current interval. Reset by +buf_flush_stat_update(). */ +static buf_flush_stat_t buf_flush_stat_cur; + +/** Running sum of past values of buf_flush_stat_cur. +Updated by buf_flush_stat_update(). Not protected by any mutex. */ +static buf_flush_stat_t buf_flush_stat_sum; + +/** Number of pages flushed through non flush_list flushes. */ +static ulint buf_lru_flush_page_count = 0; + +/* @} */ + +#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG +/******************************************************************//** +Validates the flush list. +@return TRUE if ok */ static ibool buf_flush_validate_low(void); /*========================*/ - /* out: TRUE if ok */ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -/************************************************************************ +/********************************************************************//** Inserts a modified block into the flush list. */ UNIV_INTERN void buf_flush_insert_into_flush_list( /*=============================*/ - buf_block_t* block) /* in/out: block which is modified */ + buf_block_t* block) /*!< in/out: block which is modified */ { //ut_ad(buf_pool_mutex_own()); ut_ad(mutex_own(&block->mutex)); @@ -81,7 +115,7 @@ buf_flush_insert_into_flush_list( #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ } -/************************************************************************ +/********************************************************************//** Inserts a modified block into the flush list in the right sorted position. This function is used by recovery, because there the modifications do not necessarily come in the order of lsn's. */ @@ -89,7 +123,7 @@ UNIV_INTERN void buf_flush_insert_sorted_into_flush_list( /*====================================*/ - buf_block_t* block) /* in/out: block which is modified */ + buf_block_t* block) /*!< in/out: block which is modified */ { buf_page_t* prev_b; buf_page_t* b; @@ -141,15 +175,15 @@ buf_flush_insert_sorted_into_flush_list( #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ } -/************************************************************************ +/********************************************************************//** Returns TRUE if the file page block is immediately suitable for replacement, -i.e., the transition FILE_PAGE => NOT_USED allowed. */ +i.e., the transition FILE_PAGE => NOT_USED allowed. +@return TRUE if can replace immediately */ UNIV_INTERN ibool buf_flush_ready_for_replace( /*========================*/ - /* out: TRUE if can replace immediately */ - buf_page_t* bpage) /* in: buffer control block, must be + buf_page_t* bpage) /*!< in: buffer control block, must be buf_page_in_file(bpage) and in the LRU list */ { //ut_ad(buf_pool_mutex_own()); @@ -177,16 +211,16 @@ buf_flush_ready_for_replace( return(FALSE); } -/************************************************************************ -Returns TRUE if the block is modified and ready for flushing. */ +/********************************************************************//** +Returns TRUE if the block is modified and ready for flushing. +@return TRUE if can flush immediately */ UNIV_INLINE ibool buf_flush_ready_for_flush( /*======================*/ - /* out: TRUE if can flush immediately */ - buf_page_t* bpage, /* in: buffer control block, must be + buf_page_t* bpage, /*!< in: buffer control block, must be buf_page_in_file(bpage) */ - enum buf_flush flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ + enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ { //ut_a(buf_page_in_file(bpage)); //ut_ad(buf_pool_mutex_own()); /*optimistic...*/ @@ -214,13 +248,13 @@ buf_flush_ready_for_flush( return(FALSE); } -/************************************************************************ +/********************************************************************//** Remove a block from the flush list of modified blocks. */ UNIV_INTERN void buf_flush_remove( /*=============*/ - buf_page_t* bpage) /* in: pointer to the block in question */ + buf_page_t* bpage) /*!< in: pointer to the block in question */ { //ut_ad(buf_pool_mutex_own()); ut_ad(mutex_own(buf_page_get_mutex(bpage))); @@ -253,17 +287,18 @@ buf_flush_remove( bpage->oldest_modification = 0; - ut_d(UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list)); + ut_d(UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list, + ut_ad(ut_list_node_313->in_flush_list))); mutex_exit(&flush_list_mutex); } -/************************************************************************ +/********************************************************************//** Updates the flush system data structures when a write is completed. */ UNIV_INTERN void buf_flush_write_complete( /*=====================*/ - buf_page_t* bpage) /* in: pointer to the block in question */ + buf_page_t* bpage) /*!< in: pointer to the block in question */ { enum buf_flush flush_type; @@ -295,7 +330,7 @@ buf_flush_write_complete( } } -/************************************************************************ +/********************************************************************//** Flushes possible buffered writes from the doublewrite memory buffer to disk, and also wakes up the aio thread if simulated aio is used. It is very important to call this function after a batch of writes has been posted, @@ -542,7 +577,7 @@ flush: mutex_exit(&(trx_doublewrite->mutex)); } -/************************************************************************ +/********************************************************************//** Posts a buffer page for writing. If the doublewrite memory buffer is full, calls buf_flush_buffered_writes and waits for for free space to appear. */ @@ -550,7 +585,7 @@ static void buf_flush_post_to_doublewrite_buf( /*==============================*/ - buf_page_t* bpage) /* in: buffer block to write */ + buf_page_t* bpage) /*!< in: buffer block to write */ { ulint zip_size; try_again: @@ -600,16 +635,17 @@ try_again: mutex_exit(&(trx_doublewrite->mutex)); } +#endif /* !UNIV_HOTBACKUP */ -/************************************************************************ +/********************************************************************//** Initializes a page for writing to the tablespace. */ UNIV_INTERN void buf_flush_init_for_writing( /*=======================*/ - byte* page, /* in/out: page */ - void* page_zip_, /* in/out: compressed page, or NULL */ - ib_uint64_t newest_lsn) /* in: newest modification lsn + byte* page, /*!< in/out: page */ + void* page_zip_, /*!< in/out: compressed page, or NULL */ + ib_uint64_t newest_lsn) /*!< in: newest modification lsn to the page */ { ut_ad(page); @@ -679,7 +715,8 @@ buf_flush_init_for_writing( : BUF_NO_CHECKSUM_MAGIC); } -/************************************************************************ +#ifndef UNIV_HOTBACKUP +/********************************************************************//** Does an asynchronous write of a buffer page. NOTE: in simulated aio and also when the doublewrite buffer is used, we must call buf_flush_buffered_writes after we have posted a batch of writes! */ @@ -687,7 +724,7 @@ static void buf_flush_write_block_low( /*======================*/ - buf_page_t* bpage) /* in: buffer block to write */ + buf_page_t* bpage) /*!< in: buffer block to write */ { ulint zip_size = buf_page_get_zip_size(bpage); page_t* frame = NULL; @@ -769,7 +806,7 @@ buf_flush_write_block_low( } } -/************************************************************************ +/********************************************************************//** Writes a flushable page asynchronously from the buffer pool to a file. NOTE: in simulated aio we must call os_aio_simulated_wake_handler_threads after we have posted a batch of @@ -780,8 +817,8 @@ static void buf_flush_page( /*===========*/ - buf_page_t* bpage, /* in: buffer control block */ - enum buf_flush flush_type) /* in: BUF_FLUSH_LRU + buf_page_t* bpage, /*!< in: buffer control block */ + enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ { mutex_t* block_mutex; @@ -892,16 +929,16 @@ buf_flush_page( buf_flush_write_block_low(bpage); } -/*************************************************************** -Flushes to disk all flushable pages within the flush area. */ +/***********************************************************//** +Flushes to disk all flushable pages within the flush area. +@return number of pages flushed */ static ulint buf_flush_try_neighbors( /*====================*/ - /* out: number of pages flushed */ - ulint space, /* in: space id */ - ulint offset, /* in: page offset */ - enum buf_flush flush_type, /* in: BUF_FLUSH_LRU or + ulint space, /*!< in: space id */ + ulint offset, /*!< in: page offset */ + enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ ulint flush_neighbors) { @@ -955,15 +992,9 @@ buf_flush_try_neighbors( if (flush_type != BUF_FLUSH_LRU || i == offset || buf_page_is_old(bpage)) { - mutex_t* block_mutex = buf_page_get_mutex(bpage); + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); -retry_lock: - mutex_enter(block_mutex); - if (block_mutex != buf_page_get_mutex(bpage)) { - mutex_exit(block_mutex); - block_mutex = buf_page_get_mutex(bpage); - goto retry_lock; - } + ut_a(block_mutex); if (buf_flush_ready_for_flush(bpage, flush_type) && (i == offset || !bpage->buf_fix_count)) { @@ -993,28 +1024,26 @@ retry_lock: return(count); } -/*********************************************************************** +/*******************************************************************//** This utility flushes dirty blocks from the end of the LRU list or flush_list. NOTE 1: in the case of an LRU flush the calling thread may own latches to pages: to avoid deadlocks, this function must be written so that it cannot end up waiting for these latches! NOTE 2: in the case of a flush list flush, -the calling thread is not allowed to own any latches on pages! */ +the calling thread is not allowed to own any latches on pages! +@return number of blocks for which the write request was queued; +ULINT_UNDEFINED if there was a flush of the same type already running */ UNIV_INTERN ulint buf_flush_batch( /*============*/ - /* out: number of blocks for which the - write request was queued; - ULINT_UNDEFINED if there was a flush - of the same type already running */ - enum buf_flush flush_type, /* in: BUF_FLUSH_LRU or + enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if BUF_FLUSH_LIST, then the caller must not own any latches on pages */ - ulint min_n, /* in: wished minimum mumber of blocks + ulint min_n, /*!< in: wished minimum mumber of blocks flushed (it is not guaranteed that the actual number is that big, though) */ - ib_uint64_t lsn_limit) /* in the case BUF_FLUSH_LIST all + ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all blocks whose oldest_modification is smaller than this should be flushed (if their number does not exceed @@ -1091,24 +1120,16 @@ flush_next: function a pointer to a block in the list! */ do { - mutex_t*block_mutex = buf_page_get_mutex(bpage); + mutex_t*block_mutex = buf_page_get_mutex_enter(bpage); ibool ready; -retry_lock_1: ut_a(buf_page_in_file(bpage)); - mutex_enter(block_mutex); - if (block_mutex != buf_page_get_mutex(bpage)) { - mutex_exit(block_mutex); - block_mutex = buf_page_get_mutex(bpage); - goto retry_lock_1; - } + ut_a(block_mutex); ready = buf_flush_ready_for_flush(bpage, flush_type); mutex_exit(block_mutex); if (ready) { - mutex_t* block_mutex; - buf_page_t* bpage_tmp; space = buf_page_get_space(bpage); offset = buf_page_get_page_no(bpage); @@ -1122,8 +1143,6 @@ retry_lock_1: /* Try to flush also all the neighbors */ page_count += buf_flush_try_neighbors( space, offset, flush_type, srv_flush_neighbor_pages); - block_mutex = buf_page_get_mutex(bpage); - bpage_tmp = buf_page_hash_get(space, offset); /* fprintf(stderr, "Flush type %lu, page no %lu, neighb %lu\n", flush_type, offset, @@ -1189,38 +1208,44 @@ retry_lock_1: srv_buf_pool_flushed += page_count; + /* We keep track of all flushes happening as part of LRU + flush. When estimating the desired rate at which flush_list + should be flushed we factor in this value. */ + if (flush_type == BUF_FLUSH_LRU) { + buf_lru_flush_page_count += page_count; + } + return(page_count); } -/********************************************************************** +/******************************************************************//** Waits until a flush batch of the given type ends */ UNIV_INTERN void buf_flush_wait_batch_end( /*=====================*/ - enum buf_flush type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ + enum buf_flush type) /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ { ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST)); os_event_wait(buf_pool->no_flush[type]); } -/********************************************************************** +/******************************************************************//** Gives a recommendation of how many blocks should be flushed to establish a big enough margin of replaceable blocks near the end of the LRU list -and in the free list. */ +and in the free list. +@return number of blocks which should be flushed from the end of the +LRU list */ static ulint buf_flush_LRU_recommendation(void) /*==============================*/ - /* out: number of blocks which should be flushed - from the end of the LRU list */ { buf_page_t* bpage; ulint n_replaceable; ulint distance = 0; ibool have_LRU_mutex = FALSE; - mutex_t* block_mutex; if(UT_LIST_GET_LEN(buf_pool->unzip_LRU)) have_LRU_mutex = TRUE; @@ -1238,21 +1263,15 @@ buf_flush_LRU_recommendation(void) + BUF_FLUSH_EXTRA_MARGIN) && (distance < BUF_LRU_FREE_SEARCH_LEN)) { - if (!bpage->in_LRU_list) { + mutex_t* block_mutex; + if (!bpage->in_LRU_list) { /* reatart. but it is very optimistic */ bpage = UT_LIST_GET_LAST(buf_pool->LRU); continue; } + block_mutex = buf_page_get_mutex_enter(bpage); - block_mutex = buf_page_get_mutex(bpage); - -retry_lock: - mutex_enter(block_mutex); - if (block_mutex != buf_page_get_mutex(bpage)) { - mutex_exit(block_mutex); - block_mutex = buf_page_get_mutex(bpage); - goto retry_lock; - } + ut_a(block_mutex); if (buf_flush_ready_for_replace(bpage)) { n_replaceable++; @@ -1278,7 +1297,7 @@ retry_lock: - n_replaceable); } -/************************************************************************* +/*********************************************************************//** Flushes pages from the end of the LRU list if there is too small a margin of replaceable pages there or in the free list. VERY IMPORTANT: this function is called also by threads which have locks on pages. To avoid deadlocks, we @@ -1306,18 +1325,130 @@ buf_flush_free_margin( } } +/********************************************************************* +Update the historical stats that we are collecting for flush rate +heuristics at the end of each interval. +Flush rate heuristic depends on (a) rate of redo log generation and +(b) the rate at which LRU flush is happening. */ +UNIV_INTERN +void +buf_flush_stat_update(void) +/*=======================*/ +{ + buf_flush_stat_t* item; + ib_uint64_t lsn_diff; + ib_uint64_t lsn; + ulint n_flushed; + + lsn = log_get_lsn(); + if (buf_flush_stat_cur.redo == 0) { + /* First time around. Just update the current LSN + and return. */ + buf_flush_stat_cur.redo = lsn; + return; + } + + item = &buf_flush_stat_arr[buf_flush_stat_arr_ind]; + + /* values for this interval */ + lsn_diff = lsn - buf_flush_stat_cur.redo; + n_flushed = buf_lru_flush_page_count + - buf_flush_stat_cur.n_flushed; + + /* add the current value and subtract the obsolete entry. */ + buf_flush_stat_sum.redo += lsn_diff - item->redo; + buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed; + + /* put current entry in the array. */ + item->redo = lsn_diff; + item->n_flushed = n_flushed; + + /* update the index */ + buf_flush_stat_arr_ind++; + buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL; + + /* reset the current entry. */ + buf_flush_stat_cur.redo = lsn; + buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count; +} + +/********************************************************************* +Determines the fraction of dirty pages that need to be flushed based +on the speed at which we generate redo log. Note that if redo log +is generated at a significant rate without corresponding increase +in the number of dirty pages (for example, an in-memory workload) +it can cause IO bursts of flushing. This function implements heuristics +to avoid this burstiness. +@return number of dirty pages to be flushed / second */ +UNIV_INTERN +ulint +buf_flush_get_desired_flush_rate(void) +/*==================================*/ +{ + ulint redo_avg; + ulint lru_flush_avg; + ulint n_dirty; + ulint n_flush_req; + lint rate; + ib_uint64_t lsn = log_get_lsn(); + ulint log_capacity = log_get_capacity(); + + /* log_capacity should never be zero after the initialization + of log subsystem. */ + ut_ad(log_capacity != 0); + + /* Get total number of dirty pages. It is OK to access + flush_list without holding any mtex as we are using this + only for heuristics. */ + n_dirty = UT_LIST_GET_LEN(buf_pool->flush_list); + + /* An overflow can happen if we generate more than 2^32 bytes + of redo in this interval i.e.: 4G of redo in 1 second. We can + safely consider this as infinity because if we ever come close + to 4G we'll start a synchronous flush of dirty pages. */ + /* redo_avg below is average at which redo is generated in + past BUF_FLUSH_STAT_N_INTERVAL + redo generated in the current + interval. */ + redo_avg = (ulint) (buf_flush_stat_sum.redo + / BUF_FLUSH_STAT_N_INTERVAL + + (lsn - buf_flush_stat_cur.redo)); + + /* An overflow can happen possibly if we flush more than 2^32 + pages in BUF_FLUSH_STAT_N_INTERVAL. This is a very very + unlikely scenario. Even when this happens it means that our + flush rate will be off the mark. It won't affect correctness + of any subsystem. */ + /* lru_flush_avg below is rate at which pages are flushed as + part of LRU flush in past BUF_FLUSH_STAT_N_INTERVAL + the + number of pages flushed in the current interval. */ + lru_flush_avg = buf_flush_stat_sum.n_flushed + / BUF_FLUSH_STAT_N_INTERVAL + + (buf_lru_flush_page_count + - buf_flush_stat_cur.n_flushed); + + n_flush_req = (n_dirty * redo_avg) / log_capacity; + + /* The number of pages that we want to flush from the flush + list is the difference between the required rate and the + number of pages that we are historically flushing from the + LRU list */ + rate = n_flush_req - lru_flush_avg; + return(rate > 0 ? (ulint) rate : 0); +} + #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/********************************************************************** -Validates the flush list. */ +/******************************************************************//** +Validates the flush list. +@return TRUE if ok */ static ibool buf_flush_validate_low(void) /*========================*/ - /* out: TRUE if ok */ { buf_page_t* bpage; - UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list); + UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list, + ut_ad(ut_list_node_313->in_flush_list)); bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); @@ -1335,13 +1466,13 @@ buf_flush_validate_low(void) return(TRUE); } -/********************************************************************** -Validates the flush list. */ +/******************************************************************//** +Validates the flush list. +@return TRUE if ok */ UNIV_INTERN ibool buf_flush_validate(void) /*====================*/ - /* out: TRUE if ok */ { ibool ret; @@ -1356,3 +1487,4 @@ buf_flush_validate(void) return(ret); } #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ +#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c index e5613196dd9..2270ea5dce2 100644 --- a/storage/xtradb/buf/buf0lru.c +++ b/storage/xtradb/buf/buf0lru.c @@ -16,7 +16,8 @@ Place, Suite 330, Boston, MA 02111-1307 USA *****************************************************************************/ -/****************************************************** +/**************************************************//** +@file buf/buf0lru.c The database buffer replacement algorithm Created 11/5/1995 Heikki Tuuri @@ -48,7 +49,7 @@ Created 11/5/1995 Heikki Tuuri #include "log0recv.h" #include "srv0srv.h" -/* The number of blocks from the LRU_old pointer onward, including the block +/** The number of blocks from the LRU_old pointer onward, including the block pointed to, must be 3/8 of the whole LRU list length, except that the tolerance defined below is allowed. Note that the tolerance must be small enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the @@ -56,21 +57,21 @@ LRU_old pointer is not allowed to point to either end of the LRU list. */ #define BUF_LRU_OLD_TOLERANCE 20 -/* The whole LRU list length is divided by this number to determine an +/** The whole LRU list length is divided by this number to determine an initial segment in buf_LRU_get_recent_limit */ #define BUF_LRU_INITIAL_RATIO 8 -/* When dropping the search hash index entries before deleting an ibd +/** When dropping the search hash index entries before deleting an ibd file, we build a local array of pages belonging to that tablespace in the buffer pool. Following is the size of that array. */ #define BUF_LRU_DROP_SEARCH_HASH_SIZE 1024 -/* If we switch on the InnoDB monitor because there are too few available +/** If we switch on the InnoDB monitor because there are too few available frames in the buffer pool, we set this to TRUE */ -UNIV_INTERN ibool buf_lru_switched_on_innodb_mon = FALSE; +static ibool buf_lru_switched_on_innodb_mon = FALSE; -/********************************************************************** +/******************************************************************//** These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O and page_zip_decompress() operations. Based on the statistics, buf_LRU_evict_from_unzip_LRU() decides if we want to evict from @@ -79,69 +80,71 @@ uncompressed frame (meaning we can evict dirty blocks as well). From the regular LRU, we will evict the entire block (i.e.: both the uncompressed and compressed data), which must be clean. */ -/* Number of intervals for which we keep the history of these stats. +/* @{ */ + +/** Number of intervals for which we keep the history of these stats. Each interval is 1 second, defined by the rate at which srv_error_monitor_thread() calls buf_LRU_stat_update(). */ #define BUF_LRU_STAT_N_INTERVAL 50 -/* Co-efficient with which we multiply I/O operations to equate them +/** Co-efficient with which we multiply I/O operations to equate them with page_zip_decompress() operations. */ #define BUF_LRU_IO_TO_UNZIP_FACTOR 50 -/* Sampled values buf_LRU_stat_cur. +/** Sampled values buf_LRU_stat_cur. Protected by buf_pool_mutex. Updated by buf_LRU_stat_update(). */ static buf_LRU_stat_t buf_LRU_stat_arr[BUF_LRU_STAT_N_INTERVAL]; -/* Cursor to buf_LRU_stat_arr[] that is updated in a round-robin fashion. */ +/** Cursor to buf_LRU_stat_arr[] that is updated in a round-robin fashion. */ static ulint buf_LRU_stat_arr_ind; -/* Current operation counters. Not protected by any mutex. Cleared +/** Current operation counters. Not protected by any mutex. Cleared by buf_LRU_stat_update(). */ UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_cur; -/* Running sum of past values of buf_LRU_stat_cur. +/** Running sum of past values of buf_LRU_stat_cur. Updated by buf_LRU_stat_update(). Protected by buf_pool_mutex. */ UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_sum; -/********************************************************************** +/* @} */ + +/******************************************************************//** Takes a block out of the LRU list and page hash table. If the block is compressed-only (BUF_BLOCK_ZIP_PAGE), the object will be freed and buf_pool_zip_mutex will be released. If a compressed page or a compressed-only block descriptor is freed, other compressed pages or compressed-only block descriptors may be -relocated. */ +relocated. +@return the new state of the block (BUF_BLOCK_ZIP_FREE if the state +was BUF_BLOCK_ZIP_PAGE, or BUF_BLOCK_REMOVE_HASH otherwise) */ static enum buf_page_state buf_LRU_block_remove_hashed_page( /*=============================*/ - /* out: the new state of the block - (BUF_BLOCK_ZIP_FREE if the state was - BUF_BLOCK_ZIP_PAGE, or BUF_BLOCK_REMOVE_HASH - otherwise) */ - buf_page_t* bpage, /* in: block, must contain a file page and + buf_page_t* bpage, /*!< in: block, must contain a file page and be in a state where it can be freed; there may or may not be a hash index to the page */ - ibool zip); /* in: TRUE if should remove also the + ibool zip); /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ -/********************************************************************** +/******************************************************************//** Puts a file page whose has no hash index to the free list. */ static void buf_LRU_block_free_hashed_page( /*===========================*/ - buf_block_t* block, /* in: block, must contain a file page and + buf_block_t* block, /*!< in: block, must contain a file page and be in a state where it can be freed */ ibool have_page_hash_mutex); -/********************************************************************** +/******************************************************************//** Determines if the unzip_LRU list should be used for evicting a victim -instead of the general LRU list. */ +instead of the general LRU list. +@return TRUE if should use unzip_LRU */ UNIV_INLINE ibool buf_LRU_evict_from_unzip_LRU( ibool have_LRU_mutex) /*==============================*/ - /* out: TRUE if should use unzip_LRU */ { ulint io_avg; ulint unzip_avg; @@ -191,18 +194,18 @@ buf_LRU_evict_from_unzip_LRU( return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR); } -/********************************************************************** +/******************************************************************//** Attempts to drop page hash index on a batch of pages belonging to a particular space id. */ static void buf_LRU_drop_page_hash_batch( /*=========================*/ - ulint space_id, /* in: space id */ - ulint zip_size, /* in: compressed page size in bytes + ulint space_id, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes or 0 for uncompressed pages */ - const ulint* arr, /* in: array of page_no */ - ulint count) /* in: number of entries in array */ + const ulint* arr, /*!< in: array of page_no */ + ulint count) /*!< in: number of entries in array */ { ulint i; @@ -215,7 +218,7 @@ buf_LRU_drop_page_hash_batch( } } -/********************************************************************** +/******************************************************************//** When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page hash index entries belonging to that table. This function tries to do that in batch. Note that this is a 'best effort' attempt and does @@ -224,7 +227,7 @@ static void buf_LRU_drop_page_hash_for_tablespace( /*==================================*/ - ulint id) /* in: space id */ + ulint id) /*!< in: space id */ { buf_page_t* bpage; ulint* page_arr; @@ -249,16 +252,10 @@ scan_again: bpage = UT_LIST_GET_LAST(buf_pool->LRU); while (bpage != NULL) { - mutex_t* block_mutex = buf_page_get_mutex(bpage); + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); buf_page_t* prev_bpage; -retry_lock: - mutex_enter(block_mutex); - if (block_mutex != buf_page_get_mutex(bpage)) { - mutex_exit(block_mutex); - block_mutex = buf_page_get_mutex(bpage); - goto retry_lock; - } + ut_a(block_mutex); prev_bpage = UT_LIST_GET_PREV(LRU, bpage); ut_a(buf_page_in_file(bpage)); @@ -328,14 +325,14 @@ next_page: ut_free(page_arr); } -/********************************************************************** +/******************************************************************//** Invalidates all pages belonging to a given tablespace when we are deleting the data file(s) of that tablespace. */ UNIV_INTERN void buf_LRU_invalidate_tablespace( /*==========================*/ - ulint id) /* in: space id */ + ulint id) /*!< in: space id */ { buf_page_t* bpage; ibool all_freed; @@ -358,18 +355,12 @@ scan_again: bpage = UT_LIST_GET_LAST(buf_pool->LRU); while (bpage != NULL) { - mutex_t* block_mutex = buf_page_get_mutex(bpage); + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); buf_page_t* prev_bpage; ut_a(buf_page_in_file(bpage)); -retry_lock: - mutex_enter(block_mutex); - if (block_mutex != buf_page_get_mutex(bpage)) { - mutex_exit(block_mutex); - block_mutex = buf_page_get_mutex(bpage); - goto retry_lock; - } + ut_a(block_mutex); prev_bpage = UT_LIST_GET_PREV(LRU, bpage); if (buf_page_get_space(bpage) == id) { @@ -459,15 +450,15 @@ next_page: } } -/********************************************************************** +/******************************************************************//** Gets the minimum LRU_position field for the blocks in an initial segment (determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not -guaranteed to be precise, because the ulint_clock may wrap around. */ +guaranteed to be precise, because the ulint_clock may wrap around. +@return the limit; zero if could not determine it */ UNIV_INTERN ulint buf_LRU_get_recent_limit(void) /*==========================*/ - /* out: the limit; zero if could not determine it */ { const buf_page_t* bpage; ulint len; @@ -489,21 +480,22 @@ buf_LRU_get_recent_limit(void) bpage = UT_LIST_GET_FIRST(buf_pool->LRU); - limit = buf_page_get_LRU_position(bpage) - len / BUF_LRU_INITIAL_RATIO; + limit = buf_page_get_LRU_position(bpage); + len /= BUF_LRU_INITIAL_RATIO; //buf_pool_mutex_exit(); mutex_exit(&LRU_list_mutex); - return(limit); + return(limit > len ? (limit - len) : 0); } -/************************************************************************ +/********************************************************************//** Insert a compressed block into buf_pool->zip_clean in the LRU order. */ UNIV_INTERN void buf_LRU_insert_zip_clean( /*=====================*/ - buf_page_t* bpage) /* in: pointer to the block in question */ + buf_page_t* bpage) /*!< in: pointer to the block in question */ { buf_page_t* b; @@ -531,15 +523,15 @@ buf_LRU_insert_zip_clean( } } -/********************************************************************** +/******************************************************************//** Try to free an uncompressed page of a compressed block from the unzip -LRU list. The compressed page is preserved, and it need not be clean. */ +LRU list. The compressed page is preserved, and it need not be clean. +@return TRUE if freed */ UNIV_INLINE ibool buf_LRU_free_from_unzip_LRU_list( /*=============================*/ - /* out: TRUE if freed */ - ulint n_iterations, /* in: how many times this has been called + ulint n_iterations, /*!< in: how many times this has been called repeatedly without result: a high value means that we should search farther; we will search n_iterations / 5 of the unzip_LRU list, @@ -611,14 +603,14 @@ restart: return(FALSE); } -/********************************************************************** -Try to free a clean page from the common LRU list. */ +/******************************************************************//** +Try to free a clean page from the common LRU list. +@return TRUE if freed */ UNIV_INLINE ibool buf_LRU_free_from_common_LRU_list( /*==============================*/ - /* out: TRUE if freed */ - ulint n_iterations, /* in: how many times this has been called + ulint n_iterations, /*!< in: how many times this has been called repeatedly without result: a high value means that we should search farther; if n_iterations < 10, then we search @@ -640,16 +632,9 @@ restart: enum buf_lru_free_block_status freed; mutex_t* block_mutex - = buf_page_get_mutex(bpage); - -retry_lock: - mutex_enter(block_mutex); + = buf_page_get_mutex_enter(bpage); - if (block_mutex != buf_page_get_mutex(bpage)) { - mutex_exit(block_mutex); - block_mutex = buf_page_get_mutex(bpage); - goto retry_lock; - } + ut_a(block_mutex); if (!bpage->in_LRU_list || !buf_page_in_file(bpage)) { @@ -686,14 +671,14 @@ retry_lock: return(FALSE); } -/********************************************************************** -Try to free a replaceable block. */ +/******************************************************************//** +Try to free a replaceable block. +@return TRUE if found and freed */ UNIV_INTERN ibool buf_LRU_search_and_free_block( /*==========================*/ - /* out: TRUE if found and freed */ - ulint n_iterations) /* in: how many times this has been called + ulint n_iterations) /*!< in: how many times this has been called repeatedly without result: a high value means that we should search farther; if n_iterations < 10, then we search @@ -734,7 +719,7 @@ buf_LRU_search_and_free_block( return(freed); } -/********************************************************************** +/******************************************************************//** Tries to remove LRU flushed blocks from the end of the LRU list and put them to the free list. This is beneficial for the efficiency of the insert buffer operation, as flushed pages from non-unique non-clustered indexes are here @@ -765,16 +750,15 @@ buf_LRU_try_free_flushed_blocks(void) mutex_exit(&buf_pool_mutex); } -/********************************************************************** +/******************************************************************//** Returns TRUE if less than 25 % of the buffer pool is available. This can be used in heuristics to prevent huge transactions eating up the whole buffer -pool for their locks. */ +pool for their locks. +@return TRUE if less than 25 % of buffer pool left */ UNIV_INTERN ibool buf_LRU_buf_pool_running_out(void) /*==============================*/ - /* out: TRUE if less than 25 % of buffer pool - left */ { ibool ret = FALSE; @@ -795,15 +779,14 @@ buf_LRU_buf_pool_running_out(void) return(ret); } -/********************************************************************** +/******************************************************************//** Returns a free block from the buf_pool. The block is taken off the -free list. If it is empty, returns NULL. */ +free list. If it is empty, returns NULL. +@return a free control block, or NULL if the buf_block->free list is empty */ UNIV_INTERN buf_block_t* buf_LRU_get_free_only(void) /*=======================*/ - /* out: a free control block, or NULL - if the buf_block->free list is empty */ { buf_block_t* block; @@ -835,17 +818,16 @@ buf_LRU_get_free_only(void) return(block); } -/********************************************************************** +/******************************************************************//** Returns a free block from the buf_pool. The block is taken off the free list. If it is empty, blocks are moved from the end of the -LRU list to the free list. */ +LRU list to the free list. +@return the free control block, in state BUF_BLOCK_READY_FOR_USE */ UNIV_INTERN buf_block_t* buf_LRU_get_free_block( /*===================*/ - /* out: the free control block, - in state BUF_BLOCK_READY_FOR_USE */ - ulint zip_size) /* in: compressed page size in bytes, + ulint zip_size) /*!< in: compressed page size in bytes, or 0 if uncompressed tablespace */ { buf_block_t* block = NULL; @@ -1031,7 +1013,7 @@ loop: goto loop; } -/*********************************************************************** +/*******************************************************************//** Moves the LRU_old pointer so that the length of the old blocks list is inside the allowed limits. */ UNIV_INLINE @@ -1092,7 +1074,7 @@ buf_LRU_old_adjust_len(void) } } -/*********************************************************************** +/*******************************************************************//** Initializes the old blocks pointer in the LRU list. This function should be called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */ static @@ -1124,13 +1106,13 @@ buf_LRU_old_init(void) buf_LRU_old_adjust_len(); } -/********************************************************************** +/******************************************************************//** Remove a block from the unzip_LRU list if it belonged to the list. */ static void buf_unzip_LRU_remove_block_if_needed( /*=================================*/ - buf_page_t* bpage) /* in/out: control block */ + buf_page_t* bpage) /*!< in/out: control block */ { ut_ad(buf_pool); ut_ad(bpage); @@ -1148,13 +1130,13 @@ buf_unzip_LRU_remove_block_if_needed( } } -/********************************************************************** +/******************************************************************//** Removes a block from the LRU list. */ UNIV_INLINE void buf_LRU_remove_block( /*=================*/ - buf_page_t* bpage) /* in: control block */ + buf_page_t* bpage) /*!< in: control block */ { ut_ad(buf_pool); ut_ad(bpage); @@ -1210,14 +1192,14 @@ buf_LRU_remove_block( buf_LRU_old_adjust_len(); } -/********************************************************************** +/******************************************************************//** Adds a block to the LRU list of decompressed zip pages. */ UNIV_INTERN void buf_unzip_LRU_add_block( /*====================*/ - buf_block_t* block, /* in: control block */ - ibool old) /* in: TRUE if should be put to the end + buf_block_t* block, /*!< in: control block */ + ibool old) /*!< in: TRUE if should be put to the end of the list, else put to the start */ { ut_ad(buf_pool); @@ -1237,13 +1219,13 @@ buf_unzip_LRU_add_block( } } -/********************************************************************** +/******************************************************************//** Adds a block to the LRU list end. */ UNIV_INLINE void buf_LRU_add_block_to_end_low( /*=========================*/ - buf_page_t* bpage) /* in: control block */ + buf_page_t* bpage) /*!< in: control block */ { buf_page_t* last_bpage; @@ -1296,14 +1278,14 @@ buf_LRU_add_block_to_end_low( } } -/********************************************************************** +/******************************************************************//** Adds a block to the LRU list. */ UNIV_INLINE void buf_LRU_add_block_low( /*==================*/ - buf_page_t* bpage, /* in: control block */ - ibool old) /* in: TRUE if should be put to the old blocks + buf_page_t* bpage, /*!< in: control block */ + ibool old) /*!< in: TRUE if should be put to the old blocks in the LRU list, else put to the start; if the LRU list is very short, the block is added to the start, regardless of this parameter */ @@ -1369,14 +1351,14 @@ buf_LRU_add_block_low( } } -/********************************************************************** +/******************************************************************//** Adds a block to the LRU list. */ UNIV_INTERN void buf_LRU_add_block( /*==============*/ - buf_page_t* bpage, /* in: control block */ - ibool old) /* in: TRUE if should be put to the old + buf_page_t* bpage, /*!< in: control block */ + ibool old) /*!< in: TRUE if should be put to the old blocks in the LRU list, else put to the start; if the LRU list is very short, the block is added to the start, regardless of this @@ -1385,31 +1367,31 @@ buf_LRU_add_block( buf_LRU_add_block_low(bpage, old); } -/********************************************************************** +/******************************************************************//** Moves a block to the start of the LRU list. */ UNIV_INTERN void buf_LRU_make_block_young( /*=====================*/ - buf_page_t* bpage) /* in: control block */ + buf_page_t* bpage) /*!< in: control block */ { buf_LRU_remove_block(bpage); buf_LRU_add_block_low(bpage, FALSE); } -/********************************************************************** +/******************************************************************//** Moves a block to the end of the LRU list. */ UNIV_INTERN void buf_LRU_make_block_old( /*===================*/ - buf_page_t* bpage) /* in: control block */ + buf_page_t* bpage) /*!< in: control block */ { buf_LRU_remove_block(bpage); buf_LRU_add_block_to_end_low(bpage); } -/********************************************************************** +/******************************************************************//** Try to free a block. If bpage is a descriptor of a compressed-only page, the descriptor object will be freed as well. @@ -1419,19 +1401,18 @@ accessible via bpage. The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and release these two mutexes after the call. No other -buf_page_get_mutex() may be held when calling this function. */ +buf_page_get_mutex() may be held when calling this function. +@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or +BUF_LRU_NOT_FREED otherwise. */ UNIV_INTERN enum buf_lru_free_block_status buf_LRU_free_block( /*===============*/ - /* out: BUF_LRU_FREED if freed, - BUF_LRU_CANNOT_RELOCATE or - BUF_LRU_NOT_FREED otherwise. */ - buf_page_t* bpage, /* in: block to be freed */ - ibool zip, /* in: TRUE if should remove also the + buf_page_t* bpage, /*!< in: block to be freed */ + ibool zip, /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ ibool* buf_pool_mutex_released, - /* in: pointer to a variable that will + /*!< in: pointer to a variable that will be assigned TRUE if buf_pool_mutex was temporarily released, or NULL */ ibool have_LRU_mutex) @@ -1722,13 +1703,13 @@ not_freed: return(BUF_LRU_FREED); } -/********************************************************************** +/******************************************************************//** Puts a block back to the free list. */ UNIV_INTERN void buf_LRU_block_free_non_file_page( /*=============================*/ - buf_block_t* block, /* in: block, must not contain a file page */ + buf_block_t* block, /*!< in: block, must not contain a file page */ ibool have_page_hash_mutex) { void* data; @@ -1783,26 +1764,24 @@ buf_LRU_block_free_non_file_page( UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE); } -/********************************************************************** +/******************************************************************//** Takes a block out of the LRU list and page hash table. If the block is compressed-only (BUF_BLOCK_ZIP_PAGE), the object will be freed and buf_pool_zip_mutex will be released. If a compressed page or a compressed-only block descriptor is freed, other compressed pages or compressed-only block descriptors may be -relocated. */ +relocated. +@return the new state of the block (BUF_BLOCK_ZIP_FREE if the state +was BUF_BLOCK_ZIP_PAGE, or BUF_BLOCK_REMOVE_HASH otherwise) */ static enum buf_page_state buf_LRU_block_remove_hashed_page( /*=============================*/ - /* out: the new state of the block - (BUF_BLOCK_ZIP_FREE if the state was - BUF_BLOCK_ZIP_PAGE, or BUF_BLOCK_REMOVE_HASH - otherwise) */ - buf_page_t* bpage, /* in: block, must contain a file page and + buf_page_t* bpage, /*!< in: block, must contain a file page and be in a state where it can be freed; there may or may not be a hash index to the page */ - ibool zip) /* in: TRUE if should remove also the + ibool zip) /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ { const buf_page_t* hashed_bpage; @@ -1962,6 +1941,9 @@ buf_LRU_block_remove_hashed_page( void* data = bpage->zip.data; bpage->zip.data = NULL; + ut_ad(!bpage->in_free_list); + ut_ad(!bpage->in_flush_list); + ut_ad(!bpage->in_LRU_list); mutex_exit(&((buf_block_t*) bpage)->mutex); //buf_pool_mutex_exit_forbid(); buf_buddy_free(data, page_zip_get_size(&bpage->zip), TRUE); @@ -1985,13 +1967,13 @@ buf_LRU_block_remove_hashed_page( return(BUF_BLOCK_ZIP_FREE); } -/********************************************************************** +/******************************************************************//** Puts a file page whose has no hash index to the free list. */ static void buf_LRU_block_free_hashed_page( /*===========================*/ - buf_block_t* block, /* in: block, must contain a file page and + buf_block_t* block, /*!< in: block, must contain a file page and be in a state where it can be freed */ ibool have_page_hash_mutex) { @@ -2003,7 +1985,7 @@ buf_LRU_block_free_hashed_page( buf_LRU_block_free_non_file_page(block, have_page_hash_mutex); } -/************************************************************************ +/********************************************************************//** Update the historical stats that we are collecting for LRU eviction policy at the end of each interval. */ UNIV_INTERN @@ -2042,8 +2024,9 @@ func_exit: } #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/************************************************************************** -Validates the LRU list. */ +/**********************************************************************//** +Validates the LRU list. +@return TRUE */ UNIV_INTERN ibool buf_LRU_validate(void) @@ -2068,7 +2051,8 @@ buf_LRU_validate(void) ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE); } - UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU); + UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU, + ut_ad(ut_list_node_313->in_LRU_list)); bpage = UT_LIST_GET_FIRST(buf_pool->LRU); @@ -2119,7 +2103,8 @@ buf_LRU_validate(void) mutex_exit(&LRU_list_mutex); mutex_enter(&free_list_mutex); - UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free); + UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free, + ut_ad(ut_list_node_313->in_free_list)); for (bpage = UT_LIST_GET_FIRST(buf_pool->free); bpage != NULL; @@ -2131,7 +2116,9 @@ buf_LRU_validate(void) mutex_exit(&free_list_mutex); mutex_enter(&LRU_list_mutex); - UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU); + UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU, + ut_ad(ut_list_node_313->in_unzip_LRU_list + && ut_list_node_313->page.in_LRU_list)); for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU); block; @@ -2149,7 +2136,7 @@ buf_LRU_validate(void) #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/************************************************************************** +/**********************************************************************//** Prints the LRU list. */ UNIV_INTERN void diff --git a/storage/xtradb/buf/buf0rea.c b/storage/xtradb/buf/buf0rea.c index 086ea035a7b..f2dbe939c92 100644 --- a/storage/xtradb/buf/buf0rea.c +++ b/storage/xtradb/buf/buf0rea.c @@ -16,7 +16,8 @@ Place, Suite 330, Boston, MA 02111-1307 USA *****************************************************************************/ -/****************************************************** +/**************************************************//** +@file buf/buf0rea.c The database buffer read Created 11/5/1995 Heikki Tuuri @@ -35,61 +36,53 @@ Created 11/5/1995 Heikki Tuuri #include "trx0sys.h" #include "os0file.h" #include "srv0start.h" +#include "srv0srv.h" -extern ulint srv_read_ahead; -extern ulint srv_read_ahead_rnd; -extern ulint srv_read_ahead_seq; -extern ulint srv_buf_pool_reads; - -/* The size in blocks of the area where the random read-ahead algorithm counts +/** The size in blocks of the area where the random read-ahead algorithm counts the accessed pages when deciding whether to read-ahead */ #define BUF_READ_AHEAD_RANDOM_AREA BUF_READ_AHEAD_AREA -/* There must be at least this many pages in buf_pool in the area to start +/** There must be at least this many pages in buf_pool in the area to start a random read-ahead */ -#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + buf_read_ahead_random_area / 8) +#define BUF_READ_AHEAD_RANDOM_THRESHOLD (1 + BUF_READ_AHEAD_RANDOM_AREA / 2) -/* The linear read-ahead area size */ +/** The linear read-ahead area size */ #define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA -/* The linear read-ahead threshold */ -#define LINEAR_AREA_THRESHOLD_COEF 5 / 8 - -/* If there are buf_pool->curr_size per the number below pending reads, then +/** If there are buf_pool->curr_size per the number below pending reads, then read-ahead is not done: this is to prevent flooding the buffer pool with i/o-fixed buffer blocks */ #define BUF_READ_AHEAD_PEND_LIMIT 2 -/************************************************************************ +/********************************************************************//** Low-level function which reads a page asynchronously from a file to the buffer buf_pool if it is not already there, in which case does nothing. Sets the io_fix flag and sets an exclusive lock on the buffer frame. The -flag is cleared and the x-lock released by an i/o-handler thread. */ +flag is cleared and the x-lock released by an i/o-handler thread. +@return 1 if a read request was queued, 0 if the page already resided +in buf_pool, or if the page is in the doublewrite buffer blocks in +which case it is never read into the pool, or if the tablespace does +not exist or is being dropped */ static ulint buf_read_page_low( /*==============*/ - /* out: 1 if a read request was queued, 0 if the page - already resided in buf_pool, or if the page is in - the doublewrite buffer blocks in which case it is never - read into the pool, or if the tablespace does not - exist or is being dropped */ - ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are + ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are trying to read from a non-existent tablespace, or a tablespace which is just now being dropped */ - ibool sync, /* in: TRUE if synchronous aio is desired */ - ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ..., + ibool sync, /*!< in: TRUE if synchronous aio is desired */ + ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ..., ORed to OS_AIO_SIMULATED_WAKE_LATER (see below at read-ahead functions) */ - ulint space, /* in: space id */ - ulint zip_size,/* in: compressed page size, or 0 */ - ibool unzip, /* in: TRUE=request uncompressed page */ - ib_int64_t tablespace_version, /* in: if the space memory object has + ulint space, /*!< in: space id */ + ulint zip_size,/*!< in: compressed page size, or 0 */ + ibool unzip, /*!< in: TRUE=request uncompressed page */ + ib_int64_t tablespace_version, /*!< in: if the space memory object has this timestamp different from what we are giving here, treat the tablespace as dropped; this is a timestamp we use to stop dangling page reads from a tablespace which we have DISCARDed + IMPORTed back */ - ulint offset) /* in: page number */ + ulint offset) /*!< in: page number */ { buf_page_t* bpage; ulint wake_later; @@ -211,7 +204,7 @@ not_to_recover: return(1); } -/************************************************************************ +/********************************************************************//** Applies a random read-ahead in buf_pool if there are at least a threshold value of accessed pages from the random read-ahead area. Does not read any page, not even the one at the position (space, offset), if the read-ahead @@ -220,18 +213,17 @@ pages: to avoid deadlocks this function must be written such that it cannot end up waiting for these latches! NOTE 2: the calling thread must want access to the page given: this rule is set to prevent unintended read-aheads performed by ibuf routines, a situation which could result in a deadlock if -the OS does not support asynchronous i/o. */ +the OS does not support asynchronous i/o. +@return number of page read requests issued; NOTE that if we read ibuf +pages, it may happen that the page at the given page number does not +get read even if we return a positive value! */ static ulint buf_read_ahead_random( /*==================*/ - /* out: number of page read requests issued; NOTE - that if we read ibuf pages, it may happen that - the page at the given page number does not get - read even if we return a value > 0! */ - ulint space, /* in: space id */ - ulint zip_size,/* in: compressed page size in bytes, or 0 */ - ulint offset) /* in: page number of a page which the current thread + ulint space, /*!< in: space id */ + ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ + ulint offset) /*!< in: page number of a page which the current thread wants to access */ { ib_int64_t tablespace_version; @@ -244,6 +236,9 @@ buf_read_ahead_random( ulint i; ulint buf_read_ahead_random_area; +// /* We have currently disabled random readahead */ +// return(0); + if (!(srv_read_ahead & 1)) { return(0); } @@ -378,21 +373,21 @@ read_ahead: return(count); } -/************************************************************************ +/********************************************************************//** High-level function which reads a page asynchronously from a file to the buffer buf_pool if it is not already there. Sets the io_fix flag and sets an exclusive lock on the buffer frame. The flag is cleared and the x-lock released by the i/o-handler thread. Does a random read-ahead if it seems -sensible. */ +sensible. +@return number of page read requests issued: this can be greater than +1 if read-ahead occurred */ UNIV_INTERN ulint buf_read_page( /*==========*/ - /* out: number of page read requests issued: this can - be > 1 if read-ahead occurred */ - ulint space, /* in: space id */ - ulint zip_size,/* in: compressed page size in bytes, or 0 */ - ulint offset) /* in: page number */ + ulint space, /*!< in: space id */ + ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ + ulint offset) /*!< in: page number */ { ib_int64_t tablespace_version; ulint count; @@ -429,7 +424,7 @@ buf_read_page( return(count + count2); } -/************************************************************************ +/********************************************************************//** Applies linear read-ahead if in the buf_pool the page is a border page of a linear read-ahead area and all the pages in the area have been accessed. Does not read any page if the read-ahead mechanism is not activated. Note @@ -451,15 +446,15 @@ function must be written such that it cannot end up waiting for these latches! NOTE 3: the calling thread must want access to the page given: this rule is set to prevent unintended read-aheads performed by ibuf routines, a situation -which could result in a deadlock if the OS does not support asynchronous io. */ +which could result in a deadlock if the OS does not support asynchronous io. +@return number of page read requests issued */ UNIV_INTERN ulint buf_read_ahead_linear( /*==================*/ - /* out: number of page read requests issued */ - ulint space, /* in: space id */ - ulint zip_size,/* in: compressed page size in bytes, or 0 */ - ulint offset) /* in: page number of a page; NOTE: the current thread + ulint space, /*!< in: space id */ + ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ + ulint offset) /*!< in: page number of a page; NOTE: the current thread must want access to this page (see NOTE 3 above) */ { ib_int64_t tablespace_version; @@ -478,11 +473,12 @@ buf_read_ahead_linear( ulint i; const ulint buf_read_ahead_linear_area = BUF_READ_AHEAD_LINEAR_AREA; + ulint threshold; if (!(srv_read_ahead & 2)) { return(0); } - + if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) { /* No read-ahead to avoid thread deadlocks */ return(0); @@ -545,6 +541,11 @@ buf_read_ahead_linear( asc_or_desc = -1; } + /* How many out of order accessed pages can we ignore + when working out the access pattern for linear readahead */ + threshold = ut_min((64 - srv_read_ahead_threshold), + BUF_READ_AHEAD_AREA); + fail_count = 0; rw_lock_s_lock(&page_hash_latch); @@ -555,26 +556,26 @@ buf_read_ahead_linear( /* Not accessed */ fail_count++; - } else if (pred_bpage - && (ut_ulint_cmp( + } else if (pred_bpage) { + int res = (ut_ulint_cmp( buf_page_get_LRU_position(bpage), - buf_page_get_LRU_position(pred_bpage)) - != asc_or_desc)) { + buf_page_get_LRU_position(pred_bpage))); /* Accesses not in the right order */ - - fail_count++; - pred_bpage = bpage; + if (res != 0 && res != asc_or_desc) { + fail_count++; + } } - } - - if (fail_count > buf_read_ahead_linear_area - * LINEAR_AREA_THRESHOLD_COEF) { - /* Too many failures: return */ - //buf_pool_mutex_exit(); - rw_lock_s_unlock(&page_hash_latch); + if (fail_count > threshold) { + /* Too many failures: return */ + //buf_pool_mutex_exit(); + rw_lock_s_unlock(&page_hash_latch); + return(0); + } - return(0); + if (bpage && buf_page_is_accessed(bpage)) { + pred_bpage = bpage; + } } /* If we got this far, we know that enough pages in the area have @@ -708,7 +709,7 @@ buf_read_ahead_linear( return(count); } -/************************************************************************ +/********************************************************************//** Issues read requests for pages which the ibuf module wants to read in, in order to contract the insert buffer tree. Technically, this function is like a read-ahead function. */ @@ -716,24 +717,24 @@ UNIV_INTERN void buf_read_ibuf_merge_pages( /*======================*/ - ibool sync, /* in: TRUE if the caller + ibool sync, /*!< in: TRUE if the caller wants this function to wait for the highest address page to get read in, before this function returns */ - const ulint* space_ids, /* in: array of space ids */ - const ib_int64_t* space_versions,/* in: the spaces must have + const ulint* space_ids, /*!< in: array of space ids */ + const ib_int64_t* space_versions,/*!< in: the spaces must have this version number (timestamp), otherwise we discard the read; we use this to cancel reads if DISCARD + IMPORT may have changed the tablespace size */ - const ulint* page_nos, /* in: array of page numbers + const ulint* page_nos, /*!< in: array of page numbers to read, with the highest page number the last in the array */ - ulint n_stored) /* in: number of elements + ulint n_stored) /*!< in: number of elements in the arrays */ { ulint i; @@ -786,25 +787,25 @@ tablespace_deleted: #endif /* UNIV_DEBUG */ } -/************************************************************************ +/********************************************************************//** Issues read requests for pages which recovery wants to read in. */ UNIV_INTERN void buf_read_recv_pages( /*================*/ - ibool sync, /* in: TRUE if the caller + ibool sync, /*!< in: TRUE if the caller wants this function to wait for the highest address page to get read in, before this function returns */ - ulint space, /* in: space id */ - ulint zip_size, /* in: compressed page size in + ulint space, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes, or 0 */ - const ulint* page_nos, /* in: array of page numbers + const ulint* page_nos, /*!< in: array of page numbers to read, with the highest page number the last in the array */ - ulint n_stored) /* in: number of page numbers + ulint n_stored) /*!< in: number of page numbers in the array */ { ib_int64_t tablespace_version; @@ -813,6 +814,14 @@ buf_read_recv_pages( ulint i; zip_size = fil_space_get_zip_size(space); + + if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { + /* It is a single table tablespace and the .ibd file is + missing: do nothing */ + + return; + } + tablespace_version = fil_space_get_version(space); for (i = 0; i < n_stored; i++) { |