diff options
Diffstat (limited to 'innobase/buf')
-rw-r--r-- | innobase/buf/Makefile.am | 2 | ||||
-rw-r--r-- | innobase/buf/buf0buf.c | 36 | ||||
-rw-r--r-- | innobase/buf/buf0flu.c | 53 | ||||
-rw-r--r-- | innobase/buf/buf0lru.c | 58 |
4 files changed, 106 insertions, 43 deletions
diff --git a/innobase/buf/Makefile.am b/innobase/buf/Makefile.am index b1463c2220e..3f56c8b02d7 100644 --- a/innobase/buf/Makefile.am +++ b/innobase/buf/Makefile.am @@ -17,7 +17,7 @@ include ../include/Makefile.i -libs_LIBRARIES = libbuf.a +noinst_LIBRARIES = libbuf.a libbuf_a_SOURCES = buf0buf.c buf0flu.c buf0lru.c buf0rea.c diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c index 14d538a14bc..1cdddaf6cb4 100644 --- a/innobase/buf/buf0buf.c +++ b/innobase/buf/buf0buf.c @@ -597,8 +597,9 @@ buf_pool_init( /* Wipe contents of frame to eliminate a Purify warning */ +#ifdef HAVE_purify memset(block->frame, '\0', UNIV_PAGE_SIZE); - +#endif if (srv_use_awe) { /* Add to the list of blocks mapped to frames */ @@ -1837,7 +1838,7 @@ buf_pool_invalidate(void) freed = TRUE; while (freed) { - freed = buf_LRU_search_and_free_block(0); + freed = buf_LRU_search_and_free_block(100); } mutex_enter(&(buf_pool->mutex)); @@ -2057,6 +2058,29 @@ buf_get_n_pending_ios(void) } /************************************************************************* +Returns the ratio in percents of modified pages in the buffer pool / +database pages in the buffer pool. */ + +ulint +buf_get_modified_ratio_pct(void) +/*============================*/ +{ + ulint ratio; + + mutex_enter(&(buf_pool->mutex)); + + ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list)) + / (1 + UT_LIST_GET_LEN(buf_pool->LRU) + + UT_LIST_GET_LEN(buf_pool->free)); + + /* 1 + is there to avoid division by zero */ + + mutex_exit(&(buf_pool->mutex)); + + return(ratio); +} + +/************************************************************************* Prints info of the buffer i/o. */ void @@ -2109,8 +2133,10 @@ buf_print_io( buf += sprintf(buf, "Pending writes: LRU %lu, flush list %lu, single page %lu\n", - buf_pool->n_flush[BUF_FLUSH_LRU], - buf_pool->n_flush[BUF_FLUSH_LIST], + buf_pool->n_flush[BUF_FLUSH_LRU] + + buf_pool->init_flush[BUF_FLUSH_LRU], + buf_pool->n_flush[BUF_FLUSH_LIST] + + buf_pool->init_flush[BUF_FLUSH_LIST], buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]); current_time = time(NULL); @@ -2144,7 +2170,7 @@ buf_print_io( / (buf_pool->n_page_gets - buf_pool->n_page_gets_old))); } else { buf += sprintf(buf, - "No buffer pool activity since the last printout\n"); + "No buffer pool page gets since the last printout\n"); } buf_pool->n_page_gets_old = buf_pool->n_page_gets; diff --git a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c index 02587487a92..47ac9c6b041 100644 --- a/innobase/buf/buf0flu.c +++ b/innobase/buf/buf0flu.c @@ -107,7 +107,7 @@ buf_flush_ready_for_replace( BUF_BLOCK_FILE_PAGE and in the LRU list */ { ut_ad(mutex_own(&(buf_pool->mutex))); - ut_ad(block->state == BUF_BLOCK_FILE_PAGE); + ut_a(block->state == BUF_BLOCK_FILE_PAGE); if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0) || (block->buf_fix_count != 0) @@ -227,7 +227,9 @@ buf_flush_buffered_writes(void) } for (i = 0; i < trx_doublewrite->first_free; i++) { + block = trx_doublewrite->buf_block_arr[i]; + ut_a(block->state == BUF_BLOCK_FILE_PAGE); if (block->check_index_page_at_flush && !page_simple_validate(block->frame)) { @@ -236,10 +238,12 @@ buf_flush_buffered_writes(void) ut_print_timestamp(stderr); fprintf(stderr, - " InnoDB: Apparent corruption of an index page\n" + " InnoDB: Apparent corruption of an index page n:o %lu in space %lu\n" "InnoDB: to be written to data file. We intentionally crash server\n" "InnoDB: to prevent corrupt data from ending up in data\n" - "InnoDB: files.\n"); + "InnoDB: files.\n", + block->offset, block->space); + ut_a(0); } } @@ -394,7 +398,7 @@ buf_flush_write_block_low( "Warning: cannot force log to disk in the log debug version!\n"); #else /* Force the log to the disk before writing the modified block */ - log_flush_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS); + log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE); #endif buf_flush_init_for_writing(block->frame, block->newest_modification, block->space, block->offset); @@ -432,6 +436,8 @@ buf_flush_try_page( block = buf_page_hash_get(space, offset); + ut_a(block->state == BUF_BLOCK_FILE_PAGE); + if (flush_type == BUF_FLUSH_LIST && block && buf_flush_ready_for_flush(block, flush_type)) { @@ -567,7 +573,8 @@ buf_flush_try_page( rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE); if (buf_debug_prints) { - printf("Flushing single page space %lu, page no %lu \n", + printf( + "Flushing single page space %lu, page no %lu \n", block->space, block->offset); } @@ -608,15 +615,7 @@ buf_flush_try_neighbors( low = offset; high = offset + 1; - } else if (flush_type == BUF_FLUSH_LIST) { - /* Since semaphore waits require us to flush the - doublewrite buffer to disk, it is best that the - search area is just the page itself, to minimize - chances for semaphore waits */ - - low = offset; - high = offset + 1; - } + } /* printf("Flush area: low %lu high %lu\n", low, high); */ @@ -633,13 +632,20 @@ buf_flush_try_neighbors( if (block && flush_type == BUF_FLUSH_LRU && i != offset && !block->old) { - /* We avoid flushing 'non-old' blocks in an LRU flush, - because the flushed blocks are soon freed */ + /* We avoid flushing 'non-old' blocks in an LRU flush, + because the flushed blocks are soon freed */ - continue; + continue; } - if (block && buf_flush_ready_for_flush(block, flush_type)) { + if (block && buf_flush_ready_for_flush(block, flush_type) + && (i == offset || block->buf_fix_count == 0)) { + /* We only try to flush those neighbors != offset + where the buf fix count is zero, as we then know that + we probably can latch the page without a semaphore + wait. Semaphore waits are expensive because we must + flush the doublewrite buffer before we start + waiting. */ mutex_exit(&(buf_pool->mutex)); @@ -758,7 +764,6 @@ buf_flush_batch( page_count += buf_flush_try_neighbors(space, offset, flush_type); - /* printf( "Flush type %lu, page no %lu, neighb %lu\n", flush_type, offset, @@ -884,11 +889,19 @@ buf_flush_free_margin(void) /*=======================*/ { ulint n_to_flush; + ulint n_flushed; n_to_flush = buf_flush_LRU_recommendation(); if (n_to_flush > 0) { - buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, ut_dulint_zero); + n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, + ut_dulint_zero); + if (n_flushed == ULINT_UNDEFINED) { + /* There was an LRU type flush batch already running; + let us wait for it to end */ + + buf_flush_wait_batch_end(BUF_FLUSH_LRU); + } } } diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c index 051aa0191f6..40f49f1fddc 100644 --- a/innobase/buf/buf0lru.c +++ b/innobase/buf/buf0lru.c @@ -104,12 +104,15 @@ ibool buf_LRU_search_and_free_block( /*==========================*/ /* out: TRUE if freed */ - ulint n_iterations __attribute__((unused))) /* in: how many times - this has been called repeatedly without - result: a high value means that we should - search farther */ + ulint n_iterations) /* in: how many times this has been called + repeatedly without result: a high value means + that we should search farther; if value is + k < 10, then we only search k/10 * [number + of pages in the buffer pool] from the end + of the LRU list */ { buf_block_t* block; + ulint distance = 0; ibool freed; mutex_enter(&(buf_pool->mutex)); @@ -152,6 +155,18 @@ buf_LRU_search_and_free_block( } block = UT_LIST_GET_PREV(LRU, block); + distance++; + + if (!freed && n_iterations <= 10 + && distance > 100 + (n_iterations * buf_pool->curr_size) + / 10) { + + buf_pool->LRU_flush_ended = 0; + + mutex_exit(&(buf_pool->mutex)); + + return(FALSE); + } } if (buf_pool->LRU_flush_ended > 0) { @@ -186,7 +201,7 @@ buf_LRU_try_free_flushed_blocks(void) mutex_exit(&(buf_pool->mutex)); - buf_LRU_search_and_free_block(0); + buf_LRU_search_and_free_block(1); mutex_enter(&(buf_pool->mutex)); } @@ -208,7 +223,7 @@ buf_LRU_get_free_block(void) { buf_block_t* block = NULL; ibool freed; - ulint n_iterations = 0; + ulint n_iterations = 1; ibool mon_value_was = 0; /* remove bug */ ibool started_monitor = FALSE; loop: @@ -236,9 +251,12 @@ loop: fprintf(stderr, " InnoDB: WARNING: over 4 / 5 of the buffer pool is occupied by\n" "InnoDB: lock heaps or the adaptive hash index! Check that your\n" -"InnoDB: transactions do not set too many row locks. Starting InnoDB\n" -"InnoDB: Monitor to print diagnostics, including lock heap and hash index\n" -"InnoDB: sizes.\n"); +"InnoDB: transactions do not set too many row locks.\n" +"InnoDB: Your buffer pool size is %lu MB. Maybe you should make\n" +"InnoDB: the buffer pool bigger?\n" +"InnoDB: Starting the InnoDB Monitor to print diagnostics, including\n" +"InnoDB: lock heap and hash index sizes.\n", + buf_pool->curr_size / (1024 * 1024 / UNIV_PAGE_SIZE)); srv_print_innodb_monitor = TRUE; @@ -251,14 +269,6 @@ loop: srv_print_innodb_monitor = FALSE; } - - if (buf_pool->LRU_flush_ended > 0) { - mutex_exit(&(buf_pool->mutex)); - - buf_LRU_try_free_flushed_blocks(); - - mutex_enter(&(buf_pool->mutex)); - } /* If there is a block in the free list, take it */ if (UT_LIST_GET_LEN(buf_pool->free) > 0) { @@ -340,6 +350,20 @@ loop: os_aio_simulated_wake_handler_threads(); + mutex_enter(&(buf_pool->mutex)); + + if (buf_pool->LRU_flush_ended > 0) { + /* We have written pages in an LRU flush. To make the insert + buffer more efficient, we try to move these pages to the free + list. */ + + mutex_exit(&(buf_pool->mutex)); + + buf_LRU_try_free_flushed_blocks(); + } else { + mutex_exit(&(buf_pool->mutex)); + } + if (n_iterations > 10) { os_thread_sleep(500000); |