diff options
24 files changed, 810 insertions, 598 deletions
diff --git a/mysql-test/suite/innodb/r/innodb_bug44571.result b/mysql-test/suite/innodb/r/innodb_bug44571.result new file mode 100644 index 00000000000..36374edcb3e --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_bug44571.result @@ -0,0 +1,9 @@ +CREATE TABLE bug44571 (foo INT) ENGINE=InnoDB; +ALTER TABLE bug44571 CHANGE foo bar INT; +ALTER TABLE bug44571 ADD INDEX bug44571b (foo); +ERROR 42000: Key column 'foo' doesn't exist in table +ALTER TABLE bug44571 ADD INDEX bug44571b (bar); +ERROR HY000: Incorrect key file for table 'bug44571'; try to repair it +CREATE INDEX bug44571b ON bug44571 (bar); +ERROR HY000: Incorrect key file for table 'bug44571'; try to repair it +DROP TABLE bug44571; diff --git a/mysql-test/suite/innodb/t/innodb_bug44571.test b/mysql-test/suite/innodb/t/innodb_bug44571.test new file mode 100644 index 00000000000..43f290cde84 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_bug44571.test @@ -0,0 +1,18 @@ +# +# Bug#44571 InnoDB Plugin crashes on ADD INDEX +# http://bugs.mysql.com/44571 +# +-- source include/have_innodb.inc +-- source suite/innodb/include/have_innodb_plugin.inc + +CREATE TABLE bug44571 (foo INT) ENGINE=InnoDB; +ALTER TABLE bug44571 CHANGE foo bar INT; +-- error ER_KEY_COLUMN_DOES_NOT_EXITS +ALTER TABLE bug44571 ADD INDEX bug44571b (foo); +# The following will fail, because the CHANGE foo bar was +# not communicated to InnoDB. +--error ER_NOT_KEYFILE +ALTER TABLE bug44571 ADD INDEX bug44571b (bar); +--error ER_NOT_KEYFILE +CREATE INDEX bug44571b ON bug44571 (bar); +DROP TABLE bug44571; diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index b488b21c768..df71a1c25a5 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,37 @@ +2009-08-27 The InnoDB Team + + * dict/dict0dict.c, include/dict0dict.h, + mysql-test/innodb_bug44571.result, mysql-test/innodb_bug44571.test: + Fix Bug#44571 InnoDB Plugin crashes on ADD INDEX + +2009-08-27 The InnoDB Team + + * row/row0merge.c: + Fix a bug in the merge sort that can corrupt indexes in fast index + creation. Add some consistency checks. Check that the number of + records remains constant in every merge sort pass. + +2009-08-27 The InnoDB Team + + * buf/buf0buf.c, buf/buf0lru.c, buf/buf0rea.c, + handler/ha_innodb.cc, include/buf0buf.h, include/buf0buf.ic, + include/buf0lru.h, include/ut0ut.h, ut/ut0ut.c: + Make it possible to tune the buffer pool LRU eviction policy to be + more resistant against index scans. Introduce the settable global + variables innodb_old_blocks_pct and innodb_old_blocks_time for + controlling the buffer pool eviction policy. The parameter + innodb_old_blocks_pct (5..95) controls the desired amount of "old" + blocks in the LRU list. The default is 37, corresponding to the + old fixed ratio of 3/8. Each time a block is accessed, it will be + moved to the "new" blocks if its first access was at least + innodb_old_blocks_time milliseconds ago (default 0, meaning every + block). The idea is that in index scans, blocks will be accessed + a few times within innodb_old_blocks_time, and they will remain in + the "old" section of the LRU list. Thus, when + innodb_old_blocks_time is nonzero, blocks retrieved for one-time + index scans will be more likely candidates for eviction than + blocks that are accessed in random patterns. + 2009-08-26 The InnoDB Team * handler/ha_innodb.cc, os/os0file.c: diff --git a/storage/innodb_plugin/btr/btr0sea.c b/storage/innodb_plugin/btr/btr0sea.c index faa1c13897e..0a80c61a58d 100644 --- a/storage/innodb_plugin/btr/btr0sea.c +++ b/storage/innodb_plugin/btr/btr0sea.c @@ -957,7 +957,7 @@ btr_search_guess_on_hash( /* Increment the page get statistics though we did not really fix the page: for user info only */ - buf_pool->n_page_gets++; + buf_pool->stat.n_page_gets++; return(TRUE); diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c index 0008fcb1271..6af3dff460a 100644 --- a/storage/innodb_plugin/buf/buf0buf.c +++ b/storage/innodb_plugin/buf/buf0buf.c @@ -837,16 +837,35 @@ buf_chunk_not_freed( block = chunk->blocks; for (i = chunk->size; i--; block++) { - mutex_enter(&block->mutex); - - if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE - && !buf_flush_ready_for_replace(&block->page)) { + ibool ready; + switch (buf_block_get_state(block)) { + case BUF_BLOCK_ZIP_FREE: + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_ZIP_DIRTY: + /* The uncompressed buffer pool should never + contain compressed block descriptors. */ + ut_error; + break; + case BUF_BLOCK_NOT_USED: + case BUF_BLOCK_READY_FOR_USE: + case BUF_BLOCK_MEMORY: + case BUF_BLOCK_REMOVE_HASH: + /* Skip blocks that are not being used for + file pages. */ + break; + case BUF_BLOCK_FILE_PAGE: + mutex_enter(&block->mutex); + ready = buf_flush_ready_for_replace(&block->page); mutex_exit(&block->mutex); - return(block); - } - mutex_exit(&block->mutex); + if (!ready) { + + return(block); + } + + break; + } } return(NULL); @@ -966,8 +985,6 @@ buf_pool_init(void) buf_pool->no_flush[i] = os_event_create(NULL); } - buf_pool->ulint_clock = 1; - /* 3. Initialize LRU fields --------------------------- */ /* All fields are initialized by mem_zalloc(). */ @@ -1471,31 +1488,6 @@ buf_pool_resize(void) } /********************************************************************//** -Moves the block to the start of the LRU list if there is a danger -that the block would drift out of the buffer pool. */ -UNIV_INLINE -void -buf_block_make_young( -/*=================*/ - buf_page_t* bpage) /*!< in: block to make younger */ -{ - ut_ad(!buf_pool_mutex_own()); - - /* Note that we read freed_page_clock's without holding any mutex: - this is allowed since the result is used only in heuristics */ - - if (buf_page_peek_if_too_old(bpage)) { - - buf_pool_mutex_enter(); - /* There has been freeing activity in the LRU list: - best to move to the head of the LRU list */ - - buf_LRU_make_block_young(bpage); - buf_pool_mutex_exit(); - } -} - -/********************************************************************//** Moves a page to the start of the buffer pool LRU list. This high-level function can be used to prevent an important page from from slipping out of the buffer pool. */ @@ -1649,7 +1641,7 @@ buf_page_get_zip( #ifndef UNIV_LOG_DEBUG ut_ad(!ibuf_inside()); #endif - buf_pool->n_page_gets++; + buf_pool->stat.n_page_gets++; for (;;) { buf_pool_mutex_enter(); @@ -1713,13 +1705,15 @@ err_exit: got_block: must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ; - buf_pool_mutex_exit(); + if (buf_page_peek_if_too_old(bpage)) { + buf_LRU_make_block_young(bpage); + } - buf_page_set_accessed(bpage, TRUE); + buf_page_set_accessed(bpage); - mutex_exit(block_mutex); + buf_pool_mutex_exit(); - buf_block_make_young(bpage); + mutex_exit(block_mutex); #ifdef UNIV_DEBUG_FILE_ACCESSES ut_a(!bpage->file_page_was_freed); @@ -2000,7 +1994,7 @@ buf_page_get_gen( mtr_t* mtr) /*!< in: mini-transaction */ { buf_block_t* block; - ibool accessed; + unsigned access_time; ulint fix_type; ibool must_read; @@ -2016,7 +2010,7 @@ buf_page_get_gen( #ifndef UNIV_LOG_DEBUG ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL)); #endif - buf_pool->n_page_gets++; + buf_pool->stat.n_page_gets++; loop: block = guess; buf_pool_mutex_enter(); @@ -2243,17 +2237,20 @@ wait_until_unfixed: UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page); buf_block_buf_fix_inc(block, file, line); - buf_pool_mutex_exit(); + + mutex_exit(&block->mutex); /* Check if this is the first access to the page */ - accessed = buf_page_is_accessed(&block->page); + access_time = buf_page_is_accessed(&block->page); - buf_page_set_accessed(&block->page, TRUE); + if (buf_page_peek_if_too_old(&block->page)) { + buf_LRU_make_block_young(&block->page); + } - mutex_exit(&block->mutex); + buf_page_set_accessed(&block->page); - buf_block_make_young(&block->page); + buf_pool_mutex_exit(); #ifdef UNIV_DEBUG_FILE_ACCESSES ut_a(!block->page.file_page_was_freed); @@ -2306,7 +2303,7 @@ wait_until_unfixed: mtr_memo_push(mtr, block, fix_type); - if (!accessed) { + if (!access_time) { /* In the case of a first access, try to apply linear read-ahead */ @@ -2336,7 +2333,7 @@ buf_page_optimistic_get_func( ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mini-transaction */ { - ibool accessed; + unsigned access_time; ibool success; ulint fix_type; @@ -2353,14 +2350,21 @@ buf_page_optimistic_get_func( } buf_block_buf_fix_inc(block, file, line); - accessed = buf_page_is_accessed(&block->page); - buf_page_set_accessed(&block->page, TRUE); mutex_exit(&block->mutex); - buf_block_make_young(&block->page); + buf_pool_mutex_enter(); /* Check if this is the first access to the page */ + access_time = buf_page_is_accessed(&block->page); + + if (buf_page_peek_if_too_old(&block->page)) { + buf_LRU_make_block_young(&block->page); + } + + buf_page_set_accessed(&block->page); + + buf_pool_mutex_exit(); ut_ad(!ibuf_inside() || ibuf_page(buf_block_get_space(block), @@ -2412,7 +2416,7 @@ buf_page_optimistic_get_func( #ifdef UNIV_DEBUG_FILE_ACCESSES ut_a(block->page.file_page_was_freed == FALSE); #endif - if (UNIV_UNLIKELY(!accessed)) { + if (UNIV_UNLIKELY(!access_time)) { /* In the case of a first access, try to apply linear read-ahead */ @@ -2425,7 +2429,7 @@ buf_page_optimistic_get_func( ut_a(ibuf_count_get(buf_block_get_space(block), buf_block_get_page_no(block)) == 0); #endif - buf_pool->n_page_gets++; + buf_pool->stat.n_page_gets++; return(TRUE); } @@ -2473,10 +2477,16 @@ buf_page_get_known_nowait( mutex_exit(&block->mutex); - if (mode == BUF_MAKE_YOUNG) { - buf_block_make_young(&block->page); + buf_pool_mutex_enter(); + + if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) { + buf_LRU_make_block_young(&block->page); } + buf_page_set_accessed(&block->page); + + buf_pool_mutex_exit(); + ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD)); if (rw_latch == RW_S_LATCH) { @@ -2513,7 +2523,7 @@ buf_page_get_known_nowait( || (ibuf_count_get(buf_block_get_space(block), buf_block_get_page_no(block)) == 0)); #endif - buf_pool->n_page_gets++; + buf_pool->stat.n_page_gets++; return(TRUE); } @@ -2589,7 +2599,7 @@ buf_page_try_get_func( #endif /* UNIV_DEBUG_FILE_ACCESSES */ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); - buf_pool->n_page_gets++; + buf_pool->stat.n_page_gets++; #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(buf_block_get_space(block), @@ -2608,10 +2618,10 @@ buf_page_init_low( buf_page_t* bpage) /*!< in: block to init */ { bpage->flush_type = BUF_FLUSH_LRU; - bpage->accessed = FALSE; bpage->io_fix = BUF_IO_NONE; bpage->buf_fix_count = 0; bpage->freed_page_clock = 0; + bpage->access_time = 0; bpage->newest_modification = 0; bpage->oldest_modification = 0; HASH_INVALIDATE(bpage, hash); @@ -2953,7 +2963,7 @@ buf_page_create( buf_LRU_add_block(&block->page, FALSE); buf_block_buf_fix_inc(block, __FILE__, __LINE__); - buf_pool->n_pages_created++; + buf_pool->stat.n_pages_created++; if (zip_size) { void* data; @@ -2990,12 +3000,12 @@ buf_page_create( rw_lock_x_unlock(&block->lock); } + buf_page_set_accessed(&block->page); + buf_pool_mutex_exit(); mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX); - buf_page_set_accessed(&block->page, TRUE); - mutex_exit(&block->mutex); /* Delete possible entries for the page from the insert buffer: @@ -3201,7 +3211,7 @@ corrupt: ut_ad(buf_pool->n_pend_reads > 0); buf_pool->n_pend_reads--; - buf_pool->n_pages_read++; + buf_pool->stat.n_pages_read++; if (uncompressed) { rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock, @@ -3221,7 +3231,7 @@ corrupt: BUF_IO_WRITE); } - buf_pool->n_pages_written++; + buf_pool->stat.n_pages_written++; break; @@ -3528,6 +3538,7 @@ buf_print(void) "n pending decompressions %lu\n" "n pending reads %lu\n" "n pending flush LRU %lu list %lu single page %lu\n" + "pages made young %lu, not young %lu\n" "pages read %lu, created %lu, written %lu\n", (ulong) size, (ulong) UT_LIST_GET_LEN(buf_pool->LRU), @@ -3538,8 +3549,11 @@ buf_print(void) (ulong) buf_pool->n_flush[BUF_FLUSH_LRU], (ulong) buf_pool->n_flush[BUF_FLUSH_LIST], (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE], - (ulong) buf_pool->n_pages_read, buf_pool->n_pages_created, - (ulong) buf_pool->n_pages_written); + (ulong) buf_pool->stat.n_pages_made_young, + (ulong) buf_pool->stat.n_pages_not_made_young, + (ulong) buf_pool->stat.n_pages_read, + (ulong) buf_pool->stat.n_pages_created, + (ulong) buf_pool->stat.n_pages_written); /* Count the number of blocks belonging to each index in the buffer */ @@ -3744,10 +3758,9 @@ buf_print_io( { time_t current_time; double time_elapsed; - ulint size; + ulint n_gets_diff; ut_ad(buf_pool); - size = buf_pool->curr_size; buf_pool_mutex_enter(); @@ -3755,12 +3768,14 @@ buf_print_io( "Buffer pool size %lu\n" "Free buffers %lu\n" "Database pages %lu\n" + "Old database pages %lu\n" "Modified db pages %lu\n" "Pending reads %lu\n" "Pending writes: LRU %lu, flush list %lu, single page %lu\n", - (ulong) size, + (ulong) buf_pool->curr_size, (ulong) UT_LIST_GET_LEN(buf_pool->free), (ulong) UT_LIST_GET_LEN(buf_pool->LRU), + (ulong) buf_pool->LRU_old_len, (ulong) UT_LIST_GET_LEN(buf_pool->flush_list), (ulong) buf_pool->n_pend_reads, (ulong) buf_pool->n_flush[BUF_FLUSH_LRU] @@ -3772,37 +3787,66 @@ buf_print_io( current_time = time(NULL); time_elapsed = 0.001 + difftime(current_time, buf_pool->last_printout_time); - buf_pool->last_printout_time = current_time; fprintf(file, + "Pages made young %lu, not young %lu\n" + "%.2f youngs/s, %.2f non-youngs/s\n" "Pages read %lu, created %lu, written %lu\n" "%.2f reads/s, %.2f creates/s, %.2f writes/s\n", - (ulong) buf_pool->n_pages_read, - (ulong) buf_pool->n_pages_created, - (ulong) buf_pool->n_pages_written, - (buf_pool->n_pages_read - buf_pool->n_pages_read_old) + (ulong) buf_pool->stat.n_pages_made_young, + (ulong) buf_pool->stat.n_pages_not_made_young, + (buf_pool->stat.n_pages_made_young + - buf_pool->old_stat.n_pages_made_young) + / time_elapsed, + (buf_pool->stat.n_pages_not_made_young + - buf_pool->old_stat.n_pages_not_made_young) / time_elapsed, - (buf_pool->n_pages_created - buf_pool->n_pages_created_old) + (ulong) buf_pool->stat.n_pages_read, + (ulong) buf_pool->stat.n_pages_created, + (ulong) buf_pool->stat.n_pages_written, + (buf_pool->stat.n_pages_read + - buf_pool->old_stat.n_pages_read) / time_elapsed, - (buf_pool->n_pages_written - buf_pool->n_pages_written_old) + (buf_pool->stat.n_pages_created + - buf_pool->old_stat.n_pages_created) + / time_elapsed, + (buf_pool->stat.n_pages_written + - buf_pool->old_stat.n_pages_written) / time_elapsed); - if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) { - fprintf(file, "Buffer pool hit rate %lu / 1000\n", + n_gets_diff = buf_pool->stat.n_page_gets - buf_pool->old_stat.n_page_gets; + + if (n_gets_diff) { + fprintf(file, + "Buffer pool hit rate %lu / 1000," + " young-making rate %lu / 1000 not %lu / 1000\n", (ulong) - (1000 - ((1000 * (buf_pool->n_pages_read - - buf_pool->n_pages_read_old)) - / (buf_pool->n_page_gets - - buf_pool->n_page_gets_old)))); + (1000 - ((1000 * (buf_pool->stat.n_pages_read + - buf_pool->old_stat.n_pages_read)) + / (buf_pool->stat.n_page_gets + - buf_pool->old_stat.n_page_gets))), + (ulong) + (1000 * (buf_pool->stat.n_pages_made_young + - buf_pool->old_stat.n_pages_made_young) + / n_gets_diff), + (ulong) + (1000 * (buf_pool->stat.n_pages_not_made_young + - buf_pool->old_stat.n_pages_not_made_young) + / n_gets_diff)); } else { fputs("No buffer pool page gets since the last printout\n", file); } - buf_pool->n_page_gets_old = buf_pool->n_page_gets; - buf_pool->n_pages_read_old = buf_pool->n_pages_read; - buf_pool->n_pages_created_old = buf_pool->n_pages_created; - buf_pool->n_pages_written_old = buf_pool->n_pages_written; + /* Statistics about read ahead algorithm */ + fprintf(file, "Pages read ahead %.2f/s," + " evicted without access %.2f/s\n", + (buf_pool->stat.n_ra_pages_read + - buf_pool->old_stat.n_ra_pages_read) + / time_elapsed, + (buf_pool->stat.n_ra_pages_evicted + - buf_pool->old_stat.n_ra_pages_evicted) + / time_elapsed); /* Print some values to help us with visualizing what is happening with LRU eviction. */ @@ -3814,6 +3858,7 @@ buf_print_io( buf_LRU_stat_sum.io, buf_LRU_stat_cur.io, buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip); + buf_refresh_io_stats(); buf_pool_mutex_exit(); } @@ -3825,10 +3870,7 @@ buf_refresh_io_stats(void) /*======================*/ { buf_pool->last_printout_time = time(NULL); - buf_pool->n_page_gets_old = buf_pool->n_page_gets; - buf_pool->n_pages_read_old = buf_pool->n_pages_read; - buf_pool->n_pages_created_old = buf_pool->n_pages_created; - buf_pool->n_pages_written_old = buf_pool->n_pages_written; + buf_pool->old_stat = buf_pool->stat; } /*********************************************************************//** diff --git a/storage/innodb_plugin/buf/buf0lru.c b/storage/innodb_plugin/buf/buf0lru.c index be53a5f5d9d..223746e0d01 100644 --- a/storage/innodb_plugin/buf/buf0lru.c +++ b/storage/innodb_plugin/buf/buf0lru.c @@ -49,14 +49,23 @@ Created 11/5/1995 Heikki Tuuri #include "log0recv.h" #include "srv0srv.h" -/** The number of blocks from the LRU_old pointer onward, including the block -pointed to, must be 3/8 of the whole LRU list length, except that the -tolerance defined below is allowed. Note that the tolerance must be small -enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the -LRU_old pointer is not allowed to point to either end of the LRU list. */ +/** The number of blocks from the LRU_old pointer onward, including +the block pointed to, must be buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV +of the whole LRU list length, except that the tolerance defined below +is allowed. Note that the tolerance must be small enough such that for +even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not +allowed to point to either end of the LRU list. */ #define BUF_LRU_OLD_TOLERANCE 20 +/** The minimum amount of non-old blocks when the LRU_old list exists +(that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks). +@see buf_LRU_old_adjust_len */ +#define BUF_LRU_NON_OLD_MIN_LEN 5 +#if BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN +# error "BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN" +#endif + /** The whole LRU list length is divided by this number to determine an initial segment in buf_LRU_get_recent_limit */ @@ -107,6 +116,15 @@ UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_sum; /* @} */ +/** @name Heuristics for detecting index scan @{ */ +/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for +"old" blocks. Protected by buf_pool_mutex. */ +UNIV_INTERN uint buf_LRU_old_ratio; +/** Move blocks to "new" LRU list only if the first access was at +least this many milliseconds ago. Not protected by any mutex or latch. */ +UNIV_INTERN uint buf_LRU_old_threshold_ms; +/* @} */ + /******************************************************************//** Takes a block out of the LRU list and page hash table. If the block is compressed-only (BUF_BLOCK_ZIP_PAGE), @@ -428,42 +446,6 @@ next_page: } } -/******************************************************************//** -Gets the minimum LRU_position field for the blocks in an initial segment -(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not -guaranteed to be precise, because the ulint_clock may wrap around. -@return the limit; zero if could not determine it */ -UNIV_INTERN -ulint -buf_LRU_get_recent_limit(void) -/*==========================*/ -{ - const buf_page_t* bpage; - ulint len; - ulint limit; - - buf_pool_mutex_enter(); - - len = UT_LIST_GET_LEN(buf_pool->LRU); - - if (len < BUF_LRU_OLD_MIN_LEN) { - /* The LRU list is too short to do read-ahead */ - - buf_pool_mutex_exit(); - - return(0); - } - - bpage = UT_LIST_GET_FIRST(buf_pool->LRU); - - limit = buf_page_get_LRU_position(bpage); - len /= BUF_LRU_INITIAL_RATIO; - - buf_pool_mutex_exit(); - - return(limit > len ? (limit - len) : 0); -} - /********************************************************************//** Insert a compressed block into buf_pool->zip_clean in the LRU order. */ UNIV_INTERN @@ -594,6 +576,7 @@ buf_LRU_free_from_common_LRU_list( bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) { enum buf_lru_free_block_status freed; + unsigned accessed; mutex_t* block_mutex = buf_page_get_mutex(bpage); @@ -601,11 +584,18 @@ buf_LRU_free_from_common_LRU_list( ut_ad(bpage->in_LRU_list); mutex_enter(block_mutex); + accessed = buf_page_is_accessed(bpage); freed = buf_LRU_free_block(bpage, TRUE, NULL); mutex_exit(block_mutex); switch (freed) { case BUF_LRU_FREED: + /* Keep track of pages that are evicted without + ever being accessed. This gives us a measure of + the effectiveness of readahead */ + if (!accessed) { + ++buf_pool->stat.n_ra_pages_evicted; + } return(TRUE); case BUF_LRU_NOT_FREED: @@ -953,8 +943,10 @@ buf_LRU_old_adjust_len(void) ut_a(buf_pool->LRU_old); ut_ad(buf_pool_mutex_own()); -#if 3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5 -# error "3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5" + ut_ad(buf_LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN); + ut_ad(buf_LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX); +#if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5) +# error "BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)" #endif #ifdef UNIV_LRU_DEBUG /* buf_pool->LRU_old must be the first item in the LRU list @@ -966,34 +958,39 @@ buf_LRU_old_adjust_len(void) || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old); #endif /* UNIV_LRU_DEBUG */ + old_len = buf_pool->LRU_old_len; + new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU) + * buf_LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV, + UT_LIST_GET_LEN(buf_pool->LRU) + - (BUF_LRU_OLD_TOLERANCE + + BUF_LRU_NON_OLD_MIN_LEN)); + for (;;) { - old_len = buf_pool->LRU_old_len; - new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8); + buf_page_t* LRU_old = buf_pool->LRU_old; - ut_ad(buf_pool->LRU_old->in_LRU_list); - ut_a(buf_pool->LRU_old); + ut_a(LRU_old); + ut_ad(LRU_old->in_LRU_list); #ifdef UNIV_LRU_DEBUG - ut_a(buf_pool->LRU_old->old); + ut_a(LRU_old->old); #endif /* UNIV_LRU_DEBUG */ /* Update the LRU_old pointer if necessary */ - if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) { + if (old_len + BUF_LRU_OLD_TOLERANCE < new_len) { - buf_pool->LRU_old = UT_LIST_GET_PREV( - LRU, buf_pool->LRU_old); + buf_pool->LRU_old = LRU_old = UT_LIST_GET_PREV( + LRU, LRU_old); #ifdef UNIV_LRU_DEBUG - ut_a(!buf_pool->LRU_old->old); + ut_a(!LRU_old->old); #endif /* UNIV_LRU_DEBUG */ - buf_page_set_old(buf_pool->LRU_old, TRUE); - buf_pool->LRU_old_len++; + buf_page_set_old(LRU_old, TRUE); + old_len = ++buf_pool->LRU_old_len; } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) { - buf_page_set_old(buf_pool->LRU_old, FALSE); - buf_pool->LRU_old = UT_LIST_GET_NEXT( - LRU, buf_pool->LRU_old); - buf_pool->LRU_old_len--; + buf_page_set_old(LRU_old, FALSE); + buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old); + old_len = --buf_pool->LRU_old_len; } else { return; } @@ -1021,6 +1018,7 @@ buf_LRU_old_init(void) while (bpage != NULL) { ut_ad(bpage->in_LRU_list); + ut_ad(buf_page_in_file(bpage)); buf_page_set_old(bpage, TRUE); bpage = UT_LIST_GET_NEXT(LRU, bpage); } @@ -1075,16 +1073,19 @@ buf_LRU_remove_block( if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) { - /* Below: the previous block is guaranteed to exist, because - the LRU_old pointer is only allowed to differ by the - tolerance value from strict 3/8 of the LRU list length. */ + /* Below: the previous block is guaranteed to exist, + because the LRU_old pointer is only allowed to differ + by BUF_LRU_OLD_TOLERANCE from strict + buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU + list length. */ + buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage); - buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, bpage); - ut_a(buf_pool->LRU_old); + ut_a(prev_bpage); #ifdef UNIV_LRU_DEBUG - ut_a(!buf_pool->LRU_old->old); + ut_a(!prev_bpage->old); #endif /* UNIV_LRU_DEBUG */ - buf_page_set_old(buf_pool->LRU_old, TRUE); + buf_pool->LRU_old = prev_bpage; + buf_page_set_old(prev_bpage, TRUE); buf_pool->LRU_old_len++; } @@ -1149,39 +1150,25 @@ buf_LRU_add_block_to_end_low( /*=========================*/ buf_page_t* bpage) /*!< in: control block */ { - buf_page_t* last_bpage; - ut_ad(buf_pool); ut_ad(bpage); ut_ad(buf_pool_mutex_own()); ut_a(buf_page_in_file(bpage)); - last_bpage = UT_LIST_GET_LAST(buf_pool->LRU); - - if (last_bpage) { - bpage->LRU_position = last_bpage->LRU_position; - } else { - bpage->LRU_position = buf_pool_clock_tic(); - } - ut_ad(!bpage->in_LRU_list); UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage); ut_d(bpage->in_LRU_list = TRUE); buf_page_set_old(bpage, TRUE); - if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { - - buf_pool->LRU_old_len++; - } - if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) { ut_ad(buf_pool->LRU_old); /* Adjust the length of the old block list if necessary */ + buf_pool->LRU_old_len++; buf_LRU_old_adjust_len(); } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) { @@ -1189,6 +1176,7 @@ buf_LRU_add_block_to_end_low( /* The LRU list is now long enough for LRU_old to become defined: init it */ + buf_pool->LRU_old_len++; buf_LRU_old_init(); } @@ -1222,7 +1210,6 @@ buf_LRU_add_block_low( UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage); - bpage->LRU_position = buf_pool_clock_tic(); bpage->freed_page_clock = buf_pool->freed_page_clock; } else { #ifdef UNIV_LRU_DEBUG @@ -1237,11 +1224,6 @@ buf_LRU_add_block_low( UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old, bpage); buf_pool->LRU_old_len++; - - /* We copy the LRU position field of the previous block - to the new block */ - - bpage->LRU_position = (buf_pool->LRU_old)->LRU_position; } ut_d(bpage->in_LRU_list = TRUE); @@ -1295,6 +1277,9 @@ buf_LRU_make_block_young( /*=====================*/ buf_page_t* bpage) /*!< in: control block */ { + ut_ad(buf_pool_mutex_own()); + buf_pool->stat.n_pages_made_young++; + buf_LRU_remove_block(bpage); buf_LRU_add_block_low(bpage, FALSE); } @@ -1847,6 +1832,48 @@ buf_LRU_block_free_hashed_page( buf_LRU_block_free_non_file_page(block); } +/**********************************************************************//** +Updates buf_LRU_old_ratio. +@return updated old_pct */ +UNIV_INTERN +uint +buf_LRU_old_ratio_update( +/*=====================*/ + uint old_pct,/*!< in: Reserve this percentage of + the buffer pool for "old" blocks. */ + ibool adjust) /*!< in: TRUE=adjust the LRU list; + FALSE=just assign buf_LRU_old_ratio + during the initialization of InnoDB */ +{ + uint ratio; + + ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100; + if (ratio < BUF_LRU_OLD_RATIO_MIN) { + ratio = BUF_LRU_OLD_RATIO_MIN; + } else if (ratio > BUF_LRU_OLD_RATIO_MAX) { + ratio = BUF_LRU_OLD_RATIO_MAX; + } + + if (adjust) { + buf_pool_mutex_enter(); + + if (ratio != buf_LRU_old_ratio) { + buf_LRU_old_ratio = ratio; + + if (UT_LIST_GET_LEN(buf_pool->LRU) + >= BUF_LRU_OLD_MIN_LEN) { + buf_LRU_old_adjust_len(); + } + } + + buf_pool_mutex_exit(); + } else { + buf_LRU_old_ratio = ratio; + } + + return(ratio * 100 / BUF_LRU_OLD_RATIO_DIV); +} + /********************************************************************//** Update the historical stats that we are collecting for LRU eviction policy at the end of each interval. */ @@ -1896,7 +1923,6 @@ buf_LRU_validate(void) buf_block_t* block; ulint old_len; ulint new_len; - ulint LRU_pos; ut_ad(buf_pool); buf_pool_mutex_enter(); @@ -1905,7 +1931,11 @@ buf_LRU_validate(void) ut_a(buf_pool->LRU_old); old_len = buf_pool->LRU_old_len; - new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8); + new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU) + * buf_LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV, + UT_LIST_GET_LEN(buf_pool->LRU) + - (BUF_LRU_OLD_TOLERANCE + + BUF_LRU_NON_OLD_MIN_LEN)); ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE); ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE); } @@ -1943,16 +1973,7 @@ buf_LRU_validate(void) ut_a(buf_pool->LRU_old == bpage); } - LRU_pos = buf_page_get_LRU_position(bpage); - bpage = UT_LIST_GET_NEXT(LRU, bpage); - - if (bpage) { - /* If the following assert fails, it may - not be an error: just the buf_pool clock - has wrapped around */ - ut_a(LRU_pos >= buf_page_get_LRU_position(bpage)); - } } if (buf_pool->LRU_old) { @@ -2000,9 +2021,6 @@ buf_LRU_print(void) ut_ad(buf_pool); buf_pool_mutex_enter(); - fprintf(stderr, "Pool ulint clock %lu\n", - (ulong) buf_pool->ulint_clock); - bpage = UT_LIST_GET_FIRST(buf_pool->LRU); while (bpage != NULL) { @@ -2033,18 +2051,16 @@ buf_LRU_print(void) const byte* frame; case BUF_BLOCK_FILE_PAGE: frame = buf_block_get_frame((buf_block_t*) bpage); - fprintf(stderr, "\nLRU pos %lu type %lu" + fprintf(stderr, "\ntype %lu" " index id %lu\n", - (ulong) buf_page_get_LRU_position(bpage), (ulong) fil_page_get_type(frame), (ulong) ut_dulint_get_low( btr_page_get_index_id(frame))); break; case BUF_BLOCK_ZIP_PAGE: frame = bpage->zip.data; - fprintf(stderr, "\nLRU pos %lu type %lu size %lu" + fprintf(stderr, "\ntype %lu size %lu" " index id %lu\n", - (ulong) buf_page_get_LRU_position(bpage), (ulong) fil_page_get_type(frame), (ulong) buf_page_get_zip_size(bpage), (ulong) ut_dulint_get_low( @@ -2052,8 +2068,7 @@ buf_LRU_print(void) break; default: - fprintf(stderr, "\nLRU pos %lu !state %lu!\n", - (ulong) buf_page_get_LRU_position(bpage), + fprintf(stderr, "\n!state %lu!\n", (ulong) buf_page_get_state(bpage)); break; } diff --git a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0rea.c index 4910f82ccde..dd98ea17eb5 100644 --- a/storage/innodb_plugin/buf/buf0rea.c +++ b/storage/innodb_plugin/buf/buf0rea.c @@ -38,14 +38,6 @@ Created 11/5/1995 Heikki Tuuri #include "srv0start.h" #include "srv0srv.h" -/** The size in blocks of the area where the random read-ahead algorithm counts -the accessed pages when deciding whether to read-ahead */ -#define BUF_READ_AHEAD_RANDOM_AREA BUF_READ_AHEAD_AREA - -/** There must be at least this many pages in buf_pool in the area to start -a random read-ahead */ -#define BUF_READ_AHEAD_RANDOM_THRESHOLD (1 + BUF_READ_AHEAD_RANDOM_AREA / 2) - /** The linear read-ahead area size */ #define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA @@ -62,7 +54,8 @@ flag is cleared and the x-lock released by an i/o-handler thread. @return 1 if a read request was queued, 0 if the page already resided in buf_pool, or if the page is in the doublewrite buffer blocks in which case it is never read into the pool, or if the tablespace does -not exist or is being dropped */ +not exist or is being dropped +@return 1 if read request is issued. 0 if it is not */ static ulint buf_read_page_low( @@ -165,174 +158,13 @@ buf_read_page_low( } /********************************************************************//** -Applies a random read-ahead in buf_pool if there are at least a threshold -value of accessed pages from the random read-ahead area. Does not read any -page, not even the one at the position (space, offset), if the read-ahead -mechanism is not activated. NOTE 1: the calling thread may own latches on -pages: to avoid deadlocks this function must be written such that it cannot -end up waiting for these latches! NOTE 2: the calling thread must want -access to the page given: this rule is set to prevent unintended read-aheads -performed by ibuf routines, a situation which could result in a deadlock if -the OS does not support asynchronous i/o. -@return number of page read requests issued; NOTE that if we read ibuf -pages, it may happen that the page at the given page number does not -get read even if we return a positive value! */ -static -ulint -buf_read_ahead_random( -/*==================*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint offset) /*!< in: page number of a page which the current thread - wants to access */ -{ - ib_int64_t tablespace_version; - ulint recent_blocks = 0; - ulint count; - ulint LRU_recent_limit; - ulint ibuf_mode; - ulint low, high; - ulint err; - ulint i; - ulint buf_read_ahead_random_area; - - /* We have currently disabled random readahead */ - return(0); - - if (srv_startup_is_before_trx_rollback_phase) { - /* No read-ahead to avoid thread deadlocks */ - return(0); - } - - if (ibuf_bitmap_page(zip_size, offset) - || trx_sys_hdr_page(space, offset)) { - - /* If it is an ibuf bitmap page or trx sys hdr, we do - no read-ahead, as that could break the ibuf page access - order */ - - return(0); - } - - /* Remember the tablespace version before we ask te tablespace size - below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we - do not try to read outside the bounds of the tablespace! */ - - tablespace_version = fil_space_get_version(space); - - buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA; - - low = (offset / buf_read_ahead_random_area) - * buf_read_ahead_random_area; - high = (offset / buf_read_ahead_random_area + 1) - * buf_read_ahead_random_area; - if (high > fil_space_get_size(space)) { - - high = fil_space_get_size(space); - } - - /* Get the minimum LRU_position field value for an initial segment - of the LRU list, to determine which blocks have recently been added - to the start of the list. */ - - LRU_recent_limit = buf_LRU_get_recent_limit(); - - buf_pool_mutex_enter(); - - if (buf_pool->n_pend_reads - > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) { - buf_pool_mutex_exit(); - - return(0); - } - - /* Count how many blocks in the area have been recently accessed, - that is, reside near the start of the LRU list. */ - - for (i = low; i < high; i++) { - const buf_page_t* bpage = buf_page_hash_get(space, i); - - if (bpage - && buf_page_is_accessed(bpage) - && (buf_page_get_LRU_position(bpage) > LRU_recent_limit)) { - - recent_blocks++; - - if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) { - - buf_pool_mutex_exit(); - goto read_ahead; - } - } - } - - buf_pool_mutex_exit(); - /* Do nothing */ - return(0); - -read_ahead: - /* Read all the suitable blocks within the area */ - - if (ibuf_inside()) { - ibuf_mode = BUF_READ_IBUF_PAGES_ONLY; - } else { - ibuf_mode = BUF_READ_ANY_PAGE; - } - - count = 0; - - for (i = low; i < high; i++) { - /* It is only sensible to do read-ahead in the non-sync aio - mode: hence FALSE as the first parameter */ - - if (!ibuf_bitmap_page(zip_size, i)) { - count += buf_read_page_low( - &err, FALSE, - ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, - space, zip_size, FALSE, - tablespace_version, i); - if (err == DB_TABLESPACE_DELETED) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: in random" - " readahead trying to access\n" - "InnoDB: tablespace %lu page %lu,\n" - "InnoDB: but the tablespace does not" - " exist or is just being dropped.\n", - (ulong) space, (ulong) i); - } - } - } - - /* In simulated aio we wake the aio handler threads only after - queuing all aio requests, in native aio the following call does - nothing: */ - - os_aio_simulated_wake_handler_threads(); - -#ifdef UNIV_DEBUG - if (buf_debug_prints && (count > 0)) { - fprintf(stderr, - "Random read-ahead space %lu offset %lu pages %lu\n", - (ulong) space, (ulong) offset, - (ulong) count); - } -#endif /* UNIV_DEBUG */ - - ++srv_read_ahead_rnd; - return(count); -} - -/********************************************************************//** High-level function which reads a page asynchronously from a file to the buffer buf_pool if it is not already there. Sets the io_fix flag and sets an exclusive lock on the buffer frame. The flag is cleared and the x-lock -released by the i/o-handler thread. Does a random read-ahead if it seems -sensible. -@return number of page read requests issued: this can be greater than -1 if read-ahead occurred */ +released by the i/o-handler thread. +@return TRUE if page has been read in, FALSE in case of failure */ UNIV_INTERN -ulint +ibool buf_read_page( /*==========*/ ulint space, /*!< in: space id */ @@ -341,20 +173,17 @@ buf_read_page( { ib_int64_t tablespace_version; ulint count; - ulint count2; ulint err; tablespace_version = fil_space_get_version(space); - count = buf_read_ahead_random(space, zip_size, offset); - /* We do the i/o in the synchronous aio mode to save thread switches: hence TRUE */ - count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, - zip_size, FALSE, - tablespace_version, offset); - srv_buf_pool_reads+= count2; + count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, + zip_size, FALSE, + tablespace_version, offset); + srv_buf_pool_reads += count; if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); fprintf(stderr, @@ -371,7 +200,7 @@ buf_read_page( /* Increment number of I/O operations used for LRU policy. */ buf_LRU_stat_inc_io(); - return(count + count2); + return(count > 0); } /********************************************************************//** @@ -498,9 +327,17 @@ buf_read_ahead_linear( fail_count++; } else if (pred_bpage) { - int res = (ut_ulint_cmp( - buf_page_get_LRU_position(bpage), - buf_page_get_LRU_position(pred_bpage))); + /* Note that buf_page_is_accessed() returns + the time of the first access. If some blocks + of the extent existed in the buffer pool at + the time of a linear access pattern, the first + access times may be nonmonotonic, even though + the latest access times were linear. The + threshold (srv_read_ahead_factor) should help + a little against this. */ + int res = ut_ulint_cmp( + buf_page_is_accessed(bpage), + buf_page_is_accessed(pred_bpage)); /* Accesses not in the right order */ if (res != 0 && res != asc_or_desc) { fail_count++; @@ -643,7 +480,7 @@ buf_read_ahead_linear( LRU policy decision. */ buf_LRU_stat_inc_io(); - ++srv_read_ahead_seq; + buf_pool->stat.n_ra_pages_read += count; return(count); } diff --git a/storage/innodb_plugin/dict/dict0dict.c b/storage/innodb_plugin/dict/dict0dict.c index d1f0e0ffc19..a8ee593e35c 100644 --- a/storage/innodb_plugin/dict/dict0dict.c +++ b/storage/innodb_plugin/dict/dict0dict.c @@ -82,9 +82,10 @@ static char dict_ibfk[] = "_ibfk_"; /*******************************************************************//** Tries to find column names for the index and sets the col field of the -index. */ +index. +@return TRUE if the column names were found */ static -void +ibool dict_index_find_cols( /*=================*/ dict_table_t* table, /*!< in: table */ @@ -1431,7 +1432,7 @@ add_field_size: /**********************************************************************//** Adds an index to the dictionary cache. -@return DB_SUCCESS or DB_TOO_BIG_RECORD */ +@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */ UNIV_INTERN ulint dict_index_add_to_cache( @@ -1457,7 +1458,10 @@ dict_index_add_to_cache( ut_a(!dict_index_is_clust(index) || UT_LIST_GET_LEN(table->indexes) == 0); - dict_index_find_cols(table, index); + if (!dict_index_find_cols(table, index)) { + + return(DB_CORRUPTION); + } /* Build the cache internal representation of the index, containing also the added system fields */ @@ -1665,9 +1669,10 @@ dict_index_remove_from_cache( /*******************************************************************//** Tries to find column names for the index and sets the col field of the -index. */ +index. +@return TRUE if the column names were found */ static -void +ibool dict_index_find_cols( /*=================*/ dict_table_t* table, /*!< in: table */ @@ -1692,17 +1697,21 @@ dict_index_find_cols( } } +#ifdef UNIV_DEBUG /* It is an error not to find a matching column. */ fputs("InnoDB: Error: no matching column for ", stderr); ut_print_name(stderr, NULL, FALSE, field->name); fputs(" in ", stderr); dict_index_name_print(stderr, NULL, index); fputs("!\n", stderr); - ut_error; +#endif /* UNIV_DEBUG */ + return(FALSE); found: ; } + + return(TRUE); } #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index 616c227ae0c..d279f0ba4f3 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -72,6 +72,7 @@ with this program; if not, write to the Free Software Foundation, Inc., /* Include necessary InnoDB headers */ extern "C" { #include "univ.i" +#include "buf0lru.h" #include "btr0sea.h" #include "os0file.h" #include "os0thread.h" @@ -152,6 +153,10 @@ static ulong innobase_write_io_threads; static long long innobase_buffer_pool_size, innobase_log_file_size; +/** Percentage of the buffer pool to reserve for 'old' blocks. +Connected to buf_LRU_old_ratio. */ +static uint innobase_old_blocks_pct; + /* The default values for the following char* start-up parameters are determined in innobase_init below: */ @@ -490,10 +495,10 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG}, {"buffer_pool_pages_total", (char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG}, - {"buffer_pool_read_ahead_rnd", - (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG}, - {"buffer_pool_read_ahead_seq", - (char*) &export_vars.innodb_buffer_pool_read_ahead_seq, SHOW_LONG}, + {"buffer_pool_read_ahead", + (char*) &export_vars.innodb_buffer_pool_read_ahead, SHOW_LONG}, + {"buffer_pool_read_ahead_evicted", + (char*) &export_vars.innodb_buffer_pool_read_ahead_evicted, SHOW_LONG}, {"buffer_pool_read_requests", (char*) &export_vars.innodb_buffer_pool_read_requests, SHOW_LONG}, {"buffer_pool_reads", @@ -2204,6 +2209,9 @@ innobase_change_buffering_inited_ok: ut_a(0 == strcmp(my_charset_latin1.name, "latin1_swedish_ci")); srv_latin1_ordering = my_charset_latin1.sort_order; + innobase_old_blocks_pct = buf_LRU_old_ratio_update( + innobase_old_blocks_pct, FALSE); + innobase_commit_concurrency_init_default(); /* Since we in this module access directly the fields of a trx @@ -9610,6 +9618,25 @@ innodb_adaptive_hash_index_update( } } +/****************************************************************//** +Update the system variable innodb_old_blocks_pct using the "saved" +value. This function is registered as a callback with MySQL. */ +static +void +innodb_old_blocks_pct_update( +/*=========================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to + system variable */ + void* var_ptr,/*!< out: where the + formal string goes */ + const void* save) /*!< in: immediate result + from check function */ +{ + innobase_old_blocks_pct = buf_LRU_old_ratio_update( + *static_cast<const uint*>(save), TRUE); +} + /*************************************************************//** Check if it is a valid value of innodb_change_buffering. This function is registered as a callback with MySQL. @@ -9847,7 +9874,7 @@ static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter, NULL, NULL, 500L, 1L, ~0L, 0); static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR, "Number of file I/O threads in InnoDB.", NULL, NULL, 4, 4, 64, 0); @@ -9886,6 +9913,18 @@ static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups, "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.", NULL, NULL, 1, 1, 10, 0); +static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct, + PLUGIN_VAR_RQCMDARG, + "Percentage of the buffer pool to reserve for 'old' blocks.", + NULL, innodb_old_blocks_pct_update, 100 * 3 / 8, 5, 95, 0); + +static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms, + PLUGIN_VAR_RQCMDARG, + "Move blocks to the 'new' end of the buffer pool if the first access" + " was at least this many milliseconds ago." + " The timeout is disabled if 0 (the default).", + NULL, NULL, 0, 0, UINT_MAX32, 0); + static MYSQL_SYSVAR_LONG(open_files, innobase_open_files, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "How many files at the maximum InnoDB keeps open at the same time.", @@ -9984,6 +10023,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(adaptive_flushing), MYSQL_SYSVAR(max_purge_lag), MYSQL_SYSVAR(mirrored_log_groups), + MYSQL_SYSVAR(old_blocks_pct), + MYSQL_SYSVAR(old_blocks_time), MYSQL_SYSVAR(open_files), MYSQL_SYSVAR(rollback_on_timeout), MYSQL_SYSVAR(stats_on_metadata), diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h index a24fc4cdbbb..299312e5d10 100644 --- a/storage/innodb_plugin/include/buf0buf.h +++ b/storage/innodb_plugin/include/buf0buf.h @@ -707,15 +707,6 @@ buf_page_belongs_to_unzip_LRU( /*==========================*/ const buf_page_t* bpage) /*!< in: pointer to control block */ __attribute__((pure)); -/*********************************************************************//** -Determine the approximate LRU list position of a block. -@return LRU list position */ -UNIV_INLINE -ulint -buf_page_get_LRU_position( -/*======================*/ - const buf_page_t* bpage) /*!< in: control block */ - __attribute__((pure)); /*********************************************************************//** Gets the mutex of a block. @@ -816,22 +807,22 @@ buf_page_set_old( buf_page_t* bpage, /*!< in/out: control block */ ibool old); /*!< in: old */ /*********************************************************************//** -Determine if a block has been accessed in the buffer pool. -@return TRUE if accessed */ +Determine the time of last access a block in the buffer pool. +@return ut_time_ms() at the time of last access, 0 if not accessed */ UNIV_INLINE -ibool +unsigned buf_page_is_accessed( /*=================*/ const buf_page_t* bpage) /*!< in: control block */ - __attribute__((pure)); + __attribute__((nonnull, pure)); /*********************************************************************//** Flag a block accessed. */ UNIV_INLINE void buf_page_set_accessed( /*==================*/ - buf_page_t* bpage, /*!< in/out: control block */ - ibool accessed); /*!< in: accessed */ + buf_page_t* bpage) /*!< in/out: control block */ + __attribute__((nonnull)); /*********************************************************************//** Gets the buf_block_t handle of a buffered file block if an uncompressed page frame exists, or NULL. @@ -1017,14 +1008,6 @@ buf_block_hash_get( /*===============*/ ulint space, /*!< in: space id */ ulint offset);/*!< in: offset of the page within space */ -/*******************************************************************//** -Increments the pool clock by one and returns its new value. Remember that -in the 32 bit version the clock wraps around at 4 billion! -@return new clock value */ -UNIV_INLINE -ulint -buf_pool_clock_tic(void); -/*====================*/ /*********************************************************************//** Gets the current length of the free list of buffer blocks. @return length of the free list */ @@ -1064,16 +1047,10 @@ struct buf_page_struct{ flushed to disk, this tells the flush_type. @see enum buf_flush */ - unsigned accessed:1; /*!< TRUE if the page has been accessed - while in the buffer pool: read-ahead - may read in pages which have not been - accessed yet; a thread is allowed to - read this for heuristic purposes - without holding any mutex or latch */ unsigned io_fix:2; /*!< type of pending I/O operation; also protected by buf_pool_mutex @see enum buf_io_fix */ - unsigned buf_fix_count:24;/*!< count of how manyfold this block + unsigned buf_fix_count:25;/*!< count of how manyfold this block is currently bufferfixed */ /* @} */ #endif /* !UNIV_HOTBACKUP */ @@ -1152,17 +1129,7 @@ struct buf_page_struct{ #endif /* UNIV_DEBUG */ unsigned old:1; /*!< TRUE if the block is in the old blocks in the LRU list */ - unsigned LRU_position:31;/*!< value which monotonically - decreases (or may stay - constant if old==TRUE) toward - the end of the LRU list, if - buf_pool->ulint_clock has not - wrapped around: NOTE that this - value can only be used in - heuristic algorithms, because - of the possibility of a - wrap-around! */ - unsigned freed_page_clock:32;/*!< the value of + unsigned freed_page_clock:31;/*!< the value of buf_pool->freed_page_clock when this block was the last time put to the head of the @@ -1170,6 +1137,9 @@ struct buf_page_struct{ to read this for heuristic purposes without holding any mutex or latch */ + unsigned access_time:32; /*!< time of first access, or + 0 if the block was never accessed + in the buffer pool */ /* @} */ # ifdef UNIV_DEBUG_FILE_ACCESSES ibool file_page_was_freed; @@ -1314,6 +1284,31 @@ Compute the hash fold value for blocks in buf_pool->zip_hash. */ #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) /* @} */ +/** @brief The buffer pool statistics structure. */ +struct buf_pool_stat_struct{ + ulint n_page_gets; /*!< number of page gets performed; + also successful searches through + the adaptive hash index are + counted as page gets; this field + is NOT protected by the buffer + pool mutex */ + ulint n_pages_read; /*!< number read operations */ + ulint n_pages_written;/*!< number write operations */ + ulint n_pages_created;/*!< number of pages created + in the pool with no read */ + ulint n_ra_pages_read;/*!< number of pages read in + as part of read ahead */ + ulint n_ra_pages_evicted;/*!< number of read ahead + pages that are evicted without + being accessed */ + ulint n_pages_made_young; /*!< number of pages made young, in + calls to buf_LRU_make_block_young() */ + ulint n_pages_not_made_young; /*!< number of pages not made + young because the first access + was not long enough ago, in + buf_page_peek_if_too_old() */ +}; + /** @brief The buffer pool structure. NOTE! The definition appears here only for other modules of this @@ -1338,28 +1333,16 @@ struct buf_pool_struct{ ulint n_pend_reads; /*!< number of pending read operations */ ulint n_pend_unzip; /*!< number of pending decompressions */ - time_t last_printout_time; /*!< when buf_print was last time + time_t last_printout_time; + /*!< when buf_print_io was last time called */ - ulint n_pages_read; /*!< number read operations */ - ulint n_pages_written;/*!< number write operations */ - ulint n_pages_created;/*!< number of pages created - in the pool with no read */ - ulint n_page_gets; /*!< number of page gets performed; - also successful searches through - the adaptive hash index are - counted as page gets; this field - is NOT protected by the buffer - pool mutex */ - ulint n_page_gets_old;/*!< n_page_gets when buf_print was - last time called: used to calculate - hit rate */ - ulint n_pages_read_old;/*!< n_pages_read when buf_print was - last time called */ - ulint n_pages_written_old;/*!< number write operations */ - ulint n_pages_created_old;/*!< number of pages created in - the pool with no read */ + buf_pool_stat_t stat; /*!< current statistics */ + buf_pool_stat_t old_stat; /*!< old statistics */ + /* @} */ + /** @name Page flushing algorithm fields */ + /* @{ */ UT_LIST_BASE_NODE_T(buf_page_t) flush_list; @@ -1375,10 +1358,6 @@ struct buf_pool_struct{ /*!< this is in the set state when there is no flush batch of the given type running */ - ulint ulint_clock; /*!< a sequence number used to count - time. NOTE! This counter wraps - around at 4 billion (if ulint == - 32 bits)! */ ulint freed_page_clock;/*!< a sequence number used to count the number of buffer blocks removed from the end of @@ -1402,9 +1381,11 @@ struct buf_pool_struct{ block list */ UT_LIST_BASE_NODE_T(buf_page_t) LRU; /*!< base node of the LRU list */ - buf_page_t* LRU_old; /*!< pointer to the about 3/8 oldest - blocks in the LRU list; NULL if LRU - length less than BUF_LRU_OLD_MIN_LEN; + buf_page_t* LRU_old; /*!< pointer to the about + buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV + oldest blocks in the LRU list; + NULL if LRU length less than + BUF_LRU_OLD_MIN_LEN; NOTE: when LRU_old != NULL, its length should always equal LRU_old_len */ ulint LRU_old_len; /*!< length of the LRU list from diff --git a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic index 17064342116..409d339aa16 100644 --- a/storage/innodb_plugin/include/buf0buf.ic +++ b/storage/innodb_plugin/include/buf0buf.ic @@ -72,9 +72,24 @@ buf_page_peek_if_too_old( /*=====================*/ const buf_page_t* bpage) /*!< in: block to make younger */ { - return(buf_pool->freed_page_clock - >= buf_page_get_freed_page_clock(bpage) - + 1 + (buf_pool->curr_size / 4)); + if (buf_LRU_old_threshold_ms && bpage->old) { + unsigned access_time = buf_page_is_accessed(bpage); + + if (access_time && ut_time_ms() - access_time + >= buf_LRU_old_threshold_ms) { + return(TRUE); + } + + buf_pool->stat.n_pages_not_made_young++; + return(FALSE); + } else { + /* FIXME: bpage->freed_page_clock is 31 bits */ + return((buf_pool->freed_page_clock & ~(1 << 31)) + > bpage->freed_page_clock + + (buf_pool->curr_size + * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio) + / (BUF_LRU_OLD_RATIO_DIV * 4))); + } } /*********************************************************************//** @@ -118,22 +133,6 @@ buf_pool_get_oldest_modification(void) return(lsn); } - -/*******************************************************************//** -Increments the buf_pool clock by one and returns its new value. Remember -that in the 32 bit version the clock wraps around at 4 billion! -@return new clock value */ -UNIV_INLINE -ulint -buf_pool_clock_tic(void) -/*====================*/ -{ - ut_ad(buf_pool_mutex_own()); - - buf_pool->ulint_clock++; - - return(buf_pool->ulint_clock); -} #endif /* !UNIV_HOTBACKUP */ /*********************************************************************//** @@ -280,21 +279,6 @@ buf_page_belongs_to_unzip_LRU( } /*********************************************************************//** -Determine the approximate LRU list position of a block. -@return LRU list position */ -UNIV_INLINE -ulint -buf_page_get_LRU_position( -/*======================*/ - const buf_page_t* bpage) /*!< in: control block */ -{ - ut_ad(buf_page_in_file(bpage)); - ut_ad(buf_pool_mutex_own()); - - return(bpage->LRU_position); -} - -/*********************************************************************//** Gets the mutex of a block. @return pointer to mutex protecting bpage */ UNIV_INLINE @@ -487,17 +471,17 @@ buf_page_set_old( } /*********************************************************************//** -Determine if a block has been accessed in the buffer pool. -@return TRUE if accessed */ +Determine the time of last access a block in the buffer pool. +@return ut_time_ms() at the time of last access, 0 if not accessed */ UNIV_INLINE -ibool +unsigned buf_page_is_accessed( /*=================*/ const buf_page_t* bpage) /*!< in: control block */ { ut_ad(buf_page_in_file(bpage)); - return(bpage->accessed); + return(bpage->access_time); } /*********************************************************************//** @@ -506,13 +490,15 @@ UNIV_INLINE void buf_page_set_accessed( /*==================*/ - buf_page_t* bpage, /*!< in/out: control block */ - ibool accessed) /*!< in: accessed */ + buf_page_t* bpage) /*!< in/out: control block */ { ut_a(buf_page_in_file(bpage)); - ut_ad(mutex_own(buf_page_get_mutex(bpage))); + ut_ad(buf_pool_mutex_own()); - bpage->accessed = accessed; + if (!bpage->access_time) { + /* Make this the time of the first access. */ + bpage->access_time = ut_time_ms(); + } } /*********************************************************************//** diff --git a/storage/innodb_plugin/include/buf0lru.h b/storage/innodb_plugin/include/buf0lru.h index 463aca0982c..009430af35b 100644 --- a/storage/innodb_plugin/include/buf0lru.h +++ b/storage/innodb_plugin/include/buf0lru.h @@ -69,7 +69,7 @@ These are low-level functions #########################################################################*/ /** Minimum LRU list length for which the LRU_old pointer is defined */ -#define BUF_LRU_OLD_MIN_LEN 80 +#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */ /** Maximum LRU list search length in buf_flush_LRU_recommendation() */ #define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA) @@ -84,15 +84,6 @@ void buf_LRU_invalidate_tablespace( /*==========================*/ ulint id); /*!< in: space id */ -/******************************************************************//** -Gets the minimum LRU_position field for the blocks in an initial segment -(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not -guaranteed to be precise, because the ulint_clock may wrap around. -@return the limit; zero if could not determine it */ -UNIV_INTERN -ulint -buf_LRU_get_recent_limit(void); -/*==========================*/ /********************************************************************//** Insert a compressed block into buf_pool->zip_clean in the LRU order. */ UNIV_INTERN @@ -201,6 +192,18 @@ void buf_LRU_make_block_old( /*===================*/ buf_page_t* bpage); /*!< in: control block */ +/**********************************************************************//** +Updates buf_LRU_old_ratio. +@return updated old_pct */ +UNIV_INTERN +uint +buf_LRU_old_ratio_update( +/*=====================*/ + uint old_pct,/*!< in: Reserve this percentage of + the buffer pool for "old" blocks. */ + ibool adjust);/*!< in: TRUE=adjust the LRU list; + FALSE=just assign buf_LRU_old_ratio + during the initialization of InnoDB */ /********************************************************************//** Update the historical stats that we are collecting for LRU eviction policy at the end of each interval. */ @@ -227,6 +230,35 @@ buf_LRU_print(void); /*===============*/ #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ +/** @name Heuristics for detecting index scan @{ */ +/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for +"old" blocks. Protected by buf_pool_mutex. */ +extern uint buf_LRU_old_ratio; +/** The denominator of buf_LRU_old_ratio. */ +#define BUF_LRU_OLD_RATIO_DIV 1024 +/** Maximum value of buf_LRU_old_ratio. +@see buf_LRU_old_adjust_len +@see buf_LRU_old_ratio_update */ +#define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV +/** Minimum value of buf_LRU_old_ratio. +@see buf_LRU_old_adjust_len +@see buf_LRU_old_ratio_update +The minimum must exceed +(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */ +#define BUF_LRU_OLD_RATIO_MIN 51 + +#if BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX +# error "BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX" +#endif +#if BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV +# error "BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV" +#endif + +/** Move blocks to "new" LRU list only if the first access was at +least this many milliseconds ago. Not protected by any mutex or latch. */ +extern uint buf_LRU_old_threshold_ms; +/* @} */ + /** @brief Statistics for selecting the LRU list for eviction. These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O diff --git a/storage/innodb_plugin/include/buf0rea.h b/storage/innodb_plugin/include/buf0rea.h index a3b5685bd91..093750623d6 100644 --- a/storage/innodb_plugin/include/buf0rea.h +++ b/storage/innodb_plugin/include/buf0rea.h @@ -33,12 +33,10 @@ Created 11/5/1995 Heikki Tuuri High-level function which reads a page asynchronously from a file to the buffer buf_pool if it is not already there. Sets the io_fix flag and sets an exclusive lock on the buffer frame. The flag is cleared and the x-lock -released by the i/o-handler thread. Does a random read-ahead if it seems -sensible. -@return number of page read requests issued: this can be greater than -1 if read-ahead occurred */ +released by the i/o-handler thread. +@return TRUE if page has been read in, FALSE in case of failure */ UNIV_INTERN -ulint +ibool buf_read_page( /*==========*/ ulint space, /*!< in: space id */ diff --git a/storage/innodb_plugin/include/buf0types.h b/storage/innodb_plugin/include/buf0types.h index e7167d716a0..bfae6477135 100644 --- a/storage/innodb_plugin/include/buf0types.h +++ b/storage/innodb_plugin/include/buf0types.h @@ -34,6 +34,8 @@ typedef struct buf_block_struct buf_block_t; typedef struct buf_chunk_struct buf_chunk_t; /** Buffer pool comprising buf_chunk_t */ typedef struct buf_pool_struct buf_pool_t; +/** Buffer pool statistics struct */ +typedef struct buf_pool_stat_struct buf_pool_stat_t; /** A buffer frame. @see page_t */ typedef byte buf_frame_t; diff --git a/storage/innodb_plugin/include/dict0dict.h b/storage/innodb_plugin/include/dict0dict.h index b2029699e51..d425241a3a2 100644 --- a/storage/innodb_plugin/include/dict0dict.h +++ b/storage/innodb_plugin/include/dict0dict.h @@ -712,7 +712,7 @@ dict_index_find_on_id_low( dulint id); /*!< in: index id */ /**********************************************************************//** Adds an index to the dictionary cache. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */ UNIV_INTERN ulint dict_index_add_to_cache( diff --git a/storage/innodb_plugin/include/os0sync.h b/storage/innodb_plugin/include/os0sync.h index 0e0b32e7036..6e52fa1350e 100644 --- a/storage/innodb_plugin/include/os0sync.h +++ b/storage/innodb_plugin/include/os0sync.h @@ -285,7 +285,7 @@ os_fast_mutex_free( /**********************************************************//** Atomic compare-and-swap and increment for InnoDB. */ -#ifdef HAVE_GCC_ATOMIC_BUILTINS +#ifdef HAVE_IB_GCC_ATOMIC_BUILTINS /**********************************************************//** Returns true if swapped, ptr is pointer to target, old_val is value to compare to, new_val is the value to swap in. */ @@ -377,7 +377,7 @@ InterlockedExchange() operates on LONG, and the LONG will be clobbered */ # define os_atomic_test_and_set_byte(ptr, new_val) \ ((byte) InterlockedExchange(ptr, new_val)) -#endif /* HAVE_GCC_ATOMIC_BUILTINS */ +#endif /* HAVE_IB_GCC_ATOMIC_BUILTINS */ #ifndef UNIV_NONINL #include "os0sync.ic" diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h index 499bccfe2b8..23472bd100e 100644 --- a/storage/innodb_plugin/include/srv0srv.h +++ b/storage/innodb_plugin/include/srv0srv.h @@ -315,10 +315,6 @@ extern ulint srv_buf_pool_flushed; /** Number of buffer pool reads that led to the reading of a disk page */ extern ulint srv_buf_pool_reads; -/** Number of sequential read-aheads */ -extern ulint srv_read_ahead_seq; -/** Number of random read-aheads */ -extern ulint srv_read_ahead_rnd; /** Status variables to be passed to MySQL */ typedef struct export_var_struct export_struc; @@ -605,13 +601,13 @@ struct export_var_struct{ #ifdef UNIV_DEBUG ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */ #endif /* UNIV_DEBUG */ - ulint innodb_buffer_pool_read_requests; /*!< buf_pool->n_page_gets */ + ulint innodb_buffer_pool_read_requests; /*!< buf_pool->stat.n_page_gets */ ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */ ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */ ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */ ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */ - ulint innodb_buffer_pool_read_ahead_seq;/*!< srv_read_ahead_seq */ - ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */ + ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */ + ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/ ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */ ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */ ibool innodb_have_atomic_builtins; /*!< HAVE_ATOMIC_BUILTINS */ @@ -623,9 +619,9 @@ struct export_var_struct{ ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */ ulint innodb_os_log_pending_fsyncs; /*!< fil_n_pending_log_flushes */ ulint innodb_page_size; /*!< UNIV_PAGE_SIZE */ - ulint innodb_pages_created; /*!< buf_pool->n_pages_created */ - ulint innodb_pages_read; /*!< buf_pool->n_pages_read */ - ulint innodb_pages_written; /*!< buf_pool->n_pages_written */ + ulint innodb_pages_created; /*!< buf_pool->stat.n_pages_created */ + ulint innodb_pages_read; /*!< buf_pool->stat.n_pages_read */ + ulint innodb_pages_written; /*!< buf_pool->stat.n_pages_written */ ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */ ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */ ib_int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time diff --git a/storage/innodb_plugin/include/univ.i b/storage/innodb_plugin/include/univ.i index aa01f072b1d..66240d96a11 100644 --- a/storage/innodb_plugin/include/univ.i +++ b/storage/innodb_plugin/include/univ.i @@ -125,11 +125,11 @@ if we are compiling on Windows. */ # include <sched.h> # endif -# if defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_SOLARIS_ATOMICS) \ +# if defined(HAVE_IB_GCC_ATOMIC_BUILTINS) || defined(HAVE_SOLARIS_ATOMICS) \ || defined(HAVE_WINDOWS_ATOMICS) /* If atomics are defined we use them in InnoDB mutex implementation */ # define HAVE_ATOMIC_BUILTINS -# endif /* (HAVE_GCC_ATOMIC_BUILTINS) || (HAVE_SOLARIS_ATOMICS) +# endif /* (HAVE_IB_GCC_ATOMIC_BUILTINS) || (HAVE_SOLARIS_ATOMICS) || (HAVE_WINDOWS_ATOMICS) */ /* For InnoDB rw_locks to work with atomics we need the thread_id diff --git a/storage/innodb_plugin/include/ut0ut.h b/storage/innodb_plugin/include/ut0ut.h index 80094321041..12c1cd59166 100644 --- a/storage/innodb_plugin/include/ut0ut.h +++ b/storage/innodb_plugin/include/ut0ut.h @@ -239,6 +239,15 @@ ullint ut_time_us( /*=======*/ ullint* tloc); /*!< out: us since epoch, if non-NULL */ +/**********************************************************//** +Returns the number of milliseconds since some epoch. The +value may wrap around. It should only be used for heuristic +purposes. +@return ms since epoch */ +UNIV_INTERN +uint +ut_time_ms(void); +/*============*/ /**********************************************************//** Returns the difference of two times in seconds. diff --git a/storage/innodb_plugin/plug.in b/storage/innodb_plugin/plug.in index a620f10a0da..230c6ed34df 100644 --- a/storage/innodb_plugin/plug.in +++ b/storage/innodb_plugin/plug.in @@ -63,6 +63,57 @@ MYSQL_PLUGIN_ACTIONS(innodb_plugin, [ ;; esac AC_SUBST(INNODB_DYNAMIC_CFLAGS) + + AC_MSG_CHECKING(whether GCC atomic builtins are available) + AC_TRY_RUN( + [ + int main() + { + long x; + long y; + long res; + char c; + + x = 10; + y = 123; + res = __sync_bool_compare_and_swap(&x, x, y); + if (!res || x != y) { + return(1); + } + + x = 10; + y = 123; + res = __sync_bool_compare_and_swap(&x, x + 1, y); + if (res || x != 10) { + return(1); + } + + x = 10; + y = 123; + res = __sync_add_and_fetch(&x, y); + if (res != 123 + 10 || x != 123 + 10) { + return(1); + } + + c = 10; + res = __sync_lock_test_and_set(&c, 123); + if (res != 10 || c != 123) { + return(1); + } + + return(0); + } + ], + [ + AC_DEFINE([HAVE_IB_GCC_ATOMIC_BUILTINS], [1], + [GCC atomic builtins are available]) + AC_MSG_RESULT(yes) + ], + [ + AC_MSG_RESULT(no) + ] + ) + AC_MSG_CHECKING(whether pthread_t can be used by GCC atomic builtins) AC_TRY_RUN( [ diff --git a/storage/innodb_plugin/row/row0merge.c b/storage/innodb_plugin/row/row0merge.c index 924b7ffba58..9a51221732b 100644 --- a/storage/innodb_plugin/row/row0merge.c +++ b/storage/innodb_plugin/row/row0merge.c @@ -60,9 +60,19 @@ Completed by Sunny Bains and Marko Makela #ifdef UNIV_DEBUG /** Set these in order ot enable debug printout. */ /* @{ */ +/** Log the outcome of each row_merge_cmp() call, comparing records. */ static ibool row_merge_print_cmp; +/** Log each record read from temporary file. */ static ibool row_merge_print_read; +/** Log each record write to temporary file. */ static ibool row_merge_print_write; +/** Log each row_merge_blocks() call, merging two blocks of records to +a bigger one. */ +static ibool row_merge_print_block; +/** Log each block read from temporary file. */ +static ibool row_merge_print_block_read; +/** Log each block read from temporary file. */ +static ibool row_merge_print_block_write; /* @} */ #endif /* UNIV_DEBUG */ @@ -109,8 +119,9 @@ typedef struct row_merge_buf_struct row_merge_buf_t; /** Information about temporary files used in merge sort */ struct merge_file_struct { - int fd; /*!< file descriptor */ - ulint offset; /*!< file offset */ + int fd; /*!< file descriptor */ + ulint offset; /*!< file offset (end of file) */ + ib_uint64_t n_rec; /*!< number of records in the file */ }; /** Information about temporary files used in merge sort */ @@ -682,6 +693,13 @@ row_merge_read( ib_uint64_t ofs = ((ib_uint64_t) offset) * sizeof *buf; ibool success; +#ifdef UNIV_DEBUG + if (row_merge_print_block_read) { + fprintf(stderr, "row_merge_read fd=%d ofs=%lu\n", + fd, (ulong) offset); + } +#endif /* UNIV_DEBUG */ + success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf, (ulint) (ofs & 0xFFFFFFFF), (ulint) (ofs >> 32), @@ -709,6 +727,13 @@ row_merge_write( ib_uint64_t ofs = ((ib_uint64_t) offset) * sizeof(row_merge_block_t); +#ifdef UNIV_DEBUG + if (row_merge_print_block_write) { + fprintf(stderr, "row_merge_write fd=%d ofs=%lu\n", + fd, (ulong) offset); + } +#endif /* UNIV_DEBUG */ + return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf, (ulint) (ofs & 0xFFFFFFFF), (ulint) (ofs >> 32), @@ -718,7 +743,7 @@ row_merge_write( /********************************************************************//** Read a merge record. @return pointer to next record, or NULL on I/O error or end of list */ -static +static __attribute__((nonnull)) const byte* row_merge_read_rec( /*===============*/ @@ -1070,7 +1095,7 @@ row_merge_cmp( Reads clustered index of the table and create temporary files containing the index entries for the indexes to be built. @return DB_SUCCESS or error */ -static +static __attribute__((nonnull)) ulint row_merge_read_clustered_index( /*===========================*/ @@ -1233,6 +1258,7 @@ row_merge_read_clustered_index( if (UNIV_LIKELY (row && row_merge_buf_add(buf, row, ext))) { + file->n_rec++; continue; } @@ -1274,14 +1300,19 @@ err_exit: UNIV_MEM_INVALID(block[0], sizeof block[0]); merge_buf[i] = row_merge_buf_empty(buf); - /* Try writing the record again, now that - the buffer has been written out and emptied. */ + if (UNIV_LIKELY(row != NULL)) { + /* Try writing the record again, now + that the buffer has been written out + and emptied. */ - if (UNIV_UNLIKELY - (row && !row_merge_buf_add(buf, row, ext))) { - /* An empty buffer should have enough - room for at least one record. */ - ut_error; + if (UNIV_UNLIKELY + (!row_merge_buf_add(buf, row, ext))) { + /* An empty buffer should have enough + room for at least one record. */ + ut_error; + } + + file->n_rec++; } } @@ -1320,7 +1351,7 @@ func_exit: b2 = row_merge_write_rec(&block[2], &buf[2], b2, \ of->fd, &of->offset, \ mrec##N, offsets##N); \ - if (UNIV_UNLIKELY(!b2)) { \ + if (UNIV_UNLIKELY(!b2 || ++of->n_rec > file->n_rec)) { \ goto corrupt; \ } \ b##N = row_merge_read_rec(&block[N], &buf[N], \ @@ -1336,14 +1367,14 @@ func_exit: } while (0) /*************************************************************//** -Merge two blocks of linked lists on disk and write a bigger block. +Merge two blocks of records on disk and write a bigger block. @return DB_SUCCESS or error code */ static ulint row_merge_blocks( /*=============*/ const dict_index_t* index, /*!< in: index being created */ - merge_file_t* file, /*!< in/out: file containing + const merge_file_t* file, /*!< in: file containing index entries */ row_merge_block_t* block, /*!< in/out: 3 buffers */ ulint* foffs0, /*!< in/out: offset of first @@ -1366,6 +1397,17 @@ row_merge_blocks( ulint* offsets0;/* offsets of mrec0 */ ulint* offsets1;/* offsets of mrec1 */ +#ifdef UNIV_DEBUG + if (row_merge_print_block) { + fprintf(stderr, + "row_merge_blocks fd=%d ofs=%lu + fd=%d ofs=%lu" + " = fd=%d ofs=%lu\n", + file->fd, (ulong) *foffs0, + file->fd, (ulong) *foffs1, + of->fd, (ulong) of->offset); + } +#endif /* UNIV_DEBUG */ + heap = row_merge_heap_create(index, &offsets0, &offsets1); /* Write a record and read the next record. Split the output @@ -1438,16 +1480,87 @@ done1: } /*************************************************************//** +Copy a block of index entries. +@return TRUE on success, FALSE on failure */ +static __attribute__((nonnull)) +ibool +row_merge_blocks_copy( +/*==================*/ + const dict_index_t* index, /*!< in: index being created */ + const merge_file_t* file, /*!< in: input file */ + row_merge_block_t* block, /*!< in/out: 3 buffers */ + ulint* foffs0, /*!< in/out: input file offset */ + merge_file_t* of) /*!< in/out: output file */ +{ + mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */ + + mrec_buf_t buf[3]; /*!< buffer for handling + split mrec in block[] */ + const byte* b0; /*!< pointer to block[0] */ + byte* b2; /*!< pointer to block[2] */ + const mrec_t* mrec0; /*!< merge rec, points to block[0] */ + ulint* offsets0;/* offsets of mrec0 */ + ulint* offsets1;/* dummy offsets */ + +#ifdef UNIV_DEBUG + if (row_merge_print_block) { + fprintf(stderr, + "row_merge_blocks_copy fd=%d ofs=%lu" + " = fd=%d ofs=%lu\n", + file->fd, (ulong) foffs0, + of->fd, (ulong) of->offset); + } +#endif /* UNIV_DEBUG */ + + heap = row_merge_heap_create(index, &offsets0, &offsets1); + + /* Write a record and read the next record. Split the output + file in two halves, which can be merged on the following pass. */ + + if (!row_merge_read(file->fd, *foffs0, &block[0])) { +corrupt: + mem_heap_free(heap); + return(FALSE); + } + + b0 = block[0]; + b2 = block[2]; + + b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd, + foffs0, &mrec0, offsets0); + if (UNIV_UNLIKELY(!b0 && mrec0)) { + + goto corrupt; + } + + if (mrec0) { + /* append all mrec0 to output */ + for (;;) { + ROW_MERGE_WRITE_GET_NEXT(0, goto done0); + } + } +done0: + + /* The file offset points to the beginning of the last page + that has been read. Update it to point to the next block. */ + (*foffs0)++; + + mem_heap_free(heap); + return(row_merge_write_eof(&block[2], b2, of->fd, &of->offset) + != NULL); +} + +/*************************************************************//** Merge disk files. @return DB_SUCCESS or error code */ -static +static __attribute__((nonnull)) ulint row_merge( /*======*/ const dict_index_t* index, /*!< in: index being created */ merge_file_t* file, /*!< in/out: file containing index entries */ - ulint half, /*!< in: half the file */ + ulint* half, /*!< in/out: half the file */ row_merge_block_t* block, /*!< in/out: 3 buffers */ int* tmpfd, /*!< in/out: temporary file handle */ TABLE* table) /*!< in/out: MySQL table, for @@ -1458,43 +1571,76 @@ row_merge( ulint foffs1; /*!< second input offset */ ulint error; /*!< error code */ merge_file_t of; /*!< output file */ + const ulint ihalf = *half; + /*!< half the input file */ + ulint ohalf; /*!< half the output file */ UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]); - ut_ad(half > 0); + ut_ad(ihalf > 0); + ut_ad(ihalf < file->offset); of.fd = *tmpfd; of.offset = 0; + of.n_rec = 0; /* Merge blocks to the output file. */ + ohalf = 0; foffs0 = 0; - foffs1 = half; + foffs1 = ihalf; + + for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) { + ulint ahalf; /*!< arithmetic half the input file */ - for (; foffs0 < half && foffs1 < file->offset; foffs0++, foffs1++) { error = row_merge_blocks(index, file, block, &foffs0, &foffs1, &of, table); if (error != DB_SUCCESS) { return(error); } + + /* Record the offset of the output file when + approximately half the output has been generated. In + this way, the next invocation of row_merge() will + spend most of the time in this loop. The initial + estimate is ohalf==0. */ + ahalf = file->offset / 2; + ut_ad(ohalf <= of.offset); + + /* Improve the estimate until reaching half the input + file size, or we can not get any closer to it. All + comparands should be non-negative when !(ohalf < ahalf) + because ohalf <= of.offset. */ + if (ohalf < ahalf || of.offset - ahalf < ohalf - ahalf) { + ohalf = of.offset; + } } - /* Copy the last block, if there is one. */ - while (foffs0 < half) { - if (!row_merge_read(file->fd, foffs0++, block) - || !row_merge_write(of.fd, of.offset++, block)) { + /* Copy the last blocks, if there are any. */ + + while (foffs0 < ihalf) { + if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) { return(DB_CORRUPTION); } } + + ut_ad(foffs0 == ihalf); + while (foffs1 < file->offset) { - if (!row_merge_read(file->fd, foffs1++, block) - || !row_merge_write(of.fd, of.offset++, block)) { + if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) { return(DB_CORRUPTION); } } + ut_ad(foffs1 == file->offset); + + if (UNIV_UNLIKELY(of.n_rec != file->n_rec)) { + return(DB_CORRUPTION); + } + /* Swap file descriptors for the next pass. */ *tmpfd = file->fd; *file = of; + *half = ohalf; UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]); @@ -1517,20 +1663,17 @@ row_merge_sort( reporting erroneous key value if applicable */ { - ulint blksz; /*!< block size */ + ulint half = file->offset / 2; - for (blksz = 1; blksz < file->offset; blksz *= 2) { - ulint half; + do { ulint error; - ut_ad(ut_is_2pow(blksz)); - half = ut_2pow_round((file->offset + (blksz - 1)) / 2, blksz); - error = row_merge(index, file, half, block, tmpfd, table); + error = row_merge(index, file, &half, block, tmpfd, table); if (error != DB_SUCCESS) { return(error); } - } + } while (half < file->offset && half > 0); return(DB_SUCCESS); } @@ -1909,6 +2052,7 @@ row_merge_file_create( { merge_file->fd = innobase_mysql_tmpfile(); merge_file->offset = 0; + merge_file->n_rec = 0; } /*********************************************************************//** diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c index b41c8acfab4..2f54e9b3734 100644 --- a/storage/innodb_plugin/srv/srv0srv.c +++ b/storage/innodb_plugin/srv/srv0srv.c @@ -292,12 +292,6 @@ UNIV_INTERN ulint srv_buf_pool_flushed = 0; reading of a disk page */ UNIV_INTERN ulint srv_buf_pool_reads = 0; -/** Number of sequential read-aheads */ -UNIV_INTERN ulint srv_read_ahead_seq = 0; - -/** Number of random read-aheads */ -UNIV_INTERN ulint srv_read_ahead_rnd = 0; - /* structure to pass status variables to MySQL */ UNIV_INTERN export_struc export_vars; @@ -464,8 +458,6 @@ static ulint srv_main_background_loops = 0; static ulint srv_main_flush_loops = 0; /* Log writes involving flush. */ static ulint srv_log_writes_and_flush = 0; -/* Log writes not including flush. */ -static ulint srv_log_buffer_writes = 0; /* This is only ever touched by the master thread. It records the time when the last flush of log file has happened. The master @@ -714,9 +706,8 @@ srv_print_master_thread_info( srv_main_1_second_loops, srv_main_sleeps, srv_main_10_second_loops, srv_main_background_loops, srv_main_flush_loops); - fprintf(file, "srv_master_thread log flush and writes: %lu " - " log writes only: %lu\n", - srv_log_writes_and_flush, srv_log_buffer_writes); + fprintf(file, "srv_master_thread log flush and writes: %lu\n", + srv_log_writes_and_flush); } /*********************************************************************//** @@ -1877,14 +1868,16 @@ srv_export_innodb_status(void) export_vars.innodb_data_reads = os_n_file_reads; export_vars.innodb_data_writes = os_n_file_writes; export_vars.innodb_data_written = srv_data_written; - export_vars.innodb_buffer_pool_read_requests = buf_pool->n_page_gets; + export_vars.innodb_buffer_pool_read_requests = buf_pool->stat.n_page_gets; export_vars.innodb_buffer_pool_write_requests = srv_buf_pool_write_requests; export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free; export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed; export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads; - export_vars.innodb_buffer_pool_read_ahead_rnd = srv_read_ahead_rnd; - export_vars.innodb_buffer_pool_read_ahead_seq = srv_read_ahead_seq; + export_vars.innodb_buffer_pool_read_ahead + = buf_pool->stat.n_ra_pages_read; + export_vars.innodb_buffer_pool_read_ahead_evicted + = buf_pool->stat.n_ra_pages_evicted; export_vars.innodb_buffer_pool_pages_data = UT_LIST_GET_LEN(buf_pool->LRU); export_vars.innodb_buffer_pool_pages_dirty @@ -1915,9 +1908,9 @@ srv_export_innodb_status(void) export_vars.innodb_log_writes = srv_log_writes; export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written; export_vars.innodb_dblwr_writes = srv_dblwr_writes; - export_vars.innodb_pages_created = buf_pool->n_pages_created; - export_vars.innodb_pages_read = buf_pool->n_pages_read; - export_vars.innodb_pages_written = buf_pool->n_pages_written; + export_vars.innodb_pages_created = buf_pool->stat.n_pages_created; + export_vars.innodb_pages_read = buf_pool->stat.n_pages_read; + export_vars.innodb_pages_written = buf_pool->stat.n_pages_written; export_vars.innodb_row_lock_waits = srv_n_lock_wait_count; export_vars.innodb_row_lock_current_waits = srv_n_lock_wait_current_count; @@ -2284,12 +2277,6 @@ srv_sync_log_buffer_in_background(void) log_buffer_sync_in_background(TRUE); srv_last_log_flush_time = current_time; srv_log_writes_and_flush++; - } else { - /* Actually we don't need to write logs here. - We are just being extra safe here by forcing - the log buffer to log file. */ - log_buffer_sync_in_background(FALSE); - srv_log_buffer_writes++; } } @@ -2340,8 +2327,8 @@ loop: srv_main_thread_op_info = "reserving kernel mutex"; - n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read - + buf_pool->n_pages_written; + n_ios_very_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read + + buf_pool->stat.n_pages_written; mutex_enter(&kernel_mutex); /* Store the user activity counter at the start of this loop */ @@ -2361,8 +2348,8 @@ loop: skip_sleep = FALSE; for (i = 0; i < 10; i++) { - n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read - + buf_pool->n_pages_written; + n_ios_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read + + buf_pool->stat.n_pages_written; srv_main_thread_op_info = "sleeping"; srv_main_1_second_loops++; @@ -2401,8 +2388,8 @@ loop: n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; - n_ios = log_sys->n_log_ios + buf_pool->n_pages_read - + buf_pool->n_pages_written; + n_ios = log_sys->n_log_ios + buf_pool->stat.n_pages_read + + buf_pool->stat.n_pages_written; if (n_pend_ios < SRV_PEND_IO_THRESHOLD && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) { srv_main_thread_op_info = "doing insert buffer merge"; @@ -2418,6 +2405,8 @@ loop: /* Try to keep the number of modified pages in the buffer pool under the limit wished by the user */ + srv_main_thread_op_info = + "flushing buffer pool pages"; n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), IB_ULONGLONG_MAX); @@ -2436,6 +2425,8 @@ loop: ulint n_flush = buf_flush_get_desired_flush_rate(); if (n_flush) { + srv_main_thread_op_info = + "flushing buffer pool pages"; n_flush = ut_min(PCT_IO(100), n_flush); n_pages_flushed = buf_flush_batch( @@ -2473,8 +2464,8 @@ loop: are not required, and may be disabled. */ n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; - n_ios = log_sys->n_log_ios + buf_pool->n_pages_read - + buf_pool->n_pages_written; + n_ios = log_sys->n_log_ios + buf_pool->stat.n_pages_read + + buf_pool->stat.n_pages_written; srv_main_10_second_loops++; if (n_pend_ios < SRV_PEND_IO_THRESHOLD diff --git a/storage/innodb_plugin/srv/srv0start.c b/storage/innodb_plugin/srv/srv0start.c index f7447499b3b..e4278f6b5a8 100644 --- a/storage/innodb_plugin/srv/srv0start.c +++ b/storage/innodb_plugin/srv/srv0start.c @@ -1106,7 +1106,7 @@ innobase_start_or_create_for_mysql(void) "InnoDB: The InnoDB memory heap is disabled\n"); } -#ifdef HAVE_GCC_ATOMIC_BUILTINS +#ifdef HAVE_IB_GCC_ATOMIC_BUILTINS # ifdef INNODB_RW_LOCKS_USE_ATOMICS fprintf(stderr, "InnoDB: Mutexes and rw_locks use GCC atomic builtins.\n"); @@ -1130,10 +1130,10 @@ innobase_start_or_create_for_mysql(void) fprintf(stderr, "InnoDB: Mutexes use Windows interlocked functions.\n"); # endif /* INNODB_RW_LOCKS_USE_ATOMICS */ -#else /* HAVE_GCC_ATOMIC_BUILTINS */ +#else /* HAVE_IB_GCC_ATOMIC_BUILTINS */ fprintf(stderr, "InnoDB: Neither mutexes nor rw_locks use GCC atomic builtins.\n"); -#endif /* HAVE_GCC_ATOMIC_BUILTINS */ +#endif /* HAVE_IB_GCC_ATOMIC_BUILTINS */ /* Since InnoDB does not currently clean up all its internal data structures in MySQL Embedded Server Library server_end(), we diff --git a/storage/innodb_plugin/ut/ut0ut.c b/storage/innodb_plugin/ut/ut0ut.c index e4cc226fbad..a01a0470938 100644 --- a/storage/innodb_plugin/ut/ut0ut.c +++ b/storage/innodb_plugin/ut/ut0ut.c @@ -200,6 +200,23 @@ ut_time_us( } /**********************************************************//** +Returns the number of milliseconds since some epoch. The +value may wrap around. It should only be used for heuristic +purposes. +@return ms since epoch */ +UNIV_INTERN +uint +ut_time_ms(void) +/*============*/ +{ + struct timeval tv; + + ut_gettimeofday(&tv, NULL); + + return((uint) tv.tv_sec * 1000 + tv.tv_usec / 1000); +} + +/**********************************************************//** Returns the difference of two times in seconds. @return time2 - time1 expressed in seconds */ UNIV_INTERN |