From b012588f3aed7eb10dd418213bea3dc5f281df5b Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 20 Nov 2006 12:49:52 -0700 Subject: Backport patch of bugfix for: - Bug #15815: Very poor performance with multiple queries running concurrently - Bug #22868: 'Thread thrashing' with > 50 concurrent conns under an upd-intensive workloadw This is a patch from an e-mail; it is not included in an InnoDB snapshot. --- innobase/buf/buf0buf.c | 141 +++++++++++++++++++++++++++++-------------- innobase/buf/buf0flu.c | 93 +++++++++++++++++++--------- innobase/buf/buf0lru.c | 18 +++++- innobase/include/buf0buf.h | 40 ++++++++---- innobase/include/buf0buf.ic | 21 +++---- innobase/include/sync0arr.h | 13 ++-- innobase/include/sync0rw.h | 1 + innobase/include/sync0rw.ic | 6 +- innobase/include/sync0sync.h | 1 + innobase/os/os0sync.c | 55 ++++++++++++++++- innobase/sync/sync0arr.c | 119 +++++++++++++----------------------- innobase/sync/sync0rw.c | 2 + innobase/sync/sync0sync.c | 8 ++- 13 files changed, 325 insertions(+), 193 deletions(-) diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c index aea3932eda7..382304451d6 100644 --- a/innobase/buf/buf0buf.c +++ b/innobase/buf/buf0buf.c @@ -433,6 +433,9 @@ buf_block_init( block->check_index_page_at_flush = FALSE; + mutex_create(&block->mutex); + mutex_set_level(&block->mutex, SYNC_BUF_BLOCK); + rw_lock_create(&(block->lock)); ut_ad(rw_lock_validate(&(block->lock))); @@ -599,13 +602,22 @@ buf_block_make_young( /*=================*/ buf_block_t* block) /* in: block to make younger */ { +#ifdef UNIV_SYNC_DEBUG + ut_ad(!mutex_own(&(buf_pool->mutex))); +#endif /* UNIV_SYNC_DEBUG */ + + /* Note that we read freed_page_clock's without holding any mutex: + this is allowed since the result is used only in heuristics */ + if (buf_pool->freed_page_clock >= block->freed_page_clock - + 1 + (buf_pool->curr_size / 1024)) { + + 1 + (buf_pool->curr_size / 4)) { + mutex_enter(&buf_pool->mutex); /* There has been freeing activity in the LRU list: best to move to the head of the LRU list */ buf_LRU_make_block_young(block); + mutex_exit(&buf_pool->mutex); } } @@ -640,12 +652,16 @@ buf_block_free( /*===========*/ buf_block_t* block) /* in, own: block to be freed */ { - ut_ad(block->state != BUF_BLOCK_FILE_PAGE); - mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); + + ut_a(block->state != BUF_BLOCK_FILE_PAGE); + buf_LRU_block_free_non_file_page(block); + mutex_exit(&block->mutex); + mutex_exit(&(buf_pool->mutex)); } @@ -864,9 +880,8 @@ buf_page_get_gen( #endif buf_pool->n_page_gets++; loop: - mutex_enter_fast(&(buf_pool->mutex)); - block = NULL; + mutex_enter_fast(&(buf_pool->mutex)); if (guess) { block = buf_block_align(guess); @@ -904,6 +919,10 @@ loop: goto loop; } + mutex_enter(&block->mutex); + + ut_a(block->state == BUF_BLOCK_FILE_PAGE); + must_read = FALSE; if (block->io_fix == BUF_IO_READ) { @@ -911,9 +930,9 @@ loop: must_read = TRUE; if (mode == BUF_GET_IF_IN_POOL) { - /* The page is only being read to buffer */ - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&buf_pool->mutex); + mutex_exit(&block->mutex); return(NULL); } @@ -924,7 +943,7 @@ loop: #else buf_block_buf_fix_inc(block); #endif - buf_block_make_young(block); + mutex_exit(&buf_pool->mutex); /* Check if this is the first access to the page */ @@ -932,10 +951,13 @@ loop: block->accessed = TRUE; + mutex_exit(&block->mutex); + + buf_block_make_young(block); + #ifdef UNIV_DEBUG_FILE_ACCESSES ut_a(block->file_page_was_freed == FALSE); #endif - mutex_exit(&(buf_pool->mutex)); #ifdef UNIV_DEBUG buf_dbg_counter++; @@ -960,13 +982,14 @@ loop: } if (!success) { - mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); block->buf_fix_count--; + + mutex_exit(&block->mutex); #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&(block->debug_latch)); #endif - mutex_exit(&(buf_pool->mutex)); return(NULL); } @@ -1032,11 +1055,11 @@ buf_page_optimistic_get_func( block = buf_block_align(guess); - mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); if (block->state != BUF_BLOCK_FILE_PAGE) { - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); return(FALSE); } @@ -1046,15 +1069,14 @@ buf_page_optimistic_get_func( #else buf_block_buf_fix_inc(block); #endif - buf_block_make_young(block); - - /* Check if this is the first access to the page */ - accessed = block->accessed; - block->accessed = TRUE; - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); + + buf_block_make_young(block); + + /* Check if this is the first access to the page */ ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset)); @@ -1069,14 +1091,15 @@ buf_page_optimistic_get_func( } if (!success) { - mutex_enter(&(buf_pool->mutex)); - + mutex_enter(&block->mutex); + block->buf_fix_count--; + + mutex_exit(&block->mutex); + #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&(block->debug_latch)); -#endif - mutex_exit(&(buf_pool->mutex)); - +#endif return(FALSE); } @@ -1090,14 +1113,15 @@ buf_page_optimistic_get_func( rw_lock_x_unlock(&(block->lock)); } - mutex_enter(&(buf_pool->mutex)); - + mutex_enter(&block->mutex); + block->buf_fix_count--; + + mutex_exit(&block->mutex); + #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&(block->debug_latch)); -#endif - mutex_exit(&(buf_pool->mutex)); - +#endif return(FALSE); } @@ -1156,7 +1180,7 @@ buf_page_get_known_nowait( block = buf_block_align(guess); - mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); if (block->state == BUF_BLOCK_REMOVE_HASH) { /* Another thread is just freeing the block from the LRU list @@ -1166,7 +1190,7 @@ buf_page_get_known_nowait( we have already removed it from the page address hash table of the buffer pool. */ - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); return(FALSE); } @@ -1176,12 +1200,12 @@ buf_page_get_known_nowait( #else buf_block_buf_fix_inc(block); #endif + mutex_exit(&block->mutex); + if (mode == BUF_MAKE_YOUNG) { buf_block_make_young(block); } - mutex_exit(&(buf_pool->mutex)); - ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD)); if (rw_latch == RW_S_LATCH) { @@ -1195,13 +1219,15 @@ buf_page_get_known_nowait( } if (!success) { - mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); block->buf_fix_count--; + + mutex_exit(&block->mutex); + #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&(block->debug_latch)); #endif - mutex_exit(&(buf_pool->mutex)); return(FALSE); } @@ -1247,8 +1273,7 @@ buf_page_init_for_backup_restore( block->offset = offset; block->lock_hash_val = 0; - block->lock_mutex = NULL; - + block->freed_page_clock = 0; block->newest_modification = ut_dulint_zero; @@ -1280,6 +1305,7 @@ buf_page_init( { #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); + ut_ad(mutex_own(&(block->mutex))); #endif /* UNIV_SYNC_DEBUG */ ut_ad(block->state == BUF_BLOCK_READY_FOR_USE); @@ -1293,8 +1319,7 @@ buf_page_init( block->check_index_page_at_flush = FALSE; block->lock_hash_val = lock_rec_hash(space, offset); - block->lock_mutex = NULL; - + /* Insert into the hash table of file pages */ HASH_INSERT(buf_block_t, hash, buf_pool->page_hash, @@ -1362,12 +1387,15 @@ buf_page_init_for_read( ut_ad(block); mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); if (NULL != buf_page_hash_get(space, offset)) { /* The page is already in buf_pool, return */ + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); + buf_block_free(block); if (mode == BUF_READ_IBUF_PAGES_ONLY) { @@ -1387,6 +1415,7 @@ buf_page_init_for_read( buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */ block->io_fix = BUF_IO_READ; + buf_pool->n_pend_reads++; /* We set a pass-type x-lock on the frame because then the same @@ -1400,6 +1429,7 @@ buf_page_init_for_read( rw_lock_x_lock_gen(&(block->read_lock), BUF_IO_READ); + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); if (mode == BUF_READ_IBUF_PAGES_ONLY) { @@ -1462,6 +1492,8 @@ buf_page_create( block = free_block; + mutex_enter(&block->mutex); + buf_page_init(space, offset, block); /* The block must be put to the LRU list */ @@ -1472,13 +1504,15 @@ buf_page_create( #else buf_block_buf_fix_inc(block); #endif + buf_pool->n_pages_created++; + + mutex_exit(&(buf_pool->mutex)); + mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX); block->accessed = TRUE; - buf_pool->n_pages_created++; - - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); /* Delete possible entries for the page from the insert buffer: such can exist if the page belonged to an index which was dropped */ @@ -1516,6 +1550,12 @@ buf_page_io_complete( ut_ad(block); + /* We do not need protect block->io_fix here by block->mutex to read + it because this is the only function where we can change the value + from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code + ensures that this is the only thread that handles the i/o for this + block. */ + io_type = block->io_fix; if (io_type == BUF_IO_READ) { @@ -1584,11 +1624,12 @@ buf_page_io_complete( } } + mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); + #ifdef UNIV_IBUF_DEBUG ut_a(ibuf_count_get(block->space, block->offset) == 0); #endif - mutex_enter(&(buf_pool->mutex)); - /* Because this thread which does the unlocking is not the same that did the locking, we use a pass value != 0 in unlock, which simply removes the newest lock debug record, without checking the thread @@ -1629,6 +1670,7 @@ buf_page_io_complete( } } + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); if (buf_debug_prints) { @@ -1688,6 +1730,8 @@ buf_validate(void) block = buf_pool_get_nth_block(buf_pool, i); + mutex_enter(&block->mutex); + if (block->state == BUF_BLOCK_FILE_PAGE) { ut_a(buf_page_hash_get(block->space, @@ -1731,6 +1775,8 @@ buf_validate(void) } else if (block->state == BUF_BLOCK_NOT_USED) { n_free++; } + + mutex_exit(&block->mutex); } if (n_lru + n_free > buf_pool->curr_size) { @@ -2001,16 +2047,21 @@ buf_all_freed(void) block = buf_pool_get_nth_block(buf_pool, i); + mutex_enter(&block->mutex); + if (block->state == BUF_BLOCK_FILE_PAGE) { if (!buf_flush_ready_for_replace(block)) { fprintf(stderr, "Page %lu %lu still fixed or dirty\n", - block->space, block->offset); + (ulong) block->space, + (ulong) block->offset); ut_error; } } + + mutex_exit(&block->mutex); } mutex_exit(&(buf_pool->mutex)); diff --git a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c index 9eb8076732d..d36041ae133 100644 --- a/innobase/buf/buf0flu.c +++ b/innobase/buf/buf0flu.c @@ -111,6 +111,7 @@ buf_flush_ready_for_replace( { #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); + ut_ad(mutex_own(&block->mutex)); #endif /* UNIV_SYNC_DEBUG */ ut_a(block->state == BUF_BLOCK_FILE_PAGE); @@ -137,6 +138,7 @@ buf_flush_ready_for_flush( { #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); + ut_ad(mutex_own(&(block->mutex))); #endif /* UNIV_SYNC_DEBUG */ ut_ad(block->state == BUF_BLOCK_FILE_PAGE); @@ -507,10 +509,17 @@ buf_flush_try_page( block = buf_page_hash_get(space, offset); + if (!block) { + mutex_exit(&(buf_pool->mutex)); + return(0); + } + ut_a(block->state == BUF_BLOCK_FILE_PAGE); + mutex_enter(&block->mutex); + if (flush_type == BUF_FLUSH_LIST - && block && buf_flush_ready_for_flush(block, flush_type)) { + && buf_flush_ready_for_flush(block, flush_type)) { block->io_fix = BUF_IO_WRITE; block->flush_type = flush_type; @@ -534,6 +543,7 @@ buf_flush_try_page( locked = TRUE; } + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); if (!locked) { @@ -552,8 +562,8 @@ buf_flush_try_page( return(1); - } else if (flush_type == BUF_FLUSH_LRU && block - && buf_flush_ready_for_flush(block, flush_type)) { + } else if (flush_type == BUF_FLUSH_LRU + && buf_flush_ready_for_flush(block, flush_type)) { /* VERY IMPORTANT: Because any thread may call the LRU flush, even when owning @@ -579,14 +589,15 @@ buf_flush_try_page( buf_pool mutex: this ensures that the latch is acquired immediately. */ + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); buf_flush_write_block_low(block); return(1); - } else if (flush_type == BUF_FLUSH_SINGLE_PAGE && block - && buf_flush_ready_for_flush(block, flush_type)) { + } else if (flush_type == BUF_FLUSH_SINGLE_PAGE + && buf_flush_ready_for_flush(block, flush_type)) { block->io_fix = BUF_IO_WRITE; block->flush_type = flush_type; @@ -598,6 +609,7 @@ buf_flush_try_page( (buf_pool->n_flush[flush_type])++; + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE); @@ -611,11 +623,12 @@ buf_flush_try_page( buf_flush_write_block_low(block); return(1); - } else { - mutex_exit(&(buf_pool->mutex)); + } - return(0); - } + mutex_exit(&block->mutex); + mutex_exit(&(buf_pool->mutex)); + + return(0); } /*************************************************************** @@ -659,34 +672,48 @@ buf_flush_try_neighbors( block = buf_page_hash_get(space, i); - if (block && flush_type == BUF_FLUSH_LRU && i != offset - && !block->old) { + if (!block) { + + continue; + + } else if (flush_type == BUF_FLUSH_LRU && i != offset + && !block->old) { /* We avoid flushing 'non-old' blocks in an LRU flush, because the flushed blocks are soon freed */ continue; - } + } else { + + mutex_enter(&block->mutex); - if (block && buf_flush_ready_for_flush(block, flush_type) - && (i == offset || block->buf_fix_count == 0)) { - /* We only try to flush those neighbors != offset - where the buf fix count is zero, as we then know that - we probably can latch the page without a semaphore - wait. Semaphore waits are expensive because we must - flush the doublewrite buffer before we start - waiting. */ + if (buf_flush_ready_for_flush(block, flush_type) + && (i == offset || block->buf_fix_count == 0)) { + /* We only try to flush those + neighbors != offset where the buf fix count is + zero, as we then know that we probably can + latch the page without a semaphore wait. + Semaphore waits are expensive because we must + flush the doublewrite buffer before we start + waiting. */ - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); + + mutex_exit(&(buf_pool->mutex)); - /* Note: as we release the buf_pool mutex above, in - buf_flush_try_page we cannot be sure the page is still - in a flushable state: therefore we check it again - inside that function. */ + /* Note: as we release the buf_pool mutex + above, in buf_flush_try_page we cannot be sure + the page is still in a flushable state: + therefore we check it again inside that + function. */ - count += buf_flush_try_page(space, i, flush_type); + count += buf_flush_try_page(space, i, + flush_type); - mutex_enter(&(buf_pool->mutex)); + mutex_enter(&(buf_pool->mutex)); + } else { + mutex_exit(&block->mutex); + } } } @@ -780,12 +807,15 @@ buf_flush_batch( while ((block != NULL) && !found) { + mutex_enter(&block->mutex); + if (buf_flush_ready_for_flush(block, flush_type)) { found = TRUE; space = block->space; offset = block->offset; + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); old_page_count = page_count; @@ -803,11 +833,14 @@ buf_flush_batch( } else if (flush_type == BUF_FLUSH_LRU) { - block = UT_LIST_GET_PREV(LRU, block); + mutex_exit(&block->mutex); + block = UT_LIST_GET_PREV(LRU, block); } else { ut_ad(flush_type == BUF_FLUSH_LIST); + mutex_exit(&block->mutex); + block = UT_LIST_GET_PREV(flush_list, block); } } @@ -884,10 +917,14 @@ buf_flush_LRU_recommendation(void) + BUF_FLUSH_EXTRA_MARGIN) && (distance < BUF_LRU_FREE_SEARCH_LEN)) { + mutex_enter(&block->mutex); + if (buf_flush_ready_for_replace(block)) { n_replaceable++; } + mutex_exit(&block->mutex); + distance++; block = UT_LIST_GET_PREV(LRU, block); diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c index 0ced7e23abe..0255082a53e 100644 --- a/innobase/buf/buf0lru.c +++ b/innobase/buf/buf0lru.c @@ -123,6 +123,8 @@ buf_LRU_search_and_free_block( while (block != NULL) { + mutex_enter(&block->mutex); + if (buf_flush_ready_for_replace(block)) { if (buf_debug_prints) { @@ -134,20 +136,24 @@ buf_LRU_search_and_free_block( buf_LRU_block_remove_hashed_page(block); mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); btr_search_drop_page_hash_index(block->frame); - mutex_enter(&(buf_pool->mutex)); - ut_a(block->buf_fix_count == 0); - buf_LRU_block_free_hashed_page(block); + mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); + buf_LRU_block_free_hashed_page(block); freed = TRUE; + mutex_exit(&block->mutex); break; } + mutex_exit(&block->mutex); + block = UT_LIST_GET_PREV(LRU, block); distance++; @@ -274,7 +280,10 @@ loop: block = UT_LIST_GET_FIRST(buf_pool->free); UT_LIST_REMOVE(free, buf_pool->free, block); + + mutex_enter(&block->mutex); block->state = BUF_BLOCK_READY_FOR_USE; + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); @@ -636,6 +645,7 @@ buf_LRU_block_free_non_file_page( { #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); + ut_ad(mutex_own(&block->mutex)); #endif /* UNIV_SYNC_DEBUG */ ut_ad(block); @@ -664,6 +674,7 @@ buf_LRU_block_remove_hashed_page( { #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); + ut_ad(mutex_own(&block->mutex)); #endif /* UNIV_SYNC_DEBUG */ ut_ad(block); @@ -697,6 +708,7 @@ buf_LRU_block_free_hashed_page( { #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); + ut_ad(mutex_own(&block->mutex)); #endif /* UNIV_SYNC_DEBUG */ ut_ad(block->state == BUF_BLOCK_REMOVE_HASH); diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h index 5ac9c83a5f9..872f6f3461b 100644 --- a/innobase/include/buf0buf.h +++ b/innobase/include/buf0buf.h @@ -431,8 +431,8 @@ Gets the mutex number protecting the page record lock hash chain in the lock table. */ UNIV_INLINE mutex_t* -buf_frame_get_lock_mutex( -/*=====================*/ +buf_frame_get_mutex( +/*================*/ /* out: mutex */ byte* ptr); /* in: pointer to within a buffer frame */ /*********************************************************************** @@ -654,7 +654,10 @@ struct buf_block_struct{ ulint magic_n; /* magic number to check */ ulint state; /* state of the control block: - BUF_BLOCK_NOT_USED, ... */ + BUF_BLOCK_NOT_USED, ...; changing + this is only allowed when a thread + has BOTH the buffer pool mutex AND + block->mutex locked */ byte* frame; /* pointer to buffer frame which is of size UNIV_PAGE_SIZE, and aligned to an address divisible by @@ -663,8 +666,12 @@ struct buf_block_struct{ ulint offset; /* page number within the space */ ulint lock_hash_val; /* hashed value of the page address in the record lock hash table */ - mutex_t* lock_mutex; /* mutex protecting the chain in the - record lock hash table */ + mutex_t mutex; /* mutex protecting this block: + state (also protected by the buffer + pool mutex), io_fix, buf_fix_count, + and accessed; we introduce this new + mutex in InnoDB-5.1 to relieve + contention on the buffer pool mutex */ rw_lock_t lock; /* read-write lock of the buffer frame */ rw_lock_t read_lock; /* rw-lock reserved when a page read @@ -720,20 +727,27 @@ struct buf_block_struct{ in heuristic algorithms, because of the possibility of a wrap-around! */ ulint freed_page_clock;/* the value of freed_page_clock - buffer pool when this block was - last time put to the head of the - LRU list */ + of the buffer pool when this block was + the last time put to the head of the + LRU list; a thread is allowed to + read this for heuristic purposes + without holding any mutex or latch */ ibool old; /* TRUE if the block is in the old blocks in the LRU list */ ibool accessed; /* TRUE if the page has been accessed while in the buffer pool: read-ahead may read in pages which have not been - accessed yet */ + accessed yet; this is protected by + block->mutex; a thread is allowed to + read this for heuristic purposes + without holding any mutex or latch */ ulint buf_fix_count; /* count of how manyfold this block - is currently bufferfixed */ + is currently bufferfixed; this is + protected by block->mutex */ ulint io_fix; /* if a read is pending to the frame, io_fix is BUF_IO_READ, in the case - of a write BUF_IO_WRITE, otherwise 0 */ + of a write BUF_IO_WRITE, otherwise 0; + this is protected by block->mutex */ /* 4. Optimistic search field */ dulint modify_clock; /* this clock is incremented every @@ -859,7 +873,9 @@ struct buf_pool_struct{ number of buffer blocks removed from the end of the LRU list; NOTE that this counter may wrap around at 4 - billion! */ + billion! A thread is allowed to + read this for heuristic purposes + without holding any mutex or latch */ ulint LRU_flush_ended;/* when an LRU flush ends for a page, this is incremented by one; this is set to zero when a buffer block is diff --git a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic index 16deade0901..1b50d7ffb9b 100644 --- a/innobase/include/buf0buf.ic +++ b/innobase/include/buf0buf.ic @@ -357,8 +357,8 @@ Gets the mutex number protecting the page record lock hash chain in the lock table. */ UNIV_INLINE mutex_t* -buf_frame_get_lock_mutex( -/*=====================*/ +buf_frame_get_mutex( +/*================*/ /* out: mutex */ byte* ptr) /* in: pointer to within a buffer frame */ { @@ -366,7 +366,7 @@ buf_frame_get_lock_mutex( block = buf_block_align(ptr); - return(block->lock_mutex); + return(&block->mutex); } /************************************************************************* @@ -523,6 +523,7 @@ buf_block_buf_fix_inc_debug( ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line); ut_ad(ret == TRUE); + ut_ad(mutex_own(&block->mutex)); block->buf_fix_count++; } @@ -623,27 +624,25 @@ buf_page_release( RW_NO_LATCH */ mtr_t* mtr) /* in: mtr */ { - ulint buf_fix_count; - ut_ad(block); - mutex_enter_fast(&(buf_pool->mutex)); - ut_ad(block->state == BUF_BLOCK_FILE_PAGE); ut_ad(block->buf_fix_count > 0); if (rw_latch == RW_X_LATCH && mtr->modifications) { - + mutex_enter(&buf_pool->mutex); buf_flush_note_modification(block, mtr); + mutex_exit(&buf_pool->mutex); } + mutex_enter(&block->mutex); + #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&(block->debug_latch)); #endif - buf_fix_count = block->buf_fix_count; - block->buf_fix_count = buf_fix_count - 1; + block->buf_fix_count--; - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); if (rw_latch == RW_S_LATCH) { rw_lock_s_unlock(&(block->lock)); diff --git a/innobase/include/sync0arr.h b/innobase/include/sync0arr.h index 73496a2ea84..cc0913e4a70 100644 --- a/innobase/include/sync0arr.h +++ b/innobase/include/sync0arr.h @@ -75,17 +75,12 @@ sync_array_free_cell( sync_array_t* arr, /* in: wait array */ ulint index); /* in: index of the cell in array */ /************************************************************************** -Looks for the cells in the wait array which refer -to the wait object specified, -and sets their corresponding events to the signaled state. In this -way releases the threads waiting for the object to contend for the object. -It is possible that no such cell is found, in which case does nothing. */ +Note that one of the wait objects was signalled. */ void -sync_array_signal_object( -/*=====================*/ - sync_array_t* arr, /* in: wait array */ - void* object);/* in: wait object */ +sync_array_object_signalled( +/*========================*/ + sync_array_t* arr); /* in: wait array */ /************************************************************************** If the wakeup algorithm does not work perfectly at semaphore relases, this function will do the waking (see the comment in mutex_exit). This diff --git a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h index d71691b4353..c3a75f6acd1 100644 --- a/innobase/include/sync0rw.h +++ b/innobase/include/sync0rw.h @@ -410,6 +410,7 @@ blocked by readers, a writer may queue for the lock by setting the writer field. Then no new readers are allowed in. */ struct rw_lock_struct { + os_event_t event; /* Used by sync0arr.c for thread queueing */ ulint reader_count; /* Number of readers who have locked this lock in the shared mode */ ulint writer; /* This field is set to RW_LOCK_EX if there diff --git a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic index 8fc93f4a9da..a8fb17a552a 100644 --- a/innobase/include/sync0rw.ic +++ b/innobase/include/sync0rw.ic @@ -380,7 +380,8 @@ rw_lock_s_unlock_func( mutex_exit(mutex); if (sg == TRUE) { - sync_array_signal_object(sync_primary_wait_array, lock); + os_event_set(lock->event); + sync_array_object_signalled(sync_primary_wait_array); } ut_ad(rw_lock_validate(lock)); @@ -459,7 +460,8 @@ rw_lock_x_unlock_func( mutex_exit(&(lock->mutex)); if (sg == TRUE) { - sync_array_signal_object(sync_primary_wait_array, lock); + os_event_set(lock->event); + sync_array_object_signalled(sync_primary_wait_array); } ut_ad(rw_lock_validate(lock)); diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h index 40197cd61bd..707b34ab28e 100644 --- a/innobase/include/sync0sync.h +++ b/innobase/include/sync0sync.h @@ -447,6 +447,7 @@ Do not use its fields directly! The structure used in the spin lock implementation of a mutual exclusion semaphore. */ struct mutex_struct { + os_event_t event; /* Used by sync0arr.c for the wait queue */ ulint lock_word; /* This ulint is the target of the atomic test-and-set instruction in Win32 */ #if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER) diff --git a/innobase/os/os0sync.c b/innobase/os/os0sync.c index 827d68501db..d0d03c2e247 100644 --- a/innobase/os/os0sync.c +++ b/innobase/os/os0sync.c @@ -21,6 +21,7 @@ Created 9/6/1995 Heikki Tuuri /* Type definition for an operating system mutex struct */ struct os_mutex_struct{ + os_event_t event; /* Used by sync0arr.c for queing threads */ void* handle; /* OS handle to mutex */ ulint count; /* we use this counter to check that the same thread does not @@ -35,6 +36,7 @@ struct os_mutex_struct{ /* Mutex protecting counts and the lists of OS mutexes and events */ os_mutex_t os_sync_mutex; ibool os_sync_mutex_inited = FALSE; +ibool os_sync_free_called = FALSE; /* This is incremented by 1 in os_thread_create and decremented by 1 in os_thread_exit */ @@ -50,6 +52,10 @@ ulint os_event_count = 0; ulint os_mutex_count = 0; ulint os_fast_mutex_count = 0; +/* Because a mutex is embedded inside an event and there is an +event embedded inside a mutex, on free, this generates a recursive call. +This version of the free event function doesn't acquire the global lock */ +static void os_event_free_internal(os_event_t event); /************************************************************* Initializes global event and OS 'slow' mutex lists. */ @@ -76,6 +82,7 @@ os_sync_free(void) os_event_t event; os_mutex_t mutex; + os_sync_free_called = TRUE; event = UT_LIST_GET_FIRST(os_event_list); while (event) { @@ -99,6 +106,7 @@ os_sync_free(void) mutex = UT_LIST_GET_FIRST(os_mutex_list); } + os_sync_free_called = FALSE; } /************************************************************* @@ -146,14 +154,21 @@ os_event_create( event->signal_count = 0; #endif /* __WIN__ */ - /* Put to the list of events */ - os_mutex_enter(os_sync_mutex); + /* The os_sync_mutex can be NULL because during startup an event + can be created [ because it's embedded in the mutex/rwlock ] before + this module has been initialized */ + if (os_sync_mutex != NULL) { + os_mutex_enter(os_sync_mutex); + } + /* Put to the list of events */ UT_LIST_ADD_FIRST(os_event_list, os_event_list, event); os_event_count++; - os_mutex_exit(os_sync_mutex); + if (os_sync_mutex != NULL) { + os_mutex_exit(os_sync_mutex); + } return(event); } @@ -255,6 +270,35 @@ os_event_reset( #endif } +/************************************************************** +Frees an event object, without acquiring the global lock. */ +static +void +os_event_free_internal( +/*===================*/ + os_event_t event) /* in: event to free */ +{ +#ifdef __WIN__ + ut_a(event); + + ut_a(CloseHandle(event->handle)); +#else + ut_a(event); + + /* This is to avoid freeing the mutex twice */ + os_fast_mutex_free(&(event->os_mutex)); + + ut_a(0 == pthread_cond_destroy(&(event->cond_var))); +#endif + /* Remove from the list of events */ + + UT_LIST_REMOVE(os_event_list, os_event_list, event); + + os_event_count--; + + ut_free(event); +} + /************************************************************** Frees an event object. */ @@ -456,6 +500,7 @@ os_mutex_create( mutex_str->handle = mutex; mutex_str->count = 0; + mutex_str->event = os_event_create(NULL); if (os_sync_mutex_inited) { /* When creating os_sync_mutex itself we cannot reserve it */ @@ -532,6 +577,10 @@ os_mutex_free( { ut_a(mutex); + if (!os_sync_free_called) { + os_event_free_internal(mutex->event); + } + if (os_sync_mutex_inited) { os_mutex_enter(os_sync_mutex); } diff --git a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c index a443d630425..43d3962a55e 100644 --- a/innobase/sync/sync0arr.c +++ b/innobase/sync/sync0arr.c @@ -62,9 +62,6 @@ struct sync_cell_struct { ibool waiting; /* TRUE if the thread has already called sync_array_event_wait on this cell */ - ibool event_set; /* TRUE if the event is set */ - os_event_t event; /* operating system event - semaphore handle */ time_t reservation_time;/* time when the thread reserved the wait cell */ }; @@ -218,10 +215,7 @@ sync_array_create( for (i = 0; i < n_cells; i++) { cell = sync_array_get_nth_cell(arr, i); cell->wait_object = NULL; - - /* Create an operating system event semaphore with no name */ - cell->event = os_event_create(NULL); - cell->event_set = FALSE; /* it is created in reset state */ + cell->waiting = FALSE; } return(arr); @@ -235,19 +229,12 @@ sync_array_free( /*============*/ sync_array_t* arr) /* in, own: sync wait array */ { - ulint i; - sync_cell_t* cell; ulint protection; ut_a(arr->n_reserved == 0); sync_array_validate(arr); - for (i = 0; i < arr->n_cells; i++) { - cell = sync_array_get_nth_cell(arr, i); - os_event_free(cell->event); - } - protection = arr->protection; /* Release the mutex protecting the wait array complex */ @@ -292,28 +279,20 @@ sync_array_validate( sync_array_exit(arr); } -/*********************************************************************** -Puts the cell event in set state. */ -static -void -sync_cell_event_set( -/*================*/ - sync_cell_t* cell) /* in: array cell */ -{ - os_event_set(cell->event); - cell->event_set = TRUE; -} - /*********************************************************************** Puts the cell event in reset state. */ static void sync_cell_event_reset( /*==================*/ - sync_cell_t* cell) /* in: array cell */ + ulint type, /* in: lock type mutex/rw_lock */ + void* object) /* in: the rw_lock/mutex object */ { - os_event_reset(cell->event); - cell->event_set = FALSE; + if (type == SYNC_MUTEX) { + os_event_reset(((mutex_t *) object)->event); + } else { + os_event_reset(((rw_lock_t *) object)->event); + } } /********************************************************************** @@ -346,14 +325,7 @@ sync_array_reserve_cell( if (cell->wait_object == NULL) { - /* Make sure the event is reset */ - if (cell->event_set) { - sync_cell_event_reset(cell); - } - - cell->reservation_time = time(NULL); - cell->thread = os_thread_get_curr_id(); - + cell->waiting = FALSE; cell->wait_object = object; if (type == SYNC_MUTEX) { @@ -363,7 +335,6 @@ sync_array_reserve_cell( } cell->request_type = type; - cell->waiting = FALSE; cell->file = file; cell->line = line; @@ -373,6 +344,13 @@ sync_array_reserve_cell( *index = i; sync_array_exit(arr); + + /* Make sure the event is reset */ + sync_cell_event_reset(type, object); + + cell->reservation_time = time(NULL); + + cell->thread = os_thread_get_curr_id(); return; } @@ -408,7 +386,12 @@ sync_array_wait_event( ut_a(!cell->waiting); ut_ad(os_thread_get_curr_id() == cell->thread); - event = cell->event; + if (cell->request_type == SYNC_MUTEX) { + event = ((mutex_t*) cell->wait_object)->event; + } else { + event = ((rw_lock_t*) cell->wait_object)->event; + } + cell->waiting = TRUE; #ifdef UNIV_SYNC_DEBUG @@ -505,10 +488,6 @@ sync_array_cell_print( if (!cell->waiting) { fputs("wait has ended\n", file); } - - if (cell->event_set) { - fputs("wait is ending\n", file); - } } #ifdef UNIV_SYNC_DEBUG @@ -618,7 +597,7 @@ sync_array_detect_deadlock( depth++; - if (cell->event_set || !cell->waiting) { + if (!cell->waiting) { return(FALSE); /* No deadlock here */ } @@ -797,6 +776,7 @@ sync_array_free_cell( ut_a(cell->wait_object != NULL); + cell->waiting = FALSE; cell->wait_object = NULL; ut_a(arr->n_reserved > 0); @@ -806,44 +786,17 @@ sync_array_free_cell( } /************************************************************************** -Looks for the cells in the wait array which refer to the wait object -specified, and sets their corresponding events to the signaled state. In this -way releases the threads waiting for the object to contend for the object. -It is possible that no such cell is found, in which case does nothing. */ +Increments the signalled count. */ void -sync_array_signal_object( -/*=====================*/ - sync_array_t* arr, /* in: wait array */ - void* object) /* in: wait object */ +sync_array_object_signalled( +/*========================*/ + sync_array_t* arr) /* in: wait array */ { - sync_cell_t* cell; - ulint count; - ulint i; - sync_array_enter(arr); arr->sg_count++; - i = 0; - count = 0; - - while (count < arr->n_reserved) { - - cell = sync_array_get_nth_cell(arr, i); - - if (cell->wait_object != NULL) { - - count++; - if (cell->wait_object == object) { - - sync_cell_event_set(cell); - } - } - - i++; - } - sync_array_exit(arr); } @@ -876,7 +829,17 @@ sync_arr_wake_threads_if_sema_free(void) if (sync_arr_cell_can_wake_up(cell)) { - sync_cell_event_set(cell); + if (cell->request_type == SYNC_MUTEX) { + mutex_t* mutex; + + mutex = cell->wait_object; + os_event_set(mutex->event); + } else { + rw_lock_t* lock; + + lock = cell->wait_object; + os_event_set(lock->event); + } } } @@ -906,7 +869,7 @@ sync_array_print_long_waits(void) cell = sync_array_get_nth_cell(sync_primary_wait_array, i); - if (cell->wait_object != NULL + if (cell->wait_object != NULL && cell->waiting && difftime(time(NULL), cell->reservation_time) > 240) { fputs("InnoDB: Warning: a long semaphore wait:\n", stderr); @@ -914,7 +877,7 @@ sync_array_print_long_waits(void) noticed = TRUE; } - if (cell->wait_object != NULL + if (cell->wait_object != NULL && cell->waiting && difftime(time(NULL), cell->reservation_time) > fatal_timeout) { fatal = TRUE; diff --git a/innobase/sync/sync0rw.c b/innobase/sync/sync0rw.c index e3caa24cb1e..70b3ba11462 100644 --- a/innobase/sync/sync0rw.c +++ b/innobase/sync/sync0rw.c @@ -123,6 +123,7 @@ rw_lock_create_func( lock->last_x_file_name = (char *) "not yet reserved"; lock->last_s_line = 0; lock->last_x_line = 0; + lock->event = os_event_create(NULL); mutex_enter(&rw_lock_list_mutex); @@ -151,6 +152,7 @@ rw_lock_free( mutex_free(rw_lock_get_mutex(lock)); mutex_enter(&rw_lock_list_mutex); + os_event_free(lock->event); UT_LIST_REMOVE(list, rw_lock_list, lock); diff --git a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c index 5ee08d83987..59161254343 100644 --- a/innobase/sync/sync0sync.c +++ b/innobase/sync/sync0sync.c @@ -210,6 +210,7 @@ mutex_create_func( os_fast_mutex_init(&(mutex->os_fast_mutex)); mutex->lock_word = 0; #endif + mutex->event = os_event_create(NULL); mutex_set_waiters(mutex, 0); mutex->magic_n = MUTEX_MAGIC_N; #ifdef UNIV_SYNC_DEBUG @@ -260,6 +261,8 @@ mutex_free( mutex_exit(&mutex_list_mutex); } + os_event_free(mutex->event); + #if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER) os_fast_mutex_free(&(mutex->os_fast_mutex)); #endif @@ -481,8 +484,8 @@ mutex_signal_object( /* The memory order of resetting the waiters field and signaling the object is important. See LEMMA 1 above. */ - - sync_array_signal_object(sync_primary_wait_array, mutex); + os_event_set(mutex->event); + sync_array_object_signalled(sync_primary_wait_array); } #ifdef UNIV_SYNC_DEBUG @@ -1030,6 +1033,7 @@ sync_thread_add_level( ut_a(sync_thread_levels_g(array, SYNC_PURGE_SYS)); } else if (level == SYNC_TREE_NODE) { ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE) + || sync_thread_levels_contain(array, SYNC_DICT_OPERATION) || sync_thread_levels_g(array, SYNC_TREE_NODE - 1)); } else if (level == SYNC_TREE_NODE_FROM_HASH) { ut_a(1); -- cgit v1.2.1 From 0152471f5aa966b29f3df802e5b12907ee1165f1 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 3 Jan 2007 12:08:59 +0100 Subject: Raise version number after cloning 4.0.28 --- configure.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.in b/configure.in index 0be3bcf0406..a5b1d4f7277 100644 --- a/configure.in +++ b/configure.in @@ -4,7 +4,7 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT(sql/mysqld.cc) AC_CANONICAL_SYSTEM # The Docs Makefile.am parses this line! -AM_INIT_AUTOMAKE(mysql, 4.0.28) +AM_INIT_AUTOMAKE(mysql, 4.0.29) AM_CONFIG_HEADER(config.h) PROTOCOL_VERSION=10 -- cgit v1.2.1