summaryrefslogtreecommitdiff
path: root/innobase
diff options
context:
space:
mode:
authortsmith@quadxeon.mysql.com <>2006-11-09 05:02:37 +0100
committertsmith@quadxeon.mysql.com <>2006-11-09 05:02:37 +0100
commitf1e0cf9d285269571e96ac4fc058e7494a69c7c7 (patch)
tree03d4d71d67b4aaee305c7431903f593ad5229733 /innobase
parent41117b1226bcb02536bf963d9a18a6140d5ffe1a (diff)
downloadmariadb-git-f1e0cf9d285269571e96ac4fc058e7494a69c7c7.tar.gz
This ChangeSet must be null-merged to 5.1. Applied innodb-5.0-ss982, -ss998, -ss1003
Fixes: - Bug #15815: Very poor performance with multiple queries running concurrently - Bug #22868: 'Thread thrashing' with > 50 concurrent conns under an upd-intensive workloadw - Bug #23769: Debug assertion failure with innodb_locks_unsafe_for_binlog - Bug #24089: Race condition in fil_flush_file_spaces()
Diffstat (limited to 'innobase')
-rw-r--r--innobase/buf/buf0buf.c181
-rw-r--r--innobase/buf/buf0flu.c92
-rw-r--r--innobase/buf/buf0lru.c25
-rw-r--r--innobase/dict/dict0crea.c13
-rw-r--r--innobase/fil/fil0fil.c44
-rw-r--r--innobase/include/buf0buf.h40
-rw-r--r--innobase/include/buf0buf.ic19
-rw-r--r--innobase/include/dict0crea.h6
-rw-r--r--innobase/include/sync0arr.h13
-rw-r--r--innobase/include/sync0rw.h1
-rw-r--r--innobase/include/sync0rw.ic6
-rw-r--r--innobase/include/sync0sync.h1
-rw-r--r--innobase/os/os0sync.c55
-rw-r--r--innobase/row/row0mysql.c8
-rw-r--r--innobase/row/row0sel.c6
-rw-r--r--innobase/srv/srv0start.c5
-rw-r--r--innobase/sync/sync0arr.c119
-rw-r--r--innobase/sync/sync0rw.c2
-rw-r--r--innobase/sync/sync0sync.c8
19 files changed, 419 insertions, 225 deletions
diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
index db09a931c29..f24f1744363 100644
--- a/innobase/buf/buf0buf.c
+++ b/innobase/buf/buf0buf.c
@@ -221,6 +221,9 @@ in the free list to the frames.
5) When we have AWE enabled, we disable adaptive hash indexes.
*/
+/* Value in microseconds */
+static const int WAIT_FOR_READ = 20000;
+
buf_pool_t* buf_pool = NULL; /* The buffer buf_pool of the database */
#ifdef UNIV_DEBUG
@@ -488,6 +491,9 @@ buf_block_init(
block->n_pointers = 0;
+ mutex_create(&block->mutex);
+ mutex_set_level(&block->mutex, SYNC_BUF_BLOCK);
+
rw_lock_create(&(block->lock));
ut_ad(rw_lock_validate(&(block->lock)));
@@ -756,8 +762,15 @@ buf_awe_map_page_to_frame(
bck = UT_LIST_GET_LAST(buf_pool->awe_LRU_free_mapped);
while (bck) {
- if (bck->state == BUF_BLOCK_FILE_PAGE
- && (bck->buf_fix_count != 0 || bck->io_fix != 0)) {
+ ibool skip;
+
+ mutex_enter(&bck->mutex);
+
+ skip = (bck->state == BUF_BLOCK_FILE_PAGE
+ && (bck->buf_fix_count != 0 || bck->io_fix != 0));
+
+ if (skip) {
+ mutex_exit(&bck->mutex);
/* We have to skip this */
bck = UT_LIST_GET_PREV(awe_LRU_free_mapped, bck);
@@ -790,6 +803,8 @@ buf_awe_map_page_to_frame(
buf_pool->n_pages_awe_remapped++;
+ mutex_exit(&bck->mutex);
+
return;
}
}
@@ -828,13 +843,22 @@ buf_block_make_young(
/*=================*/
buf_block_t* block) /* in: block to make younger */
{
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!mutex_own(&(buf_pool->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+
+ /* Note that we read freed_page_clock's without holding any mutex:
+ this is allowed since the result is used only in heuristics */
+
if (buf_pool->freed_page_clock >= block->freed_page_clock
- + 1 + (buf_pool->curr_size / 1024)) {
+ + 1 + (buf_pool->curr_size / 4)) {
+ mutex_enter(&buf_pool->mutex);
/* There has been freeing activity in the LRU list:
best to move to the head of the LRU list */
buf_LRU_make_block_young(block);
+ mutex_exit(&buf_pool->mutex);
}
}
@@ -869,12 +893,16 @@ buf_block_free(
/*===========*/
buf_block_t* block) /* in, own: block to be freed */
{
- ut_a(block->state != BUF_BLOCK_FILE_PAGE);
-
mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&block->mutex);
+
+ ut_a(block->state != BUF_BLOCK_FILE_PAGE);
+
buf_LRU_block_free_non_file_page(block);
+ mutex_exit(&block->mutex);
+
mutex_exit(&(buf_pool->mutex));
}
@@ -1093,9 +1121,8 @@ buf_page_get_gen(
#endif
buf_pool->n_page_gets++;
loop:
- mutex_enter_fast(&(buf_pool->mutex));
-
block = NULL;
+ mutex_enter_fast(&(buf_pool->mutex));
if (guess) {
block = buf_block_align(guess);
@@ -1133,6 +1160,8 @@ loop:
goto loop;
}
+ mutex_enter(&block->mutex);
+
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
must_read = FALSE;
@@ -1142,9 +1171,9 @@ loop:
must_read = TRUE;
if (mode == BUF_GET_IF_IN_POOL) {
-
/* The page is only being read to buffer */
- mutex_exit(&(buf_pool->mutex));
+ mutex_exit(&buf_pool->mutex);
+ mutex_exit(&block->mutex);
return(NULL);
}
@@ -1168,7 +1197,7 @@ loop:
#else
buf_block_buf_fix_inc(block);
#endif
- buf_block_make_young(block);
+ mutex_exit(&buf_pool->mutex);
/* Check if this is the first access to the page */
@@ -1176,10 +1205,13 @@ loop:
block->accessed = TRUE;
+ mutex_exit(&block->mutex);
+
+ buf_block_make_young(block);
+
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(block->file_page_was_freed == FALSE);
#endif
- mutex_exit(&(buf_pool->mutex));
#ifdef UNIV_DEBUG
buf_dbg_counter++;
@@ -1204,13 +1236,14 @@ loop:
}
if (!success) {
- mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&block->mutex);
block->buf_fix_count--;
+
+ mutex_exit(&block->mutex);
#ifdef UNIV_SYNC_DEBUG
rw_lock_s_unlock(&(block->debug_latch));
#endif
- mutex_exit(&(buf_pool->mutex));
return(NULL);
}
@@ -1221,18 +1254,16 @@ loop:
completes */
for (;;) {
- mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&block->mutex);
if (block->io_fix == BUF_IO_READ) {
- mutex_exit(&(buf_pool->mutex));
+ mutex_exit(&block->mutex);
- /* Sleep 20 milliseconds */
-
- os_thread_sleep(20000);
+ os_thread_sleep(WAIT_FOR_READ);
} else {
- mutex_exit(&(buf_pool->mutex));
+ mutex_exit(&block->mutex);
break;
}
@@ -1290,15 +1321,15 @@ buf_page_optimistic_get_func(
ut_ad(mtr && block);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
- mutex_enter(&(buf_pool->mutex));
/* If AWE is used, block may have a different frame now, e.g., NULL */
-
+
+ mutex_enter(&block->mutex);
+
if (UNIV_UNLIKELY(block->state != BUF_BLOCK_FILE_PAGE)
- || UNIV_UNLIKELY(block->frame != guess)) {
- exit_func:
- mutex_exit(&(buf_pool->mutex));
+ || UNIV_UNLIKELY(block->frame != guess)) {
+
+ mutex_exit(&block->mutex);
return(FALSE);
}
@@ -1308,15 +1339,14 @@ buf_page_optimistic_get_func(
#else
buf_block_buf_fix_inc(block);
#endif
- buf_block_make_young(block);
-
- /* Check if this is the first access to the page */
-
accessed = block->accessed;
-
block->accessed = TRUE;
- mutex_exit(&(buf_pool->mutex));
+ mutex_exit(&block->mutex);
+
+ buf_block_make_young(block);
+
+ /* Check if this is the first access to the page */
ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
@@ -1331,13 +1361,16 @@ buf_page_optimistic_get_func(
}
if (UNIV_UNLIKELY(!success)) {
- mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&block->mutex);
block->buf_fix_count--;
+
+ mutex_exit(&block->mutex);
+
#ifdef UNIV_SYNC_DEBUG
rw_lock_s_unlock(&(block->debug_latch));
#endif
- goto exit_func;
+ return(FALSE);
}
if (UNIV_UNLIKELY(!UT_DULINT_EQ(modify_clock, block->modify_clock))) {
@@ -1350,13 +1383,16 @@ buf_page_optimistic_get_func(
rw_lock_x_unlock(&(block->lock));
}
- mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&block->mutex);
block->buf_fix_count--;
+
+ mutex_exit(&block->mutex);
+
#ifdef UNIV_SYNC_DEBUG
rw_lock_s_unlock(&(block->debug_latch));
#endif
- goto exit_func;
+ return(FALSE);
}
mtr_memo_push(mtr, block, fix_type);
@@ -1413,10 +1449,10 @@ buf_page_get_known_nowait(
ut_ad(mtr);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
- mutex_enter(&(buf_pool->mutex));
-
block = buf_block_align(guess);
+ mutex_enter(&block->mutex);
+
if (block->state == BUF_BLOCK_REMOVE_HASH) {
/* Another thread is just freeing the block from the LRU list
of the buffer pool: do not try to access this page; this
@@ -1425,7 +1461,7 @@ buf_page_get_known_nowait(
we have already removed it from the page address hash table
of the buffer pool. */
- mutex_exit(&(buf_pool->mutex));
+ mutex_exit(&block->mutex);
return(FALSE);
}
@@ -1437,12 +1473,12 @@ buf_page_get_known_nowait(
#else
buf_block_buf_fix_inc(block);
#endif
+ mutex_exit(&block->mutex);
+
if (mode == BUF_MAKE_YOUNG) {
buf_block_make_young(block);
}
- mutex_exit(&(buf_pool->mutex));
-
ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
if (rw_latch == RW_S_LATCH) {
@@ -1456,13 +1492,15 @@ buf_page_get_known_nowait(
}
if (!success) {
- mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&block->mutex);
block->buf_fix_count--;
+
+ mutex_exit(&block->mutex);
+
#ifdef UNIV_SYNC_DEBUG
rw_lock_s_unlock(&(block->debug_latch));
#endif
- mutex_exit(&(buf_pool->mutex));
return(FALSE);
}
@@ -1510,8 +1548,7 @@ buf_page_init_for_backup_restore(
block->offset = offset;
block->lock_hash_val = 0;
- block->lock_mutex = NULL;
-
+
block->freed_page_clock = 0;
block->newest_modification = ut_dulint_zero;
@@ -1543,6 +1580,7 @@ buf_page_init(
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(mutex_own(&(block->mutex)));
#endif /* UNIV_SYNC_DEBUG */
ut_a(block->state != BUF_BLOCK_FILE_PAGE);
@@ -1557,8 +1595,7 @@ buf_page_init(
block->index = NULL;
block->lock_hash_val = lock_rec_hash(space, offset);
- block->lock_mutex = NULL;
-
+
/* Insert into the hash table of file pages */
if (buf_page_hash_get(space, offset)) {
@@ -1650,6 +1687,7 @@ buf_page_init_for_read(
ut_a(block);
mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&block->mutex);
if (fil_tablespace_deleted_or_being_deleted_in_mem(space,
tablespace_version)) {
@@ -1662,7 +1700,9 @@ buf_page_init_for_read(
/* The page belongs to a space which has been deleted or is
being deleted, or the page is already in buf_pool, return */
+ mutex_exit(&block->mutex);
mutex_exit(&(buf_pool->mutex));
+
buf_block_free(block);
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
@@ -1682,6 +1722,7 @@ buf_page_init_for_read(
buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */
block->io_fix = BUF_IO_READ;
+
buf_pool->n_pend_reads++;
/* We set a pass-type x-lock on the frame because then the same
@@ -1693,6 +1734,7 @@ buf_page_init_for_read(
rw_lock_x_lock_gen(&(block->lock), BUF_IO_READ);
+ mutex_exit(&block->mutex);
mutex_exit(&(buf_pool->mutex));
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
@@ -1757,6 +1799,8 @@ buf_page_create(
block = free_block;
+ mutex_enter(&block->mutex);
+
buf_page_init(space, offset, block);
/* The block must be put to the LRU list */
@@ -1767,13 +1811,15 @@ buf_page_create(
#else
buf_block_buf_fix_inc(block);
#endif
+ buf_pool->n_pages_created++;
+
+ mutex_exit(&(buf_pool->mutex));
+
mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
block->accessed = TRUE;
- buf_pool->n_pages_created++;
-
- mutex_exit(&(buf_pool->mutex));
+ mutex_exit(&block->mutex);
/* Delete possible entries for the page from the insert buffer:
such can exist if the page belonged to an index which was dropped */
@@ -1822,6 +1868,12 @@ buf_page_io_complete(
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ /* We do not need protect block->io_fix here by block->mutex to read
+ it because this is the only function where we can change the value
+ from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
+ ensures that this is the only thread that handles the i/o for this
+ block. */
+
io_type = block->io_fix;
if (io_type == BUF_IO_READ) {
@@ -1890,11 +1942,12 @@ buf_page_io_complete(
}
}
+ mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&block->mutex);
+
#ifdef UNIV_IBUF_DEBUG
ut_a(ibuf_count_get(block->space, block->offset) == 0);
#endif
- mutex_enter(&(buf_pool->mutex));
-
/* Because this thread which does the unlocking is not the same that
did the locking, we use a pass value != 0 in unlock, which simply
removes the newest lock debug record, without checking the thread
@@ -1937,6 +1990,7 @@ buf_page_io_complete(
#endif /* UNIV_DEBUG */
}
+ mutex_exit(&block->mutex);
mutex_exit(&(buf_pool->mutex));
#ifdef UNIV_DEBUG
@@ -1999,6 +2053,8 @@ buf_validate(void)
block = buf_pool_get_nth_block(buf_pool, i);
+ mutex_enter(&block->mutex);
+
if (block->state == BUF_BLOCK_FILE_PAGE) {
ut_a(buf_page_hash_get(block->space,
@@ -2042,6 +2098,8 @@ buf_validate(void)
} else if (block->state == BUF_BLOCK_NOT_USED) {
n_free++;
}
+
+ mutex_exit(&block->mutex);
}
if (n_lru + n_free > buf_pool->curr_size) {
@@ -2185,11 +2243,17 @@ buf_get_latched_pages_number(void)
for (i = 0; i < buf_pool->curr_size; i++) {
- block = buf_pool_get_nth_block(buf_pool, i);
+ block = buf_pool_get_nth_block(buf_pool, i);
+
+ if (block->magic_n == BUF_BLOCK_MAGIC_N) {
+ mutex_enter(&block->mutex);
- if (((block->buf_fix_count != 0) || (block->io_fix != 0)) &&
- block->magic_n == BUF_BLOCK_MAGIC_N )
- fixed_pages_number++;
+ if (block->buf_fix_count != 0 || block->io_fix != 0) {
+ fixed_pages_number++;
+ }
+
+ mutex_exit(&block->mutex);
+ }
}
mutex_exit(&(buf_pool->mutex));
@@ -2354,16 +2418,21 @@ buf_all_freed(void)
block = buf_pool_get_nth_block(buf_pool, i);
+ mutex_enter(&block->mutex);
+
if (block->state == BUF_BLOCK_FILE_PAGE) {
if (!buf_flush_ready_for_replace(block)) {
fprintf(stderr,
"Page %lu %lu still fixed or dirty\n",
- (ulong) block->space, (ulong) block->offset);
+ (ulong) block->space,
+ (ulong) block->offset);
ut_error;
}
}
+
+ mutex_exit(&block->mutex);
}
mutex_exit(&(buf_pool->mutex));
diff --git a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c
index e39d1ae0a71..fc7b60bf5fb 100644
--- a/innobase/buf/buf0flu.c
+++ b/innobase/buf/buf0flu.c
@@ -114,6 +114,7 @@ buf_flush_ready_for_replace(
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(mutex_own(&block->mutex));
#endif /* UNIV_SYNC_DEBUG */
if (block->state != BUF_BLOCK_FILE_PAGE) {
ut_print_timestamp(stderr);
@@ -148,6 +149,7 @@ buf_flush_ready_for_flush(
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(mutex_own(&(block->mutex)));
#endif /* UNIV_SYNC_DEBUG */
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
@@ -539,8 +541,15 @@ buf_flush_try_page(
ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
+ if (!block) {
+ mutex_exit(&(buf_pool->mutex));
+ return(0);
+ }
+
+ mutex_enter(&block->mutex);
+
if (flush_type == BUF_FLUSH_LIST
- && block && buf_flush_ready_for_flush(block, flush_type)) {
+ && buf_flush_ready_for_flush(block, flush_type)) {
block->io_fix = BUF_IO_WRITE;
@@ -578,6 +587,7 @@ buf_flush_try_page(
locked = TRUE;
}
+ mutex_exit(&block->mutex);
mutex_exit(&(buf_pool->mutex));
if (!locked) {
@@ -598,8 +608,8 @@ buf_flush_try_page(
return(1);
- } else if (flush_type == BUF_FLUSH_LRU && block
- && buf_flush_ready_for_flush(block, flush_type)) {
+ } else if (flush_type == BUF_FLUSH_LRU
+ && buf_flush_ready_for_flush(block, flush_type)) {
/* VERY IMPORTANT:
Because any thread may call the LRU flush, even when owning
@@ -639,14 +649,15 @@ buf_flush_try_page(
buf_pool mutex: this ensures that the latch is acquired
immediately. */
+ mutex_exit(&block->mutex);
mutex_exit(&(buf_pool->mutex));
buf_flush_write_block_low(block);
return(1);
- } else if (flush_type == BUF_FLUSH_SINGLE_PAGE && block
- && buf_flush_ready_for_flush(block, flush_type)) {
+ } else if (flush_type == BUF_FLUSH_SINGLE_PAGE
+ && buf_flush_ready_for_flush(block, flush_type)) {
block->io_fix = BUF_IO_WRITE;
@@ -672,6 +683,7 @@ buf_flush_try_page(
(buf_pool->n_flush[flush_type])++;
+ mutex_exit(&block->mutex);
mutex_exit(&(buf_pool->mutex));
rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
@@ -688,11 +700,12 @@ buf_flush_try_page(
buf_flush_write_block_low(block);
return(1);
- } else {
- mutex_exit(&(buf_pool->mutex));
+ }
- return(0);
- }
+ mutex_exit(&block->mutex);
+ mutex_exit(&(buf_pool->mutex));
+
+ return(0);
}
/***************************************************************
@@ -737,34 +750,48 @@ buf_flush_try_neighbors(
block = buf_page_hash_get(space, i);
ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
- if (block && flush_type == BUF_FLUSH_LRU && i != offset
- && !block->old) {
+ if (!block) {
+
+ continue;
+
+ } else if (flush_type == BUF_FLUSH_LRU && i != offset
+ && !block->old) {
/* We avoid flushing 'non-old' blocks in an LRU flush,
because the flushed blocks are soon freed */
continue;
- }
+ } else {
+
+ mutex_enter(&block->mutex);
+
+ if (buf_flush_ready_for_flush(block, flush_type)
+ && (i == offset || block->buf_fix_count == 0)) {
+ /* We only try to flush those
+ neighbors != offset where the buf fix count is
+ zero, as we then know that we probably can
+ latch the page without a semaphore wait.
+ Semaphore waits are expensive because we must
+ flush the doublewrite buffer before we start
+ waiting. */
- if (block && buf_flush_ready_for_flush(block, flush_type)
- && (i == offset || block->buf_fix_count == 0)) {
- /* We only try to flush those neighbors != offset
- where the buf fix count is zero, as we then know that
- we probably can latch the page without a semaphore
- wait. Semaphore waits are expensive because we must
- flush the doublewrite buffer before we start
- waiting. */
+ mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
+ mutex_exit(&(buf_pool->mutex));
- /* Note: as we release the buf_pool mutex above, in
- buf_flush_try_page we cannot be sure the page is still
- in a flushable state: therefore we check it again
- inside that function. */
+ /* Note: as we release the buf_pool mutex
+ above, in buf_flush_try_page we cannot be sure
+ the page is still in a flushable state:
+ therefore we check it again inside that
+ function. */
- count += buf_flush_try_page(space, i, flush_type);
+ count += buf_flush_try_page(space, i,
+ flush_type);
- mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&(buf_pool->mutex));
+ } else {
+ mutex_exit(&block->mutex);
+ }
}
}
@@ -858,12 +885,15 @@ buf_flush_batch(
while ((block != NULL) && !found) {
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ mutex_enter(&block->mutex);
+
if (buf_flush_ready_for_flush(block, flush_type)) {
found = TRUE;
space = block->space;
offset = block->offset;
+ mutex_exit(&block->mutex);
mutex_exit(&(buf_pool->mutex));
old_page_count = page_count;
@@ -881,10 +911,14 @@ buf_flush_batch(
} else if (flush_type == BUF_FLUSH_LRU) {
+ mutex_exit(&block->mutex);
+
block = UT_LIST_GET_PREV(LRU, block);
} else {
ut_ad(flush_type == BUF_FLUSH_LIST);
+ mutex_exit(&block->mutex);
+
block = UT_LIST_GET_PREV(flush_list, block);
}
}
@@ -966,10 +1000,14 @@ buf_flush_LRU_recommendation(void)
+ BUF_FLUSH_EXTRA_MARGIN)
&& (distance < BUF_LRU_FREE_SEARCH_LEN)) {
+ mutex_enter(&block->mutex);
+
if (buf_flush_ready_for_replace(block)) {
n_replaceable++;
}
+ mutex_exit(&block->mutex);
+
distance++;
block = UT_LIST_GET_PREV(LRU, block);
diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c
index 8b135cc5db3..dfee2add045 100644
--- a/innobase/buf/buf0lru.c
+++ b/innobase/buf/buf0lru.c
@@ -86,6 +86,9 @@ scan_again:
block = UT_LIST_GET_LAST(buf_pool->LRU);
while (block != NULL) {
+
+ mutex_enter(&block->mutex);
+
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
if (block->space == id
@@ -112,6 +115,8 @@ scan_again:
if (block->is_hashed) {
page_no = block->offset;
+ mutex_exit(&block->mutex);
+
mutex_exit(&(buf_pool->mutex));
/* Note that the following call will acquire
@@ -138,6 +143,7 @@ scan_again:
buf_LRU_block_free_hashed_page(block);
}
next_page:
+ mutex_exit(&block->mutex);
block = UT_LIST_GET_PREV(LRU, block);
}
@@ -211,6 +217,9 @@ buf_LRU_search_and_free_block(
while (block != NULL) {
ut_a(block->in_LRU_list);
+
+ mutex_enter(&block->mutex);
+
if (buf_flush_ready_for_replace(block)) {
#ifdef UNIV_DEBUG
@@ -225,6 +234,7 @@ buf_LRU_search_and_free_block(
buf_LRU_block_remove_hashed_page(block);
mutex_exit(&(buf_pool->mutex));
+ mutex_exit(&block->mutex);
/* Remove possible adaptive hash index built on the
page; in the case of AWE the block may not have a
@@ -233,15 +243,21 @@ buf_LRU_search_and_free_block(
if (block->frame) {
btr_search_drop_page_hash_index(block->frame);
}
- mutex_enter(&(buf_pool->mutex));
ut_a(block->buf_fix_count == 0);
+ mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&block->mutex);
+
buf_LRU_block_free_hashed_page(block);
freed = TRUE;
+ mutex_exit(&block->mutex);
break;
}
+
+ mutex_exit(&block->mutex);
+
block = UT_LIST_GET_PREV(LRU, block);
distance++;
@@ -415,8 +431,12 @@ loop:
}
}
+ mutex_enter(&block->mutex);
+
block->state = BUF_BLOCK_READY_FOR_USE;
+ mutex_exit(&block->mutex);
+
mutex_exit(&(buf_pool->mutex));
if (started_monitor) {
@@ -818,6 +838,7 @@ buf_LRU_block_free_non_file_page(
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(mutex_own(&block->mutex));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(block);
@@ -857,6 +878,7 @@ buf_LRU_block_remove_hashed_page(
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(mutex_own(&block->mutex));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(block);
@@ -914,6 +936,7 @@ buf_LRU_block_free_hashed_page(
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(mutex_own(&block->mutex));
#endif /* UNIV_SYNC_DEBUG */
ut_a(block->state == BUF_BLOCK_REMOVE_HASH);
diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c
index c7d6ffd2c22..e20d8b6e83a 100644
--- a/innobase/dict/dict0crea.c
+++ b/innobase/dict/dict0crea.c
@@ -724,8 +724,10 @@ dict_truncate_index_tree(
/* out: new root page number, or
FIL_NULL on failure */
dict_table_t* table, /* in: the table the index belongs to */
- rec_t* rec, /* in: record in the clustered index of
- SYS_INDEXES table */
+ btr_pcur_t* pcur, /* in/out: persistent cursor pointing to
+ record in the clustered index of
+ SYS_INDEXES table. The cursor may be
+ repositioned in this call. */
mtr_t* mtr) /* in: mtr having the latch
on the record page. The mtr may be
committed and restarted in this call. */
@@ -734,6 +736,7 @@ dict_truncate_index_tree(
ulint space;
ulint type;
dulint index_id;
+ rec_t* rec;
byte* ptr;
ulint len;
ulint comp;
@@ -744,6 +747,7 @@ dict_truncate_index_tree(
#endif /* UNIV_SYNC_DEBUG */
ut_a(!dict_sys->sys_indexes->comp);
+ rec = btr_pcur_get_rec(pcur);
ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);
ut_ad(len == 4);
@@ -809,10 +813,11 @@ dict_truncate_index_tree(
/* We will need to commit the mini-transaction in order to avoid
deadlocks in the btr_create() call, because otherwise we would
be freeing and allocating pages in the same mini-transaction. */
+ btr_pcur_store_position(pcur, mtr);
mtr_commit(mtr);
- /* mtr_commit() will invalidate rec. */
- rec = NULL;
+
mtr_start(mtr);
+ btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
/* Find the index corresponding to this SYS_INDEXES record. */
for (index = UT_LIST_GET_FIRST(table->indexes);
diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c
index 64987294654..65320b57183 100644
--- a/innobase/fil/fil0fil.c
+++ b/innobase/fil/fil0fil.c
@@ -4285,29 +4285,47 @@ fil_flush_file_spaces(
{
fil_system_t* system = fil_system;
fil_space_t* space;
+ ulint* space_ids;
+ ulint n_space_ids;
+ ulint i;
mutex_enter(&(system->mutex));
- space = UT_LIST_GET_FIRST(system->unflushed_spaces);
+ n_space_ids = UT_LIST_GET_LEN(system->unflushed_spaces);
+ if (n_space_ids == 0) {
- while (space) {
- if (space->purpose == purpose && !space->is_being_deleted) {
+ mutex_exit(&system->mutex);
+ return;
+ }
- space->n_pending_flushes++; /* prevent dropping of the
- space while we are
- flushing */
- mutex_exit(&(system->mutex));
+ /* Assemble a list of space ids to flush. Previously, we
+ traversed system->unflushed_spaces and called UT_LIST_GET_NEXT()
+ on a space that was just removed from the list by fil_flush().
+ Thus, the space could be dropped and the memory overwritten. */
+ space_ids = mem_alloc(n_space_ids * sizeof *space_ids);
- fil_flush(space->id);
+ n_space_ids = 0;
- mutex_enter(&(system->mutex));
+ for (space = UT_LIST_GET_FIRST(system->unflushed_spaces);
+ space;
+ space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
+
+ if (space->purpose == purpose && !space->is_being_deleted) {
- space->n_pending_flushes--;
+ space_ids[n_space_ids++] = space->id;
}
- space = UT_LIST_GET_NEXT(unflushed_spaces, space);
}
-
- mutex_exit(&(system->mutex));
+
+ mutex_exit(&system->mutex);
+
+ /* Flush the spaces. It will not hurt to call fil_flush() on
+ a non-existing space id. */
+ for (i = 0; i < n_space_ids; i++) {
+
+ fil_flush(space_ids[i]);
+ }
+
+ mem_free(space_ids);
}
/**********************************************************************
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
index fc1d9a64c7f..11e5bb39e63 100644
--- a/innobase/include/buf0buf.h
+++ b/innobase/include/buf0buf.h
@@ -461,8 +461,8 @@ Gets the mutex number protecting the page record lock hash chain in the lock
table. */
UNIV_INLINE
mutex_t*
-buf_frame_get_lock_mutex(
-/*=====================*/
+buf_frame_get_mutex(
+/*================*/
/* out: mutex */
byte* ptr); /* in: pointer to within a buffer frame */
/***********************************************************************
@@ -713,7 +713,10 @@ struct buf_block_struct{
ulint magic_n; /* magic number to check */
ulint state; /* state of the control block:
- BUF_BLOCK_NOT_USED, ... */
+ BUF_BLOCK_NOT_USED, ...; changing
+ this is only allowed when a thread
+ has BOTH the buffer pool mutex AND
+ block->mutex locked */
byte* frame; /* pointer to buffer frame which
is of size UNIV_PAGE_SIZE, and
aligned to an address divisible by
@@ -731,8 +734,12 @@ struct buf_block_struct{
ulint offset; /* page number within the space */
ulint lock_hash_val; /* hashed value of the page address
in the record lock hash table */
- mutex_t* lock_mutex; /* mutex protecting the chain in the
- record lock hash table */
+ mutex_t mutex; /* mutex protecting this block:
+ state (also protected by the buffer
+ pool mutex), io_fix, buf_fix_count,
+ and accessed; we introduce this new
+ mutex in InnoDB-5.1 to relieve
+ contention on the buffer pool mutex */
rw_lock_t lock; /* read-write lock of the buffer
frame */
buf_block_t* hash; /* node used in chaining to the page
@@ -788,20 +795,27 @@ struct buf_block_struct{
in heuristic algorithms, because of
the possibility of a wrap-around! */
ulint freed_page_clock;/* the value of freed_page_clock
- buffer pool when this block was
- last time put to the head of the
- LRU list */
+ of the buffer pool when this block was
+ the last time put to the head of the
+ LRU list; a thread is allowed to
+ read this for heuristic purposes
+ without holding any mutex or latch */
ibool old; /* TRUE if the block is in the old
blocks in the LRU list */
ibool accessed; /* TRUE if the page has been accessed
while in the buffer pool: read-ahead
may read in pages which have not been
- accessed yet */
+ accessed yet; this is protected by
+ block->mutex; a thread is allowed to
+ read this for heuristic purposes
+ without holding any mutex or latch */
ulint buf_fix_count; /* count of how manyfold this block
- is currently bufferfixed */
+ is currently bufferfixed; this is
+ protected by block->mutex */
ulint io_fix; /* if a read is pending to the frame,
io_fix is BUF_IO_READ, in the case
- of a write BUF_IO_WRITE, otherwise 0 */
+ of a write BUF_IO_WRITE, otherwise 0;
+ this is protected by block->mutex */
/* 4. Optimistic search field */
dulint modify_clock; /* this clock is incremented every
@@ -962,7 +976,9 @@ struct buf_pool_struct{
number of buffer blocks removed from
the end of the LRU list; NOTE that
this counter may wrap around at 4
- billion! */
+ billion! A thread is allowed to
+ read this for heuristic purposes
+ without holding any mutex or latch */
ulint LRU_flush_ended;/* when an LRU flush ends for a page,
this is incremented by one; this is
set to zero when a buffer block is
diff --git a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic
index af32db10b5f..ddc91b8d66c 100644
--- a/innobase/include/buf0buf.ic
+++ b/innobase/include/buf0buf.ic
@@ -330,8 +330,8 @@ Gets the mutex number protecting the page record lock hash chain in the lock
table. */
UNIV_INLINE
mutex_t*
-buf_frame_get_lock_mutex(
-/*=====================*/
+buf_frame_get_mutex(
+/*================*/
/* out: mutex */
byte* ptr) /* in: pointer to within a buffer frame */
{
@@ -339,7 +339,7 @@ buf_frame_get_lock_mutex(
block = buf_block_align(ptr);
- return(block->lock_mutex);
+ return(&block->mutex);
}
/*************************************************************************
@@ -512,6 +512,7 @@ buf_block_buf_fix_inc_debug(
ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
ut_ad(ret == TRUE);
+ ut_ad(mutex_own(&block->mutex));
#endif
block->buf_fix_count++;
}
@@ -524,6 +525,9 @@ buf_block_buf_fix_inc(
/*==================*/
buf_block_t* block) /* in: block to bufferfix */
{
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&block->mutex));
+#endif
block->buf_fix_count++;
}
#endif /* UNIV_SYNC_DEBUG */
@@ -618,23 +622,24 @@ buf_page_release(
ut_ad(block);
- mutex_enter_fast(&(buf_pool->mutex));
-
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
ut_a(block->buf_fix_count > 0);
if (rw_latch == RW_X_LATCH && mtr->modifications) {
-
+ mutex_enter(&buf_pool->mutex);
buf_flush_note_modification(block, mtr);
+ mutex_exit(&buf_pool->mutex);
}
+ mutex_enter(&block->mutex);
+
#ifdef UNIV_SYNC_DEBUG
rw_lock_s_unlock(&(block->debug_latch));
#endif
buf_fix_count = block->buf_fix_count;
block->buf_fix_count = buf_fix_count - 1;
- mutex_exit(&(buf_pool->mutex));
+ mutex_exit(&block->mutex);
if (rw_latch == RW_S_LATCH) {
rw_lock_s_unlock(&(block->lock));
diff --git a/innobase/include/dict0crea.h b/innobase/include/dict0crea.h
index 5dd571be59c..44acca61c5e 100644
--- a/innobase/include/dict0crea.h
+++ b/innobase/include/dict0crea.h
@@ -62,8 +62,10 @@ dict_truncate_index_tree(
/* out: new root page number, or
FIL_NULL on failure */
dict_table_t* table, /* in: the table the index belongs to */
- rec_t* rec, /* in: record in the clustered index of
- SYS_INDEXES table */
+ btr_pcur_t* pcur, /* in/out: persistent cursor pointing to
+ record in the clustered index of
+ SYS_INDEXES table. The cursor may be
+ repositioned in this call. */
mtr_t* mtr); /* in: mtr having the latch
on the record page. The mtr may be
committed and restarted in this call. */
diff --git a/innobase/include/sync0arr.h b/innobase/include/sync0arr.h
index fecd910683e..ba712d14aad 100644
--- a/innobase/include/sync0arr.h
+++ b/innobase/include/sync0arr.h
@@ -75,17 +75,12 @@ sync_array_free_cell(
sync_array_t* arr, /* in: wait array */
ulint index); /* in: index of the cell in array */
/**************************************************************************
-Looks for the cells in the wait array which refer
-to the wait object specified,
-and sets their corresponding events to the signaled state. In this
-way releases the threads waiting for the object to contend for the object.
-It is possible that no such cell is found, in which case does nothing. */
+Note that one of the wait objects was signalled. */
void
-sync_array_signal_object(
-/*=====================*/
- sync_array_t* arr, /* in: wait array */
- void* object);/* in: wait object */
+sync_array_object_signalled(
+/*========================*/
+ sync_array_t* arr); /* in: wait array */
/**************************************************************************
If the wakeup algorithm does not work perfectly at semaphore relases,
this function will do the waking (see the comment in mutex_exit). This
diff --git a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h
index 911c8ac3f4a..741f9500612 100644
--- a/innobase/include/sync0rw.h
+++ b/innobase/include/sync0rw.h
@@ -411,6 +411,7 @@ blocked by readers, a writer may queue for the lock by setting the writer
field. Then no new readers are allowed in. */
struct rw_lock_struct {
+ os_event_t event; /* Used by sync0arr.c for thread queueing */
ulint reader_count; /* Number of readers who have locked this
lock in the shared mode */
ulint writer; /* This field is set to RW_LOCK_EX if there
diff --git a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic
index 9e15475ae53..31a1ea6562a 100644
--- a/innobase/include/sync0rw.ic
+++ b/innobase/include/sync0rw.ic
@@ -382,7 +382,8 @@ rw_lock_s_unlock_func(
mutex_exit(mutex);
if (UNIV_UNLIKELY(sg)) {
- sync_array_signal_object(sync_primary_wait_array, lock);
+ os_event_set(lock->event);
+ sync_array_object_signalled(sync_primary_wait_array);
}
ut_ad(rw_lock_validate(lock));
@@ -462,7 +463,8 @@ rw_lock_x_unlock_func(
mutex_exit(&(lock->mutex));
if (UNIV_UNLIKELY(sg)) {
- sync_array_signal_object(sync_primary_wait_array, lock);
+ os_event_set(lock->event);
+ sync_array_object_signalled(sync_primary_wait_array);
}
ut_ad(rw_lock_validate(lock));
diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
index 5955ab9a06a..9893921c5d2 100644
--- a/innobase/include/sync0sync.h
+++ b/innobase/include/sync0sync.h
@@ -453,6 +453,7 @@ Do not use its fields directly! The structure used in the spin lock
implementation of a mutual exclusion semaphore. */
struct mutex_struct {
+ os_event_t event; /* Used by sync0arr.c for the wait queue */
ulint lock_word; /* This ulint is the target of the atomic
test-and-set instruction in Win32 */
#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
diff --git a/innobase/os/os0sync.c b/innobase/os/os0sync.c
index 8bafb73baf8..a3204a7b3e8 100644
--- a/innobase/os/os0sync.c
+++ b/innobase/os/os0sync.c
@@ -21,6 +21,7 @@ Created 9/6/1995 Heikki Tuuri
/* Type definition for an operating system mutex struct */
struct os_mutex_struct{
+ os_event_t event; /* Used by sync0arr.c for queing threads */
void* handle; /* OS handle to mutex */
ulint count; /* we use this counter to check
that the same thread does not
@@ -35,6 +36,7 @@ struct os_mutex_struct{
/* Mutex protecting counts and the lists of OS mutexes and events */
os_mutex_t os_sync_mutex;
ibool os_sync_mutex_inited = FALSE;
+ibool os_sync_free_called = FALSE;
/* This is incremented by 1 in os_thread_create and decremented by 1 in
os_thread_exit */
@@ -50,6 +52,10 @@ ulint os_event_count = 0;
ulint os_mutex_count = 0;
ulint os_fast_mutex_count = 0;
+/* Because a mutex is embedded inside an event and there is an
+event embedded inside a mutex, on free, this generates a recursive call.
+This version of the free event function doesn't acquire the global lock */
+static void os_event_free_internal(os_event_t event);
/*************************************************************
Initializes global event and OS 'slow' mutex lists. */
@@ -76,6 +82,7 @@ os_sync_free(void)
os_event_t event;
os_mutex_t mutex;
+ os_sync_free_called = TRUE;
event = UT_LIST_GET_FIRST(os_event_list);
while (event) {
@@ -99,6 +106,7 @@ os_sync_free(void)
mutex = UT_LIST_GET_FIRST(os_mutex_list);
}
+ os_sync_free_called = FALSE;
}
/*************************************************************
@@ -146,14 +154,21 @@ os_event_create(
event->signal_count = 0;
#endif /* __WIN__ */
- /* Put to the list of events */
- os_mutex_enter(os_sync_mutex);
+ /* The os_sync_mutex can be NULL because during startup an event
+ can be created [ because it's embedded in the mutex/rwlock ] before
+ this module has been initialized */
+ if (os_sync_mutex != NULL) {
+ os_mutex_enter(os_sync_mutex);
+ }
+ /* Put to the list of events */
UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
os_event_count++;
- os_mutex_exit(os_sync_mutex);
+ if (os_sync_mutex != NULL) {
+ os_mutex_exit(os_sync_mutex);
+ }
return(event);
}
@@ -256,6 +271,35 @@ os_event_reset(
}
/**************************************************************
+Frees an event object, without acquiring the global lock. */
+static
+void
+os_event_free_internal(
+/*===================*/
+ os_event_t event) /* in: event to free */
+{
+#ifdef __WIN__
+ ut_a(event);
+
+ ut_a(CloseHandle(event->handle));
+#else
+ ut_a(event);
+
+ /* This is to avoid freeing the mutex twice */
+ os_fast_mutex_free(&(event->os_mutex));
+
+ ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
+#endif
+ /* Remove from the list of events */
+
+ UT_LIST_REMOVE(os_event_list, os_event_list, event);
+
+ os_event_count--;
+
+ ut_free(event);
+}
+
+/**************************************************************
Frees an event object. */
void
@@ -456,6 +500,7 @@ os_mutex_create(
mutex_str->handle = mutex;
mutex_str->count = 0;
+ mutex_str->event = os_event_create(NULL);
if (os_sync_mutex_inited) {
/* When creating os_sync_mutex itself we cannot reserve it */
@@ -532,6 +577,10 @@ os_mutex_free(
{
ut_a(mutex);
+ if (!os_sync_free_called) {
+ os_event_free_internal(mutex->event);
+ }
+
if (os_sync_mutex_inited) {
os_mutex_enter(os_sync_mutex);
}
diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c
index 955c7139de7..efbb93ba9f5 100644
--- a/innobase/row/row0mysql.c
+++ b/innobase/row/row0mysql.c
@@ -2920,12 +2920,10 @@ do not allow the TRUNCATE. We also reserve the data dictionary latch. */
goto next_rec;
}
- btr_pcur_store_position(&pcur, &mtr);
+ /* This call may commit and restart mtr
+ and reposition pcur. */
+ root_page_no = dict_truncate_index_tree(table, &pcur, &mtr);
- /* This call may commit and restart mtr. */
- root_page_no = dict_truncate_index_tree(table, rec, &mtr);
-
- btr_pcur_restore_position(BTR_MODIFY_LEAF, &pcur, &mtr);
rec = btr_pcur_get_rec(&pcur);
if (root_page_no != FIL_NULL) {
diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c
index ec56afbb4f5..f8a65e6ff82 100644
--- a/innobase/row/row0sel.c
+++ b/innobase/row/row0sel.c
@@ -1323,6 +1323,12 @@ rec_loop:
ULINT_UNDEFINED, &heap);
if (srv_locks_unsafe_for_binlog) {
+
+ if (page_rec_is_supremum(rec)) {
+
+ goto next_rec;
+ }
+
lock_type = LOCK_REC_NOT_GAP;
} else {
lock_type = LOCK_ORDINARY;
diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c
index 8530f117c9d..b41dcbe44cd 100644
--- a/innobase/srv/srv0start.c
+++ b/innobase/srv/srv0start.c
@@ -1142,10 +1142,7 @@ innobase_start_or_create_for_mysql(void)
#if defined(__NETWARE__)
-/* Create less event semaphores because Win 98/ME had difficulty creating
-40000 event semaphores.
-Comment from Novell, Inc.: also, these just take a lot of memory on
-NetWare. */
+ /* Comment from Novell, Inc.: These take a lot of memory on NetWare.*/
srv_max_n_threads = 1000;
#else
if (srv_pool_size >= 1000 * 1024) {
diff --git a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c
index 198ef49ca9f..64f9310bad3 100644
--- a/innobase/sync/sync0arr.c
+++ b/innobase/sync/sync0arr.c
@@ -62,9 +62,6 @@ struct sync_cell_struct {
ibool waiting; /* TRUE if the thread has already
called sync_array_event_wait
on this cell */
- ibool event_set; /* TRUE if the event is set */
- os_event_t event; /* operating system event
- semaphore handle */
time_t reservation_time;/* time when the thread reserved
the wait cell */
};
@@ -218,10 +215,7 @@ sync_array_create(
for (i = 0; i < n_cells; i++) {
cell = sync_array_get_nth_cell(arr, i);
cell->wait_object = NULL;
-
- /* Create an operating system event semaphore with no name */
- cell->event = os_event_create(NULL);
- cell->event_set = FALSE; /* it is created in reset state */
+ cell->waiting = FALSE;
}
return(arr);
@@ -235,19 +229,12 @@ sync_array_free(
/*============*/
sync_array_t* arr) /* in, own: sync wait array */
{
- ulint i;
- sync_cell_t* cell;
ulint protection;
ut_a(arr->n_reserved == 0);
sync_array_validate(arr);
- for (i = 0; i < arr->n_cells; i++) {
- cell = sync_array_get_nth_cell(arr, i);
- os_event_free(cell->event);
- }
-
protection = arr->protection;
/* Release the mutex protecting the wait array complex */
@@ -293,27 +280,19 @@ sync_array_validate(
}
/***********************************************************************
-Puts the cell event in set state. */
-static
-void
-sync_cell_event_set(
-/*================*/
- sync_cell_t* cell) /* in: array cell */
-{
- os_event_set(cell->event);
- cell->event_set = TRUE;
-}
-
-/***********************************************************************
Puts the cell event in reset state. */
static
void
sync_cell_event_reset(
/*==================*/
- sync_cell_t* cell) /* in: array cell */
+ ulint type, /* in: lock type mutex/rw_lock */
+ void* object) /* in: the rw_lock/mutex object */
{
- os_event_reset(cell->event);
- cell->event_set = FALSE;
+ if (type == SYNC_MUTEX) {
+ os_event_reset(((mutex_t *) object)->event);
+ } else {
+ os_event_reset(((rw_lock_t *) object)->event);
+ }
}
/**********************************************************************
@@ -346,14 +325,7 @@ sync_array_reserve_cell(
if (cell->wait_object == NULL) {
- /* Make sure the event is reset */
- if (cell->event_set) {
- sync_cell_event_reset(cell);
- }
-
- cell->reservation_time = time(NULL);
- cell->thread = os_thread_get_curr_id();
-
+ cell->waiting = FALSE;
cell->wait_object = object;
if (type == SYNC_MUTEX) {
@@ -363,7 +335,6 @@ sync_array_reserve_cell(
}
cell->request_type = type;
- cell->waiting = FALSE;
cell->file = file;
cell->line = line;
@@ -373,6 +344,13 @@ sync_array_reserve_cell(
*index = i;
sync_array_exit(arr);
+
+ /* Make sure the event is reset */
+ sync_cell_event_reset(type, object);
+
+ cell->reservation_time = time(NULL);
+
+ cell->thread = os_thread_get_curr_id();
return;
}
@@ -408,7 +386,12 @@ sync_array_wait_event(
ut_a(!cell->waiting);
ut_ad(os_thread_get_curr_id() == cell->thread);
- event = cell->event;
+ if (cell->request_type == SYNC_MUTEX) {
+ event = ((mutex_t*) cell->wait_object)->event;
+ } else {
+ event = ((rw_lock_t*) cell->wait_object)->event;
+ }
+
cell->waiting = TRUE;
#ifdef UNIV_SYNC_DEBUG
@@ -510,10 +493,6 @@ sync_array_cell_print(
if (!cell->waiting) {
fputs("wait has ended\n", file);
}
-
- if (cell->event_set) {
- fputs("wait is ending\n", file);
- }
}
#ifdef UNIV_SYNC_DEBUG
@@ -623,7 +602,7 @@ sync_array_detect_deadlock(
depth++;
- if (cell->event_set || !cell->waiting) {
+ if (!cell->waiting) {
return(FALSE); /* No deadlock here */
}
@@ -802,6 +781,7 @@ sync_array_free_cell(
ut_a(cell->wait_object != NULL);
+ cell->waiting = FALSE;
cell->wait_object = NULL;
ut_a(arr->n_reserved > 0);
@@ -811,44 +791,17 @@ sync_array_free_cell(
}
/**************************************************************************
-Looks for the cells in the wait array which refer to the wait object
-specified, and sets their corresponding events to the signaled state. In this
-way releases the threads waiting for the object to contend for the object.
-It is possible that no such cell is found, in which case does nothing. */
+Increments the signalled count. */
void
-sync_array_signal_object(
-/*=====================*/
- sync_array_t* arr, /* in: wait array */
- void* object) /* in: wait object */
+sync_array_object_signalled(
+/*========================*/
+ sync_array_t* arr) /* in: wait array */
{
- sync_cell_t* cell;
- ulint count;
- ulint i;
-
sync_array_enter(arr);
arr->sg_count++;
- i = 0;
- count = 0;
-
- while (count < arr->n_reserved) {
-
- cell = sync_array_get_nth_cell(arr, i);
-
- if (cell->wait_object != NULL) {
-
- count++;
- if (cell->wait_object == object) {
-
- sync_cell_event_set(cell);
- }
- }
-
- i++;
- }
-
sync_array_exit(arr);
}
@@ -881,7 +834,17 @@ sync_arr_wake_threads_if_sema_free(void)
if (sync_arr_cell_can_wake_up(cell)) {
- sync_cell_event_set(cell);
+ if (cell->request_type == SYNC_MUTEX) {
+ mutex_t* mutex;
+
+ mutex = cell->wait_object;
+ os_event_set(mutex->event);
+ } else {
+ rw_lock_t* lock;
+
+ lock = cell->wait_object;
+ os_event_set(lock->event);
+ }
}
}
@@ -911,7 +874,7 @@ sync_array_print_long_waits(void)
cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
- if (cell->wait_object != NULL
+ if (cell->wait_object != NULL && cell->waiting
&& difftime(time(NULL), cell->reservation_time) > 240) {
fputs("InnoDB: Warning: a long semaphore wait:\n",
stderr);
@@ -919,7 +882,7 @@ sync_array_print_long_waits(void)
noticed = TRUE;
}
- if (cell->wait_object != NULL
+ if (cell->wait_object != NULL && cell->waiting
&& difftime(time(NULL), cell->reservation_time)
> fatal_timeout) {
fatal = TRUE;
diff --git a/innobase/sync/sync0rw.c b/innobase/sync/sync0rw.c
index 973b46fdd50..050de73db9e 100644
--- a/innobase/sync/sync0rw.c
+++ b/innobase/sync/sync0rw.c
@@ -128,6 +128,7 @@ rw_lock_create_func(
lock->last_x_file_name = "not yet reserved";
lock->last_s_line = 0;
lock->last_x_line = 0;
+ lock->event = os_event_create(NULL);
mutex_enter(&rw_lock_list_mutex);
@@ -163,6 +164,7 @@ rw_lock_free(
mutex_free(rw_lock_get_mutex(lock));
mutex_enter(&rw_lock_list_mutex);
+ os_event_free(lock->event);
if (UT_LIST_GET_PREV(list, lock)) {
ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
diff --git a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c
index 43249f4b96f..95bf83dce79 100644
--- a/innobase/sync/sync0sync.c
+++ b/innobase/sync/sync0sync.c
@@ -212,6 +212,7 @@ mutex_create_func(
os_fast_mutex_init(&(mutex->os_fast_mutex));
mutex->lock_word = 0;
#endif
+ mutex->event = os_event_create(NULL);
mutex_set_waiters(mutex, 0);
mutex->magic_n = MUTEX_MAGIC_N;
#ifdef UNIV_SYNC_DEBUG
@@ -288,6 +289,8 @@ mutex_free(
mutex_exit(&mutex_list_mutex);
}
+ os_event_free(mutex->event);
+
#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
os_fast_mutex_free(&(mutex->os_fast_mutex));
#endif
@@ -564,8 +567,8 @@ mutex_signal_object(
/* The memory order of resetting the waiters field and
signaling the object is important. See LEMMA 1 above. */
-
- sync_array_signal_object(sync_primary_wait_array, mutex);
+ os_event_set(mutex->event);
+ sync_array_object_signalled(sync_primary_wait_array);
}
#ifdef UNIV_SYNC_DEBUG
@@ -1114,6 +1117,7 @@ sync_thread_add_level(
ut_a(sync_thread_levels_g(array, SYNC_PURGE_SYS));
} else if (level == SYNC_TREE_NODE) {
ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
+ || sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
|| sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
} else if (level == SYNC_TREE_NODE_FROM_HASH) {
ut_a(1);