summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@oracle.com>2016-09-02 17:28:54 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2017-04-26 23:03:27 +0300
commite63ead68bf4ef14f836181c834aa010d471abe9c (patch)
treec056b2143146e6af508097a29c1fe32790c1f4d0
parenta6adf567fd64b5566948ca029f68c613b6b703cc (diff)
downloadmariadb-git-e63ead68bf4ef14f836181c834aa010d471abe9c.tar.gz
Bug#24346574 PAGE CLEANER THREAD, ASSERT BLOCK->N_POINTERS == 0
btr_search_drop_page_hash_index(): Do not return before ensuring that block->index=NULL, even if !btr_search_enabled. We would typically still skip acquiring the AHI latch when the AHI is disabled, because block->index would already be NULL. Only if the AHI is in the process of being disabled, we would wait for the AHI latch and then notice that block->index=NULL and return. The above bug was a regression caused in MySQL 5.7.9 by the fix of Bug#21407023: DISABLING AHI SHOULD AVOID TAKING AHI LATCH The rest of this patch improves diagnostics by adding assertions. assert_block_ahi_valid(): A debug predicate for checking that block->n_pointers!=0 implies block->index!=NULL. assert_block_ahi_empty(): A debug predicate for checking that block->n_pointers==0. buf_block_init(): Instead of assigning block->n_pointers=0, assert_block_ahi_empty(block). buf_pool_clear_hash_index(): Clarify comments, and assign block->n_pointers=0 before assigning block->index=NULL. The wrong ordering could make block->n_pointers appear incorrect in debug assertions. This bug was introduced in MySQL 5.1.52 by Bug#13006367 62487: INNODB TAKES 3 MINUTES TO CLEAN UP THE ADAPTIVE HASH INDEX AT SHUTDOWN i_s_innodb_buffer_page_get_info(): Add a comment that the IS_HASHED column in the INFORMATION_SCHEMA views INNODB_BUFFER_POOL_PAGE and INNODB_BUFFER_PAGE_LRU may show false positives (there may be no pointers after all.) ha_insert_for_fold_func(), ha_delete_hash_node(), ha_search_and_update_if_found_func(): Use atomics for updating buf_block_t::n_pointers. While buf_block_t::index is always protected by btr_search_x_lock(index), in ha_insert_for_fold_func() the n_pointers-- may belong to another dict_index_t whose btr_search_latches[] we are not holding. RB: 13879 Reviewed-by: Jimmy Yang <jimmy.yang@oracle.com>
-rw-r--r--storage/innobase/btr/btr0cur.cc2
-rw-r--r--storage/innobase/btr/btr0sea.cc37
-rw-r--r--storage/innobase/buf/buf0buf.cc27
-rw-r--r--storage/innobase/buf/buf0lru.cc57
-rw-r--r--storage/innobase/ha/ha0ha.cc29
-rw-r--r--storage/innobase/handler/i_s.cc4
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc4
-rw-r--r--storage/innobase/include/buf0buf.h49
-rw-r--r--storage/innobase/include/buf0buf.ic1
9 files changed, 143 insertions, 67 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 39d6508adf3..773b03775be 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -3683,6 +3683,8 @@ btr_cur_update_in_place(
btr_search_x_lock(index);
}
+
+ assert_block_ahi_valid(block);
#endif /* BTR_CUR_HASH_ADAPT */
row_upd_rec_in_place(rec, index, offsets, update, page_zip);
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index f7a430591ac..3ae9e95819a 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -611,6 +611,7 @@ btr_search_update_hash_ref(
|| rw_lock_own(&(block->lock), RW_LOCK_X));
ut_ad(page_align(btr_cur_get_rec(cursor))
== buf_block_get_frame(block));
+ assert_block_ahi_valid(block);
index = block->index;
@@ -1122,14 +1123,13 @@ btr_search_drop_page_hash_index(buf_block_t* block)
rw_lock_t* latch;
btr_search_t* info;
- if (!btr_search_enabled) {
- return;
- }
-
retry:
/* Do a dirty check on block->index, return if the block is
not in the adaptive hash index. */
index = block->index;
+ /* This debug check uses a dirty read that could theoretically cause
+ false positives while buf_pool_clear_hash_index() is executing. */
+ assert_block_ahi_valid(block);
if (index == NULL) {
return;
@@ -1156,6 +1156,7 @@ retry:
ut_ad(!btr_search_own_any(RW_LOCK_X));
rw_lock_s_lock(latch);
+ assert_block_ahi_valid(block);
if (block->index == NULL) {
rw_lock_s_unlock(latch);
@@ -1172,6 +1173,7 @@ retry:
#ifdef MYSQL_INDEX_DISABLE_AHI
ut_ad(!index->disable_ahi);
#endif
+ ut_ad(btr_search_enabled);
ut_ad(block->page.id.space() == index->space);
ut_a(index_id == index->id);
@@ -1290,23 +1292,8 @@ next_rec:
MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_REMOVED, n_cached);
cleanup:
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- if (UNIV_UNLIKELY(block->n_pointers)) {
- /* Corruption */
- ib::error() << "Corruption of adaptive hash index."
- << " After dropping, the hash index to a page of "
- << index->name
- << ", still " << block->n_pointers
- << " hash nodes remain.";
- rw_lock_x_unlock(latch);
-
- ut_ad(btr_search_validate());
- } else {
- rw_lock_x_unlock(latch);
- }
-#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ assert_block_ahi_valid(block);
rw_lock_x_unlock(latch);
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
ut_free(folds);
}
@@ -1533,6 +1520,7 @@ btr_search_build_page_hash_index(
have to take care not to increment the counter in that
case. */
if (!block->index) {
+ assert_block_ahi_empty(block);
index->search_info->ref_count++;
}
@@ -1551,6 +1539,7 @@ btr_search_build_page_hash_index(
MONITOR_INC(MONITOR_ADAPTIVE_HASH_PAGE_ADDED);
MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_ADDED, n_cached);
exit_func:
+ assert_block_ahi_valid(block);
btr_search_x_unlock(index);
ut_free(folds);
@@ -1590,6 +1579,8 @@ btr_search_move_or_delete_hash_entries(
ut_a(!block->index || block->index == index);
ut_a(!(new_block->index || block->index)
|| !dict_index_is_ibuf(index));
+ assert_block_ahi_valid(block);
+ assert_block_ahi_valid(new_block);
if (new_block->index) {
@@ -1650,6 +1641,7 @@ btr_search_update_hash_on_delete(btr_cur_t* cursor)
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
+ assert_block_ahi_valid(block);
index = block->index;
if (!index) {
@@ -1674,6 +1666,7 @@ btr_search_update_hash_on_delete(btr_cur_t* cursor)
}
btr_search_x_lock(index);
+ assert_block_ahi_valid(block);
if (block->index) {
ut_a(block->index == index);
@@ -1684,6 +1677,8 @@ btr_search_update_hash_on_delete(btr_cur_t* cursor)
MONITOR_INC(
MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND);
}
+
+ assert_block_ahi_valid(block);
}
btr_search_x_unlock(index);
@@ -1747,6 +1742,7 @@ btr_search_update_hash_node_on_insert(btr_cur_t* cursor)
}
func_exit:
+ assert_block_ahi_valid(block);
btr_search_x_unlock(index);
} else {
btr_search_x_unlock(index);
@@ -1791,6 +1787,7 @@ btr_search_update_hash_on_insert(btr_cur_t* cursor)
block = btr_cur_get_block(cursor);
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
+ assert_block_ahi_valid(block);
index = block->index;
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index d2902580924..cc567ec73c8 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -1496,6 +1496,10 @@ buf_block_init(
{
UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
+ /* This function should only be executed at database startup or by
+ buf_pool_resize(). Either way, adaptive hash index must not exist. */
+ assert_block_ahi_empty(block);
+
block->frame = frame;
block->page.buf_pool_index = buf_pool_index(buf_pool);
@@ -1526,11 +1530,6 @@ buf_block_init(
ut_d(block->in_unzip_LRU_list = FALSE);
ut_d(block->in_withdraw_list = FALSE);
-#ifdef BTR_CUR_HASH_ADAPT
-# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- block->n_pointers = 0;
-# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-#endif /* BTR_CUR_HASH_ADAPT */
page_zip_des_init(&block->page.zip);
mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);
@@ -2244,6 +2243,10 @@ buf_page_realloc(
/* set other flags of buf_block_t */
#ifdef BTR_CUR_HASH_ADAPT
+ /* This code should only be executed by buf_pool_resize(),
+ while the adaptive hash index is disabled. */
+ assert_block_ahi_empty(block);
+ assert_block_ahi_empty(new_block);
ut_ad(!block->index);
new_block->index = NULL;
new_block->n_hash_helps = 0;
@@ -3210,20 +3213,23 @@ buf_pool_clear_hash_index()
for (; i--; block++) {
dict_index_t* index = block->index;
+ assert_block_ahi_valid(block);
/* We can set block->index = NULL
- when we have an x-latch on search latch;
- see the comment in buf0buf.h */
+ and block->n_pointers = 0
+ when btr_search_own_all(RW_LOCK_X);
+ see the comments in buf0buf.h */
if (!index) {
- /* Not hashed */
continue;
}
- block->index = NULL;
+ ut_ad(buf_block_get_state(block)
+ == BUF_BLOCK_FILE_PAGE);
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
block->n_pointers = 0;
# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ block->index = NULL;
}
}
}
@@ -3915,6 +3921,9 @@ buf_block_init_low(
{
block->skip_flush_check = false;
#ifdef BTR_CUR_HASH_ADAPT
+ /* No adaptive hash index entries may point to a previously
+ unused (and now freshly allocated) block. */
+ assert_block_ahi_empty(block);
block->index = NULL;
block->n_hash_helps = 0;
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index c492ec60494..10a8561d38d 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -295,21 +295,29 @@ next_page:
continue;
}
- mutex_enter(&((buf_block_t*) bpage)->mutex);
+ buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
- {
- bool skip = bpage->buf_fix_count > 0
- || !((buf_block_t*) bpage)->index;
+ mutex_enter(&block->mutex);
- mutex_exit(&((buf_block_t*) bpage)->mutex);
+ /* This debug check uses a dirty read that could
+ theoretically cause false positives while
+ buf_pool_clear_hash_index() is executing.
+ (Other conflicting access paths to the adaptive hash
+ index should not be possible, because when a
+ tablespace is being discarded or dropped, there must
+ be no concurrect access to the contained tables.) */
+ assert_block_ahi_valid(block);
- if (skip) {
- /* Skip this block, because there are
- no adaptive hash index entries
- pointing to it, or because we cannot
- drop them due to the buffer-fix. */
- goto next_page;
- }
+ bool skip = bpage->buf_fix_count > 0 || !block->index;
+
+ mutex_exit(&block->mutex);
+
+ if (skip) {
+ /* Skip this block, because there are
+ no adaptive hash index entries
+ pointing to it, or because we cannot
+ drop them due to the buffer-fix. */
+ goto next_page;
}
/* Store the page number so that we can drop the hash
@@ -800,6 +808,17 @@ scan_again:
bpage->id, bpage->size);
goto scan_again;
+ } else {
+ /* This debug check uses a dirty read that could
+ theoretically cause false positives while
+ buf_pool_clear_hash_index() is executing,
+ if the writes to block->index=NULL and
+ block->n_pointers=0 are reordered.
+ (Other conflicting access paths to the adaptive hash
+ index should not be possible, because when a
+ tablespace is being discarded or dropped, there must
+ be no concurrect access to the contained tables.) */
+ assert_block_ahi_empty((buf_block_t*) bpage);
}
#endif /* BTR_CUR_HASH_ADAPT */
@@ -1156,6 +1175,9 @@ buf_LRU_get_free_only(
|| !buf_block_will_withdrawn(buf_pool, block)) {
/* found valid free block */
buf_page_mutex_enter(block);
+ /* No adaptive hash index entries may point to
+ a free block. */
+ assert_block_ahi_empty(block);
buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
@@ -2030,17 +2052,10 @@ buf_LRU_block_free_non_file_page(
case BUF_BLOCK_READY_FOR_USE:
break;
default:
- ib::error() << "Block:" << block
- << " incorrect state:" << buf_get_state_name(block)
- << " in buf_LRU_block_free_non_file_page";
- return; /* Continue */
+ ut_error;
}
-#ifdef BTR_CUR_HASH_ADAPT
-# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- ut_a(block->n_pointers == 0);
-# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-#endif /* BTR_CUR_HASH_ADAPT */
+ assert_block_ahi_empty(block);
ut_ad(!block->page.in_free_list);
ut_ad(!block->page.in_flush_list);
ut_ad(!block->page.in_LRU_list);
diff --git a/storage/innobase/ha/ha0ha.cc b/storage/innobase/ha/ha0ha.cc
index 5822bd6755c..f620db6f62e 100644
--- a/storage/innobase/ha/ha0ha.cc
+++ b/storage/innobase/ha/ha0ha.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -189,6 +190,12 @@ ha_clear(
}
#ifdef BTR_CUR_HASH_ADAPT
+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/** Maximum number of records in a page */
+static const lint MAX_N_POINTERS
+ = UNIV_PAGE_SIZE_MAX / REC_N_NEW_EXTRA_BYTES;
+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
/*************************************************************//**
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
@@ -235,9 +242,11 @@ ha_insert_for_fold_func(
buf_block_t* prev_block = prev_node->block;
ut_a(prev_block->frame
== page_align(prev_node->data));
- ut_a(prev_block->n_pointers > 0);
- prev_block->n_pointers--;
- block->n_pointers++;
+ ut_a(my_atomic_addlint(
+ &prev_block->n_pointers, -1)
+ < MAX_N_POINTERS);
+ ut_a(my_atomic_addlint(&block->n_pointers, 1)
+ < MAX_N_POINTERS);
}
prev_node->block = block;
@@ -268,7 +277,8 @@ ha_insert_for_fold_func(
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (table->adaptive) {
- block->n_pointers++;
+ ut_a(my_atomic_addlint(&block->n_pointers, 1)
+ < MAX_N_POINTERS);
}
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
@@ -329,8 +339,8 @@ ha_delete_hash_node(
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (table->adaptive) {
ut_a(del_node->block->frame = page_align(del_node->data));
- ut_a(del_node->block->n_pointers > 0);
- del_node->block->n_pointers--;
+ ut_a(my_atomic_addlint(&del_node->block->n_pointers, -1)
+ < MAX_N_POINTERS);
}
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
@@ -372,9 +382,10 @@ ha_search_and_update_if_found_func(
if (node) {
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (table->adaptive) {
- ut_a(node->block->n_pointers > 0);
- node->block->n_pointers--;
- new_block->n_pointers++;
+ ut_a(my_atomic_addlint(&node->block->n_pointers, -1)
+ < MAX_N_POINTERS);
+ ut_a(my_atomic_addlint(&new_block->n_pointers, 1)
+ < MAX_N_POINTERS);
}
node->block = new_block;
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index 65197e402aa..3d764ef6e7a 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -5149,6 +5149,10 @@ i_s_innodb_buffer_page_get_info(
block = reinterpret_cast<const buf_block_t*>(bpage);
frame = block->frame;
#ifdef BTR_CUR_HASH_ADAPT
+ /* Note: this may be a false positive, that
+ is, block->index will not always be set to
+ NULL when the last adaptive hash index
+ reference is dropped. */
page_info->hashed = (block->index != NULL);
#endif /* BTR_CUR_HASH_ADAPT */
} else {
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index 7a4dc610694..c7ffecd00c1 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -3953,7 +3953,11 @@ ibuf_insert_to_index_page(
ut_ad(ibuf_inside(mtr));
ut_ad(dtuple_check_typed(entry));
#ifdef BTR_CUR_HASH_ADAPT
+ /* A change buffer merge must occur before users are granted
+ any access to the page. No adaptive hash index entries may
+ point to a freshly read page. */
ut_ad(!block->index);
+ assert_block_ahi_empty(block);
#endif /* BTR_CUR_HASH_ADAPT */
ut_ad(mtr->is_named_space(block->page.id.space()));
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index c0b635ab8c9..aaadd544315 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1836,23 +1836,53 @@ struct buf_block_t{
/* @} */
/** @name Hash search fields
- These 5 fields may only be modified when we have
- an x-latch on search system AND
- - we are holding an s-latch or x-latch on buf_block_t::lock or
- - we know that buf_block_t::buf_fix_count == 0.
+ These 5 fields may only be modified when:
+ we are holding the appropriate x-latch in btr_search_latches[], and
+ one of the following holds:
+ (1) the block state is BUF_BLOCK_FILE_PAGE, and
+ we are holding an s-latch or x-latch on buf_block_t::lock, or
+ (2) buf_block_t::buf_fix_count == 0, or
+ (3) the block state is BUF_BLOCK_REMOVE_HASH.
An exception to this is when we init or create a page
in the buffer pool in buf0buf.cc.
- Another exception is that assigning block->index = NULL
- is allowed whenever holding an x-latch on search system. */
+ Another exception for buf_pool_clear_hash_index() is that
+ assigning block->index = NULL (and block->n_pointers = 0)
+ is allowed whenever btr_search_own_all(RW_LOCK_X).
+
+ Another exception is that ha_insert_for_fold_func() may
+ decrement n_pointers without holding the appropriate latch
+ in btr_search_latches[]. Thus, n_pointers must be
+ protected by atomic memory access.
+
+ This implies that the fields may be read without race
+ condition whenever any of the following hold:
+ - the btr_search_latches[] s-latch or x-latch is being held, or
+ - the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH,
+ and holding some latch prevents the state from changing to that.
+
+ Some use of assert_block_ahi_empty() or assert_block_ahi_valid()
+ is prone to race conditions while buf_pool_clear_hash_index() is
+ executing (the adaptive hash index is being disabled). Such use
+ is explicitly commented. */
/* @{ */
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ulint n_pointers; /*!< used in debugging: the number of
pointers in the adaptive hash index
- pointing to this frame */
+ pointing to this frame;
+ protected by atomic memory access
+ or btr_search_own_all(). */
+# define assert_block_ahi_empty(block) \
+ ut_a(my_atomic_addlint(&(block)->n_pointers, 0) == 0)
+# define assert_block_ahi_valid(block) \
+ ut_a((block)->index \
+ || my_atomic_addlint(&(block)->n_pointers, 0) == 0)
+# else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+# define assert_block_ahi_empty(block) /* nothing */
+# define assert_block_ahi_valid(block) /* nothing */
# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
unsigned curr_n_fields:10;/*!< prefix length for hash indexing:
number of full fields */
@@ -1868,11 +1898,14 @@ struct buf_block_t{
complete, though: there may
have been hash collisions,
record deletions, etc. */
+ /* @} */
+#else /* BTR_CUR_HASH_ADAPT */
+# define assert_block_ahi_empty(block) /* nothing */
+# define assert_block_ahi_valid(block) /* nothing */
#endif /* BTR_CUR_HASH_ADAPT */
bool skip_flush_check;
/*!< Skip check in buf_dblwr_check_block
during bulk load, protected by lock.*/
- /* @} */
# ifdef UNIV_DEBUG
/** @name Debug fields */
/* @{ */
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index 429c7fd3ba6..f22dcc48a01 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -920,6 +920,7 @@ buf_block_modify_clock_inc(
RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
}
#endif /* UNIV_DEBUG */
+ assert_block_ahi_valid(block);
block->modify_clock++;
}