diff options
author | Marko Mäkelä <marko.makela@oracle.com> | 2016-09-02 17:28:54 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2017-04-26 23:03:27 +0300 |
commit | e63ead68bf4ef14f836181c834aa010d471abe9c (patch) | |
tree | c056b2143146e6af508097a29c1fe32790c1f4d0 | |
parent | a6adf567fd64b5566948ca029f68c613b6b703cc (diff) | |
download | mariadb-git-e63ead68bf4ef14f836181c834aa010d471abe9c.tar.gz |
Bug#24346574 PAGE CLEANER THREAD, ASSERT BLOCK->N_POINTERS == 0
btr_search_drop_page_hash_index(): Do not return before ensuring
that block->index=NULL, even if !btr_search_enabled. We would
typically still skip acquiring the AHI latch when the AHI is
disabled, because block->index would already be NULL. Only if the AHI
is in the process of being disabled, we would wait for the AHI latch
and then notice that block->index=NULL and return.
The above bug was a regression caused in MySQL 5.7.9 by the fix of
Bug#21407023: DISABLING AHI SHOULD AVOID TAKING AHI LATCH
The rest of this patch improves diagnostics by adding assertions.
assert_block_ahi_valid(): A debug predicate for checking that
block->n_pointers!=0 implies block->index!=NULL.
assert_block_ahi_empty(): A debug predicate for checking that
block->n_pointers==0.
buf_block_init(): Instead of assigning block->n_pointers=0,
assert_block_ahi_empty(block).
buf_pool_clear_hash_index(): Clarify comments, and assign
block->n_pointers=0 before assigning block->index=NULL.
The wrong ordering could make block->n_pointers appear incorrect in
debug assertions. This bug was introduced in MySQL 5.1.52 by
Bug#13006367 62487: INNODB TAKES 3 MINUTES TO CLEAN UP THE
ADAPTIVE HASH INDEX AT SHUTDOWN
i_s_innodb_buffer_page_get_info(): Add a comment that
the IS_HASHED column in the INFORMATION_SCHEMA views
INNODB_BUFFER_POOL_PAGE and INNODB_BUFFER_PAGE_LRU may
show false positives (there may be no pointers after all.)
ha_insert_for_fold_func(), ha_delete_hash_node(),
ha_search_and_update_if_found_func(): Use atomics for
updating buf_block_t::n_pointers. While buf_block_t::index is
always protected by btr_search_x_lock(index), in
ha_insert_for_fold_func() the n_pointers-- may belong to
another dict_index_t whose btr_search_latches[] we are not holding.
RB: 13879
Reviewed-by: Jimmy Yang <jimmy.yang@oracle.com>
-rw-r--r-- | storage/innobase/btr/btr0cur.cc | 2 | ||||
-rw-r--r-- | storage/innobase/btr/btr0sea.cc | 37 | ||||
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 27 | ||||
-rw-r--r-- | storage/innobase/buf/buf0lru.cc | 57 | ||||
-rw-r--r-- | storage/innobase/ha/ha0ha.cc | 29 | ||||
-rw-r--r-- | storage/innobase/handler/i_s.cc | 4 | ||||
-rw-r--r-- | storage/innobase/ibuf/ibuf0ibuf.cc | 4 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 49 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.ic | 1 |
9 files changed, 143 insertions, 67 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 39d6508adf3..773b03775be 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -3683,6 +3683,8 @@ btr_cur_update_in_place( btr_search_x_lock(index); } + + assert_block_ahi_valid(block); #endif /* BTR_CUR_HASH_ADAPT */ row_upd_rec_in_place(rec, index, offsets, update, page_zip); diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index f7a430591ac..3ae9e95819a 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -611,6 +611,7 @@ btr_search_update_hash_ref( || rw_lock_own(&(block->lock), RW_LOCK_X)); ut_ad(page_align(btr_cur_get_rec(cursor)) == buf_block_get_frame(block)); + assert_block_ahi_valid(block); index = block->index; @@ -1122,14 +1123,13 @@ btr_search_drop_page_hash_index(buf_block_t* block) rw_lock_t* latch; btr_search_t* info; - if (!btr_search_enabled) { - return; - } - retry: /* Do a dirty check on block->index, return if the block is not in the adaptive hash index. */ index = block->index; + /* This debug check uses a dirty read that could theoretically cause + false positives while buf_pool_clear_hash_index() is executing. */ + assert_block_ahi_valid(block); if (index == NULL) { return; @@ -1156,6 +1156,7 @@ retry: ut_ad(!btr_search_own_any(RW_LOCK_X)); rw_lock_s_lock(latch); + assert_block_ahi_valid(block); if (block->index == NULL) { rw_lock_s_unlock(latch); @@ -1172,6 +1173,7 @@ retry: #ifdef MYSQL_INDEX_DISABLE_AHI ut_ad(!index->disable_ahi); #endif + ut_ad(btr_search_enabled); ut_ad(block->page.id.space() == index->space); ut_a(index_id == index->id); @@ -1290,23 +1292,8 @@ next_rec: MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_REMOVED, n_cached); cleanup: -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - if (UNIV_UNLIKELY(block->n_pointers)) { - /* Corruption */ - ib::error() << "Corruption of adaptive hash index." - << " After dropping, the hash index to a page of " - << index->name - << ", still " << block->n_pointers - << " hash nodes remain."; - rw_lock_x_unlock(latch); - - ut_ad(btr_search_validate()); - } else { - rw_lock_x_unlock(latch); - } -#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + assert_block_ahi_valid(block); rw_lock_x_unlock(latch); -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ ut_free(folds); } @@ -1533,6 +1520,7 @@ btr_search_build_page_hash_index( have to take care not to increment the counter in that case. */ if (!block->index) { + assert_block_ahi_empty(block); index->search_info->ref_count++; } @@ -1551,6 +1539,7 @@ btr_search_build_page_hash_index( MONITOR_INC(MONITOR_ADAPTIVE_HASH_PAGE_ADDED); MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_ADDED, n_cached); exit_func: + assert_block_ahi_valid(block); btr_search_x_unlock(index); ut_free(folds); @@ -1590,6 +1579,8 @@ btr_search_move_or_delete_hash_entries( ut_a(!block->index || block->index == index); ut_a(!(new_block->index || block->index) || !dict_index_is_ibuf(index)); + assert_block_ahi_valid(block); + assert_block_ahi_valid(new_block); if (new_block->index) { @@ -1650,6 +1641,7 @@ btr_search_update_hash_on_delete(btr_cur_t* cursor) ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X)); + assert_block_ahi_valid(block); index = block->index; if (!index) { @@ -1674,6 +1666,7 @@ btr_search_update_hash_on_delete(btr_cur_t* cursor) } btr_search_x_lock(index); + assert_block_ahi_valid(block); if (block->index) { ut_a(block->index == index); @@ -1684,6 +1677,8 @@ btr_search_update_hash_on_delete(btr_cur_t* cursor) MONITOR_INC( MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND); } + + assert_block_ahi_valid(block); } btr_search_x_unlock(index); @@ -1747,6 +1742,7 @@ btr_search_update_hash_node_on_insert(btr_cur_t* cursor) } func_exit: + assert_block_ahi_valid(block); btr_search_x_unlock(index); } else { btr_search_x_unlock(index); @@ -1791,6 +1787,7 @@ btr_search_update_hash_on_insert(btr_cur_t* cursor) block = btr_cur_get_block(cursor); ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X)); + assert_block_ahi_valid(block); index = block->index; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index d2902580924..cc567ec73c8 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1496,6 +1496,10 @@ buf_block_init( { UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE); + /* This function should only be executed at database startup or by + buf_pool_resize(). Either way, adaptive hash index must not exist. */ + assert_block_ahi_empty(block); + block->frame = frame; block->page.buf_pool_index = buf_pool_index(buf_pool); @@ -1526,11 +1530,6 @@ buf_block_init( ut_d(block->in_unzip_LRU_list = FALSE); ut_d(block->in_withdraw_list = FALSE); -#ifdef BTR_CUR_HASH_ADAPT -# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - block->n_pointers = 0; -# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ -#endif /* BTR_CUR_HASH_ADAPT */ page_zip_des_init(&block->page.zip); mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex); @@ -2244,6 +2243,10 @@ buf_page_realloc( /* set other flags of buf_block_t */ #ifdef BTR_CUR_HASH_ADAPT + /* This code should only be executed by buf_pool_resize(), + while the adaptive hash index is disabled. */ + assert_block_ahi_empty(block); + assert_block_ahi_empty(new_block); ut_ad(!block->index); new_block->index = NULL; new_block->n_hash_helps = 0; @@ -3210,20 +3213,23 @@ buf_pool_clear_hash_index() for (; i--; block++) { dict_index_t* index = block->index; + assert_block_ahi_valid(block); /* We can set block->index = NULL - when we have an x-latch on search latch; - see the comment in buf0buf.h */ + and block->n_pointers = 0 + when btr_search_own_all(RW_LOCK_X); + see the comments in buf0buf.h */ if (!index) { - /* Not hashed */ continue; } - block->index = NULL; + ut_ad(buf_block_get_state(block) + == BUF_BLOCK_FILE_PAGE); # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG block->n_pointers = 0; # endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + block->index = NULL; } } } @@ -3915,6 +3921,9 @@ buf_block_init_low( { block->skip_flush_check = false; #ifdef BTR_CUR_HASH_ADAPT + /* No adaptive hash index entries may point to a previously + unused (and now freshly allocated) block. */ + assert_block_ahi_empty(block); block->index = NULL; block->n_hash_helps = 0; diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index c492ec60494..10a8561d38d 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -295,21 +295,29 @@ next_page: continue; } - mutex_enter(&((buf_block_t*) bpage)->mutex); + buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage); - { - bool skip = bpage->buf_fix_count > 0 - || !((buf_block_t*) bpage)->index; + mutex_enter(&block->mutex); - mutex_exit(&((buf_block_t*) bpage)->mutex); + /* This debug check uses a dirty read that could + theoretically cause false positives while + buf_pool_clear_hash_index() is executing. + (Other conflicting access paths to the adaptive hash + index should not be possible, because when a + tablespace is being discarded or dropped, there must + be no concurrect access to the contained tables.) */ + assert_block_ahi_valid(block); - if (skip) { - /* Skip this block, because there are - no adaptive hash index entries - pointing to it, or because we cannot - drop them due to the buffer-fix. */ - goto next_page; - } + bool skip = bpage->buf_fix_count > 0 || !block->index; + + mutex_exit(&block->mutex); + + if (skip) { + /* Skip this block, because there are + no adaptive hash index entries + pointing to it, or because we cannot + drop them due to the buffer-fix. */ + goto next_page; } /* Store the page number so that we can drop the hash @@ -800,6 +808,17 @@ scan_again: bpage->id, bpage->size); goto scan_again; + } else { + /* This debug check uses a dirty read that could + theoretically cause false positives while + buf_pool_clear_hash_index() is executing, + if the writes to block->index=NULL and + block->n_pointers=0 are reordered. + (Other conflicting access paths to the adaptive hash + index should not be possible, because when a + tablespace is being discarded or dropped, there must + be no concurrect access to the contained tables.) */ + assert_block_ahi_empty((buf_block_t*) bpage); } #endif /* BTR_CUR_HASH_ADAPT */ @@ -1156,6 +1175,9 @@ buf_LRU_get_free_only( || !buf_block_will_withdrawn(buf_pool, block)) { /* found valid free block */ buf_page_mutex_enter(block); + /* No adaptive hash index entries may point to + a free block. */ + assert_block_ahi_empty(block); buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE); UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE); @@ -2030,17 +2052,10 @@ buf_LRU_block_free_non_file_page( case BUF_BLOCK_READY_FOR_USE: break; default: - ib::error() << "Block:" << block - << " incorrect state:" << buf_get_state_name(block) - << " in buf_LRU_block_free_non_file_page"; - return; /* Continue */ + ut_error; } -#ifdef BTR_CUR_HASH_ADAPT -# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - ut_a(block->n_pointers == 0); -# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ -#endif /* BTR_CUR_HASH_ADAPT */ + assert_block_ahi_empty(block); ut_ad(!block->page.in_free_list); ut_ad(!block->page.in_flush_list); ut_ad(!block->page.in_LRU_list); diff --git a/storage/innobase/ha/ha0ha.cc b/storage/innobase/ha/ha0ha.cc index 5822bd6755c..f620db6f62e 100644 --- a/storage/innobase/ha/ha0ha.cc +++ b/storage/innobase/ha/ha0ha.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -189,6 +190,12 @@ ha_clear( } #ifdef BTR_CUR_HASH_ADAPT +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG +/** Maximum number of records in a page */ +static const lint MAX_N_POINTERS + = UNIV_PAGE_SIZE_MAX / REC_N_NEW_EXTRA_BYTES; +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + /*************************************************************//** Inserts an entry into a hash table. If an entry with the same fold number is found, its node is updated to point to the new data, and no new node @@ -235,9 +242,11 @@ ha_insert_for_fold_func( buf_block_t* prev_block = prev_node->block; ut_a(prev_block->frame == page_align(prev_node->data)); - ut_a(prev_block->n_pointers > 0); - prev_block->n_pointers--; - block->n_pointers++; + ut_a(my_atomic_addlint( + &prev_block->n_pointers, -1) + < MAX_N_POINTERS); + ut_a(my_atomic_addlint(&block->n_pointers, 1) + < MAX_N_POINTERS); } prev_node->block = block; @@ -268,7 +277,8 @@ ha_insert_for_fold_func( #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG if (table->adaptive) { - block->n_pointers++; + ut_a(my_atomic_addlint(&block->n_pointers, 1) + < MAX_N_POINTERS); } #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ @@ -329,8 +339,8 @@ ha_delete_hash_node( #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG if (table->adaptive) { ut_a(del_node->block->frame = page_align(del_node->data)); - ut_a(del_node->block->n_pointers > 0); - del_node->block->n_pointers--; + ut_a(my_atomic_addlint(&del_node->block->n_pointers, -1) + < MAX_N_POINTERS); } #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ @@ -372,9 +382,10 @@ ha_search_and_update_if_found_func( if (node) { #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG if (table->adaptive) { - ut_a(node->block->n_pointers > 0); - node->block->n_pointers--; - new_block->n_pointers++; + ut_a(my_atomic_addlint(&node->block->n_pointers, -1) + < MAX_N_POINTERS); + ut_a(my_atomic_addlint(&new_block->n_pointers, 1) + < MAX_N_POINTERS); } node->block = new_block; diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 65197e402aa..3d764ef6e7a 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -5149,6 +5149,10 @@ i_s_innodb_buffer_page_get_info( block = reinterpret_cast<const buf_block_t*>(bpage); frame = block->frame; #ifdef BTR_CUR_HASH_ADAPT + /* Note: this may be a false positive, that + is, block->index will not always be set to + NULL when the last adaptive hash index + reference is dropped. */ page_info->hashed = (block->index != NULL); #endif /* BTR_CUR_HASH_ADAPT */ } else { diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 7a4dc610694..c7ffecd00c1 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -3953,7 +3953,11 @@ ibuf_insert_to_index_page( ut_ad(ibuf_inside(mtr)); ut_ad(dtuple_check_typed(entry)); #ifdef BTR_CUR_HASH_ADAPT + /* A change buffer merge must occur before users are granted + any access to the page. No adaptive hash index entries may + point to a freshly read page. */ ut_ad(!block->index); + assert_block_ahi_empty(block); #endif /* BTR_CUR_HASH_ADAPT */ ut_ad(mtr->is_named_space(block->page.id.space())); diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index c0b635ab8c9..aaadd544315 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1836,23 +1836,53 @@ struct buf_block_t{ /* @} */ /** @name Hash search fields - These 5 fields may only be modified when we have - an x-latch on search system AND - - we are holding an s-latch or x-latch on buf_block_t::lock or - - we know that buf_block_t::buf_fix_count == 0. + These 5 fields may only be modified when: + we are holding the appropriate x-latch in btr_search_latches[], and + one of the following holds: + (1) the block state is BUF_BLOCK_FILE_PAGE, and + we are holding an s-latch or x-latch on buf_block_t::lock, or + (2) buf_block_t::buf_fix_count == 0, or + (3) the block state is BUF_BLOCK_REMOVE_HASH. An exception to this is when we init or create a page in the buffer pool in buf0buf.cc. - Another exception is that assigning block->index = NULL - is allowed whenever holding an x-latch on search system. */ + Another exception for buf_pool_clear_hash_index() is that + assigning block->index = NULL (and block->n_pointers = 0) + is allowed whenever btr_search_own_all(RW_LOCK_X). + + Another exception is that ha_insert_for_fold_func() may + decrement n_pointers without holding the appropriate latch + in btr_search_latches[]. Thus, n_pointers must be + protected by atomic memory access. + + This implies that the fields may be read without race + condition whenever any of the following hold: + - the btr_search_latches[] s-latch or x-latch is being held, or + - the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH, + and holding some latch prevents the state from changing to that. + + Some use of assert_block_ahi_empty() or assert_block_ahi_valid() + is prone to race conditions while buf_pool_clear_hash_index() is + executing (the adaptive hash index is being disabled). Such use + is explicitly commented. */ /* @{ */ # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG ulint n_pointers; /*!< used in debugging: the number of pointers in the adaptive hash index - pointing to this frame */ + pointing to this frame; + protected by atomic memory access + or btr_search_own_all(). */ +# define assert_block_ahi_empty(block) \ + ut_a(my_atomic_addlint(&(block)->n_pointers, 0) == 0) +# define assert_block_ahi_valid(block) \ + ut_a((block)->index \ + || my_atomic_addlint(&(block)->n_pointers, 0) == 0) +# else /* UNIV_AHI_DEBUG || UNIV_DEBUG */ +# define assert_block_ahi_empty(block) /* nothing */ +# define assert_block_ahi_valid(block) /* nothing */ # endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ unsigned curr_n_fields:10;/*!< prefix length for hash indexing: number of full fields */ @@ -1868,11 +1898,14 @@ struct buf_block_t{ complete, though: there may have been hash collisions, record deletions, etc. */ + /* @} */ +#else /* BTR_CUR_HASH_ADAPT */ +# define assert_block_ahi_empty(block) /* nothing */ +# define assert_block_ahi_valid(block) /* nothing */ #endif /* BTR_CUR_HASH_ADAPT */ bool skip_flush_check; /*!< Skip check in buf_dblwr_check_block during bulk load, protected by lock.*/ - /* @} */ # ifdef UNIV_DEBUG /** @name Debug fields */ /* @{ */ diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic index 429c7fd3ba6..f22dcc48a01 100644 --- a/storage/innobase/include/buf0buf.ic +++ b/storage/innobase/include/buf0buf.ic @@ -920,6 +920,7 @@ buf_block_modify_clock_inc( RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX)); } #endif /* UNIV_DEBUG */ + assert_block_ahi_valid(block); block->modify_clock++; } |