diff options
author | Marko Mäkelä <marko.makela@oracle.com> | 2010-08-10 13:22:48 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@oracle.com> | 2010-08-10 13:22:48 +0300 |
commit | 271e6ae34117db9475da877beb5ec2a0c7495872 (patch) | |
tree | 8169a7e668160badd28493f250e732ed06ede72b /storage | |
parent | a4c5cf7ca9efcf386600c5da5f049dcab9e86046 (diff) | |
download | mariadb-git-271e6ae34117db9475da877beb5ec2a0c7495872.tar.gz |
Bug#54914: InnoDB: performance drop with innodb_change_buffering=all
Reduce ibuf_mutex and ibuf_pessimistic_insert_mutex contention further.
Protect ibuf->empty by the insert buffer root page latch, not ibuf_mutex.
ibuf_tree_root_get(): Assert that ibuf_mutex is owned by the
caller. Assert that the stamped page number is correct. Assert that
ibuf->empty agrees with the root page.
ibuf_size_update(): Do not update ibuf->empty.
ibuf_init_at_db_start(): Update ibuf->empty while holding the root page latch.
ibuf_add_free_page(): Return TRUE/FALSE instead of DB_SUCCESS/DB_STRONG_FAIL.
ibuf_remove_free_page(): Release ibuf_pessimistic_insert_mutex as
early as possible.
ibuf_contract_ext(): Rely on a dirty read of ibuf->empty, unless the
server is being shut down. Never acquire ibuf_mutex. Eliminate n_stored.
ibuf_contract_after_insert(): Never acquire ibuf_mutex. Perform dirty
reads of ibuf->size and ibuf->max_size.
ibuf_insert_low(): Only acquire ibuf_mutex for mode==BTR_MODIFY_TREE.
Perform dirty reads of ibuf->size and ibuf->max_size. Update
ibuf->empty while holding the root page latch.
ibuf_delete_rec(): Update ibuf->empty while holding the root page latch.
ibuf_is_empty(): Release ibuf_mutex earlier.
Diffstat (limited to 'storage')
-rw-r--r-- | storage/innobase/ibuf/ibuf0ibuf.c | 250 | ||||
-rw-r--r-- | storage/innobase/include/ibuf0ibuf.ic | 11 |
2 files changed, 134 insertions, 127 deletions
diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c index 1d162f82b93..a048de0e884 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.c +++ b/storage/innobase/ibuf/ibuf0ibuf.c @@ -55,6 +55,7 @@ Created 7/19/1997 Heikki Tuuri #include "lock0lock.h" #include "log0recv.h" #include "que0que.h" +#include "srv0start.h" /* srv_shutdown_state */ /* STRUCTURE OF AN INSERT BUFFER RECORD @@ -395,8 +396,10 @@ ibuf_tree_root_get( mtr_t* mtr) /*!< in: mtr */ { buf_block_t* block; + page_t* root; ut_ad(ibuf_inside()); + ut_ad(mutex_own(&ibuf_mutex)); mtr_x_lock(dict_index_get_lock(ibuf->index), mtr); @@ -405,7 +408,13 @@ ibuf_tree_root_get( buf_block_dbg_add_level(block, SYNC_TREE_NODE); - return(buf_block_get_frame(block)); + root = buf_block_get_frame(block); + + ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); + ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO); + ut_ad(ibuf->empty == (page_get_n_recs(root) == 0)); + + return(root); } #ifdef UNIV_IBUF_COUNT_DEBUG @@ -482,8 +491,6 @@ ibuf_size_update( /* the '1 +' is the ibuf header page */ ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len); - - ibuf->empty = page_get_n_recs(root) == 0; } /******************************************************************//** @@ -554,6 +561,7 @@ ibuf_init_at_db_start(void) ibuf_size_update(root, &mtr); mutex_exit(&ibuf_mutex); + ibuf->empty = (page_get_n_recs(root) == 0); mtr_commit(&mtr); ibuf_exit(); @@ -2025,9 +2033,9 @@ ibuf_data_too_much_free(void) /*********************************************************************//** Allocates a new page from the ibuf file segment and adds it to the free list. -@return DB_SUCCESS, or DB_STRONG_FAIL if no space left */ +@return TRUE on success, FALSE if no space left */ static -ulint +ibool ibuf_add_free_page(void) /*====================*/ { @@ -2063,10 +2071,10 @@ ibuf_add_free_page(void) header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP, &mtr); - if (page_no == FIL_NULL) { + if (UNIV_UNLIKELY(page_no == FIL_NULL)) { mtr_commit(&mtr); - return(DB_STRONG_FAIL); + return(FALSE); } { @@ -2113,7 +2121,7 @@ ibuf_add_free_page(void) ibuf_exit(); - return(DB_SUCCESS); + return(TRUE); } /*********************************************************************//** @@ -2143,20 +2151,17 @@ ibuf_remove_free_page(void) header_page = ibuf_header_page_get(&mtr); /* Prevent pessimistic inserts to insert buffer trees for a while */ - mutex_enter(&ibuf_pessimistic_insert_mutex); - ibuf_enter(); - + mutex_enter(&ibuf_pessimistic_insert_mutex); mutex_enter(&ibuf_mutex); if (!ibuf_data_too_much_free()) { mutex_exit(&ibuf_mutex); + mutex_exit(&ibuf_pessimistic_insert_mutex); ibuf_exit(); - mutex_exit(&ibuf_pessimistic_insert_mutex); - mtr_commit(&mtr); return; @@ -2218,11 +2223,11 @@ ibuf_remove_free_page(void) flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); + mutex_exit(&ibuf_pessimistic_insert_mutex); + ibuf->seg_size--; ibuf->free_list_len--; - mutex_exit(&ibuf_pessimistic_insert_mutex); - /* Set the bit indicating that this page is no more an ibuf tree page (level 2 page) */ @@ -2484,17 +2489,19 @@ ibuf_contract_ext( ulint page_nos[IBUF_MAX_N_PAGES_MERGED]; ulint space_ids[IBUF_MAX_N_PAGES_MERGED]; ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED]; - ulint n_stored; ulint sum_sizes; mtr_t mtr; *n_pages = 0; ut_ad(!ibuf_inside()); - mutex_enter(&ibuf_mutex); + /* We perform a dirty read of ibuf->empty, without latching + the insert buffer root page. We trust this dirty read except + when a slow shutdown is being executed. During a slow + shutdown, the insert buffer merge must be completed. */ - if (ibuf->empty) { - mutex_exit(&ibuf_mutex); + if (UNIV_UNLIKELY(ibuf->empty) + && UNIV_LIKELY(!srv_shutdown_state)) { ibuf_is_empty: #if 0 /* TODO */ @@ -2523,18 +2530,18 @@ ibuf_is_empty: position within the leaf */ btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr); - mutex_exit(&ibuf_mutex); ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index)); if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) { - /* When the ibuf tree is emptied completely, the last record - is removed using an optimistic delete and ibuf_size_update - is not called, causing ibuf->empty to remain FALSE. If we do - not reset it to TRUE here then database shutdown will hang - in the loop in ibuf_contract_for_n_pages. */ - - ibuf->empty = TRUE; + /* If a B-tree page is empty, it must be the root page + and the whole B-tree must be empty. InnoDB does not + allow empty B-tree pages other than the root. */ + ut_ad(ibuf->empty); + ut_ad(page_get_space_id(btr_pcur_get_page(&pcur)) + == IBUF_SPACE_ID); + ut_ad(page_get_page_no(btr_pcur_get_page(&pcur)) + == FSP_IBUF_TREE_ROOT_PAGE_NO); ibuf_exit(); @@ -2546,10 +2553,10 @@ ibuf_is_empty: sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur), space_ids, space_versions, - page_nos, &n_stored); + page_nos, n_pages); #if 0 /* defined UNIV_IBUF_DEBUG */ fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n", - sync, n_stored, sum_sizes); + sync, *n_pages, sum_sizes); #endif ibuf_exit(); @@ -2557,8 +2564,7 @@ ibuf_is_empty: btr_pcur_close(&pcur); buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos, - n_stored); - *n_pages = n_stored; + *n_pages); return(sum_sizes + 1); } @@ -2628,33 +2634,33 @@ ibuf_contract_after_insert( ibool sync; ulint sum_sizes; ulint size; - - mutex_enter(&ibuf_mutex); - - if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { - mutex_exit(&ibuf_mutex); - + ulint max_size; + + /* Perform dirty reads of ibuf->size and ibuf->max_size, to + reduce ibuf_mutex contention. ibuf->max_size remains constant + after ibuf_init_at_db_start(), but ibuf->size should be + protected by ibuf_mutex. Given that ibuf->size fits in a + machine word, this should be OK; at worst we are doing some + excessive ibuf_contract() or occasionally skipping a + ibuf_contract(). */ + size = ibuf->size; + max_size = ibuf->max_size; + + if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { return; } - sync = FALSE; - - if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) { - - sync = TRUE; - } - - mutex_exit(&ibuf_mutex); + sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC); /* Contract at least entry_size many bytes */ sum_sizes = 0; size = 1; - while ((size > 0) && (sum_sizes < entry_size)) { + do { size = ibuf_contract(sync); sum_sizes += size; - } + } while (size > 0 && sum_sizes < entry_size); } /*********************************************************************//** @@ -3272,7 +3278,7 @@ ibuf_set_entry_counter( /*********************************************************************//** Buffer an operation in the insert/delete buffer, instead of doing it directly to the disk page, if this is possible. -@return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */ +@return DB_SUCCESS, DB_STRONG_FAIL or other error */ static ulint ibuf_insert_low( @@ -3302,6 +3308,7 @@ ibuf_insert_low( rec_t* ins_rec; ibool old_bit_value; page_t* bitmap_page; + buf_block_t* block; page_t* root; ulint err; ibool do_merge; @@ -3311,7 +3318,6 @@ ibuf_insert_low( ulint n_stored; mtr_t mtr; mtr_t bitmap_mtr; - ibool too_big; ut_a(!dict_index_is_clust(index)); ut_ad(dtuple_check_typed(entry)); @@ -3323,11 +3329,14 @@ ibuf_insert_low( do_merge = FALSE; - mutex_enter(&ibuf_mutex); - too_big = ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT; - mutex_exit(&ibuf_mutex); - - if (too_big) { + /* Perform dirty reads of ibuf->size and ibuf->max_size, to + reduce ibuf_mutex contention. ibuf->max_size remains constant + after ibuf_init_at_db_start(), but ibuf->size should be + protected by ibuf_mutex. Given that ibuf->size fits in a + machine word, this should be OK; at worst we are doing some + excessive ibuf_contract() or occasionally skipping a + ibuf_contract(). */ + if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) { /* Insert buffer is now too big, contract it but do not try to insert */ @@ -3361,10 +3370,8 @@ ibuf_insert_low( if (mode == BTR_MODIFY_TREE) { for (;;) { - mutex_enter(&ibuf_pessimistic_insert_mutex); - ibuf_enter(); - + mutex_enter(&ibuf_pessimistic_insert_mutex); mutex_enter(&ibuf_mutex); if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) { @@ -3373,17 +3380,13 @@ ibuf_insert_low( } mutex_exit(&ibuf_mutex); - - ibuf_exit(); - mutex_exit(&ibuf_pessimistic_insert_mutex); + ibuf_exit(); - err = ibuf_add_free_page(); - - if (UNIV_UNLIKELY(err == DB_STRONG_FAIL)) { + if (UNIV_UNLIKELY(!ibuf_add_free_page())) { mem_heap_free(heap); - return(err); + return(DB_STRONG_FAIL); } } } else { @@ -3423,9 +3426,14 @@ ibuf_insert_low( before mtr_commit(&mtr). We must not mtr_commit(&mtr) until after the IBUF_OP_DELETE has been buffered. */ - err = DB_STRONG_FAIL; +fail_exit: + if (mode == BTR_MODIFY_TREE) { + mutex_exit(&ibuf_mutex); + mutex_exit(&ibuf_pessimistic_insert_mutex); + } - goto function_exit; + err = DB_STRONG_FAIL; + goto func_exit; } /* After this point, the page could still be loaded to the @@ -3471,9 +3479,7 @@ ibuf_insert_low( space_ids, space_versions, page_nos, &n_stored); - err = DB_STRONG_FAIL; - - goto function_exit; + goto fail_exit; } } @@ -3484,11 +3490,9 @@ ibuf_insert_low( && !ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur, mode == BTR_MODIFY_PREV, &mtr)) { bitmap_fail: - err = DB_STRONG_FAIL; - mtr_commit(&bitmap_mtr); - goto function_exit; + goto fail_exit; } /* Set the bitmap bit denoting that the insert buffer contains @@ -3512,10 +3516,19 @@ bitmap_fail: err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor, ibuf_entry, &ins_rec, &dummy_big_rec, 0, thr, &mtr); - if (err == DB_SUCCESS && op != IBUF_OP_DELETE) { - /* Update the page max trx id field */ - page_update_max_trx_id(btr_cur_get_block(cursor), NULL, - thr_get_trx(thr)->id, &mtr); + block = btr_cur_get_block(cursor); + ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID); + + /* If this is the root page, update ibuf->empty. */ + if (UNIV_UNLIKELY(buf_block_get_page_no(block) + == FSP_IBUF_TREE_ROOT_PAGE_NO)) { + const page_t* root = buf_block_get_frame(block); + + ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); + ut_ad(page_get_page_no(root) + == FSP_IBUF_TREE_ROOT_PAGE_NO); + + ibuf->empty = (page_get_n_recs(root) == 0); } } else { ut_ad(mode == BTR_MODIFY_TREE); @@ -3532,16 +3545,22 @@ bitmap_fail: cursor, ibuf_entry, &ins_rec, &dummy_big_rec, 0, thr, &mtr); - if (err == DB_SUCCESS && op != IBUF_OP_DELETE) { - /* Update the page max trx id field */ - page_update_max_trx_id(btr_cur_get_block(cursor), NULL, - thr_get_trx(thr)->id, &mtr); - } - + mutex_exit(&ibuf_pessimistic_insert_mutex); ibuf_size_update(root, &mtr); + mutex_exit(&ibuf_mutex); + ibuf->empty = (page_get_n_recs(root) == 0); + + block = btr_cur_get_block(cursor); + ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID); + } + + if (err == DB_SUCCESS && op != IBUF_OP_DELETE) { + /* Update the page max trx id field */ + page_update_max_trx_id(block, NULL, + thr_get_trx(thr)->id, &mtr); } -function_exit: +func_exit: #ifdef UNIV_IBUF_COUNT_DEBUG if (err == DB_SUCCESS) { fprintf(stderr, @@ -3553,11 +3572,6 @@ function_exit: ibuf_count_get(space, page_no) + 1); } #endif - if (mode == BTR_MODIFY_TREE) { - - mutex_exit(&ibuf_mutex); - mutex_exit(&ibuf_pessimistic_insert_mutex); - } mtr_commit(&mtr); btr_pcur_close(&pcur); @@ -3565,16 +3579,8 @@ function_exit: mem_heap_free(heap); - if (err == DB_SUCCESS) { - mutex_enter(&ibuf_mutex); - - ibuf->empty = FALSE; - - mutex_exit(&ibuf_mutex); - - if (mode == BTR_MODIFY_TREE) { - ibuf_contract_after_insert(entry_size); - } + if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) { + ibuf_contract_after_insert(entry_size); } if (do_merge) { @@ -4081,6 +4087,22 @@ ibuf_delete_rec( success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr); if (success) { + if (UNIV_UNLIKELY(!page_get_n_recs(btr_pcur_get_page(pcur)))) { + /* If a B-tree page is empty, it must be the root page + and the whole B-tree must be empty. InnoDB does not + allow empty B-tree pages other than the root. */ + root = btr_pcur_get_page(pcur); + + ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); + ut_ad(page_get_page_no(root) + == FSP_IBUF_TREE_ROOT_PAGE_NO); + + /* ibuf->empty is protected by the root page latch. + Before the deletion, it had to be FALSE. */ + ut_ad(!ibuf->empty); + ibuf->empty = TRUE; + } + #ifdef UNIV_IBUF_COUNT_DEBUG fprintf(stderr, "Decrementing ibuf count of space %lu page %lu\n" @@ -4108,6 +4130,7 @@ ibuf_delete_rec( if (!ibuf_restore_pos(space, page_no, search_tuple, BTR_MODIFY_TREE, pcur, mtr)) { + mutex_exit(&ibuf_mutex); goto func_exit; } @@ -4121,10 +4144,12 @@ ibuf_delete_rec( ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1); #endif ibuf_size_update(root, mtr); + mutex_exit(&ibuf_mutex); + + ibuf->empty = (page_get_n_recs(root) == 0); btr_pcur_commit_specify_mtr(pcur, mtr); func_exit: - mutex_exit(&ibuf_mutex); btr_pcur_close(pcur); return(TRUE); @@ -4642,37 +4667,18 @@ ibuf_is_empty(void) mtr_t mtr; ibuf_enter(); - - mutex_enter(&ibuf_mutex); - mtr_start(&mtr); + mutex_enter(&ibuf_mutex); root = ibuf_tree_root_get(&mtr); - - if (page_get_n_recs(root) == 0) { - - is_empty = TRUE; - - if (ibuf->empty == FALSE) { - fprintf(stderr, - "InnoDB: Warning: insert buffer tree is empty" - " but the data struct does not\n" - "InnoDB: know it. This condition is legal" - " if the master thread has not yet\n" - "InnoDB: run to completion.\n"); - } - } else { - ut_a(ibuf->empty == FALSE); - - is_empty = FALSE; - } - mutex_exit(&ibuf_mutex); + is_empty = (page_get_n_recs(root) == 0); mtr_commit(&mtr); - ibuf_exit(); + ut_a(is_empty == ibuf->empty); + return(is_empty); } diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic index aee27cf9739..e3fa6e3e929 100644 --- a/storage/innobase/include/ibuf0ibuf.ic +++ b/storage/innobase/include/ibuf0ibuf.ic @@ -46,11 +46,12 @@ struct ibuf_struct{ ulint seg_size; /*!< allocated pages of the file segment containing ibuf header and tree */ - ibool empty; /*!< after an insert to the ibuf tree - is performed, this is set to FALSE, - and if a contract operation finds - the tree empty, this is set to - TRUE */ + ibool empty; /*!< Protected by the page + latch of the root page of the + insert buffer tree + (FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE + if and only if the insert + buffer tree is empty. */ ulint free_list_len; /*!< length of the free list */ ulint height; /*!< tree height */ dict_index_t* index; /*!< insert buffer index */ |