summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@oracle.com>2010-08-10 13:22:48 +0300
committerMarko Mäkelä <marko.makela@oracle.com>2010-08-10 13:22:48 +0300
commit271e6ae34117db9475da877beb5ec2a0c7495872 (patch)
tree8169a7e668160badd28493f250e732ed06ede72b /storage
parenta4c5cf7ca9efcf386600c5da5f049dcab9e86046 (diff)
downloadmariadb-git-271e6ae34117db9475da877beb5ec2a0c7495872.tar.gz
Bug#54914: InnoDB: performance drop with innodb_change_buffering=all
Reduce ibuf_mutex and ibuf_pessimistic_insert_mutex contention further. Protect ibuf->empty by the insert buffer root page latch, not ibuf_mutex. ibuf_tree_root_get(): Assert that ibuf_mutex is owned by the caller. Assert that the stamped page number is correct. Assert that ibuf->empty agrees with the root page. ibuf_size_update(): Do not update ibuf->empty. ibuf_init_at_db_start(): Update ibuf->empty while holding the root page latch. ibuf_add_free_page(): Return TRUE/FALSE instead of DB_SUCCESS/DB_STRONG_FAIL. ibuf_remove_free_page(): Release ibuf_pessimistic_insert_mutex as early as possible. ibuf_contract_ext(): Rely on a dirty read of ibuf->empty, unless the server is being shut down. Never acquire ibuf_mutex. Eliminate n_stored. ibuf_contract_after_insert(): Never acquire ibuf_mutex. Perform dirty reads of ibuf->size and ibuf->max_size. ibuf_insert_low(): Only acquire ibuf_mutex for mode==BTR_MODIFY_TREE. Perform dirty reads of ibuf->size and ibuf->max_size. Update ibuf->empty while holding the root page latch. ibuf_delete_rec(): Update ibuf->empty while holding the root page latch. ibuf_is_empty(): Release ibuf_mutex earlier.
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.c250
-rw-r--r--storage/innobase/include/ibuf0ibuf.ic11
2 files changed, 134 insertions, 127 deletions
diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
index 1d162f82b93..a048de0e884 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.c
+++ b/storage/innobase/ibuf/ibuf0ibuf.c
@@ -55,6 +55,7 @@ Created 7/19/1997 Heikki Tuuri
#include "lock0lock.h"
#include "log0recv.h"
#include "que0que.h"
+#include "srv0start.h" /* srv_shutdown_state */
/* STRUCTURE OF AN INSERT BUFFER RECORD
@@ -395,8 +396,10 @@ ibuf_tree_root_get(
mtr_t* mtr) /*!< in: mtr */
{
buf_block_t* block;
+ page_t* root;
ut_ad(ibuf_inside());
+ ut_ad(mutex_own(&ibuf_mutex));
mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
@@ -405,7 +408,13 @@ ibuf_tree_root_get(
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
- return(buf_block_get_frame(block));
+ root = buf_block_get_frame(block);
+
+ ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO);
+ ut_ad(ibuf->empty == (page_get_n_recs(root) == 0));
+
+ return(root);
}
#ifdef UNIV_IBUF_COUNT_DEBUG
@@ -482,8 +491,6 @@ ibuf_size_update(
/* the '1 +' is the ibuf header page */
ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
-
- ibuf->empty = page_get_n_recs(root) == 0;
}
/******************************************************************//**
@@ -554,6 +561,7 @@ ibuf_init_at_db_start(void)
ibuf_size_update(root, &mtr);
mutex_exit(&ibuf_mutex);
+ ibuf->empty = (page_get_n_recs(root) == 0);
mtr_commit(&mtr);
ibuf_exit();
@@ -2025,9 +2033,9 @@ ibuf_data_too_much_free(void)
/*********************************************************************//**
Allocates a new page from the ibuf file segment and adds it to the free
list.
-@return DB_SUCCESS, or DB_STRONG_FAIL if no space left */
+@return TRUE on success, FALSE if no space left */
static
-ulint
+ibool
ibuf_add_free_page(void)
/*====================*/
{
@@ -2063,10 +2071,10 @@ ibuf_add_free_page(void)
header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
&mtr);
- if (page_no == FIL_NULL) {
+ if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
mtr_commit(&mtr);
- return(DB_STRONG_FAIL);
+ return(FALSE);
}
{
@@ -2113,7 +2121,7 @@ ibuf_add_free_page(void)
ibuf_exit();
- return(DB_SUCCESS);
+ return(TRUE);
}
/*********************************************************************//**
@@ -2143,20 +2151,17 @@ ibuf_remove_free_page(void)
header_page = ibuf_header_page_get(&mtr);
/* Prevent pessimistic inserts to insert buffer trees for a while */
- mutex_enter(&ibuf_pessimistic_insert_mutex);
-
ibuf_enter();
-
+ mutex_enter(&ibuf_pessimistic_insert_mutex);
mutex_enter(&ibuf_mutex);
if (!ibuf_data_too_much_free()) {
mutex_exit(&ibuf_mutex);
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
ibuf_exit();
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
mtr_commit(&mtr);
return;
@@ -2218,11 +2223,11 @@ ibuf_remove_free_page(void)
flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+
ibuf->seg_size--;
ibuf->free_list_len--;
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
/* Set the bit indicating that this page is no more an ibuf tree page
(level 2 page) */
@@ -2484,17 +2489,19 @@ ibuf_contract_ext(
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
- ulint n_stored;
ulint sum_sizes;
mtr_t mtr;
*n_pages = 0;
ut_ad(!ibuf_inside());
- mutex_enter(&ibuf_mutex);
+ /* We perform a dirty read of ibuf->empty, without latching
+ the insert buffer root page. We trust this dirty read except
+ when a slow shutdown is being executed. During a slow
+ shutdown, the insert buffer merge must be completed. */
- if (ibuf->empty) {
- mutex_exit(&ibuf_mutex);
+ if (UNIV_UNLIKELY(ibuf->empty)
+ && UNIV_LIKELY(!srv_shutdown_state)) {
ibuf_is_empty:
#if 0 /* TODO */
@@ -2523,18 +2530,18 @@ ibuf_is_empty:
position within the leaf */
btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
- mutex_exit(&ibuf_mutex);
ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
- /* When the ibuf tree is emptied completely, the last record
- is removed using an optimistic delete and ibuf_size_update
- is not called, causing ibuf->empty to remain FALSE. If we do
- not reset it to TRUE here then database shutdown will hang
- in the loop in ibuf_contract_for_n_pages. */
-
- ibuf->empty = TRUE;
+ /* If a B-tree page is empty, it must be the root page
+ and the whole B-tree must be empty. InnoDB does not
+ allow empty B-tree pages other than the root. */
+ ut_ad(ibuf->empty);
+ ut_ad(page_get_space_id(btr_pcur_get_page(&pcur))
+ == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(btr_pcur_get_page(&pcur))
+ == FSP_IBUF_TREE_ROOT_PAGE_NO);
ibuf_exit();
@@ -2546,10 +2553,10 @@ ibuf_is_empty:
sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
space_ids, space_versions,
- page_nos, &n_stored);
+ page_nos, n_pages);
#if 0 /* defined UNIV_IBUF_DEBUG */
fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
- sync, n_stored, sum_sizes);
+ sync, *n_pages, sum_sizes);
#endif
ibuf_exit();
@@ -2557,8 +2564,7 @@ ibuf_is_empty:
btr_pcur_close(&pcur);
buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
- n_stored);
- *n_pages = n_stored;
+ *n_pages);
return(sum_sizes + 1);
}
@@ -2628,33 +2634,33 @@ ibuf_contract_after_insert(
ibool sync;
ulint sum_sizes;
ulint size;
-
- mutex_enter(&ibuf_mutex);
-
- if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
- mutex_exit(&ibuf_mutex);
-
+ ulint max_size;
+
+ /* Perform dirty reads of ibuf->size and ibuf->max_size, to
+ reduce ibuf_mutex contention. ibuf->max_size remains constant
+ after ibuf_init_at_db_start(), but ibuf->size should be
+ protected by ibuf_mutex. Given that ibuf->size fits in a
+ machine word, this should be OK; at worst we are doing some
+ excessive ibuf_contract() or occasionally skipping a
+ ibuf_contract(). */
+ size = ibuf->size;
+ max_size = ibuf->max_size;
+
+ if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
return;
}
- sync = FALSE;
-
- if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) {
-
- sync = TRUE;
- }
-
- mutex_exit(&ibuf_mutex);
+ sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
/* Contract at least entry_size many bytes */
sum_sizes = 0;
size = 1;
- while ((size > 0) && (sum_sizes < entry_size)) {
+ do {
size = ibuf_contract(sync);
sum_sizes += size;
- }
+ } while (size > 0 && sum_sizes < entry_size);
}
/*********************************************************************//**
@@ -3272,7 +3278,7 @@ ibuf_set_entry_counter(
/*********************************************************************//**
Buffer an operation in the insert/delete buffer, instead of doing it
directly to the disk page, if this is possible.
-@return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
+@return DB_SUCCESS, DB_STRONG_FAIL or other error */
static
ulint
ibuf_insert_low(
@@ -3302,6 +3308,7 @@ ibuf_insert_low(
rec_t* ins_rec;
ibool old_bit_value;
page_t* bitmap_page;
+ buf_block_t* block;
page_t* root;
ulint err;
ibool do_merge;
@@ -3311,7 +3318,6 @@ ibuf_insert_low(
ulint n_stored;
mtr_t mtr;
mtr_t bitmap_mtr;
- ibool too_big;
ut_a(!dict_index_is_clust(index));
ut_ad(dtuple_check_typed(entry));
@@ -3323,11 +3329,14 @@ ibuf_insert_low(
do_merge = FALSE;
- mutex_enter(&ibuf_mutex);
- too_big = ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT;
- mutex_exit(&ibuf_mutex);
-
- if (too_big) {
+ /* Perform dirty reads of ibuf->size and ibuf->max_size, to
+ reduce ibuf_mutex contention. ibuf->max_size remains constant
+ after ibuf_init_at_db_start(), but ibuf->size should be
+ protected by ibuf_mutex. Given that ibuf->size fits in a
+ machine word, this should be OK; at worst we are doing some
+ excessive ibuf_contract() or occasionally skipping a
+ ibuf_contract(). */
+ if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
/* Insert buffer is now too big, contract it but do not try
to insert */
@@ -3361,10 +3370,8 @@ ibuf_insert_low(
if (mode == BTR_MODIFY_TREE) {
for (;;) {
- mutex_enter(&ibuf_pessimistic_insert_mutex);
-
ibuf_enter();
-
+ mutex_enter(&ibuf_pessimistic_insert_mutex);
mutex_enter(&ibuf_mutex);
if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
@@ -3373,17 +3380,13 @@ ibuf_insert_low(
}
mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
mutex_exit(&ibuf_pessimistic_insert_mutex);
+ ibuf_exit();
- err = ibuf_add_free_page();
-
- if (UNIV_UNLIKELY(err == DB_STRONG_FAIL)) {
+ if (UNIV_UNLIKELY(!ibuf_add_free_page())) {
mem_heap_free(heap);
- return(err);
+ return(DB_STRONG_FAIL);
}
}
} else {
@@ -3423,9 +3426,14 @@ ibuf_insert_low(
before mtr_commit(&mtr). We must not mtr_commit(&mtr)
until after the IBUF_OP_DELETE has been buffered. */
- err = DB_STRONG_FAIL;
+fail_exit:
+ if (mode == BTR_MODIFY_TREE) {
+ mutex_exit(&ibuf_mutex);
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+ }
- goto function_exit;
+ err = DB_STRONG_FAIL;
+ goto func_exit;
}
/* After this point, the page could still be loaded to the
@@ -3471,9 +3479,7 @@ ibuf_insert_low(
space_ids, space_versions,
page_nos, &n_stored);
- err = DB_STRONG_FAIL;
-
- goto function_exit;
+ goto fail_exit;
}
}
@@ -3484,11 +3490,9 @@ ibuf_insert_low(
&& !ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur,
mode == BTR_MODIFY_PREV, &mtr)) {
bitmap_fail:
- err = DB_STRONG_FAIL;
-
mtr_commit(&bitmap_mtr);
- goto function_exit;
+ goto fail_exit;
}
/* Set the bitmap bit denoting that the insert buffer contains
@@ -3512,10 +3516,19 @@ bitmap_fail:
err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
ibuf_entry, &ins_rec,
&dummy_big_rec, 0, thr, &mtr);
- if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
- /* Update the page max trx id field */
- page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
- thr_get_trx(thr)->id, &mtr);
+ block = btr_cur_get_block(cursor);
+ ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
+
+ /* If this is the root page, update ibuf->empty. */
+ if (UNIV_UNLIKELY(buf_block_get_page_no(block)
+ == FSP_IBUF_TREE_ROOT_PAGE_NO)) {
+ const page_t* root = buf_block_get_frame(block);
+
+ ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(root)
+ == FSP_IBUF_TREE_ROOT_PAGE_NO);
+
+ ibuf->empty = (page_get_n_recs(root) == 0);
}
} else {
ut_ad(mode == BTR_MODIFY_TREE);
@@ -3532,16 +3545,22 @@ bitmap_fail:
cursor,
ibuf_entry, &ins_rec,
&dummy_big_rec, 0, thr, &mtr);
- if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
- /* Update the page max trx id field */
- page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
- thr_get_trx(thr)->id, &mtr);
- }
-
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
ibuf_size_update(root, &mtr);
+ mutex_exit(&ibuf_mutex);
+ ibuf->empty = (page_get_n_recs(root) == 0);
+
+ block = btr_cur_get_block(cursor);
+ ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
+ }
+
+ if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
+ /* Update the page max trx id field */
+ page_update_max_trx_id(block, NULL,
+ thr_get_trx(thr)->id, &mtr);
}
-function_exit:
+func_exit:
#ifdef UNIV_IBUF_COUNT_DEBUG
if (err == DB_SUCCESS) {
fprintf(stderr,
@@ -3553,11 +3572,6 @@ function_exit:
ibuf_count_get(space, page_no) + 1);
}
#endif
- if (mode == BTR_MODIFY_TREE) {
-
- mutex_exit(&ibuf_mutex);
- mutex_exit(&ibuf_pessimistic_insert_mutex);
- }
mtr_commit(&mtr);
btr_pcur_close(&pcur);
@@ -3565,16 +3579,8 @@ function_exit:
mem_heap_free(heap);
- if (err == DB_SUCCESS) {
- mutex_enter(&ibuf_mutex);
-
- ibuf->empty = FALSE;
-
- mutex_exit(&ibuf_mutex);
-
- if (mode == BTR_MODIFY_TREE) {
- ibuf_contract_after_insert(entry_size);
- }
+ if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) {
+ ibuf_contract_after_insert(entry_size);
}
if (do_merge) {
@@ -4081,6 +4087,22 @@ ibuf_delete_rec(
success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
if (success) {
+ if (UNIV_UNLIKELY(!page_get_n_recs(btr_pcur_get_page(pcur)))) {
+ /* If a B-tree page is empty, it must be the root page
+ and the whole B-tree must be empty. InnoDB does not
+ allow empty B-tree pages other than the root. */
+ root = btr_pcur_get_page(pcur);
+
+ ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(root)
+ == FSP_IBUF_TREE_ROOT_PAGE_NO);
+
+ /* ibuf->empty is protected by the root page latch.
+ Before the deletion, it had to be FALSE. */
+ ut_ad(!ibuf->empty);
+ ibuf->empty = TRUE;
+ }
+
#ifdef UNIV_IBUF_COUNT_DEBUG
fprintf(stderr,
"Decrementing ibuf count of space %lu page %lu\n"
@@ -4108,6 +4130,7 @@ ibuf_delete_rec(
if (!ibuf_restore_pos(space, page_no, search_tuple,
BTR_MODIFY_TREE, pcur, mtr)) {
+ mutex_exit(&ibuf_mutex);
goto func_exit;
}
@@ -4121,10 +4144,12 @@ ibuf_delete_rec(
ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
#endif
ibuf_size_update(root, mtr);
+ mutex_exit(&ibuf_mutex);
+
+ ibuf->empty = (page_get_n_recs(root) == 0);
btr_pcur_commit_specify_mtr(pcur, mtr);
func_exit:
- mutex_exit(&ibuf_mutex);
btr_pcur_close(pcur);
return(TRUE);
@@ -4642,37 +4667,18 @@ ibuf_is_empty(void)
mtr_t mtr;
ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
mtr_start(&mtr);
+ mutex_enter(&ibuf_mutex);
root = ibuf_tree_root_get(&mtr);
-
- if (page_get_n_recs(root) == 0) {
-
- is_empty = TRUE;
-
- if (ibuf->empty == FALSE) {
- fprintf(stderr,
- "InnoDB: Warning: insert buffer tree is empty"
- " but the data struct does not\n"
- "InnoDB: know it. This condition is legal"
- " if the master thread has not yet\n"
- "InnoDB: run to completion.\n");
- }
- } else {
- ut_a(ibuf->empty == FALSE);
-
- is_empty = FALSE;
- }
-
mutex_exit(&ibuf_mutex);
+ is_empty = (page_get_n_recs(root) == 0);
mtr_commit(&mtr);
-
ibuf_exit();
+ ut_a(is_empty == ibuf->empty);
+
return(is_empty);
}
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index aee27cf9739..e3fa6e3e929 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -46,11 +46,12 @@ struct ibuf_struct{
ulint seg_size; /*!< allocated pages of the file
segment containing ibuf header and
tree */
- ibool empty; /*!< after an insert to the ibuf tree
- is performed, this is set to FALSE,
- and if a contract operation finds
- the tree empty, this is set to
- TRUE */
+ ibool empty; /*!< Protected by the page
+ latch of the root page of the
+ insert buffer tree
+ (FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE
+ if and only if the insert
+ buffer tree is empty. */
ulint free_list_len; /*!< length of the free list */
ulint height; /*!< tree height */
dict_index_t* index; /*!< insert buffer index */