From f2096478d5750b983f9a9cc4691d20e152dafd4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Mar 2023 15:52:42 +0200 Subject: MDEV-29835 InnoDB hang on B-tree split or merge This is a follow-up to commit de4030e4d49805a7ded5c0bfee01cc3fd7623522 (MDEV-30400), which fixed some hangs related to B-tree split or merge. btr_root_block_get(): Use and update the root page guess. This is just a minor performance optimization, not affecting correctness. btr_validate_level(): Remove the parameter "lockout", and always acquire an exclusive dict_index_t::lock in CHECK TABLE without QUICK. This is needed in order to avoid latching order violation in btr_page_get_father_node_ptr_for_validate(). btr_cur_need_opposite_intention(): Return true in case btr_cur_compress_recommendation() would hold later during the mini-transaction, or if a page underflow or overflow is possible. If we return true, our caller will escalate to aqcuiring an exclusive dict_index_t::lock, to prevent a latching order violation and deadlock during btr_compress() or btr_page_split_and_insert(). btr_cur_t::search_leaf(), btr_cur_t::open_leaf(): Also invoke btr_cur_need_opposite_intention() on the leaf page. btr_cur_t::open_leaf(): When escalating to exclusive index locking, acquire exclusive latches on all pages as well. innobase_instant_try(): Return an error code if the root page cannot be retrieved. In addition to the normal stress testing with Random Query Generator (RQG) this has been tested with ./mtr --mysqld=--loose-innodb-limit-optimistic-insert-debug=2 but with the injection in btr_cur_optimistic_insert() for non-leaf pages adjusted so that it would use the value 3. (Otherwise, infinite page splits could occur in some mtr tests.) Tested by: Matthias Leich --- storage/innobase/btr/btr0btr.cc | 141 ++++++++------------------ storage/innobase/btr/btr0cur.cc | 160 +++++++++++++++++++----------- storage/innobase/handler/handler0alter.cc | 1 + storage/innobase/include/btr0btr.h | 2 +- storage/innobase/include/btr0types.h | 3 + storage/innobase/include/mtr0mtr.h | 3 + storage/innobase/row/row0log.cc | 6 +- 7 files changed, 156 insertions(+), 160 deletions(-) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 7fd851f7b0e..1b69f4c7170 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -254,7 +254,7 @@ Gets the root node of a tree and x- or s-latches it. buf_block_t* btr_root_block_get( /*===============*/ - const dict_index_t* index, /*!< in: index tree */ + dict_index_t* index, /*!< in: index tree */ rw_lock_type_t mode, /*!< in: either RW_S_LATCH or RW_X_LATCH */ mtr_t* mtr, /*!< in: mtr */ @@ -266,11 +266,31 @@ btr_root_block_get( return nullptr; } - buf_block_t *block = btr_block_get(*index, index->page, mode, false, mtr, - err); - if (block) + buf_block_t *block; +#ifndef BTR_CUR_ADAPT + static constexpr buf_block_t *guess= nullptr; +#else + buf_block_t *&guess= btr_search_get_info(index)->root_guess; + guess= +#endif + block= + buf_page_get_gen(page_id_t{index->table->space->id, index->page}, + index->table->space->zip_size(), mode, guess, BUF_GET, + mtr, err, false); + ut_ad(!block == (*err != DB_SUCCESS)); + + if (UNIV_LIKELY(block != nullptr)) { - if (index->is_ibuf()); + if (!!page_is_comp(block->page.frame) != index->table->not_redundant() || + btr_page_get_index_id(block->page.frame) != index->id || + !fil_page_index_page_check(block->page.frame) || + index->is_spatial() != + (fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE)) + { + *err= DB_PAGE_CORRUPTED; + block= nullptr; + } + else if (index->is_ibuf()); else if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF, *block, *index->table->space) || !btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP, @@ -280,6 +300,9 @@ btr_root_block_get( block= nullptr; } } + else if (*err == DB_DECRYPTION_FAILED) + btr_decryption_failed(*index); + return block; } @@ -290,7 +313,7 @@ static page_t* btr_root_get( /*=========*/ - const dict_index_t* index, /*!< in: index tree */ + dict_index_t* index, /*!< in: index tree */ mtr_t* mtr, /*!< in: mtr */ dberr_t* err) /*!< out: error code */ { @@ -502,9 +525,7 @@ btr_block_reget(mtr_t *mtr, const dict_index_t &index, return block; } -#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ ut_ad(mtr->memo_contains_flagged(&index.lock, MTR_MEMO_X_LOCK)); -#endif return btr_block_get(index, id.page_no(), rw_latch, true, mtr, err); } @@ -773,9 +794,7 @@ btr_page_get_father_node_ptr_for_validate( const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no(); dict_index_t* index = btr_cur_get_index(cursor); ut_ad(!dict_index_is_spatial(index)); - - ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); + ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); ut_ad(dict_index_get_page(index) != page_no); const auto level = btr_page_get_level(btr_cur_get_page(cursor)); @@ -793,10 +812,6 @@ btr_page_get_father_node_ptr_for_validate( } const rec_t* node_ptr = btr_cur_get_rec(cursor); -#if 0 /* MDEV-29835 FIXME */ - ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive() - || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); -#endif offsets = rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED, &heap); @@ -2456,11 +2471,10 @@ btr_insert_on_non_leaf_level( } ut_ad(cursor.flag == BTR_CUR_BINARY); -#if 0 /* MDEV-29835 FIXME */ - ut_ad(!btr_cur_get_block(&cursor)->page.lock.not_recursive() + ut_ad(btr_cur_get_block(&cursor) + != mtr->at_savepoint(mtr->get_savepoint() - 1) || index->is_spatial() || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); -#endif if (UNIV_LIKELY(err == DB_SUCCESS)) { err = btr_cur_optimistic_insert(flags, @@ -2568,10 +2582,8 @@ btr_attach_half_pages( prev_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); #if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ if (!prev_block) { -# if 0 /* MDEV-29835 FIXME */ ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); -# endif prev_block = btr_block_get(*index, prev_page_no, RW_X_LATCH, !level, mtr); } @@ -2582,10 +2594,8 @@ btr_attach_half_pages( next_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); #if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ if (!next_block) { -# if 0 /* MDEV-29835 FIXME */ ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); -# endif next_block = btr_block_get(*index, next_page_no, RW_X_LATCH, !level, mtr); } @@ -3397,9 +3407,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block, #if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ if (!prev) { -# if 0 /* MDEV-29835 FIXME */ ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK)); -# endif prev= btr_block_get(index, id.page_no(), RW_X_LATCH, page_is_leaf(block.page.frame), mtr, &err); if (UNIV_UNLIKELY(!prev)) @@ -3415,9 +3423,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block, #if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ if (!next) { -# if 0 /* MDEV-29835 FIXME */ ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK)); -# endif next= btr_block_get(index, id.page_no(), RW_X_LATCH, page_is_leaf(block.page.frame), mtr, &err); if (UNIV_UNLIKELY(!next)) @@ -4291,7 +4297,7 @@ btr_discard_page( if (UNIV_UNLIKELY(!merge_block)) { return err; } -#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ +#if 1 /* MDEV-29835 FIXME: Acquire the page latch upfront. */ ut_ad(!memcmp_aligned<4>(merge_block->page.frame + FIL_PAGE_NEXT, block->page.frame + FIL_PAGE_OFFSET, @@ -4317,7 +4323,7 @@ btr_discard_page( if (UNIV_UNLIKELY(!merge_block)) { return err; } -#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ +#if 1 /* MDEV-29835 FIXME: Acquire the page latch upfront. */ ut_ad(!memcmp_aligned<4>(merge_block->page.frame + FIL_PAGE_PREV, block->page.frame + FIL_PAGE_OFFSET, @@ -4898,8 +4904,7 @@ btr_validate_level( /*===============*/ dict_index_t* index, /*!< in: index tree */ const trx_t* trx, /*!< in: transaction or NULL */ - ulint level, /*!< in: level number */ - bool lockout)/*!< in: true if X-latch index is intended */ + ulint level) /*!< in: level number */ { buf_block_t* block; page_t* page; @@ -4918,18 +4923,10 @@ btr_validate_level( #ifdef UNIV_ZIP_DEBUG page_zip_des_t* page_zip; #endif /* UNIV_ZIP_DEBUG */ - ulint savepoint = 0; - uint32_t parent_page_no = FIL_NULL; - uint32_t parent_right_page_no = FIL_NULL; - bool rightmost_child = false; mtr.start(); - if (lockout) { - mtr_x_lock_index(index, &mtr); - } else { - mtr_sx_lock_index(index, &mtr); - } + mtr_x_lock_index(index, &mtr); dberr_t err; block = btr_root_block_get(index, RW_SX_LATCH, &mtr, &err); @@ -5025,11 +5022,7 @@ func_exit: mem_heap_empty(heap); offsets = offsets2 = NULL; - if (lockout) { - mtr_x_lock_index(index, &mtr); - } else { - mtr_sx_lock_index(index, &mtr); - } + mtr_x_lock_index(index, &mtr); page = block->page.frame; @@ -5073,7 +5066,6 @@ func_exit: if (right_page_no != FIL_NULL) { const rec_t* right_rec; - savepoint = mtr.get_savepoint(); right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH, !level, &mtr, &err); @@ -5177,11 +5169,6 @@ broken_links: father_page = btr_cur_get_page(&node_cur); node_ptr = btr_cur_get_rec(&node_cur); - parent_page_no = page_get_page_no(father_page); - parent_right_page_no = btr_page_get_next(father_page); - rightmost_child = page_rec_is_supremum( - page_rec_get_next(node_ptr)); - rec = page_rec_get_prev(page_get_supremum_rec(page)); if (rec) { btr_cur_position(index, rec, block, &node_cur); @@ -5263,37 +5250,6 @@ broken_links: } } else if (const rec_t* right_node_ptr = page_rec_get_next(node_ptr)) { - if (!lockout && rightmost_child) { - - /* To obey latch order of tree blocks, - we should release the right_block once to - obtain lock of the uncle block. */ - ut_ad(right_block - == mtr.at_savepoint(savepoint)); - mtr.rollback_to_savepoint(savepoint, - savepoint + 1); - - if (parent_right_page_no != FIL_NULL) { - btr_block_get(*index, - parent_right_page_no, - RW_SX_LATCH, false, - &mtr); - } - - right_block = btr_block_get(*index, - right_page_no, - RW_SX_LATCH, - !level, &mtr, - &err); - if (!right_block) { - btr_validate_report1(index, level, - block); - fputs("InnoDB: broken FIL_PAGE_NEXT" - " link\n", stderr); - goto invalid_page; - } - } - btr_cur_position( index, page_get_infimum_rec(right_block->page.frame), @@ -5365,20 +5321,6 @@ node_ptr_fails: mtr.start(); - if (!lockout) { - if (rightmost_child) { - if (parent_right_page_no != FIL_NULL) { - btr_block_get(*index, - parent_right_page_no, - RW_SX_LATCH, false, - &mtr); - } - } else if (parent_page_no != FIL_NULL) { - btr_block_get(*index, parent_page_no, - RW_SX_LATCH, false, &mtr); - } - } - block = btr_block_get(*index, right_page_no, RW_SX_LATCH, !level, &mtr, &err); goto loop; @@ -5396,21 +5338,16 @@ btr_validate_index( dict_index_t* index, /*!< in: index */ const trx_t* trx) /*!< in: transaction or NULL */ { - const bool lockout= index->is_spatial(); - mtr_t mtr; mtr.start(); - if (lockout) - mtr_x_lock_index(index, &mtr); - else - mtr_sx_lock_index(index, &mtr); + mtr_x_lock_index(index, &mtr); dberr_t err; if (page_t *root= btr_root_get(index, &mtr, &err)) for (auto level= btr_page_get_level(root);; level--) { - if (dberr_t err_level= btr_validate_level(index, trx, level, lockout)) + if (dberr_t err_level= btr_validate_level(index, trx, level)) err= err_level; if (!level) break; diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 62c7d44d286..27ed631099d 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -748,29 +748,34 @@ btr_cur_will_modify_tree( /** Detects whether the modifying record might need a opposite modification to the intention. -@param[in] page page -@param[in] lock_intention lock intention for the tree operation -@param[in] rec record (current node_ptr) +@param page page +@param lock_intention lock intention for the tree operation +@param node_ptr_max_size the maximum size of a node pointer +@param compress_limit BTR_CUR_PAGE_COMPRESS_LIMIT(index) +@param rec record (current node_ptr) @return true if tree modification is needed */ -static -bool -btr_cur_need_opposite_intention( - const page_t* page, - btr_intention_t lock_intention, - const rec_t* rec) +static bool btr_cur_need_opposite_intention(const page_t *page, + btr_intention_t lock_intention, + ulint node_ptr_max_size, + ulint compress_limit, + const rec_t *rec) { - switch (lock_intention) { - case BTR_INTENTION_DELETE: - return (page_has_prev(page) && page_rec_is_first(rec, page)) || - (page_has_next(page) && page_rec_is_last(rec, page)); - case BTR_INTENTION_INSERT: - return page_has_next(page) && page_rec_is_last(rec, page); - case BTR_INTENTION_BOTH: - return(false); - } - - MY_ASSERT_UNREACHABLE(); - return(false); + if (lock_intention != BTR_INTENTION_INSERT) + { + /* We compensate also for btr_cur_compress_recommendation() */ + if (!page_has_siblings(page) || + page_rec_is_first(rec, page) || page_rec_is_last(rec, page) || + page_get_data_size(page) < node_ptr_max_size + compress_limit) + return true; + if (lock_intention == BTR_INTENTION_DELETE) + return false; + } + else if (page_has_next(page) && page_rec_is_last(rec, page)) + return true; + LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page), return true); + const ulint max_size= page_get_max_insert_size_after_reorganize(page, 2); + return max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT + node_ptr_max_size || + max_size < node_ptr_max_size * 2; } /** @@ -1038,7 +1043,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, const ulint savepoint= mtr->get_savepoint(); - ulint node_ptr_max_size= 0; + ulint node_ptr_max_size= 0, compress_limit= 0; rw_lock_type_t rw_latch= RW_S_LATCH; switch (latch_mode) { @@ -1050,13 +1055,19 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK)); break; } - if (lock_intention == BTR_INTENTION_DELETE && buf_pool.n_pend_reads && - trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH) - /* Most delete-intended operations are due to the purge of history. - Prioritize them when the history list is growing huge. */ - mtr_x_lock_index(index(), mtr); - else - mtr_sx_lock_index(index(), mtr); + if (lock_intention == BTR_INTENTION_DELETE) + { + compress_limit= BTR_CUR_PAGE_COMPRESS_LIMIT(index()); + if (buf_pool.n_pend_reads && + trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH) + { + /* Most delete-intended operations are due to the purge of history. + Prioritize them when the history list is growing huge. */ + mtr_x_lock_index(index(), mtr); + break; + } + } + mtr_sx_lock_index(index(), mtr); break; #ifdef UNIV_DEBUG case BTR_CONT_MODIFY_TREE: @@ -1331,6 +1342,10 @@ release_tree: !btr_block_get(*index(), btr_page_get_next(block->page.frame), RW_X_LATCH, false, mtr, &err)) goto func_exit; + if (btr_cur_need_opposite_intention(block->page.frame, lock_intention, + node_ptr_max_size, compress_limit, + page_cur.rec)) + goto need_opposite_intention; } reached_latched_leaf: @@ -1384,6 +1399,7 @@ release_tree: break; case BTR_MODIFY_TREE: if (btr_cur_need_opposite_intention(block->page.frame, lock_intention, + node_ptr_max_size, compress_limit, page_cur.rec)) /* If the rec is the first or last in the page for pessimistic delete intention, it might cause node_ptr insert for the upper @@ -1536,6 +1552,17 @@ release_tree: goto search_loop; } +ATTRIBUTE_COLD void mtr_t::index_lock_upgrade() +{ + auto &slot= m_memo[get_savepoint() - 1]; + if (slot.type == MTR_MEMO_X_LOCK) + return; + ut_ad(slot.type == MTR_MEMO_SX_LOCK); + index_lock *lock= static_cast(slot.object); + lock->u_x_upgrade(SRW_LOCK_CALL); + slot.type= MTR_MEMO_X_LOCK; +} + ATTRIBUTE_COLD dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, mtr_t *mtr) @@ -1554,8 +1581,7 @@ dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple, ut_ad(block->page.id().page_no() == index()->page); block->page.fix(); mtr->rollback_to_savepoint(1); - ut_ad(mtr->memo_contains_flagged(&index()->lock, - MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK)); + mtr->index_lock_upgrade(); const page_cur_mode_t page_mode{btr_cur_nonleaf_mode(mode)}; @@ -1785,7 +1811,6 @@ search_loop: dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, btr_latch_mode latch_mode, mtr_t *mtr) { - btr_intention_t lock_intention; ulint n_blocks= 0; mem_heap_t *heap= nullptr; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; @@ -1797,7 +1822,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; latch_mode= btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED); - lock_intention= btr_cur_get_and_clear_intention(&latch_mode); + btr_intention_t lock_intention= btr_cur_get_and_clear_intention(&latch_mode); /* Store the position of the tree latch we push to mtr so that we know how to release it when we have latched the leaf node */ @@ -1805,7 +1830,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, auto savepoint= mtr->get_savepoint(); rw_lock_type_t upper_rw_latch= RW_X_LATCH; - ulint node_ptr_max_size= 0; + ulint node_ptr_max_size= 0, compress_limit= 0; if (latch_mode == BTR_MODIFY_TREE) { @@ -1814,12 +1839,18 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, and read IO bandwidth should be prioritized for them, when the history list is growing huge. */ savepoint++; - if (lock_intention == BTR_INTENTION_DELETE - && buf_pool.n_pend_reads - && trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH) - mtr_x_lock_index(index, mtr); - else - mtr_sx_lock_index(index, mtr); + if (lock_intention == BTR_INTENTION_DELETE) + { + compress_limit= BTR_CUR_PAGE_COMPRESS_LIMIT(index); + + if (buf_pool.n_pend_reads && + trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH) + { + mtr_x_lock_index(index, mtr); + goto index_locked; + } + } + mtr_sx_lock_index(index, mtr); } else { @@ -1840,9 +1871,11 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, } } +index_locked: ut_ad(savepoint == mtr->get_savepoint()); - const rw_lock_type_t root_leaf_rw_latch= rw_lock_type_t(latch_mode & ~12); + const rw_lock_type_t root_leaf_rw_latch= + rw_lock_type_t(latch_mode & (RW_S_LATCH | RW_X_LATCH)); page_cur.index = index; @@ -1913,15 +1946,28 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, !btr_block_get(*index, btr_page_get_next(block->page.frame), RW_X_LATCH, false, mtr, &err)) break; + + if (!index->lock.have_x() && + btr_cur_need_opposite_intention(block->page.frame, + lock_intention, + node_ptr_max_size, + compress_limit, page_cur.rec)) + goto need_opposite_intention; } else { if (rw_latch == RW_NO_LATCH) mtr->upgrade_buffer_fix(leaf_savepoint - 1, - rw_lock_type_t(latch_mode)); - /* Release index->lock if needed, and the non-leaf pages. */ - mtr->rollback_to_savepoint(savepoint - !latch_by_caller, - leaf_savepoint - 1); + rw_lock_type_t(latch_mode & + (RW_X_LATCH | RW_S_LATCH))); + if (latch_mode != BTR_CONT_MODIFY_TREE) + { + ut_ad(latch_mode == BTR_MODIFY_LEAF || + latch_mode == BTR_SEARCH_LEAF); + /* Release index->lock if needed, and the non-leaf pages. */ + mtr->rollback_to_savepoint(savepoint - !latch_by_caller, + leaf_savepoint - 1); + } } break; } @@ -1943,22 +1989,25 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, : !page_cur_move_to_prev(&page_cur)) goto corrupted; - const rec_t *node_ptr= page_cur.rec; - offsets= rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED, + offsets= rec_get_offsets(page_cur.rec, index, offsets, 0, ULINT_UNDEFINED, &heap); ut_ad(latch_mode != BTR_MODIFY_TREE || upper_rw_latch == RW_X_LATCH); if (latch_mode != BTR_MODIFY_TREE); - else if (btr_cur_need_opposite_intention(block->page.frame, - lock_intention, node_ptr)) + else if (btr_cur_need_opposite_intention(block->page.frame, lock_intention, + node_ptr_max_size, compress_limit, + page_cur.rec)) { + need_opposite_intention: /* If the rec is the first or last in the page for pessimistic delete intention, it might cause node_ptr insert for the upper level. We should change the intention and retry. */ mtr->rollback_to_savepoint(savepoint); - lock_intention= BTR_INTENTION_BOTH; + mtr->index_lock_upgrade(); + /* X-latch all pages from now on */ + latch_mode= BTR_CONT_MODIFY_TREE; page= index->page; height= ULINT_UNDEFINED; n_blocks= 0; @@ -1967,7 +2016,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, else { if (!btr_cur_will_modify_tree(index, block->page.frame, - lock_intention, node_ptr, + lock_intention, page_cur.rec, node_ptr_max_size, zip_size, mtr)) { ut_ad(n_blocks); @@ -1997,7 +2046,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, } /* Go to the child node */ - page= btr_node_ptr_get_child_page_no(node_ptr, offsets); + page= btr_node_ptr_get_child_page_no(page_cur.rec, offsets); n_blocks++; } @@ -2307,8 +2356,7 @@ convert_big_rec: return(DB_TOO_BIG_RECORD); } - LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page), - goto fail); + LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page), goto fail); if (block->page.zip.data && leaf && (page_get_data_size(page) + rec_size @@ -2322,7 +2370,7 @@ fail: /* prefetch siblings of the leaf for the pessimistic operation, if the page is leaf. */ - if (page_is_leaf(page)) { + if (leaf) { btr_cur_prefetch_siblings(block, index); } fail_err: @@ -2391,7 +2439,7 @@ fail_err: #ifdef UNIV_DEBUG if (!(flags & BTR_CREATE_FLAG) - && index->is_primary() && page_is_leaf(page)) { + && leaf && index->is_primary()) { const dfield_t* trx_id = dtuple_get_nth_field( entry, dict_col_get_clust_pos( dict_table_get_sys_col(index->table, diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 9e9c0a17a39..6a8986d76d2 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -6104,6 +6104,7 @@ func_exit: id, MTR_MEMO_PAGE_SX_FIX); if (UNIV_UNLIKELY(!root)) { + err = DB_CORRUPTION; goto func_exit; } diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index a1cc10b05db..a56598d3620 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -445,7 +445,7 @@ Gets the root node of a tree and x- or s-latches it. buf_block_t* btr_root_block_get( /*===============*/ - const dict_index_t* index, /*!< in: index tree */ + dict_index_t* index, /*!< in: index tree */ rw_lock_type_t mode, /*!< in: either RW_S_LATCH or RW_X_LATCH */ mtr_t* mtr, /*!< in: mtr */ diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h index 912c022c64f..fc829e7857a 100644 --- a/storage/innobase/include/btr0types.h +++ b/storage/innobase/include/btr0types.h @@ -103,6 +103,9 @@ enum btr_latch_mode { dict_index_t::lock is being held in non-exclusive mode. */ BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED, + /** Attempt to modify records in an x-latched tree. */ + BTR_MODIFY_TREE_ALREADY_LATCHED = BTR_MODIFY_TREE + | BTR_ALREADY_S_LATCHED, /** U-latch root and X-latch a leaf page, assuming that dict_index_t::lock is being held in U mode. */ BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index f3fe1841b2e..60e01abe18d 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -344,6 +344,9 @@ public: /** Upgrade U locks on a block to X */ void page_lock_upgrade(const buf_block_t &block); + /** Upgrade index U lock to X */ + ATTRIBUTE_COLD void index_lock_upgrade(); + /** Check if we are holding tablespace latch @param space tablespace to search for @return whether space.latch is being held */ diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 0743dc2bb50..b21ff2b9f86 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -3078,6 +3078,9 @@ row_log_apply_op_low( mtr_start(&mtr); index->set_modified(mtr); cursor.page_cur.index = index; + if (has_index_lock) { + mtr_x_lock_index(index, &mtr); + } /* We perform the pessimistic variant of the operations if we already hold index->lock exclusively. First, search the @@ -3085,7 +3088,8 @@ row_log_apply_op_low( depending on when the row in the clustered index was scanned. */ *error = cursor.search_leaf(entry, PAGE_CUR_LE, has_index_lock - ? BTR_MODIFY_TREE : BTR_MODIFY_LEAF, &mtr); + ? BTR_MODIFY_TREE_ALREADY_LATCHED + : BTR_MODIFY_LEAF, &mtr); if (UNIV_UNLIKELY(*error != DB_SUCCESS)) { goto func_exit; } -- cgit v1.2.1