diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2023-01-24 14:52:25 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2023-01-24 14:52:25 +0200 |
commit | fa543a0f621fcf19e31c7d044f2b6c4f0836cd5a (patch) | |
tree | 3290abf543bfb0cb2d6e6977f55198e4d632b42d /storage | |
parent | a5b30158d4d8718f8a21b53ac21e3e33aa11b85e (diff) | |
parent | cea50896d2ea0d18924d92d62a7ec1607d55e509 (diff) | |
download | mariadb-git-fa543a0f621fcf19e31c7d044f2b6c4f0836cd5a.tar.gz |
Merge 10.7 into 10.8
Diffstat (limited to 'storage')
45 files changed, 2974 insertions, 2628 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index 49b363b76d3..e422ef47b89 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -332,12 +332,14 @@ SET(INNOBASE_SOURCES include/row0upd.inl include/row0vers.h include/rw_lock.h + include/small_vector.h include/srv0mon.h include/srv0mon.inl include/srv0srv.h include/srv0start.h include/srw_lock.h include/sux_lock.h + include/transactional_lock_guard.h include/trx0i_s.h include/trx0purge.h include/trx0rec.h diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 73d88596743..d12c395aa17 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2022, MariaDB Corporation. +Copyright (c) 2014, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -460,6 +460,54 @@ btr_page_create( } } +buf_block_t * +mtr_t::get_already_latched(const page_id_t id, mtr_memo_type_t type) const +{ + ut_ad(is_active()); + ut_ad(type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX || + type == MTR_MEMO_PAGE_S_FIX); + for (ulint i= 0; i < m_memo.size(); i++) + { + const mtr_memo_slot_t &slot= m_memo[i]; + const auto slot_type= mtr_memo_type_t(slot.type & ~MTR_MEMO_MODIFY); + if (slot_type == MTR_MEMO_PAGE_X_FIX || slot_type == type) + { + buf_block_t *block= static_cast<buf_block_t*>(slot.object); + if (block->page.id() == id) + return block; + } + } + return nullptr; +} + +/** Fetch an index root page that was already latched in the +mini-transaction. */ +static buf_block_t *btr_get_latched_root(const dict_index_t &index, mtr_t *mtr) +{ + return mtr->get_already_latched(page_id_t{index.table->space_id, index.page}, + MTR_MEMO_PAGE_SX_FIX); +} + +/** Fetch an index page that should have been already latched in the +mini-transaction. */ +static buf_block_t * +btr_block_reget(mtr_t *mtr, const dict_index_t &index, + const page_id_t id, rw_lock_type_t rw_latch, + dberr_t *err) +{ + if (buf_block_t *block= + mtr->get_already_latched(id, mtr_memo_type_t(rw_latch))) + { + *err= DB_SUCCESS; + return block; + } + +#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ + ut_ad(mtr->memo_contains_flagged(&index.lock, MTR_MEMO_X_LOCK)); +#endif + return btr_block_get(index, id.page_no(), rw_latch, true, mtr, err); +} + /**************************************************************//** Allocates a new file page to be used in an ibuf tree. Takes the page from the free list of the tree, which must contain pages! @@ -472,18 +520,16 @@ btr_page_alloc_for_ibuf( mtr_t* mtr, /*!< in: mtr */ dberr_t* err) /*!< out: error code */ { - buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, err); + buf_block_t *root= btr_get_latched_root(*index, mtr); if (UNIV_UNLIKELY(!root)) return root; - buf_block_t *new_block= - buf_page_get_gen(page_id_t(index->table->space_id, + buf_page_get_gen(page_id_t(IBUF_SPACE_ID, mach_read_from_4(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST + FLST_FIRST + FIL_ADDR_PAGE + root->page.frame)), - index->table->space->zip_size(), RW_X_LATCH, nullptr, - BUF_GET, mtr, err); + 0, RW_X_LATCH, nullptr, BUF_GET, mtr, err); if (new_block) *err= flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, new_block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); @@ -523,11 +569,11 @@ btr_page_alloc_low( #ifdef BTR_CUR_HASH_ADAPT ut_ad(!root->index || !root->index->freed()); #endif - mtr->release_block_at_savepoint(savepoint, root); + mtr->rollback_to_savepoint(savepoint); } else { - mtr->u_lock_register(savepoint); + mtr->lock_register(savepoint, MTR_MEMO_PAGE_SX_FIX); root->page.lock.u_lock(); #ifdef BTR_CUR_HASH_ADAPT btr_search_drop_page_hash_index(root, true); @@ -579,15 +625,12 @@ btr_page_free_for_ibuf( mtr_t* mtr) /*!< in: mtr */ { ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); - - dberr_t err; - if (buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, &err)) - { - err= flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + buf_block_t *root= btr_get_latched_root(*index, mtr); + dberr_t err= + flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); - ut_d(if (err == DB_SUCCESS) - flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); - } + ut_d(if (err == DB_SUCCESS) + flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); return err; } @@ -637,11 +680,11 @@ dberr_t btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, #ifdef BTR_CUR_HASH_ADAPT ut_ad(!root->index || !root->index->freed()); #endif - mtr->release_block_at_savepoint(savepoint, root); + mtr->rollback_to_savepoint(savepoint); } else { - mtr->u_lock_register(savepoint); + mtr->lock_register(savepoint, MTR_MEMO_PAGE_SX_FIX); root->page.lock.u_lock(); #ifdef BTR_CUR_HASH_ADAPT btr_search_drop_page_hash_index(root, true); @@ -712,35 +755,27 @@ btr_node_ptr_get_child( mtr, err); } -MY_ATTRIBUTE((nonnull(2,3,5), warn_unused_result)) +MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result)) /************************************************************//** Returns the upper level node pointer to a page. It is assumed that mtr holds an sx-latch on the tree. @return rec_get_offsets() of the node pointer record */ static rec_offs* -btr_page_get_father_node_ptr_func( -/*==============================*/ +btr_page_get_father_node_ptr_for_validate( rec_offs* offsets,/*!< in: work area for the return value */ mem_heap_t* heap, /*!< in: memory heap to use */ btr_cur_t* cursor, /*!< in: cursor pointing to user record, out: cursor on node pointer record, its page x-latched */ - btr_latch_mode latch_mode,/*!< in: BTR_CONT_MODIFY_TREE - or BTR_CONT_SEARCH_TREE */ mtr_t* mtr) /*!< in: mtr */ { - ut_ad(latch_mode == BTR_CONT_MODIFY_TREE - || latch_mode == BTR_CONT_SEARCH_TREE); - const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no(); dict_index_t* index = btr_cur_get_index(cursor); ut_ad(!dict_index_is_spatial(index)); - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); ut_ad(dict_index_get_page(index) != page_no); const auto level = btr_page_get_level(btr_cur_get_page(cursor)); @@ -752,12 +787,16 @@ btr_page_get_father_node_ptr_func( dict_index_build_node_ptr(index, user_rec, 0, heap, level), - PAGE_CUR_LE, latch_mode, + RW_S_LATCH, cursor, mtr) != DB_SUCCESS) { return nullptr; } const rec_t* node_ptr = btr_cur_get_rec(cursor); +#if 0 /* MDEV-29835 FIXME */ + ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive() + || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); +#endif offsets = rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED, &heap); @@ -769,13 +808,65 @@ btr_page_get_father_node_ptr_func( return(offsets); } -#define btr_page_get_father_node_ptr(of,heap,cur,mtr) \ - btr_page_get_father_node_ptr_func( \ - of,heap,cur,BTR_CONT_MODIFY_TREE,mtr) +MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result)) +/** Return the node pointer to a page. +@param offsets work area for the return value +@param heap memory heap +@param cursor in: child page; out: node pointer to it +@param mtr mini-transaction +@return rec_get_offsets() of the node pointer record +@retval nullptr if the parent page had not been latched in mtr */ +static rec_offs *btr_page_get_parent(rec_offs *offsets, mem_heap_t *heap, + btr_cur_t *cursor, mtr_t *mtr) +{ + const uint32_t page_no= cursor->block()->page.id().page_no(); + const dict_index_t *index= cursor->index(); + ut_ad(!index->is_spatial()); + ut_ad(index->page != page_no); + + uint32_t p= index->page; + auto level= btr_page_get_level(cursor->block()->page.frame); + const dtuple_t *tuple= + dict_index_build_node_ptr(index, btr_cur_get_rec(cursor), 0, heap, level); + level++; + + ulint i; + for (i= 0; i < mtr->get_savepoint(); i++) + if (buf_block_t *block= mtr->block_at_savepoint(i)) + if (block->page.id().page_no() == p) + { + ut_ad(block->page.lock.have_u_or_x() || + (!block->page.lock.have_s() && index->lock.have_x())); + ulint up_match= 0, low_match= 0; + cursor->page_cur.block= block; + if (page_cur_search_with_match(tuple, PAGE_CUR_LE, &up_match, + &low_match, &cursor->page_cur, + nullptr)) + return nullptr; + offsets= rec_get_offsets(cursor->page_cur.rec, index, offsets, 0, + ULINT_UNDEFINED, &heap); + p= btr_node_ptr_get_child_page_no(cursor->page_cur.rec, offsets); + if (p != page_no) + { + if (btr_page_get_level(block->page.frame) == level) + return nullptr; + i= 0; // MDEV-29835 FIXME: require all pages to be latched in order! + continue; + } + ut_ad(block->page.lock.have_u_or_x()); + if (block->page.lock.have_u_not_x()) + { + /* btr_cur_t::search_leaf(BTR_MODIFY_TREE) only U-latches the + root page initially. */ + ut_ad(block->page.id().page_no() == index->page); + block->page.lock.u_x_upgrade(); + mtr->page_lock_upgrade(*block); + } + return offsets; + } -#define btr_page_get_father_node_ptr_for_validate(of,heap,cur,mtr) \ - btr_page_get_father_node_ptr_func( \ - of,heap,cur,BTR_CONT_SEARCH_TREE,mtr) + return nullptr; +} /************************************************************//** Returns the upper level node pointer to a page. It is assumed that mtr holds @@ -796,7 +887,7 @@ btr_page_get_father_block( if (UNIV_UNLIKELY(!rec)) return nullptr; cursor->page_cur.rec= rec; - return btr_page_get_father_node_ptr(offsets, heap, cursor, mtr); + return btr_page_get_parent(offsets, heap, cursor, mtr); } /** Seek to the parent page of a B-tree page. @@ -811,7 +902,7 @@ bool btr_page_get_father(mtr_t* mtr, btr_cur_t* cursor) return false; cursor->page_cur.rec= rec; mem_heap_t *heap= mem_heap_create(100); - const bool got= btr_page_get_father_node_ptr(nullptr, heap, cursor, mtr); + const bool got= btr_page_get_parent(nullptr, heap, cursor, mtr); mem_heap_free(heap); return got; } @@ -1718,48 +1809,43 @@ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr) /** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE. @param[in] index clustered index with instant ALTER TABLE @param[in] all whether to reset FIL_PAGE_TYPE as well -@param[in,out] mtr mini-transaction -@return error code */ +@param[in,out] mtr mini-transaction */ ATTRIBUTE_COLD -dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) +void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) { ut_ad(!index.table->is_temporary()); ut_ad(index.is_primary()); - dberr_t err; - if (buf_block_t *root= btr_root_block_get(&index, RW_SX_LATCH, mtr, &err)) + buf_block_t *root= btr_get_latched_root(index, mtr); + byte *page_type= root->page.frame + FIL_PAGE_TYPE; + if (all) { - byte *page_type= root->page.frame + FIL_PAGE_TYPE; - if (all) - { - ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT || - mach_read_from_2(page_type) == FIL_PAGE_INDEX); - mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX); - byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame; - mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant, - page_ptr_get_direction(instant + 1)); - } - else - ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT); - static const byte supremuminfimum[8 + 8] = "supremuminfimum"; - uint16_t infimum, supremum; - if (page_is_comp(root->page.frame)) - { - infimum= PAGE_NEW_INFIMUM; - supremum= PAGE_NEW_SUPREMUM; - } - else - { - infimum= PAGE_OLD_INFIMUM; - supremum= PAGE_OLD_SUPREMUM; - } - ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) == - !memcmp(&root->page.frame[supremum], supremuminfimum, 8)); - mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[infimum], - supremuminfimum + 8, 8); - mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[supremum], - supremuminfimum, 8); + ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT || + mach_read_from_2(page_type) == FIL_PAGE_INDEX); + mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX); + byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame; + mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant, + page_ptr_get_direction(instant + 1)); } - return err; + else + ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT); + static const byte supremuminfimum[8 + 8] = "supremuminfimum"; + uint16_t infimum, supremum; + if (page_is_comp(root->page.frame)) + { + infimum= PAGE_NEW_INFIMUM; + supremum= PAGE_NEW_SUPREMUM; + } + else + { + infimum= PAGE_OLD_INFIMUM; + supremum= PAGE_OLD_SUPREMUM; + } + ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) == + !memcmp(&root->page.frame[supremum], supremuminfimum, 8)); + mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[infimum], + supremuminfimum + 8, 8); + mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[supremum], + supremuminfimum, 8); } /*************************************************************//** @@ -1856,11 +1942,6 @@ btr_root_raise_and_insert( } /* Copy the records from root to the new page one by one. */ - dberr_t e; - if (!err) { - err = &e; - } - if (0 #ifdef UNIV_ZIP_COPY || new_page_zip @@ -2004,21 +2085,15 @@ btr_root_raise_and_insert( page_cursor->block = new_block; page_cursor->index = index; - if (tuple) { - ut_ad(dtuple_check_typed(tuple)); - /* Reposition the cursor to the child node */ - ulint low_match = 0, up_match = 0; + ut_ad(dtuple_check_typed(tuple)); + /* Reposition the cursor to the child node */ + ulint low_match = 0, up_match = 0; - if (page_cur_search_with_match(tuple, PAGE_CUR_LE, - &up_match, &low_match, - page_cursor, nullptr)) { - if (err) { - *err = DB_CORRUPTION; - } - return nullptr; - } - } else { - page_cursor->rec = page_get_infimum_rec(new_block->page.frame); + if (page_cur_search_with_match(tuple, PAGE_CUR_LE, + &up_match, &low_match, + page_cursor, nullptr)) { + *err = DB_CORRUPTION; + return nullptr; } /* Split the child and insert tuple */ @@ -2237,6 +2312,7 @@ func_exit: return(rec); } +#ifdef UNIV_DEBUG /*************************************************************//** Returns TRUE if the insert fits on the appropriate half-page with the chosen split_rec. @@ -2335,6 +2411,7 @@ got_rec: return(false); } +#endif /*******************************************************//** Inserts a data tuple to a tree on a non-leaf level. It is assumed @@ -2357,25 +2434,34 @@ btr_insert_on_non_leaf_level( rtr_info_t rtr_info; ut_ad(level > 0); - auto mode = PAGE_CUR_LE; + + flags |= BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG + | BTR_NO_UNDO_LOG_FLAG; + cursor.page_cur.index = index; + + dberr_t err; if (index->is_spatial()) { - mode = PAGE_CUR_RTREE_INSERT; /* For spatial index, initialize structures to track its parents etc. */ rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); rtr_info_update_btr(&cursor, &rtr_info); + err = rtr_search_to_nth_level(level, tuple, + PAGE_CUR_RTREE_INSERT, + BTR_CONT_MODIFY_TREE, + &cursor, mtr); + } else { + err = btr_cur_search_to_nth_level(level, tuple, RW_X_LATCH, + &cursor, mtr); } - flags |= BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG; - cursor.page_cur.index = index; - - dberr_t err = btr_cur_search_to_nth_level(level, tuple, mode, - BTR_CONT_MODIFY_TREE, - &cursor, mtr); ut_ad(cursor.flag == BTR_CUR_BINARY); +#if 0 /* MDEV-29835 FIXME */ + ut_ad(!btr_cur_get_block(&cursor)->page.lock.not_recursive() + || index->is_spatial() + || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); +#endif if (UNIV_LIKELY(err == DB_SUCCESS)) { err = btr_cur_optimistic_insert(flags, @@ -2471,6 +2557,7 @@ btr_attach_half_pages( /* Get the level of the split pages */ const ulint level = btr_page_get_level(block->page.frame); ut_ad(level == btr_page_get_level(new_block->page.frame)); + page_id_t id{block->page.id()}; /* Get the previous and next pages of page */ const uint32_t prev_page_no = btr_page_get_prev(block->page.frame); @@ -2478,12 +2565,32 @@ btr_attach_half_pages( /* for consistency, both blocks should be locked, before change */ if (prev_page_no != FIL_NULL && direction == FSP_DOWN) { - prev_block = btr_block_get(*index, prev_page_no, RW_X_LATCH, - !level, mtr); + id.set_page_no(prev_page_no); + prev_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); +#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ + if (!prev_block) { +# if 0 /* MDEV-29835 FIXME */ + ut_ad(mtr->memo_contains(index->lock, + MTR_MEMO_X_LOCK)); +# endif + prev_block = btr_block_get(*index, prev_page_no, + RW_X_LATCH, !level, mtr); + } +#endif } if (next_page_no != FIL_NULL && direction != FSP_DOWN) { - next_block = btr_block_get(*index, next_page_no, RW_X_LATCH, - !level, mtr); + id.set_page_no(next_page_no); + next_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); +#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ + if (!next_block) { +# if 0 /* MDEV-29835 FIXME */ + ut_ad(mtr->memo_contains(index->lock, + MTR_MEMO_X_LOCK)); +# endif + next_block = btr_block_get(*index, next_page_no, + RW_X_LATCH, !level, mtr); + } +#endif } /* Build the node pointer (= node key and page address) for the upper @@ -3020,6 +3127,7 @@ insert_empty: return nullptr; } +#ifdef UNIV_DEBUG /* If the split is made on the leaf level and the insert will fit on the appropriate half-page, we may release the tree x-latch. We can then move the records after releasing the tree latch, @@ -3027,21 +3135,21 @@ insert_empty: const bool insert_will_fit = !new_page_zip && btr_page_insert_fits(cursor, split_rec, offsets, tuple, n_ext, heap); +#endif if (!split_rec && !insert_left) { UT_DELETE_ARRAY(buf); buf = NULL; } - if (!srv_read_only_mode - && insert_will_fit +#if 0 // FIXME: this used to be a no-op, and may cause trouble if enabled + if (insert_will_fit && page_is_leaf(page) && !dict_index_is_online_ddl(cursor->index())) { -#if 0 // FIXME: this used to be a no-op, and may cause trouble if enabled mtr->release(cursor->index()->lock); -#endif /* NOTE: We cannot release root block latch here, because it has segment header and already modified in most of cases.*/ } +#endif /* 5. Move then the records to the new page */ if (direction == FSP_DOWN) { @@ -3273,52 +3381,58 @@ func_exit: dberr_t btr_level_list_remove(const buf_block_t& block, const dict_index_t& index, mtr_t* mtr) { - ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_X_FIX)); - ut_ad(block.zip_size() == index.table->space->zip_size()); - ut_ad(index.table->space->id == block.page.id().space()); - /* Get the previous and next page numbers of page */ - - const page_t* page = block.page.frame; - const uint32_t prev_page_no = btr_page_get_prev(page); - const uint32_t next_page_no = btr_page_get_next(page); - - /* Update page links of the level */ - dberr_t err; + ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_X_FIX)); + ut_ad(block.zip_size() == index.table->space->zip_size()); + ut_ad(index.table->space->id == block.page.id().space()); + /* Get the previous and next page numbers of page */ + const uint32_t prev_page_no= btr_page_get_prev(block.page.frame); + const uint32_t next_page_no= btr_page_get_next(block.page.frame); + page_id_t id{block.page.id()}; + buf_block_t *prev= nullptr, *next; + dberr_t err; - if (prev_page_no != FIL_NULL) { - buf_block_t* prev_block = btr_block_get( - index, prev_page_no, RW_X_LATCH, page_is_leaf(page), - mtr, &err); - if (UNIV_UNLIKELY(!prev_block)) { - return err; - } - if (UNIV_UNLIKELY(memcmp_aligned<4>(prev_block->page.frame - + FIL_PAGE_NEXT, - page + FIL_PAGE_OFFSET, - 4))) { - return DB_CORRUPTION; - } - btr_page_set_next(prev_block, next_page_no, mtr); - } + /* Update page links of the level */ + if (prev_page_no != FIL_NULL) + { + id.set_page_no(prev_page_no); + prev= mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); +#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ + if (!prev) + { +# if 0 /* MDEV-29835 FIXME */ + ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK)); +# endif + prev= btr_block_get(index, id.page_no(), RW_X_LATCH, + page_is_leaf(block.page.frame), mtr, &err); + if (UNIV_UNLIKELY(!prev)) + return err; + } +#endif + } - if (next_page_no != FIL_NULL) { - buf_block_t* next_block = btr_block_get( - index, next_page_no, RW_X_LATCH, page_is_leaf(page), - mtr, &err); + if (next_page_no != FIL_NULL) + { + id.set_page_no(next_page_no); + next= mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); +#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ + if (!next) + { +# if 0 /* MDEV-29835 FIXME */ + ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK)); +# endif + next= btr_block_get(index, id.page_no(), RW_X_LATCH, + page_is_leaf(block.page.frame), mtr, &err); + if (UNIV_UNLIKELY(!next)) + return err; + } +#endif + btr_page_set_prev(next, prev_page_no, mtr); + } - if (UNIV_UNLIKELY(!next_block)) { - return err; - } - if (UNIV_UNLIKELY(memcmp_aligned<4>(next_block->page.frame - + FIL_PAGE_PREV, - page + FIL_PAGE_OFFSET, - 4))) { - return DB_CORRUPTION; - } - btr_page_set_prev(next_block, prev_page_no, mtr); - } + if (prev) + btr_page_set_next(prev, next_page_no, mtr); - return DB_SUCCESS; + return DB_SUCCESS; } /*************************************************************//** @@ -4168,23 +4282,30 @@ btr_discard_page( const uint32_t left_page_no = btr_page_get_prev(block->page.frame); const uint32_t right_page_no = btr_page_get_next(block->page.frame); + page_id_t merge_page_id{block->page.id()}; ut_d(bool parent_is_different = false); + dberr_t err; if (left_page_no != FIL_NULL) { - dberr_t err; - merge_block = btr_block_get(*index, left_page_no, RW_X_LATCH, - true, mtr, &err); + merge_page_id.set_page_no(left_page_no); + merge_block = btr_block_reget(mtr, *index, merge_page_id, + RW_X_LATCH, &err); if (UNIV_UNLIKELY(!merge_block)) { return err; } - +#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ + ut_ad(!memcmp_aligned<4>(merge_block->page.frame + + FIL_PAGE_NEXT, + block->page.frame + FIL_PAGE_OFFSET, + 4)); +#else if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame + FIL_PAGE_NEXT, block->page.frame + FIL_PAGE_OFFSET, 4))) { return DB_CORRUPTION; } - +#endif ut_d(parent_is_different = (page_rec_get_next( page_get_infimum_rec( @@ -4192,19 +4313,25 @@ btr_discard_page( &parent_cursor))) == btr_cur_get_rec(&parent_cursor))); } else if (right_page_no != FIL_NULL) { - dberr_t err; - merge_block = btr_block_get(*index, right_page_no, RW_X_LATCH, - true, mtr, &err); + merge_page_id.set_page_no(right_page_no); + merge_block = btr_block_reget(mtr, *index, merge_page_id, + RW_X_LATCH, &err); if (UNIV_UNLIKELY(!merge_block)) { return err; } +#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ + ut_ad(!memcmp_aligned<4>(merge_block->page.frame + + FIL_PAGE_PREV, + block->page.frame + FIL_PAGE_OFFSET, + 4)); +#else if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame + FIL_PAGE_PREV, block->page.frame + FIL_PAGE_OFFSET, 4))) { return DB_CORRUPTION; } - +#endif ut_d(parent_is_different = page_rec_is_supremum( page_rec_get_next(btr_cur_get_rec(&parent_cursor)))); if (page_is_leaf(merge_block->page.frame)) { @@ -4246,13 +4373,10 @@ btr_discard_page( } #ifdef UNIV_ZIP_DEBUG - { - page_zip_des_t* merge_page_zip - = buf_block_get_page_zip(merge_block); - ut_a(!merge_page_zip - || page_zip_validate(merge_page_zip, - merge_block->page.frame, index)); - } + if (page_zip_des_t* merge_page_zip + = buf_block_get_page_zip(merge_block)); + ut_a(page_zip_validate(merge_page_zip, + merge_block->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ if (index->has_locking()) { @@ -4271,7 +4395,7 @@ btr_discard_page( } /* Free the file page */ - dberr_t err = btr_page_free(index, block, mtr); + err = btr_page_free(index, block, mtr); if (err == DB_SUCCESS) { /* btr_check_node_ptr() needs parent block latched. @@ -4464,6 +4588,8 @@ btr_check_node_ptr( offsets = btr_page_get_father_block(NULL, heap, mtr, &cursor); } + ut_ad(offsets); + if (page_is_leaf(page)) { goto func_exit; @@ -4796,19 +4922,16 @@ btr_validate_level( page_zip_des_t* page_zip; #endif /* UNIV_ZIP_DEBUG */ ulint savepoint = 0; - ulint savepoint2 = 0; uint32_t parent_page_no = FIL_NULL; uint32_t parent_right_page_no = FIL_NULL; bool rightmost_child = false; mtr.start(); - if (!srv_read_only_mode) { - if (lockout) { - mtr_x_lock_index(index, &mtr); - } else { - mtr_sx_lock_index(index, &mtr); - } + if (lockout) { + mtr_x_lock_index(index, &mtr); + } else { + mtr_sx_lock_index(index, &mtr); } dberr_t err; @@ -4856,7 +4979,6 @@ corrupted: offsets = rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED, &heap); - savepoint2 = mtr_set_savepoint(&mtr); block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr, &err); if (!block) { @@ -4877,10 +4999,8 @@ corrupted: /* To obey latch order of tree blocks, we should release the right_block once to obtain lock of the uncle block. */ - mtr_release_block_at_savepoint( - &mtr, savepoint2, block); + mtr.release_last_page(); - savepoint2 = mtr_set_savepoint(&mtr); block = btr_block_get(*index, left_page_no, RW_SX_LATCH, false, &mtr, &err); @@ -4908,12 +5028,10 @@ func_exit: mem_heap_empty(heap); offsets = offsets2 = NULL; - if (!srv_read_only_mode) { - if (lockout) { - mtr_x_lock_index(index, &mtr); - } else { - mtr_sx_lock_index(index, &mtr); - } + if (lockout) { + mtr_x_lock_index(index, &mtr); + } else { + mtr_sx_lock_index(index, &mtr); } page = block->page.frame; @@ -4958,7 +5076,7 @@ func_exit: if (right_page_no != FIL_NULL) { const rec_t* right_rec; - savepoint = mtr_set_savepoint(&mtr); + savepoint = mtr.get_savepoint(); right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH, !level, &mtr, &err); @@ -5152,8 +5270,10 @@ broken_links: /* To obey latch order of tree blocks, we should release the right_block once to obtain lock of the uncle block. */ - mtr_release_block_at_savepoint( - &mtr, savepoint, right_block); + ut_ad(right_block + == mtr.at_savepoint(savepoint)); + mtr.rollback_to_savepoint(savepoint, + savepoint + 1); if (parent_right_page_no != FIL_NULL) { btr_block_get(*index, diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 2f237bb5957..67b8a68930a 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -3,7 +3,7 @@ Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -103,14 +103,14 @@ throughput clearly from about 100000. */ #define BTR_CUR_FINE_HISTORY_LENGTH 100000 #ifdef BTR_CUR_HASH_ADAPT -/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */ +/** Number of searches down the B-tree in btr_cur_t::search_leaf(). */ ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_non_sea; /** Old value of btr_cur_n_non_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor(). */ ulint btr_cur_n_non_sea_old; /** Number of successful adaptive hash index lookups in -btr_cur_search_to_nth_level(). */ +btr_cur_t::search_leaf(). */ ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_sea; /** Old value of btr_cur_n_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by @@ -187,167 +187,6 @@ btr_rec_free_externally_stored_fields( /*==================== B-TREE SEARCH =========================*/ -/** Latches the leaf page or pages requested. -@param[in] block leaf page where the search converged -@param[in] latch_mode BTR_SEARCH_LEAF, ... -@param[in] cursor cursor -@param[in] mtr mini-transaction -@param[out] latch_leaves latched blocks and savepoints */ -void -btr_cur_latch_leaves( - buf_block_t* block, - btr_latch_mode latch_mode, - btr_cur_t* cursor, - mtr_t* mtr, - btr_latch_leaves_t* latch_leaves) -{ - compile_time_assert(int(MTR_MEMO_PAGE_S_FIX) == int(RW_S_LATCH)); - compile_time_assert(int(MTR_MEMO_PAGE_X_FIX) == int(RW_X_LATCH)); - compile_time_assert(int(MTR_MEMO_PAGE_SX_FIX) == int(RW_SX_LATCH)); - ut_ad(block->page.id().space() == cursor->index()->table->space->id); - ut_ad(block->page.in_file()); - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&cursor->index()->lock, - MTR_MEMO_S_LOCK - | MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - auto rtr_info = cursor->rtr_info; - if (UNIV_LIKELY_NULL(rtr_info) && !cursor->index()->is_spatial()) { - rtr_info = nullptr; - } - - const rw_lock_type_t mode = rw_lock_type_t( - latch_mode & (RW_X_LATCH | RW_S_LATCH)); - static_assert(ulint{RW_S_LATCH} == ulint{BTR_SEARCH_LEAF}, ""); - static_assert(ulint{RW_X_LATCH} == ulint{BTR_MODIFY_LEAF}, ""); - static_assert(BTR_SEARCH_LEAF & BTR_SEARCH_TREE, ""); - - switch (latch_mode) { - default: - break; - uint32_t left_page_no; - uint32_t right_page_no; - ulint save; - case BTR_SEARCH_LEAF: - case BTR_MODIFY_LEAF: - case BTR_SEARCH_TREE: - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_savepoints[RTR_MAX_LEVELS] - = mtr->get_savepoint(); - } -latch_block: - if (latch_leaves) { - latch_leaves->savepoints[1] = mtr->get_savepoint(); - latch_leaves->blocks[1] = block; - } - block->page.fix(); - mtr->page_lock(block, mode); - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_blocks[RTR_MAX_LEVELS] = block; - } - return; - case BTR_MODIFY_TREE: - /* It is exclusive for other operations which calls - btr_page_set_prev() */ - ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock, - MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - save = mtr->get_savepoint(); - /* x-latch also siblings from left to right */ - left_page_no = btr_page_get_prev(block->page.frame); - - if (left_page_no != FIL_NULL) { - buf_block_t *b = btr_block_get( - *cursor->index(), left_page_no, RW_X_LATCH, - true, mtr); - - if (latch_leaves) { - latch_leaves->savepoints[0] = save; - latch_leaves->blocks[0] = b; - } - - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_savepoints[RTR_MAX_LEVELS] - = save; - rtr_info->tree_blocks[RTR_MAX_LEVELS] = b; - } - - save = mtr->get_savepoint(); - } - - if (latch_leaves) { - latch_leaves->savepoints[1] = mtr->get_savepoint(); - latch_leaves->blocks[1] = block; - } - - block->page.fix(); - block->page.lock.x_lock(); - - mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); -#ifdef BTR_CUR_HASH_ADAPT - ut_ad(!btr_search_check_marked_free_index(block)); -#endif - - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_savepoints[RTR_MAX_LEVELS + 1] = save; - rtr_info->tree_blocks[RTR_MAX_LEVELS + 1] = block; - } - - right_page_no = btr_page_get_next(block->page.frame); - - if (right_page_no != FIL_NULL) { - save = mtr->get_savepoint(); - - buf_block_t* b = btr_block_get( - *cursor->index(), right_page_no, RW_X_LATCH, - true, mtr); - if (latch_leaves) { - latch_leaves->savepoints[2] = save; - latch_leaves->blocks[2] = b; - } - - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_savepoints[RTR_MAX_LEVELS + 2] - = save; - rtr_info->tree_blocks[RTR_MAX_LEVELS + 2] = b; - } - } - - return; - - case BTR_SEARCH_PREV: - case BTR_MODIFY_PREV: - ut_ad(!rtr_info); - static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); - static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); - static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) - == (RW_S_LATCH ^ RW_X_LATCH), ""); - - /* Because we are holding index->lock, no page splits - or merges may run concurrently, and we may read - FIL_PAGE_PREV from a buffer-fixed, unlatched page. */ - left_page_no = btr_page_get_prev(block->page.frame); - - if (left_page_no != FIL_NULL) { - save = mtr->get_savepoint(); - cursor->left_block = btr_block_get( - *cursor->index(), left_page_no, - mode, true, mtr); - if (latch_leaves) { - latch_leaves->savepoints[0] = save; - latch_leaves->blocks[0] = cursor->left_block; - } - } - - goto latch_block; - case BTR_CONT_MODIFY_TREE: - ut_ad(cursor->index()->is_spatial()); - return; - } - - MY_ASSERT_UNREACHABLE(); -} - /** Load the instant ALTER TABLE metadata from the clustered index when loading a table definition. @param[in,out] index clustered index definition @@ -729,98 +568,6 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page) return index->n_core_null_bytes > 128; } -/** Optimistically latches the leaf page or pages requested. -@param[in] block guessed buffer block -@param[in] modify_clock modify clock value -@param[in,out] latch_mode BTR_SEARCH_LEAF, ... -@param[in,out] cursor cursor -@param[in] mtr mini-transaction -@return true if success */ -TRANSACTIONAL_TARGET -bool -btr_cur_optimistic_latch_leaves( - buf_block_t* block, - ib_uint64_t modify_clock, - btr_latch_mode* latch_mode, - btr_cur_t* cursor, - mtr_t* mtr) -{ - ut_ad(block->page.buf_fix_count()); - ut_ad(block->page.in_file()); - ut_ad(block->page.frame); - - switch (*latch_mode) { - default: - MY_ASSERT_UNREACHABLE(); - return(false); - case BTR_SEARCH_LEAF: - case BTR_MODIFY_LEAF: - return(buf_page_optimistic_get(*latch_mode, block, - modify_clock, mtr)); - case BTR_SEARCH_PREV: /* btr_pcur_move_backward_from_page() */ - case BTR_MODIFY_PREV: /* Ditto, or ibuf_insert() */ - uint32_t curr_page_no, left_page_no; - { - transactional_shared_lock_guard<block_lock> g{ - block->page.lock}; - if (block->modify_clock != modify_clock) { - return false; - } - curr_page_no = block->page.id().page_no(); - left_page_no = btr_page_get_prev(block->page.frame); - } - - static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); - static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); - static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) - == (RW_S_LATCH ^ RW_X_LATCH), ""); - - const rw_lock_type_t mode = rw_lock_type_t( - *latch_mode & (RW_X_LATCH | RW_S_LATCH)); - - if (left_page_no != FIL_NULL) { - cursor->left_block = buf_page_get_gen( - page_id_t(cursor->index()->table->space_id, - left_page_no), - cursor->index()->table->space->zip_size(), - mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr); - - if (cursor->left_block - && btr_page_get_next( - cursor->left_block->page.frame) - != curr_page_no) { -release_left_block: - mtr->release_last_page(); - return false; - } - } else { - cursor->left_block = nullptr; - } - - if (buf_page_optimistic_get(mode, block, modify_clock, mtr)) { - if (btr_page_get_prev(block->page.frame) - == left_page_no) { - /* block was already buffer-fixed while - entering the function and - buf_page_optimistic_get() buffer-fixes - it again. */ - ut_ad(2 <= block->page.buf_fix_count()); - *latch_mode = btr_latch_mode(mode); - return(true); - } - - mtr->release_last_page(); - } - - ut_ad(block->page.buf_fix_count()); - if (cursor->left_block) { - goto release_left_block; - } - } - - return false; -} - /** Gets intention in btr_intention_t from latch_mode, and cleares the intention at the latch_mode. @@ -848,38 +595,6 @@ btr_intention_t btr_cur_get_and_clear_intention(btr_latch_mode *latch_mode) return(intention); } -/** -Gets the desired latch type for the root leaf (root page is root leaf) -at the latch mode. -@param latch_mode in: BTR_SEARCH_LEAF, ... -@return latch type */ -static -rw_lock_type_t -btr_cur_latch_for_root_leaf( - ulint latch_mode) -{ - switch (latch_mode) { - case BTR_SEARCH_LEAF: - case BTR_SEARCH_TREE: - case BTR_SEARCH_PREV: - return(RW_S_LATCH); - case BTR_MODIFY_LEAF: - case BTR_MODIFY_TREE: - case BTR_MODIFY_PREV: - return(RW_X_LATCH); - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - /* A root page should be latched already, - and don't need to be latched here. - fall through (RW_NO_LATCH) */ - case BTR_NO_LATCHES: - return(RW_NO_LATCH); - } - - MY_ASSERT_UNREACHABLE(); - return(RW_NO_LATCH); /* avoid compiler warnings */ -} - /** @return whether the distance between two records is at most the specified value */ static bool @@ -1197,1221 +912,879 @@ static ulint btr_node_ptr_max_size(const dict_index_t* index) return rec_max_size; } -/********************************************************************//** -Searches an index tree and positions a tree cursor on a given level. -NOTE: n_fields_cmp in tuple must be set so that it cannot be compared -to node pointer page number fields on the upper levels of the tree! -Note that if mode is PAGE_CUR_LE, which is used in inserts, then -cursor->up_match and cursor->low_match both will have sensible values. -If mode is PAGE_CUR_GE, then up_match will a have a sensible value. - -If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the -search tuple should be performed in the B-tree. InnoDB does an insert -immediately after the cursor. Thus, the cursor may end up on a user record, -or on a page infimum record. -@param level the tree level of search -@param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that - it cannot get compared to the node ptr page number field! -@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a - unique prefix of a record, mode should be PAGE_CUR_LE, not - PAGE_CUR_GE, as the latter may end up on the previous page of - the record! Inserts should always be made using PAGE_CUR_LE - to search the position! -@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT, - BTR_DELETE_MARK, or BTR_DELETE; - cursor->left_block is used to store a pointer to the left - neighbor page -@param cursor tree cursor; the cursor page is s- or x-latched, but see also - above! -@param mtr mini-transaction -@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none) -@return DB_SUCCESS on success or error code otherwise */ -TRANSACTIONAL_TARGET -dberr_t btr_cur_search_to_nth_level(ulint level, - const dtuple_t *tuple, - page_cur_mode_t mode, - btr_latch_mode latch_mode, - btr_cur_t *cursor, mtr_t *mtr, - ib_uint64_t autoinc) +/** @return a B-tree search mode suitable for non-leaf pages +@param mode leaf page search mode */ +static inline page_cur_mode_t btr_cur_nonleaf_mode(page_cur_mode_t mode) { - page_t* page = NULL; /* remove warning */ - buf_block_t* block; - buf_block_t* guess; - ulint height; - ulint up_match; - ulint up_bytes; - ulint low_match; - ulint low_bytes; - ulint rw_latch; - page_cur_mode_t page_mode; - page_cur_mode_t search_mode = PAGE_CUR_UNSUPP; - ulint buf_mode; - ulint node_ptr_max_size = srv_page_size / 2; - page_cur_t* page_cursor; - btr_op_t btr_op; - ulint root_height = 0; /* remove warning */ - - btr_intention_t lock_intention; - buf_block_t* tree_blocks[BTR_MAX_LEVELS]; - ulint tree_savepoints[BTR_MAX_LEVELS]; - ulint n_blocks = 0; - ulint n_releases = 0; - bool detected_same_key_root = false; - - ulint leftmost_from_level = 0; - buf_block_t** prev_tree_blocks = NULL; - ulint* prev_tree_savepoints = NULL; - ulint prev_n_blocks = 0; - ulint prev_n_releases = 0; - bool need_path = true; - bool rtree_parent_modified = false; - bool mbr_adj = false; - bool found = false; - dict_index_t * const index = cursor->index(); - - DBUG_ENTER("btr_cur_search_to_nth_level"); - -#ifdef BTR_CUR_ADAPT - btr_search_t* info; -#endif /* BTR_CUR_ADAPT */ - mem_heap_t* heap = NULL; - rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs* offsets = offsets_; - rec_offs offsets2_[REC_OFFS_NORMAL_SIZE]; - rec_offs* offsets2 = offsets2_; - rec_offs_init(offsets_); - rec_offs_init(offsets2_); - /* Currently, PAGE_CUR_LE is the only search mode used for searches - ending to upper levels */ - - ut_ad(level == 0 || mode == PAGE_CUR_LE - || RTREE_SEARCH_MODE(mode)); - ut_ad(dict_index_check_search_tuple(index, tuple)); - ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr)); - ut_ad(dtuple_check_typed(tuple)); - ut_ad(!(index->type & DICT_FTS)); - ut_ad(index->page != FIL_NULL); - - MEM_UNDEFINED(&cursor->up_match, sizeof cursor->up_match); - MEM_UNDEFINED(&cursor->up_bytes, sizeof cursor->up_bytes); - MEM_UNDEFINED(&cursor->low_match, sizeof cursor->low_match); - MEM_UNDEFINED(&cursor->low_bytes, sizeof cursor->low_bytes); -#ifdef UNIV_DEBUG - cursor->up_match = ULINT_UNDEFINED; - cursor->low_match = ULINT_UNDEFINED; -#endif /* UNIV_DEBUG */ - - const bool latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED; - - ut_ad(!latch_by_caller - || srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK - | MTR_MEMO_SX_LOCK)); - - /* These flags are mutually exclusive, they are lumped together - with the latch mode for historical reasons. It's possible for - none of the flags to be set. */ - switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) { - default: - btr_op = BTR_NO_OP; - break; - case BTR_INSERT: - btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE) - ? BTR_INSERT_IGNORE_UNIQUE_OP - : BTR_INSERT_OP; - break; - case BTR_DELETE: - btr_op = BTR_DELETE_OP; - ut_a(cursor->purge_node); - break; - case BTR_DELETE_MARK: - btr_op = BTR_DELMARK_OP; - break; - } + if (mode > PAGE_CUR_GE) + { + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE); + return mode; + } + if (mode == PAGE_CUR_GE) + return PAGE_CUR_L; + ut_ad(mode == PAGE_CUR_G); + return PAGE_CUR_LE; +} - /* Operations on the insert buffer tree cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index)); - /* Operations on the clustered index cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index)); - /* Operations on the temporary table(indexes) cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !index->table->is_temporary()); - /* Operation on the spatial index cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !dict_index_is_spatial(index)); +dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, + btr_latch_mode latch_mode, mtr_t *mtr) +{ + ut_ad(index()->is_btree() || index()->is_ibuf()); + ut_ad(!index()->is_ibuf() || ibuf_inside(mtr)); - lock_intention = btr_cur_get_and_clear_intention(&latch_mode); + buf_block_t *guess; + btr_op_t btr_op; + btr_intention_t lock_intention; + bool detected_same_key_root= false; - /* Turn the flags unrelated to the latch mode off. */ - latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + mem_heap_t* heap = NULL; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs* offsets = offsets_; + rec_offs offsets2_[REC_OFFS_NORMAL_SIZE]; + rec_offs* offsets2 = offsets2_; + rec_offs_init(offsets_); + rec_offs_init(offsets2_); + + ut_ad(dict_index_check_search_tuple(index(), tuple)); + ut_ad(dtuple_check_typed(tuple)); + ut_ad(index()->page != FIL_NULL); + + MEM_UNDEFINED(&up_match, sizeof up_match); + MEM_UNDEFINED(&up_bytes, sizeof up_bytes); + MEM_UNDEFINED(&low_match, sizeof low_match); + MEM_UNDEFINED(&low_bytes, sizeof low_bytes); + ut_d(up_match= ULINT_UNDEFINED); + ut_d(low_match= ULINT_UNDEFINED); + + ut_ad(!(latch_mode & BTR_ALREADY_S_LATCHED) || + mtr->memo_contains_flagged(&index()->lock, + MTR_MEMO_S_LOCK | MTR_MEMO_SX_LOCK | + MTR_MEMO_X_LOCK)); + + /* These flags are mutually exclusive, they are lumped together + with the latch mode for historical reasons. It's possible for + none of the flags to be set. */ + switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) { + default: + btr_op= BTR_NO_OP; + break; + case BTR_INSERT: + btr_op= (latch_mode & BTR_IGNORE_SEC_UNIQUE) + ? BTR_INSERT_IGNORE_UNIQUE_OP + : BTR_INSERT_OP; + break; + case BTR_DELETE: + btr_op= BTR_DELETE_OP; + ut_a(purge_node); + break; + case BTR_DELETE_MARK: + btr_op= BTR_DELMARK_OP; + break; + } - ut_ad(!latch_by_caller - || latch_mode == BTR_SEARCH_LEAF - || latch_mode == BTR_SEARCH_TREE - || latch_mode == BTR_MODIFY_LEAF); + /* Operations on the insert buffer tree cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !index()->is_ibuf()); + /* Operations on the clustered index cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !index()->is_clust()); + /* Operations on the temporary table(indexes) cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !index()->table->is_temporary()); - ut_ad(autoinc == 0 || dict_index_is_clust(index)); - ut_ad(autoinc == 0 - || latch_mode == BTR_MODIFY_TREE - || latch_mode == BTR_MODIFY_LEAF); - ut_ad(autoinc == 0 || level == 0); + const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; + lock_intention= btr_cur_get_and_clear_intention(&latch_mode); + latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - cursor->flag = BTR_CUR_BINARY; + ut_ad(!latch_by_caller + || latch_mode == BTR_SEARCH_LEAF + || latch_mode == BTR_MODIFY_LEAF + || latch_mode == BTR_MODIFY_TREE + || latch_mode == BTR_MODIFY_ROOT_AND_LEAF); + flag= BTR_CUR_BINARY; #ifndef BTR_CUR_ADAPT - guess = NULL; + guess= nullptr; #else - info = btr_search_get_info(index); - guess = info->root_guess; - -#ifdef BTR_CUR_HASH_ADAPT + btr_search_t *info= btr_search_get_info(index()); + guess= info->root_guess; + +# ifdef BTR_CUR_HASH_ADAPT +# ifdef UNIV_SEARCH_PERF_STAT + info->n_searches++; +# endif + /* We do a dirty read of btr_search_enabled below, + and btr_search_guess_on_hash() will have to check it again. */ + if (!btr_search_enabled); + else if (btr_search_guess_on_hash(index(), info, tuple, mode, + latch_mode, this, mtr)) + { + /* Search using the hash index succeeded */ + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ++btr_cur_n_sea; -# ifdef UNIV_SEARCH_PERF_STAT - info->n_searches++; + return DB_SUCCESS; + } + else + ++btr_cur_n_non_sea; # endif - /* We do a dirty read of btr_search_enabled below, - and btr_search_guess_on_hash() will have to check it again. */ - if (!btr_search_enabled) { - } else if (autoinc == 0 - && latch_mode <= BTR_MODIFY_LEAF -# ifdef PAGE_CUR_LE_OR_EXTENDS - && mode != PAGE_CUR_LE_OR_EXTENDS -# endif /* PAGE_CUR_LE_OR_EXTENDS */ - && info->last_hash_succ - && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG) - && index->is_btree() && !index->table->is_temporary() - && btr_search_guess_on_hash(index, info, tuple, mode, - latch_mode, cursor, mtr)) { - - /* Search using the hash index succeeded */ - - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_GE); - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ut_ad(cursor->low_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ++btr_cur_n_sea; - - DBUG_RETURN(DB_SUCCESS); - } else { - ++btr_cur_n_non_sea; - } -# endif /* BTR_CUR_HASH_ADAPT */ -#endif /* BTR_CUR_ADAPT */ - - /* If the hash search did not succeed, do binary search down the - tree */ - - /* Store the position of the tree latch we push to mtr so that we - know how to release it when we have latched leaf node(s) */ - - ulint savepoint = mtr_set_savepoint(mtr); - - rw_lock_type_t upper_rw_latch; - - switch (latch_mode) { - case BTR_MODIFY_TREE: - /* Most of delete-intended operations are purging. - Free blocks and read IO bandwidth should be prior - for them, when the history list is glowing huge. */ - if (lock_intention == BTR_INTENTION_DELETE - && buf_pool.n_pend_reads - && trx_sys.history_size_approx() - > BTR_CUR_FINE_HISTORY_LENGTH) { -x_latch_index: - mtr_x_lock_index(index, mtr); - } else if (index->is_spatial() - && lock_intention <= BTR_INTENTION_BOTH) { - /* X lock the if there is possibility of - pessimistic delete on spatial index. As we could - lock upward for the tree */ - goto x_latch_index; - } else { - mtr_sx_lock_index(index, mtr); - } - upper_rw_latch = RW_X_LATCH; - break; - case BTR_CONT_MODIFY_TREE: - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, - MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - if (index->is_spatial()) { - /* If we are about to locate parent page for split - and/or merge operation for R-Tree index, X latch - the parent */ - upper_rw_latch = RW_X_LATCH; - break; - } - /* fall through */ - case BTR_CONT_SEARCH_TREE: - /* Do nothing */ - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, - MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - upper_rw_latch = RW_NO_LATCH; - break; - default: - if (!srv_read_only_mode) { - if (!latch_by_caller) { - ut_ad(latch_mode != BTR_SEARCH_TREE); - mtr_s_lock_index(index, mtr); - } - upper_rw_latch = RW_S_LATCH; - } else { - upper_rw_latch = RW_NO_LATCH; - } - } - const rw_lock_type_t root_leaf_rw_latch = btr_cur_latch_for_root_leaf( - latch_mode); - - page_cursor = btr_cur_get_page_cur(cursor); - page_cursor->index = index; - - const ulint zip_size = index->table->space->zip_size(); - - /* Start with the root page. */ - page_id_t page_id(index->table->space_id, index->page); - - if (root_leaf_rw_latch == RW_X_LATCH) { - node_ptr_max_size = btr_node_ptr_max_size(index); - } - - up_match = 0; - up_bytes = 0; - low_match = 0; - low_bytes = 0; - - height = ULINT_UNDEFINED; - - /* We use these modified search modes on non-leaf levels of the - B-tree. These let us end up in the right B-tree leaf. In that leaf - we use the original search mode. */ - - switch (mode) { - case PAGE_CUR_GE: - page_mode = PAGE_CUR_L; - break; - case PAGE_CUR_G: - page_mode = PAGE_CUR_LE; - break; - default: -#ifdef PAGE_CUR_LE_OR_EXTENDS - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE - || RTREE_SEARCH_MODE(mode) - || mode == PAGE_CUR_LE_OR_EXTENDS); -#else /* PAGE_CUR_LE_OR_EXTENDS */ - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE - || RTREE_SEARCH_MODE(mode)); -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - page_mode = mode; - break; - } - - /* Loop and search until we arrive at the desired level */ - btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}}; - -search_loop: - buf_mode = BUF_GET; - rw_latch = RW_NO_LATCH; - rtree_parent_modified = false; - - if (height != 0) { - /* We are about to fetch the root or a non-leaf page. */ - if ((latch_mode != BTR_MODIFY_TREE || height == level) - && !prev_tree_blocks) { - /* If doesn't have SX or X latch of index, - each pages should be latched before reading. */ - if (height == ULINT_UNDEFINED - && upper_rw_latch == RW_S_LATCH - && autoinc) { - /* needs sx-latch of root page - for writing PAGE_ROOT_AUTO_INC */ - rw_latch = RW_SX_LATCH; - } else { - rw_latch = upper_rw_latch; - } - } - } else if (latch_mode <= BTR_MODIFY_LEAF) { - rw_latch = latch_mode; - - if (btr_op != BTR_NO_OP - && ibuf_should_try(index, btr_op != BTR_INSERT_OP)) { - - /* Try to buffer the operation if the leaf - page is not in the buffer pool. */ - - buf_mode = btr_op == BTR_DELETE_OP - ? BUF_GET_IF_IN_POOL_OR_WATCH - : BUF_GET_IF_IN_POOL; - } - } - -retry_page_get: - ut_ad(n_blocks < BTR_MAX_LEVELS); - tree_savepoints[n_blocks] = mtr_set_savepoint(mtr); - dberr_t err; - block = buf_page_get_gen(page_id, zip_size, rw_latch, guess, - buf_mode, mtr, &err, - height == 0 && !index->is_clust()); - if (!block) { - switch (err) { - case DB_SUCCESS: - /* change buffering */ - break; - case DB_DECRYPTION_FAILED: - btr_decryption_failed(*index); - /* fall through */ - default: - goto func_exit; - } - - /* This must be a search to perform an insert/delete - mark/ delete; try using the insert/delete buffer */ - - ut_ad(height == 0); - ut_ad(cursor->thr); - - switch (btr_op) { - default: - MY_ASSERT_UNREACHABLE(); - break; - case BTR_INSERT_OP: - case BTR_INSERT_IGNORE_UNIQUE_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL); - - if (ibuf_insert(IBUF_OP_INSERT, tuple, index, - page_id, zip_size, cursor->thr)) { - - cursor->flag = BTR_CUR_INSERT_TO_IBUF; - - goto func_exit; - } - break; - - case BTR_DELMARK_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL); - - if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple, - index, page_id, zip_size, - cursor->thr)) { - - cursor->flag = BTR_CUR_DEL_MARK_IBUF; - - goto func_exit; - } - - break; - - case BTR_DELETE_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH); - ut_ad(index->is_btree()); - auto& chain = buf_pool.page_hash.cell_get( - page_id.fold()); - - if (!row_purge_poss_sec(cursor->purge_node, - index, tuple)) { - - /* The record cannot be purged yet. */ - cursor->flag = BTR_CUR_DELETE_REF; - } else if (ibuf_insert(IBUF_OP_DELETE, tuple, - index, page_id, zip_size, - cursor->thr)) { +#endif - /* The purge was buffered. */ - cursor->flag = BTR_CUR_DELETE_IBUF; - } else { - /* The purge could not be buffered. */ - buf_pool.watch_unset(page_id, chain); - break; - } + /* If the hash search did not succeed, do binary search down the + tree */ - buf_pool.watch_unset(page_id, chain); - goto func_exit; - } + /* Store the position of the tree latch we push to mtr so that we + know how to release it when we have latched leaf node(s) */ - /* Insert to the insert/delete buffer did not succeed, we - must read the page from disk. */ + const ulint savepoint= mtr->get_savepoint(); - buf_mode = BUF_GET; + ulint node_ptr_max_size= 0; + rw_lock_type_t rw_latch= RW_S_LATCH; - goto retry_page_get; - } + switch (latch_mode) { + case BTR_MODIFY_TREE: + rw_latch= RW_X_LATCH; + node_ptr_max_size= btr_node_ptr_max_size(index()); + if (latch_by_caller) + { + ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK)); + break; + } + if (lock_intention == BTR_INTENTION_DELETE && buf_pool.n_pend_reads && + trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH) + /* Most delete-intended operations are due to the purge of history. + Prioritize them when the history list is growing huge. */ + mtr_x_lock_index(index(), mtr); + else + mtr_sx_lock_index(index(), mtr); + break; +#ifdef UNIV_DEBUG + case BTR_CONT_MODIFY_TREE: + ut_ad("invalid mode" == 0); + break; +#endif + case BTR_MODIFY_ROOT_AND_LEAF: + rw_latch= RW_SX_LATCH; + /* fall through */ + default: + if (!latch_by_caller) + mtr_s_lock_index(index(), mtr); + } - tree_blocks[n_blocks] = block; + const ulint zip_size= index()->table->space->zip_size(); - if (height && prev_tree_blocks) { - /* also latch left sibling */ - ut_ad(rw_latch == RW_NO_LATCH); + /* Start with the root page. */ + page_id_t page_id(index()->table->space_id, index()->page); - rw_latch = upper_rw_latch; + const page_cur_mode_t page_mode= btr_cur_nonleaf_mode(mode); + ulint height= ULINT_UNDEFINED; + up_match= 0; + up_bytes= 0; + low_match= 0; + low_bytes= 0; + ulint buf_mode= BUF_GET; + search_loop: + dberr_t err; + auto block_savepoint= mtr->get_savepoint(); + buf_block_t *block= + buf_page_get_gen(page_id, zip_size, rw_latch, guess, buf_mode, mtr, + &err, height == 0 && !index()->is_clust()); + if (!block) + { + switch (err) { + case DB_DECRYPTION_FAILED: + btr_decryption_failed(*index()); + /* fall through */ + default: + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + return err; + case DB_SUCCESS: + /* This must be a search to perform an insert, delete mark, or delete; + try using the change buffer */ + ut_ad(height == 0); + ut_ad(thr); + break; + } - /* Because we are holding index->lock, no page splits - or merges may run concurrently, and we may read - FIL_PAGE_PREV from a buffer-fixed, unlatched page. */ - uint32_t left_page_no = btr_page_get_prev(block->page.frame); + switch (btr_op) { + default: + MY_ASSERT_UNREACHABLE(); + break; + case BTR_INSERT_OP: + case BTR_INSERT_IGNORE_UNIQUE_OP: + ut_ad(buf_mode == BUF_GET_IF_IN_POOL); - if (left_page_no != FIL_NULL) { - ut_ad(prev_n_blocks < leftmost_from_level); + if (ibuf_insert(IBUF_OP_INSERT, tuple, index(), page_id, zip_size, thr)) + { + flag= BTR_CUR_INSERT_TO_IBUF; + goto func_exit; + } + break; - prev_tree_savepoints[prev_n_blocks] - = mtr_set_savepoint(mtr); - buf_block_t* get_block = buf_page_get_gen( - page_id_t(page_id.space(), left_page_no), - zip_size, rw_latch, NULL, buf_mode, - mtr, &err); - if (!get_block) { - if (err == DB_DECRYPTION_FAILED) { - btr_decryption_failed(*index); - } - goto func_exit; - } + case BTR_DELMARK_OP: + ut_ad(buf_mode == BUF_GET_IF_IN_POOL); - prev_tree_blocks[prev_n_blocks++] = get_block; - /* BTR_MODIFY_TREE doesn't update prev/next_page_no, - without their parent page's lock. So, not needed to - retry here, because we have the parent page's lock. */ - } + if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple, + index(), page_id, zip_size, thr)) + { + flag = BTR_CUR_DEL_MARK_IBUF; + goto func_exit; + } - mtr->s_lock_register(tree_savepoints[n_blocks]); - block->page.lock.s_lock(); - } + break; - page = buf_block_get_frame(block); + case BTR_DELETE_OP: + ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH); + auto& chain = buf_pool.page_hash.cell_get(page_id.fold()); + + if (!row_purge_poss_sec(purge_node, index(), tuple)) + /* The record cannot be purged yet. */ + flag= BTR_CUR_DELETE_REF; + else if (ibuf_insert(IBUF_OP_DELETE, tuple, index(), + page_id, zip_size, thr)) + /* The purge was buffered. */ + flag= BTR_CUR_DELETE_IBUF; + else + { + /* The purge could not be buffered. */ + buf_pool.watch_unset(page_id, chain); + break; + } - if (height == ULINT_UNDEFINED - && page_is_leaf(page) - && rw_latch != RW_NO_LATCH - && rw_latch != root_leaf_rw_latch) { - /* The root page is also a leaf page (root_leaf). - We should reacquire the page, because the root page - is latched differently from leaf pages. */ - ut_ad(root_leaf_rw_latch != RW_NO_LATCH); - ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH); - ut_ad(rw_latch == RW_S_LATCH || autoinc); - ut_ad(!autoinc || root_leaf_rw_latch == RW_X_LATCH); + buf_pool.watch_unset(page_id, chain); + goto func_exit; + } - ut_ad(n_blocks == 0); - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_blocks], - tree_blocks[n_blocks]); + /* Change buffering did not succeed, we must read the page. */ + buf_mode= BUF_GET; + goto search_loop; + } - upper_rw_latch = root_leaf_rw_latch; - goto search_loop; - } + if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() || + btr_page_get_index_id(block->page.frame) != index()->id || + fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || + !fil_page_index_page_check(block->page.frame)) + { + corrupted: + ut_ad("corrupted" == 0); // FIXME: remove this + err= DB_CORRUPTION; + goto func_exit; + } + page_cur.block= block; + ut_ad(block == mtr->at_savepoint(block_savepoint)); #ifdef UNIV_ZIP_DEBUG - if (rw_latch != RW_NO_LATCH) { - const page_zip_des_t* page_zip - = buf_block_get_page_zip(block); - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); - } + if (rw_latch == RW_NO_LATCH); + else if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block)) + ut_a(page_zip_validate(page_zip, block->page.frame, index())); #endif /* UNIV_ZIP_DEBUG */ + const uint32_t page_level= btr_page_get_level(block->page.frame); - ut_ad(fil_page_index_page_check(page)); - ut_ad(index->id == btr_page_get_index_id(page)); - - if (height == ULINT_UNDEFINED) { - /* We are in the root node */ - - height = btr_page_get_level(page); - root_height = height; - cursor->tree_height = root_height + 1; - - if (dict_index_is_spatial(index)) { - ut_ad(cursor->rtr_info); - - /* If SSN in memory is not initialized, fetch - it from root page */ - if (!rtr_get_current_ssn_id(index)) { - /* FIXME: do this in dict_load_table_one() */ - index->set_ssn(page_get_ssn_id(page) + 1); - } - - /* Save the MBR */ - cursor->rtr_info->thr = cursor->thr; - rtr_get_mbr_from_tuple(tuple, &cursor->rtr_info->mbr); - } - + if (height == ULINT_UNDEFINED) + { + /* We are in the B-tree index root page. */ #ifdef BTR_CUR_ADAPT - info->root_guess = block; + info->root_guess= block; #endif - } - - if (height == 0) { - if (rw_latch == RW_NO_LATCH) { - btr_cur_latch_leaves(block, latch_mode, cursor, mtr, - &latch_leaves); - } - - switch (latch_mode) { - case BTR_MODIFY_TREE: - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - break; - default: - if (!latch_by_caller - && !srv_read_only_mode) { - /* Release the tree s-latch */ - mtr_release_s_latch_at_savepoint( - mtr, savepoint, - &index->lock); - } - - /* release upper blocks */ - if (prev_tree_blocks) { - ut_ad(!autoinc); - for (; - prev_n_releases < prev_n_blocks; - prev_n_releases++) { - mtr_release_block_at_savepoint( - mtr, - prev_tree_savepoints[ - prev_n_releases], - prev_tree_blocks[ - prev_n_releases]); - } - } - - for (; n_releases < n_blocks; n_releases++) { - if (n_releases == 0 - && (autoinc)) { - /* keep the root page latch */ - ut_ad(mtr->memo_contains_flagged( - tree_blocks[n_releases], - MTR_MEMO_PAGE_SX_FIX - | MTR_MEMO_PAGE_X_FIX)); - continue; - } - - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - } - - page_mode = mode; - } - - if (dict_index_is_spatial(index)) { - /* Remember the page search mode */ - search_mode = page_mode; - - /* Some adjustment on search mode, when the - page search mode is PAGE_CUR_RTREE_LOCATE - or PAGE_CUR_RTREE_INSERT, as we are searching - with MBRs. When it is not the target level, we - should search all sub-trees that "CONTAIN" the - search range/MBR. When it is at the target - level, the search becomes PAGE_CUR_LE */ - if (page_mode == PAGE_CUR_RTREE_LOCATE - && level == height) { - if (level == 0) { - page_mode = PAGE_CUR_LE; - } else { - page_mode = PAGE_CUR_RTREE_GET_FATHER; - } - } + height= page_level; + tree_height= height + 1; - if (page_mode == PAGE_CUR_RTREE_INSERT) { - page_mode = (level == height) - ? PAGE_CUR_LE - : PAGE_CUR_RTREE_INSERT; - - ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE); - } - - /* "need_path" indicates if we need to tracking the parent - pages, if it is not spatial comparison, then no need to - track it */ - if (page_mode < PAGE_CUR_CONTAIN) { - need_path = false; - } - - up_match = 0; - low_match = 0; - - if (latch_mode == BTR_MODIFY_TREE - || latch_mode == BTR_CONT_MODIFY_TREE - || latch_mode == BTR_CONT_SEARCH_TREE) { - /* Tree are locked, no need for Page Lock to protect - the "path" */ - cursor->rtr_info->need_page_lock = false; - } + if (!height) + { + /* The root page is also a leaf page. + We may have to reacquire the page latch in a different mode. */ + switch (rw_latch) { + case RW_S_LATCH: + if ((latch_mode & ~12) != RW_S_LATCH) + { + ut_ad(rw_lock_type_t(latch_mode & ~12) == RW_X_LATCH); + goto relatch_x; } + if (latch_mode != BTR_MODIFY_PREV) + { + if (!latch_by_caller) + /* Release the tree s-latch */ + mtr->rollback_to_savepoint(savepoint, savepoint + 1); + goto reached_latched_leaf; + } + /* fall through */ + case RW_SX_LATCH: + ut_ad(rw_latch == RW_S_LATCH || + latch_mode == BTR_MODIFY_ROOT_AND_LEAF); + relatch_x: + mtr->rollback_to_savepoint(block_savepoint); + height= ULINT_UNDEFINED; + rw_latch= RW_X_LATCH; + goto search_loop; + case RW_X_LATCH: + if (latch_mode == BTR_MODIFY_TREE) + goto reached_index_root_and_leaf; + goto reached_root_and_leaf; + case RW_NO_LATCH: + ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK)); + } + goto reached_leaf; + } + } + else if (UNIV_UNLIKELY(height != page_level)) + goto corrupted; + else + switch (latch_mode) { + case BTR_MODIFY_TREE: + break; + case BTR_MODIFY_ROOT_AND_LEAF: + ut_ad((mtr->at_savepoint(block_savepoint - 1)->page.id().page_no() == + index()->page) == (tree_height <= height + 2)); + if (tree_height <= height + 2) + /* Retain the root page latch. */ + break; + goto release_parent_page; + default: + if (rw_latch == RW_NO_LATCH) + { + ut_ad(!height); + break; + } + release_parent_page: + ut_ad(block_savepoint > savepoint); + mtr->rollback_to_savepoint(block_savepoint - 1, block_savepoint); + block_savepoint--; + } - page_cursor->block = block; - - if (dict_index_is_spatial(index) && page_mode >= PAGE_CUR_CONTAIN) { - ut_ad(need_path); - found = rtr_cur_search_with_match( - block, index, tuple, page_mode, page_cursor, - cursor->rtr_info); + if (!height) + { + reached_leaf: + /* We reached the leaf level. */ + ut_ad(block == mtr->at_savepoint(block_savepoint)); - /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */ - if (search_mode == PAGE_CUR_RTREE_INSERT - && cursor->rtr_info->mbr_adj) { - static_assert(BTR_MODIFY_TREE - == (8 | BTR_MODIFY_LEAF), ""); + if (latch_mode == BTR_MODIFY_ROOT_AND_LEAF) + { + reached_root_and_leaf: + if (!latch_by_caller) + mtr->rollback_to_savepoint(savepoint, savepoint + 1); + reached_index_root_and_leaf: + ut_ad(rw_latch == RW_X_LATCH); +#ifdef BTR_CUR_HASH_ADAPT + btr_search_drop_page_hash_index(block, true); +#endif + if (page_cur_search_with_match(tuple, mode, &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + goto func_exit; + } - if (!(latch_mode & 8)) { - /* Parent MBR needs updated, should retry - with BTR_MODIFY_TREE */ - goto func_exit; - } + switch (latch_mode) { + case BTR_SEARCH_PREV: + case BTR_MODIFY_PREV: + static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); + static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); + ut_ad(!latch_by_caller); - rtree_parent_modified = true; - cursor->rtr_info->mbr_adj = false; - mbr_adj = true; - } + if (rw_latch == RW_NO_LATCH) + { + /* latch also siblings from left to right */ + rw_latch= rw_lock_type_t(latch_mode & (RW_X_LATCH | RW_S_LATCH)); + if (page_has_prev(block->page.frame) && + !btr_block_get(*index(), btr_page_get_prev(block->page.frame), + rw_latch, false, mtr, &err)) + goto func_exit; + mtr->upgrade_buffer_fix(block_savepoint, rw_latch); + if (page_has_next(block->page.frame) && + !btr_block_get(*index(), btr_page_get_next(block->page.frame), + rw_latch, false, mtr, &err)) + goto func_exit; + } + goto release_tree; + case BTR_SEARCH_LEAF: + case BTR_MODIFY_LEAF: + if (rw_latch == RW_NO_LATCH) + { + ut_ad(index()->is_ibuf()); + mtr->upgrade_buffer_fix(block_savepoint, rw_lock_type_t(latch_mode)); + } + if (!latch_by_caller) + { +release_tree: + /* Release the tree s-latch */ + block_savepoint--; + mtr->rollback_to_savepoint(savepoint, savepoint + 1); + } + /* release upper blocks */ + if (savepoint < block_savepoint) + mtr->rollback_to_savepoint(savepoint, block_savepoint); + break; + default: + ut_ad(latch_mode == BTR_MODIFY_TREE); + ut_ad(rw_latch == RW_NO_LATCH); + /* x-latch also siblings from left to right */ + if (page_has_prev(block->page.frame) && + !btr_block_get(*index(), btr_page_get_prev(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + goto func_exit; + mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH); + if (page_has_next(block->page.frame) && + !btr_block_get(*index(), btr_page_get_next(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + goto func_exit; + } - if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) { - cursor->low_match = - DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1; - } + reached_latched_leaf: #ifdef BTR_CUR_HASH_ADAPT - } else if (height == 0 && btr_search_enabled - && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG) - && index->is_btree()) { - /* The adaptive hash index is only used when searching - for leaf pages (height==0), but not in r-trees. - We only need the byte prefix comparison for the purpose - of updating the adaptive hash index. */ - if (page_cur_search_with_match_bytes( - tuple, page_mode, &up_match, &up_bytes, - &low_match, &low_bytes, page_cursor)) { - err = DB_CORRUPTION; - goto func_exit; - } + if (btr_search_enabled && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG)) + { + if (page_cur_search_with_match_bytes(tuple, mode, + &up_match, &up_bytes, + &low_match, &low_bytes, &page_cur)) + goto corrupted; + } + else #endif /* BTR_CUR_HASH_ADAPT */ - } else { - /* Search for complete index fields. */ - up_bytes = low_bytes = 0; - if (page_cur_search_with_match( - tuple, page_mode, &up_match, - &low_match, page_cursor, - need_path ? cursor->rtr_info : nullptr)) { - err = DB_CORRUPTION; - goto func_exit; - } - } - - /* If this is the desired level, leave the loop */ - - ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor))); - - /* Add Predicate lock if it is serializable isolation - and only if it is in the search case */ - if (dict_index_is_spatial(index) - && cursor->rtr_info->need_prdt_lock - && mode != PAGE_CUR_RTREE_INSERT - && mode != PAGE_CUR_RTREE_LOCATE - && mode >= PAGE_CUR_CONTAIN) { - lock_prdt_t prdt; - - { - trx_t* trx = thr_get_trx(cursor->thr); - TMLockTrxGuard g{TMLockTrxArgs(*trx)}; - lock_init_prdt_from_mbr( - &prdt, &cursor->rtr_info->mbr, mode, - trx->lock.lock_heap); - } - - if (rw_latch == RW_NO_LATCH && height != 0) { - block->page.lock.s_lock(); - } - - lock_prdt_lock(block, &prdt, index, LOCK_S, - LOCK_PREDICATE, cursor->thr); - - if (rw_latch == RW_NO_LATCH && height != 0) { - block->page.lock.s_unlock(); - } - } - - if (level != height) { - - const rec_t* node_ptr; - ut_ad(height > 0); - - height--; - guess = NULL; - - node_ptr = page_cur_get_rec(page_cursor); - - offsets = rec_get_offsets(node_ptr, index, offsets, 0, - ULINT_UNDEFINED, &heap); - - /* If the rec is the first or last in the page for - pessimistic delete intention, it might cause node_ptr insert - for the upper level. We should change the intention and retry. - */ - if (latch_mode == BTR_MODIFY_TREE - && btr_cur_need_opposite_intention( - page, lock_intention, node_ptr)) { - -need_opposite_intention: - ut_ad(upper_rw_latch == RW_X_LATCH); - - if (n_releases > 0) { - /* release root block */ - mtr_release_block_at_savepoint( - mtr, tree_savepoints[0], - tree_blocks[0]); - } - - /* release all blocks */ - for (; n_releases <= n_blocks; n_releases++) { - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - - lock_intention = BTR_INTENTION_BOTH; - - page_id.set_page_no(index->page); - up_match = 0; - low_match = 0; - height = ULINT_UNDEFINED; + if (page_cur_search_with_match(tuple, mode, &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; - n_blocks = 0; - n_releases = 0; + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); - goto search_loop; - } +#ifdef BTR_CUR_HASH_ADAPT + /* We do a dirty read of btr_search_enabled here. We will + properly check btr_search_enabled again in + btr_search_build_page_hash_index() before building a page hash + index, while holding search latch. */ + if (!btr_search_enabled); + else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) + /* This may be a search tuple for btr_pcur_t::restore_position(). */ + ut_ad(tuple->is_metadata() || + (tuple->is_metadata(tuple->info_bits ^ REC_STATUS_INSTANT))); + else if (index()->table->is_temporary()); + else if (!rec_is_metadata(page_cur.rec, *index())) + btr_search_info_update(index(), this); +#endif /* BTR_CUR_HASH_ADAPT */ - if (dict_index_is_spatial(index)) { - if (page_rec_is_supremum(node_ptr)) { - cursor->low_match = 0; - cursor->up_match = 0; - goto func_exit; - } + goto func_exit; + } - /* If we are doing insertion or record locating, - remember the tree nodes we visited */ - if (page_mode == PAGE_CUR_RTREE_INSERT - || (search_mode == PAGE_CUR_RTREE_LOCATE - && (latch_mode != BTR_MODIFY_LEAF))) { - bool add_latch = false; - - if (latch_mode == BTR_MODIFY_TREE - && rw_latch == RW_NO_LATCH) { - ut_ad(mtr->memo_contains_flagged( - &index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - block->page.lock.s_lock(); - add_latch = true; - } + guess= nullptr; + if (page_cur_search_with_match(tuple, page_mode, &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, ULINT_UNDEFINED, + &heap); - /* Store the parent cursor location */ -#ifdef UNIV_DEBUG - ulint num_stored = rtr_store_parent_path( - block, cursor, latch_mode, - height + 1, mtr); -#else - rtr_store_parent_path( - block, cursor, latch_mode, - height + 1, mtr); -#endif + ut_ad(block == mtr->at_savepoint(block_savepoint)); - if (page_mode == PAGE_CUR_RTREE_INSERT) { - btr_pcur_t* r_cursor = - rtr_get_parent_cursor( - cursor, height + 1, - true); - /* If it is insertion, there should - be only one parent for each level - traverse */ -#ifdef UNIV_DEBUG - ut_ad(num_stored == 1); -#endif - - node_ptr = btr_pcur_get_rec(r_cursor); + switch (latch_mode) { + default: + break; + case BTR_MODIFY_TREE: + if (btr_cur_need_opposite_intention(block->page.frame, lock_intention, + page_cur.rec)) + /* If the rec is the first or last in the page for pessimistic + delete intention, it might cause node_ptr insert for the upper + level. We should change the intention and retry. */ + need_opposite_intention: + return pessimistic_search_leaf(tuple, mode, mtr); - } + if (detected_same_key_root || lock_intention != BTR_INTENTION_BOTH || + index()->is_unique() || + (up_match <= rec_offs_n_fields(offsets) && + low_match <= rec_offs_n_fields(offsets))) + break; - if (add_latch) { - block->page.lock.s_unlock(); - } + /* If the first or the last record of the page or the same key + value to the first record or last record, then another page might + be chosen when BTR_CONT_MODIFY_TREE. So, the parent page should + not released to avoiding deadlock with blocking the another search + with the same key value. */ + const rec_t *first= + page_rec_get_next_const(page_get_infimum_rec(block->page.frame)); + ulint matched_fields; - ut_ad(!page_rec_is_supremum(node_ptr)); - } + if (UNIV_UNLIKELY(!first)) + goto corrupted; + if (page_cur.rec == first || + page_rec_is_last(page_cur.rec, block->page.frame)) + { + same_key_root: + detected_same_key_root= true; + break; + } - ut_ad(page_mode == search_mode - || (page_mode == PAGE_CUR_WITHIN - && search_mode == PAGE_CUR_RTREE_LOCATE)); + matched_fields= 0; + offsets2= rec_get_offsets(first, index(), offsets2, 0, ULINT_UNDEFINED, + &heap); + cmp_rec_rec(page_cur.rec, first, offsets, offsets2, index(), false, + &matched_fields); + if (matched_fields >= rec_offs_n_fields(offsets) - 1) + goto same_key_root; + if (const rec_t* last= + page_rec_get_prev_const(page_get_supremum_rec(block->page.frame))) + { + matched_fields= 0; + offsets2= rec_get_offsets(last, index(), offsets2, 0, ULINT_UNDEFINED, + &heap); + cmp_rec_rec(page_cur.rec, last, offsets, offsets2, index(), false, + &matched_fields); + if (matched_fields >= rec_offs_n_fields(offsets) - 1) + goto same_key_root; + } + else + goto corrupted; - page_mode = search_mode; - } + /* Release the non-root parent page unless it may need to be modified. */ + if (tree_height > height + 1 && + !btr_cur_will_modify_tree(index(), block->page.frame, lock_intention, + page_cur.rec, node_ptr_max_size, + zip_size, mtr)) + { + mtr->rollback_to_savepoint(block_savepoint - 1, block_savepoint); + block_savepoint--; + } + } - /* If the first or the last record of the page - or the same key value to the first record or last record, - the another page might be chosen when BTR_CONT_MODIFY_TREE. - So, the parent page should not released to avoiding deadlock - with blocking the another search with the same key value. */ - if (!detected_same_key_root - && lock_intention == BTR_INTENTION_BOTH - && !dict_index_is_unique(index) - && latch_mode == BTR_MODIFY_TREE - && (up_match >= rec_offs_n_fields(offsets) - 1 - || low_match >= rec_offs_n_fields(offsets) - 1)) { - const rec_t* first_rec = page_rec_get_next_const( - page_get_infimum_rec(page)); - ulint matched_fields; + /* Go to the child node */ + page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, offsets)); - ut_ad(upper_rw_latch == RW_X_LATCH); + if (!--height) + { + /* We are about to access the leaf level. */ - if (UNIV_UNLIKELY(!first_rec)) { - corrupted: - err = DB_CORRUPTION; - goto func_exit; - } - if (node_ptr == first_rec - || page_rec_is_last(node_ptr, page)) { - detected_same_key_root = true; - } else { - matched_fields = 0; - - offsets2 = rec_get_offsets( - first_rec, index, offsets2, - 0, ULINT_UNDEFINED, &heap); - cmp_rec_rec(node_ptr, first_rec, - offsets, offsets2, index, false, - &matched_fields); - - if (matched_fields - >= rec_offs_n_fields(offsets) - 1) { - detected_same_key_root = true; - } else if (const rec_t* last_rec - = page_rec_get_prev_const( - page_get_supremum_rec( - page))) { - matched_fields = 0; - - offsets2 = rec_get_offsets( - last_rec, index, offsets2, - 0, ULINT_UNDEFINED, &heap); - cmp_rec_rec( - node_ptr, last_rec, - offsets, offsets2, index, - false, &matched_fields); - if (matched_fields - >= rec_offs_n_fields(offsets) - 1) { - detected_same_key_root = true; - } - } else { - goto corrupted; - } - } - } + switch (latch_mode) { + case BTR_MODIFY_ROOT_AND_LEAF: + rw_latch= RW_X_LATCH; + break; + case BTR_MODIFY_PREV: /* ibuf_insert() or btr_pcur_move_to_prev() */ + case BTR_SEARCH_PREV: /* btr_pcur_move_to_prev() */ + ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH); - /* If the page might cause modify_tree, - we should not release the parent page's lock. */ - if (!detected_same_key_root - && latch_mode == BTR_MODIFY_TREE - && !btr_cur_will_modify_tree( - index, page, lock_intention, node_ptr, - node_ptr_max_size, zip_size, mtr) - && !rtree_parent_modified) { - ut_ad(upper_rw_latch == RW_X_LATCH); - ut_ad(n_releases <= n_blocks); - - /* we can release upper blocks */ - for (; n_releases < n_blocks; n_releases++) { - if (n_releases == 0) { - /* we should not release root page - to pin to same block. */ - continue; - } + if (page_has_prev(block->page.frame) && + page_rec_is_first(page_cur.rec, block->page.frame)) + { + ut_ad(block_savepoint + 1 == mtr->get_savepoint()); + /* Latch the previous page if the node pointer is the leftmost + of the current page. */ + buf_block_t *left= btr_block_get(*index(), + btr_page_get_prev(block->page.frame), + RW_NO_LATCH, false, mtr, &err); + if (UNIV_UNLIKELY(!left)) + goto func_exit; + ut_ad(block_savepoint + 2 == mtr->get_savepoint()); + if (UNIV_LIKELY(left->page.lock.s_lock_try())) + mtr->lock_register(block_savepoint + 1, MTR_MEMO_PAGE_S_FIX); + else + { + if (rw_latch == RW_S_LATCH) + block->page.lock.s_unlock(); + else + block->page.lock.x_unlock(); + mtr->upgrade_buffer_fix(block_savepoint + 1, RW_S_LATCH); + mtr->lock_register(block_savepoint, MTR_MEMO_BUF_FIX); + mtr->upgrade_buffer_fix(block_savepoint, RW_S_LATCH); + /* While our latch on the level-2 page prevents splits or + merges of this level-1 block, other threads may have + modified it due to splitting or merging some level-0 (leaf) + pages underneath it. Thus, we must search again. */ + if (page_cur_search_with_match(tuple, page_mode, + &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, + ULINT_UNDEFINED, &heap); + page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, + offsets)); + } + } + goto leaf_with_no_latch; + case BTR_MODIFY_LEAF: + case BTR_SEARCH_LEAF: + if (index()->is_ibuf()) + goto leaf_with_no_latch; + rw_latch= rw_lock_type_t(latch_mode); + if (btr_op != BTR_NO_OP && + ibuf_should_try(index(), btr_op != BTR_INSERT_OP)) + /* Try to buffer the operation if the leaf page + is not in the buffer pool. */ + buf_mode= btr_op == BTR_DELETE_OP + ? BUF_GET_IF_IN_POOL_OR_WATCH + : BUF_GET_IF_IN_POOL; + break; + case BTR_MODIFY_TREE: + ut_ad(rw_latch == RW_X_LATCH); - /* release unused blocks to unpin */ - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - } + if (lock_intention == BTR_INTENTION_INSERT && + page_has_next(block->page.frame) && + page_rec_is_last(page_cur.rec, block->page.frame)) + { + /* btr_insert_into_right_sibling() might cause deleting node_ptr + at upper level */ + mtr->rollback_to_savepoint(block_savepoint); + goto need_opposite_intention; + } + /* fall through */ + default: + leaf_with_no_latch: + rw_latch= RW_NO_LATCH; + } + } - if (height == level - && latch_mode == BTR_MODIFY_TREE) { - ut_ad(upper_rw_latch == RW_X_LATCH); - /* we should sx-latch root page, if released already. - It contains seg_header. */ - if (n_releases > 0) { - mtr->sx_latch_at_savepoint( - tree_savepoints[0], - tree_blocks[0]); - } + goto search_loop; +} - /* x-latch the branch blocks not released yet. */ - for (ulint i = n_releases; i <= n_blocks; i++) { - mtr->x_latch_at_savepoint( - tree_savepoints[i], - tree_blocks[i]); - } - } +ATTRIBUTE_COLD +dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple, + page_cur_mode_t mode, mtr_t *mtr) +{ + ut_ad(index()->is_btree() || index()->is_ibuf()); + ut_ad(!index()->is_ibuf() || ibuf_inside(mtr)); - /* We should consider prev_page of parent page, if the node_ptr - is the leftmost of the page. because BTR_SEARCH_PREV and - BTR_MODIFY_PREV latches prev_page of the leaf page. */ - if ((latch_mode == BTR_SEARCH_PREV - || latch_mode == BTR_MODIFY_PREV) - && !prev_tree_blocks) { - /* block should be latched for consistent - btr_page_get_prev() */ - ut_ad(mtr->memo_contains_flagged( - block, MTR_MEMO_PAGE_S_FIX - | MTR_MEMO_PAGE_X_FIX)); + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs* offsets = offsets_; + rec_offs_init(offsets_); - if (page_has_prev(page) - && page_rec_is_first(node_ptr, page)) { + ut_ad(flag == BTR_CUR_BINARY); + ut_ad(dict_index_check_search_tuple(index(), tuple)); + ut_ad(dtuple_check_typed(tuple)); + buf_block_t *block= mtr->at_savepoint(1); + ut_ad(block->page.id().page_no() == index()->page); + block->page.fix(); + mtr->rollback_to_savepoint(1); + ut_ad(mtr->memo_contains_flagged(&index()->lock, + MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK)); + + const page_cur_mode_t page_mode{btr_cur_nonleaf_mode(mode)}; + + mtr->page_lock(block, RW_X_LATCH); + + up_match= 0; + up_bytes= 0; + low_match= 0; + low_bytes= 0; + ulint height= btr_page_get_level(block->page.frame); + tree_height= height + 1; + mem_heap_t *heap= nullptr; - if (leftmost_from_level == 0) { - leftmost_from_level = height + 1; - } - } else { - leftmost_from_level = 0; - } + search_loop: + dberr_t err; + page_cur.block= block; - if (height == 0 && leftmost_from_level > 0) { - /* should retry to get also prev_page - from level==leftmost_from_level. */ - prev_tree_blocks = static_cast<buf_block_t**>( - ut_malloc_nokey(sizeof(buf_block_t*) - * leftmost_from_level)); - - prev_tree_savepoints = static_cast<ulint*>( - ut_malloc_nokey(sizeof(ulint) - * leftmost_from_level)); - - /* back to the level (leftmost_from_level+1) */ - ulint idx = n_blocks - - (leftmost_from_level - 1); - - page_id.set_page_no( - tree_blocks[idx]->page.id().page_no()); - - for (ulint i = n_blocks - - (leftmost_from_level - 1); - i <= n_blocks; i++) { - mtr_release_block_at_savepoint( - mtr, tree_savepoints[i], - tree_blocks[i]); - } + if (UNIV_UNLIKELY(!height)) + { + if (page_cur_search_with_match(tuple, mode, &up_match, &low_match, + &page_cur, nullptr)) + corrupted: + err= DB_CORRUPTION; + else + { + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); - n_blocks -= (leftmost_from_level - 1); - height = leftmost_from_level; - ut_ad(n_releases == 0); - - /* replay up_match, low_match */ - up_match = 0; - low_match = 0; - rtr_info_t* rtr_info = need_path - ? cursor->rtr_info : NULL; - - for (ulint i = 0; i < n_blocks; i++) { - page_cursor->block = tree_blocks[i]; - if (page_cur_search_with_match( - tuple, - page_mode, &up_match, - &low_match, page_cursor, - rtr_info)) { - err = DB_CORRUPTION; - goto func_exit; - } - } +#ifdef BTR_CUR_HASH_ADAPT + /* We do a dirty read of btr_search_enabled here. We will + properly check btr_search_enabled again in + btr_search_build_page_hash_index() before building a page hash + index, while holding search latch. */ + if (!btr_search_enabled); + else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) + /* This may be a search tuple for btr_pcur_t::restore_position(). */ + ut_ad(tuple->is_metadata() || + (tuple->is_metadata(tuple->info_bits ^ REC_STATUS_INSTANT))); + else if (index()->table->is_temporary()); + else if (!rec_is_metadata(page_cur.rec, *index())) + btr_search_info_update(index(), this); +#endif /* BTR_CUR_HASH_ADAPT */ + err= DB_SUCCESS; + } - goto search_loop; - } - } + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + return err; + } - /* Go to the child node */ - page_id.set_page_no( - btr_node_ptr_get_child_page_no(node_ptr, offsets)); + if (page_cur_search_with_match(tuple, page_mode, &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; - n_blocks++; + page_id_t page_id{block->page.id()}; - if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) { - /* We're doing a search on an ibuf tree and we're one - level above the leaf page. */ + offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, ULINT_UNDEFINED, + &heap); + /* Go to the child node */ + page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, offsets)); - ut_ad(level == 0); + const auto block_savepoint= mtr->get_savepoint(); + block= + buf_page_get_gen(page_id, block->zip_size(), RW_NO_LATCH, nullptr, BUF_GET, + mtr, &err, !--height && !index()->is_clust()); - buf_mode = BUF_GET; - rw_latch = RW_NO_LATCH; - goto retry_page_get; - } + if (!block) + { + if (err == DB_DECRYPTION_FAILED) + btr_decryption_failed(*index()); + goto func_exit; + } - if (dict_index_is_spatial(index) - && page_mode >= PAGE_CUR_CONTAIN - && page_mode != PAGE_CUR_RTREE_INSERT) { - ut_ad(need_path); - rtr_node_path_t* path = - cursor->rtr_info->path; - - if (!path->empty() && found) { - ut_ad(path->back().page_no - == page_id.page_no()); - path->pop_back(); -#ifdef UNIV_DEBUG - if (page_mode == PAGE_CUR_RTREE_LOCATE - && (latch_mode != BTR_MODIFY_LEAF)) { - btr_pcur_t* cur - = cursor->rtr_info->parent_path->back( - ).cursor; - rec_t* my_node_ptr - = btr_pcur_get_rec(cur); - - offsets = rec_get_offsets( - my_node_ptr, index, offsets, - 0, ULINT_UNDEFINED, &heap); - - ulint my_page_no - = btr_node_ptr_get_child_page_no( - my_node_ptr, offsets); - - ut_ad(page_id.page_no() == my_page_no); - } -#endif - } - } + if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() || + btr_page_get_index_id(block->page.frame) != index()->id || + fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || + !fil_page_index_page_check(block->page.frame)) + goto corrupted; - goto search_loop; - } else if (!dict_index_is_spatial(index) - && latch_mode == BTR_MODIFY_TREE - && lock_intention == BTR_INTENTION_INSERT - && page_has_next(page) - && page_rec_is_last(page_cur_get_rec(page_cursor), page)) { - - /* btr_insert_into_right_sibling() might cause - deleting node_ptr at upper level */ - - guess = NULL; - - if (height == 0) { - /* release the leaf pages if latched */ - for (uint i = 0; i < 3; i++) { - if (latch_leaves.blocks[i] != NULL) { - mtr_release_block_at_savepoint( - mtr, latch_leaves.savepoints[i], - latch_leaves.blocks[i]); - latch_leaves.blocks[i] = NULL; - } - } - } + if (height != btr_page_get_level(block->page.frame)) + goto corrupted; - goto need_opposite_intention; - } + if (page_has_prev(block->page.frame) && + !btr_block_get(*index(), btr_page_get_prev(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + goto func_exit; + mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH); +#ifdef UNIV_ZIP_DEBUG + const page_zip_des_t *page_zip= buf_block_get_page_zip(block); + ut_a(!page_zip || page_zip_validate(page_zip, page, index())); +#endif /* UNIV_ZIP_DEBUG */ + if (page_has_next(block->page.frame) && + !btr_block_get(*index(), btr_page_get_next(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + goto func_exit; + goto search_loop; +} - if (level != 0) { - ut_ad(!autoinc); +/********************************************************************//** +Searches an index tree and positions a tree cursor on a given non-leaf level. +NOTE: n_fields_cmp in tuple must be set so that it cannot be compared +to node pointer page number fields on the upper levels of the tree! +cursor->up_match and cursor->low_match both will have sensible values. +Cursor is left at the place where an insert of the +search tuple should be performed in the B-tree. InnoDB does an insert +immediately after the cursor. Thus, the cursor may end up on a user record, +or on a page infimum record. +@param level the tree level of search +@param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that + it cannot get compared to the node ptr page number field! +@param latch RW_S_LATCH or RW_X_LATCH +@param cursor tree cursor; the cursor page is s- or x-latched, but see also + above! +@param mtr mini-transaction +@return DB_SUCCESS on success or error code otherwise */ +TRANSACTIONAL_TARGET +dberr_t btr_cur_search_to_nth_level(ulint level, + const dtuple_t *tuple, + rw_lock_type_t rw_latch, + btr_cur_t *cursor, mtr_t *mtr) +{ + dict_index_t *const index= cursor->index(); - if (upper_rw_latch == RW_NO_LATCH) { - ut_ad(latch_mode == BTR_CONT_MODIFY_TREE - || latch_mode == BTR_CONT_SEARCH_TREE); - btr_block_get( - *index, page_id.page_no(), - latch_mode == BTR_CONT_MODIFY_TREE - ? RW_X_LATCH : RW_SX_LATCH, false, mtr, &err); - } else { - ut_ad(mtr->memo_contains_flagged(block, - upper_rw_latch)); - - if (latch_by_caller) { - ut_ad(latch_mode == BTR_SEARCH_TREE); - /* to exclude modifying tree operations - should sx-latch the index. */ - ut_ad(mtr->memo_contains(index->lock, - MTR_MEMO_SX_LOCK)); - /* because has sx-latch of index, - can release upper blocks. */ - for (; n_releases < n_blocks; n_releases++) { - mtr_release_block_at_savepoint( - mtr, - tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - } - } + ut_ad(index->is_btree() || index->is_ibuf()); + mem_heap_t *heap= nullptr; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs *offsets= offsets_; + rec_offs_init(offsets_); + ut_ad(level); + ut_ad(dict_index_check_search_tuple(index, tuple)); + ut_ad(index->is_ibuf() ? ibuf_inside(mtr) : index->is_btree()); + ut_ad(dtuple_check_typed(tuple)); + ut_ad(index->page != FIL_NULL); + + MEM_UNDEFINED(&cursor->up_bytes, sizeof cursor->up_bytes); + MEM_UNDEFINED(&cursor->low_bytes, sizeof cursor->low_bytes); + cursor->up_match= 0; + cursor->low_match= 0; + cursor->flag= BTR_CUR_BINARY; - if (page_mode <= PAGE_CUR_LE) { - cursor->low_match = low_match; - cursor->up_match = up_match; - } - } else { - cursor->low_match = low_match; - cursor->low_bytes = low_bytes; - cursor->up_match = up_match; - cursor->up_bytes = up_bytes; +#ifndef BTR_CUR_ADAPT + buf_block_t *block= nullptr; +#else + btr_search_t *info= btr_search_get_info(index); + buf_block_t *block= info->root_guess; +#endif /* BTR_CUR_ADAPT */ - if (autoinc) { - page_set_autoinc(tree_blocks[0], autoinc, mtr, false); - } + ut_ad(mtr->memo_contains_flagged(&index->lock, + MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); -#ifdef BTR_CUR_HASH_ADAPT - /* We do a dirty read of btr_search_enabled here. We - will properly check btr_search_enabled again in - btr_search_build_page_hash_index() before building a - page hash index, while holding search latch. */ - if (!btr_search_enabled) { - } else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) { - /* This may be a search tuple for - btr_pcur_t::restore_position(). */ - ut_ad(tuple->is_metadata() - || (tuple->is_metadata(tuple->info_bits - ^ REC_STATUS_INSTANT))); - } else if (index->is_spatial()) { - } else if (index->table->is_temporary()) { - } else if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) { - /* Only user records belong in the adaptive - hash index. */ - } else { - btr_search_info_update(index, cursor); - } -#endif /* BTR_CUR_HASH_ADAPT */ - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_GE); - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ut_ad(cursor->low_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - } - - /* For spatial index, remember what blocks are still latched */ - if (dict_index_is_spatial(index) - && (latch_mode == BTR_MODIFY_TREE - || latch_mode == BTR_MODIFY_LEAF)) { - for (ulint i = 0; i < n_releases; i++) { - cursor->rtr_info->tree_blocks[i] = NULL; - cursor->rtr_info->tree_savepoints[i] = 0; - } + const ulint zip_size= index->table->space->zip_size(); - for (ulint i = n_releases; i <= n_blocks; i++) { - cursor->rtr_info->tree_blocks[i] = tree_blocks[i]; - cursor->rtr_info->tree_savepoints[i] = tree_savepoints[i]; - } - } + /* Start with the root page. */ + page_id_t page_id(index->table->space_id, index->page); + ulint height= ULINT_UNDEFINED; -func_exit: +search_loop: + dberr_t err= DB_SUCCESS; + if (buf_block_t *b= + mtr->get_already_latched(page_id, mtr_memo_type_t(rw_latch))) + block= b; + else if (!(block= buf_page_get_gen(page_id, zip_size, rw_latch, + block, BUF_GET, mtr, &err))) + { + if (err == DB_DECRYPTION_FAILED) + btr_decryption_failed(*index); + goto func_exit; + } - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } +#ifdef UNIV_ZIP_DEBUG + if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block)) + ut_a(page_zip_validate(page_zip, block->page.frame, index)); +#endif /* UNIV_ZIP_DEBUG */ - ut_free(prev_tree_blocks); - ut_free(prev_tree_savepoints); + if (!!page_is_comp(block->page.frame) != index->table->not_redundant() || + btr_page_get_index_id(block->page.frame) != index->id || + fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || + !fil_page_index_page_check(block->page.frame)) + { + corrupted: + err= DB_CORRUPTION; + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + return err; + } - if (mbr_adj) { - /* remember that we will need to adjust parent MBR */ - cursor->rtr_info->mbr_adj = true; - } + const uint32_t page_level= btr_page_get_level(block->page.frame); - DBUG_RETURN(err); + if (height == ULINT_UNDEFINED) + { + /* We are in the root node */ + height= page_level; + if (!height) + goto corrupted; + cursor->tree_height= height + 1; + } + else if (height != ulint{page_level}) + goto corrupted; + + cursor->page_cur.block= block; + + /* Search for complete index fields. */ + if (page_cur_search_with_match(tuple, PAGE_CUR_LE, &cursor->up_match, + &cursor->low_match, &cursor->page_cur, + nullptr)) + goto corrupted; + + /* If this is the desired level, leave the loop */ + if (level == height) + goto func_exit; + + ut_ad(height > level); + height--; + + offsets = rec_get_offsets(cursor->page_cur.rec, index, offsets, 0, + ULINT_UNDEFINED, &heap); + /* Go to the child node */ + page_id.set_page_no(btr_node_ptr_get_child_page_no(cursor->page_cur.rec, + offsets)); + block= nullptr; + goto search_loop; } dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, btr_latch_mode latch_mode, mtr_t *mtr) { - ulint node_ptr_max_size= srv_page_size / 2; btr_intention_t lock_intention; ulint n_blocks= 0; mem_heap_t *heap= nullptr; @@ -2422,29 +1795,21 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, rec_offs_init(offsets_); const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; - latch_mode = btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED); + latch_mode= btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED); lock_intention= btr_cur_get_and_clear_intention(&latch_mode); - /* This function doesn't need to lock left page of the leaf page */ - if (latch_mode == BTR_SEARCH_PREV) - latch_mode= BTR_SEARCH_LEAF; - else if (latch_mode == BTR_MODIFY_PREV) - latch_mode= BTR_MODIFY_LEAF; - /* Store the position of the tree latch we push to mtr so that we know how to release it when we have latched the leaf node */ auto savepoint= mtr->get_savepoint(); rw_lock_type_t upper_rw_latch= RW_X_LATCH; + ulint node_ptr_max_size= 0; - switch (latch_mode) { - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - abort(); - break; - case BTR_MODIFY_TREE: + if (latch_mode == BTR_MODIFY_TREE) + { + node_ptr_max_size= btr_node_ptr_max_size(index); /* Most of delete-intended operations are purging. Free blocks and read IO bandwidth should be prioritized for them, when the history list is growing huge. */ @@ -2455,32 +1820,35 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, mtr_x_lock_index(index, mtr); else mtr_sx_lock_index(index, mtr); - break; - default: + } + else + { + static_assert(int{BTR_CONT_MODIFY_TREE} == (12 | BTR_MODIFY_LEAF), ""); + ut_ad(!(latch_mode & 8)); + /* This function doesn't need to lock left page of the leaf page */ + static_assert(int{BTR_SEARCH_PREV} == (4 | BTR_SEARCH_LEAF), ""); + static_assert(int{BTR_MODIFY_PREV} == (4 | BTR_MODIFY_LEAF), ""); + latch_mode= btr_latch_mode(latch_mode & ~4); ut_ad(!latch_by_caller || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_SX_LOCK | MTR_MEMO_S_LOCK)); upper_rw_latch= RW_S_LATCH; - if (latch_by_caller) - break; - ut_ad(latch_mode != BTR_SEARCH_TREE); - savepoint++; - mtr_s_lock_index(index, mtr); + if (!latch_by_caller) + { + savepoint++; + mtr_s_lock_index(index, mtr); + } } ut_ad(savepoint == mtr->get_savepoint()); - const rw_lock_type_t root_leaf_rw_latch= - btr_cur_latch_for_root_leaf(latch_mode); + const rw_lock_type_t root_leaf_rw_latch= rw_lock_type_t(latch_mode & ~12); page_cur.index = index; uint32_t page= index->page; const auto zip_size= index->table->space->zip_size(); - if (root_leaf_rw_latch == RW_X_LATCH) - node_ptr_max_size= btr_node_ptr_max_size(index); - for (ulint height= ULINT_UNDEFINED;;) { ut_ad(n_blocks < BTR_MAX_LEVELS); @@ -2529,16 +1897,27 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, reached_leaf: const auto leaf_savepoint= mtr->get_savepoint(); ut_ad(leaf_savepoint); + ut_ad(block == mtr->at_savepoint(leaf_savepoint - 1)); - if (rw_latch == RW_NO_LATCH) - btr_cur_latch_leaves(block, latch_mode, this, mtr); - - switch (latch_mode) { - case BTR_MODIFY_TREE: - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - break; - default: + if (latch_mode == BTR_MODIFY_TREE) + { + ut_ad(rw_latch == RW_NO_LATCH); + /* x-latch also siblings from left to right */ + if (page_has_prev(block->page.frame) && + !btr_block_get(*index, btr_page_get_prev(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + break; + mtr->upgrade_buffer_fix(leaf_savepoint - 1, RW_X_LATCH); + if (page_has_next(block->page.frame) && + !btr_block_get(*index, btr_page_get_next(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + break; + } + else + { + if (rw_latch == RW_NO_LATCH) + mtr->upgrade_buffer_fix(leaf_savepoint - 1, + rw_lock_type_t(latch_mode)); /* Release index->lock if needed, and the non-leaf pages. */ mtr->rollback_to_savepoint(savepoint - !latch_by_caller, leaf_savepoint - 1); @@ -4667,16 +4046,15 @@ btr_cur_pessimistic_update( } } - if (!srv_read_only_mode - && !big_rec_vec +#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled + if (!big_rec_vec && page_is_leaf(block->page.frame) && !dict_index_is_online_ddl(index)) { -#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled mtr->release(index->lock); -#endif /* NOTE: We cannot release root block latch here, because it has segment header and already modified in most of cases.*/ } +#endif err = DB_SUCCESS; goto return_after_reservations; @@ -5418,15 +4796,14 @@ return_after_reservations: err_exit: mem_heap_free(heap); - if (!srv_read_only_mode - && page_is_leaf(page) - && !dict_index_is_online_ddl(index)) { #if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled + if (page_is_leaf(page) + && !dict_index_is_online_ddl(index)) { mtr->release(index->lock); -#endif /* NOTE: We cannot release root block latch here, because it has segment header and already modified in most of cases.*/ } +#endif index->table->space->release_free_extents(n_reserved); return(ret); @@ -5543,16 +4920,18 @@ public: buf_block_t *parent_block= m_block; ulint parent_savepoint= m_savepoint; - m_savepoint= mtr_set_savepoint(&mtr); m_block= btr_block_get(*index(), m_page_id.page_no(), RW_S_LATCH, !level, &mtr, nullptr); + if (!m_block) + return false; if (parent_block && parent_block != right_parent) - mtr_release_block_at_savepoint(&mtr, parent_savepoint, parent_block); + mtr.rollback_to_savepoint(parent_savepoint, parent_savepoint + 1); - return m_block && - (level == ULINT_UNDEFINED || - btr_page_get_level(buf_block_get_frame(m_block)) == level); + m_savepoint= mtr.get_savepoint() - 1; + + return level == ULINT_UNDEFINED || + btr_page_get_level(m_block->page.frame) == level; } /** Sets page mode for leaves */ @@ -5759,14 +5138,18 @@ static ha_rows btr_estimate_n_rows_in_range_on_level( buf_block_t *prev_block= block; ulint prev_savepoint= savepoint; - savepoint= mtr_set_savepoint(&mtr); + savepoint= mtr.get_savepoint(); /* Fetch the page. */ block= btr_block_get(*index, page_id.page_no(), RW_S_LATCH, !level, &mtr, nullptr); if (prev_block) - mtr_release_block_at_savepoint(&mtr, prev_savepoint, prev_block); + { + mtr.rollback_to_savepoint(prev_savepoint, prev_savepoint + 1); + if (block) + savepoint--; + } if (!block || btr_page_get_level(buf_block_get_frame(block)) != level) goto inexact; @@ -5795,14 +5178,20 @@ static ha_rows btr_estimate_n_rows_in_range_on_level( } while (page_id.page_no() != right_page_no); if (block) - mtr_release_block_at_savepoint(&mtr, savepoint, block); + { + ut_ad(block == mtr.at_savepoint(savepoint)); + mtr.rollback_to_savepoint(savepoint, savepoint + 1); + } return (n_rows); inexact: if (block) - mtr_release_block_at_savepoint(&mtr, savepoint, block); + { + ut_ad(block == mtr.at_savepoint(savepoint)); + mtr.rollback_to_savepoint(savepoint, savepoint + 1); + } is_n_rows_exact= false; @@ -5861,9 +5250,7 @@ ha_rows btr_estimate_n_rows_in_range(dict_index_t *index, mtr.start(); - /* Store the position of the tree latch we push to mtr so that we - know how to release it when we have latched leaf node(s) */ - ulint savepoint= mtr_set_savepoint(&mtr); + ut_ad(mtr.get_savepoint() == 0); mtr_s_lock_index(index, &mtr); ha_rows table_n_rows= dict_table_get_n_rows(index->table); @@ -5918,10 +5305,10 @@ search_loop: } if (height == 0) - /* There is no need to unlach non-leaf pages here as they must already be + /* There is no need to release non-leaf pages here as they must already be unlatched in btr_est_cur_t::fetch_child(). Try to search on pages after - index->lock unlatching to decrease contention. */ - mtr_release_s_latch_at_savepoint(&mtr, savepoint, &index->lock); + releasing the index latch, to decrease contention. */ + mtr.rollback_to_savepoint(0, 1); /* There is no need to search on left page if divergence_height != ULINT_UNDEFINED, as it was already searched before @@ -6367,16 +5754,21 @@ struct btr_blob_log_check_t { DEBUG_SYNC_C("blob_write_middle"); - log_free_check(); - - DEBUG_SYNC_C("blob_write_middle_after_check"); - const mtr_log_t log_mode = m_mtr->get_log_mode(); m_mtr->start(); m_mtr->set_log_mode(log_mode); index->set_modified(*m_mtr); + log_free_check(); + + DEBUG_SYNC_C("blob_write_middle_after_check"); + if (UNIV_UNLIKELY(page_no != FIL_NULL)) { + dberr_t err; + if (UNIV_LIKELY(index->page != page_no)) { + ut_a(btr_root_block_get(index, RW_SX_LATCH, + m_mtr, &err)); + } m_pcur->btr_cur.page_cur.block = btr_block_get( *index, page_no, RW_X_LATCH, false, m_mtr); /* The page should not be evicted or corrupted while @@ -6389,7 +5781,7 @@ struct btr_blob_log_check_t { ut_ad(m_pcur->rel_pos == BTR_PCUR_ON); mtr_sx_lock_index(index, m_mtr); ut_a(m_pcur->restore_position( - BTR_MODIFY_LEAF_ALREADY_LATCHED, + BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED, m_mtr) == btr_pcur_t::SAME_ALL); } @@ -6556,6 +5948,10 @@ btr_store_big_rec_extern_fields( page_zip = buf_block_get_page_zip(rec_block); } + ut_ad(btr_mtr->get_already_latched( + page_id_t{index->table->space_id, index->page}, + MTR_MEMO_PAGE_SX_FIX)); + mtr.start(); index->set_modified(mtr); mtr.set_log_mode_sub(*btr_mtr); diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 76b173359da..642db0e9f1c 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved. -Copyright (C) 2014, 2022, MariaDB Corporation. +Copyright (C) 2014, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -280,6 +280,70 @@ btr_defragment_calc_n_recs_for_size( return n_recs; } +MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result)) +/************************************************************//** +Returns the upper level node pointer to a page. It is assumed that mtr holds +an sx-latch on the tree. +@return rec_get_offsets() of the node pointer record */ +static +rec_offs* +btr_page_search_father_node_ptr( + rec_offs* offsets,/*!< in: work area for the return value */ + mem_heap_t* heap, /*!< in: memory heap to use */ + btr_cur_t* cursor, /*!< in: cursor pointing to user record, + out: cursor on node pointer record, + its page x-latched */ + mtr_t* mtr) /*!< in: mtr */ +{ + const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no(); + dict_index_t* index = btr_cur_get_index(cursor); + ut_ad(!index->is_spatial()); + + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + ut_ad(dict_index_get_page(index) != page_no); + + const auto level = btr_page_get_level(btr_cur_get_page(cursor)); + + const rec_t* user_rec = btr_cur_get_rec(cursor); + ut_a(page_rec_is_user_rec(user_rec)); + + if (btr_cur_search_to_nth_level(level + 1, + dict_index_build_node_ptr(index, + user_rec, 0, + heap, level), + RW_X_LATCH, + cursor, mtr) != DB_SUCCESS) { + return nullptr; + } + + const rec_t* node_ptr = btr_cur_get_rec(cursor); + ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive() + || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); + + offsets = rec_get_offsets(node_ptr, index, offsets, 0, + ULINT_UNDEFINED, &heap); + + if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) { + offsets = nullptr; + } + + return(offsets); +} + +static bool btr_page_search_father(mtr_t *mtr, btr_cur_t *cursor) +{ + rec_t *rec= + page_rec_get_next(page_get_infimum_rec(cursor->block()->page.frame)); + if (UNIV_UNLIKELY(!rec)) + return false; + cursor->page_cur.rec= rec; + mem_heap_t *heap= mem_heap_create(100); + const bool got= btr_page_search_father_node_ptr(nullptr, heap, cursor, mtr); + mem_heap_free(heap); + return got; +} + /*************************************************************//** Merge as many records from the from_block to the to_block. Delete the from_block if all records are successfully merged to to_block. @@ -408,7 +472,7 @@ btr_defragment_merge_pages( parent.page_cur.index = index; parent.page_cur.block = from_block; - if (!btr_page_get_father(mtr, &parent)) { + if (!btr_page_search_father(mtr, &parent)) { to_block = nullptr; } else if (n_recs_to_move == n_recs) { /* The whole page is merged with the previous page, @@ -699,10 +763,9 @@ processed: acquire index->lock X-latch. This entitles us to acquire page latches in any order for the index. */ mtr_x_lock_index(index, &mtr); - /* This will acquire index->lock U latch, which is allowed - when we are already holding the X-latch. */ if (buf_block_t *last_block = - item->pcur->restore_position(BTR_MODIFY_TREE, &mtr) + item->pcur->restore_position( + BTR_PURGE_TREE_ALREADY_LATCHED, &mtr) == btr_pcur_t::CORRUPTED ? nullptr : btr_defragment_n_pages(btr_pcur_get_block(item->pcur), diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc index 46b8d487850..1dd26f8c467 100644 --- a/storage/innobase/btr/btr0pcur.cc +++ b/storage/innobase/btr/btr0pcur.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2022, MariaDB Corporation. +Copyright (c) 2016, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -212,24 +212,98 @@ btr_pcur_copy_stored_position( pcur_receive->old_n_fields = pcur_donate->old_n_fields; } +/** Optimistically latches the leaf page or pages requested. +@param[in] block guessed buffer block +@param[in,out] pcur cursor +@param[in,out] latch_mode BTR_SEARCH_LEAF, ... +@param[in,out] mtr mini-transaction +@return true if success */ +TRANSACTIONAL_TARGET +static bool btr_pcur_optimistic_latch_leaves(buf_block_t *block, + btr_pcur_t *pcur, + btr_latch_mode *latch_mode, + mtr_t *mtr) +{ + ut_ad(block->page.buf_fix_count()); + ut_ad(block->page.in_file()); + ut_ad(block->page.frame); + + static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); + static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); + static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) == + (RW_S_LATCH ^ RW_X_LATCH), ""); + + const rw_lock_type_t mode= + rw_lock_type_t(*latch_mode & (RW_X_LATCH | RW_S_LATCH)); + + switch (*latch_mode) { + default: + ut_ad(*latch_mode == BTR_SEARCH_LEAF || *latch_mode == BTR_MODIFY_LEAF); + return buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr); + case BTR_SEARCH_PREV: + case BTR_MODIFY_PREV: + page_id_t id{0}; + uint32_t left_page_no; + ulint zip_size; + buf_block_t *left_block= nullptr; + { + transactional_shared_lock_guard<block_lock> g{block->page.lock}; + if (block->modify_clock != pcur->modify_clock) + return false; + id= block->page.id(); + zip_size= block->zip_size(); + left_page_no= btr_page_get_prev(block->page.frame); + } + + if (left_page_no != FIL_NULL) + { + left_block= + buf_page_get_gen(page_id_t(id.space(), left_page_no), zip_size, + mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr); + + if (left_block && + btr_page_get_next(left_block->page.frame) != id.page_no()) + { +release_left_block: + mtr->release_last_page(); + return false; + } + } + + if (buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr)) + { + if (btr_page_get_prev(block->page.frame) == left_page_no) + { + /* block was already buffer-fixed while entering the function and + buf_page_optimistic_get() buffer-fixes it again. */ + ut_ad(2 <= block->page.buf_fix_count()); + *latch_mode= btr_latch_mode(mode); + return true; + } + + mtr->release_last_page(); + } + + ut_ad(block->page.buf_fix_count()); + if (left_block) + goto release_left_block; + return false; + } +} + /** Structure acts as functor to do the latching of leaf pages. It returns true if latching of leaf pages succeeded and false otherwise. */ struct optimistic_latch_leaves { btr_pcur_t *const cursor; - btr_latch_mode *latch_mode; + btr_latch_mode *const latch_mode; mtr_t *const mtr; - optimistic_latch_leaves(btr_pcur_t *cursor, btr_latch_mode *latch_mode, - mtr_t *mtr) - : cursor(cursor), latch_mode(latch_mode), mtr(mtr) {} - - bool operator() (buf_block_t *hint) const + bool operator()(buf_block_t *hint) const { - return hint && btr_cur_optimistic_latch_leaves( - hint, cursor->modify_clock, latch_mode, - btr_pcur_get_btr_cur(cursor), mtr); + return hint && + btr_pcur_optimistic_latch_leaves(hint, cursor, latch_mode, mtr); } }; @@ -246,8 +320,8 @@ record GREATER than the user record which was the predecessor of the supremum. (4) cursor was positioned before the first or after the last in an empty tree: restores to before first or after the last in the tree. -@param restore_latch_mode BTR_SEARCH_LEAF, ... -@param mtr mtr +@param latch_mode BTR_SEARCH_LEAF, ... +@param mtr mini-transaction @return btr_pcur_t::SAME_ALL cursor position on user rec and points on the record with the same field values as in the stored record, btr_pcur_t::SAME_UNIQ cursor position is on user rec and points on the @@ -301,10 +375,9 @@ btr_pcur_t::restore_position(btr_latch_mode restore_latch_mode, mtr_t *mtr) case BTR_SEARCH_PREV: case BTR_MODIFY_PREV: /* Try optimistic restoration. */ - if (block_when_stored.run_with_hint( - optimistic_latch_leaves(this, &restore_latch_mode, - mtr))) { + optimistic_latch_leaves{this, &restore_latch_mode, + mtr})) { pos_state = BTR_PCUR_IS_POSITIONED; latch_mode = restore_latch_mode; @@ -465,18 +538,9 @@ btr_pcur_move_to_next_page( return DB_CORRUPTION; } - ulint mode = cursor->latch_mode; - switch (mode) { - case BTR_SEARCH_TREE: - mode = BTR_SEARCH_LEAF; - break; - case BTR_MODIFY_TREE: - mode = BTR_MODIFY_LEAF; - } - dberr_t err; buf_block_t* next_block = btr_block_get( - *cursor->index(), next_page_no, mode, + *cursor->index(), next_page_no, cursor->latch_mode & ~12, page_is_leaf(page), mtr, &err); if (UNIV_UNLIKELY(!next_block)) { @@ -538,26 +602,42 @@ btr_pcur_move_backward_from_page( return true; } - buf_block_t* release_block = nullptr; - - if (!page_has_prev(btr_pcur_get_page(cursor))) { - } else if (btr_pcur_is_before_first_on_page(cursor)) { - release_block = btr_pcur_get_block(cursor); - page_cur_set_after_last(cursor->btr_cur.left_block, - btr_pcur_get_page_cur(cursor)); - } else { - /* The repositioned cursor did not end on an infimum - record on a page. Cursor repositioning acquired a latch - also on the previous page, but we do not need the latch: - release it. */ - release_block = cursor->btr_cur.left_block; + buf_block_t* block = btr_pcur_get_block(cursor); + + if (page_has_prev(block->page.frame)) { + buf_block_t* left_block + = mtr->at_savepoint(mtr->get_savepoint() - 1); + const page_t* const left = left_block->page.frame; + if (memcmp_aligned<4>(left + FIL_PAGE_NEXT, + block->page.frame + + FIL_PAGE_OFFSET, 4)) { + /* This should be the right sibling page, or + if there is none, the current block. */ + ut_ad(left_block == block + || !memcmp_aligned<4>(left + FIL_PAGE_PREV, + block->page.frame + + FIL_PAGE_OFFSET, 4)); + /* The previous one must be the left sibling. */ + left_block + = mtr->at_savepoint(mtr->get_savepoint() - 2); + ut_ad(!memcmp_aligned<4>(left_block->page.frame + + FIL_PAGE_NEXT, + block->page.frame + + FIL_PAGE_OFFSET, 4)); + } + if (btr_pcur_is_before_first_on_page(cursor)) { + page_cur_set_after_last(left_block, + &cursor->btr_cur.page_cur); + /* Release the right sibling. */ + } else { + /* Release the left sibling. */ + block = left_block; + } + mtr->release(*block); } cursor->latch_mode = latch_mode; cursor->old_rec = nullptr; - if (release_block) { - mtr->release(*release_block); - } return false; } diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index c61cdd9f604..300276ff3a6 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -1057,26 +1057,24 @@ btr_search_guess_on_hash( index_id_t index_id; ut_ad(mtr->is_active()); + ut_ad(index->is_btree() || index->is_ibuf()); - if (!btr_search_enabled) { + /* Note that, for efficiency, the struct info may not be protected by + any latch here! */ + + if (latch_mode > BTR_MODIFY_LEAF + || !info->last_hash_succ || !info->n_hash_potential + || (tuple->info_bits & REC_INFO_MIN_REC_FLAG)) { return false; } - ut_ad(!index->is_ibuf()); + ut_ad(index->is_btree()); + ut_ad(!index->table->is_temporary()); + ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); compile_time_assert(ulint{BTR_SEARCH_LEAF} == ulint{RW_S_LATCH}); compile_time_assert(ulint{BTR_MODIFY_LEAF} == ulint{RW_X_LATCH}); - /* Not supported for spatial index */ - ut_ad(!dict_index_is_spatial(index)); - - /* Note that, for efficiency, the struct info may not be protected by - any latch here! */ - - if (info->n_hash_potential == 0) { - return false; - } - cursor->n_fields = info->n_fields; cursor->n_bytes = info->n_bytes; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 9b8e843eab7..f87888d90da 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2689,6 +2689,18 @@ re_evict: && mode != BUF_GET_IF_IN_POOL_OR_WATCH) { } else if (!ibuf_debug || recv_recovery_is_on()) { } else if (fil_space_t* space = fil_space_t::get(page_id.space())) { + for (ulint i = 0; i < mtr->get_savepoint(); i++) { + if (buf_block_t* b = mtr->block_at_savepoint(i)) { + if (b->page.oldest_modification() > 2 + && b->page.lock.have_any()) { + /* We are holding a dirty page latch + that would hang buf_flush_sync(). */ + space->release(); + goto re_evict_fail; + } + } + } + /* Try to evict the block from the buffer pool, to use the insert buffer (change buffer) as much as possible. */ @@ -2730,9 +2742,9 @@ re_evict: /* Failed to evict the page; change it directly */ } +re_evict_fail: #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - ut_ad(state > buf_page_t::FREED); if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) { goto ignore_block; } @@ -2788,8 +2800,7 @@ ibuf_merge_corrupted: } if (rw_latch == RW_X_LATCH) { - mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); - goto got_latch; + goto get_latch_valid; } else { block->page.lock.x_unlock(); goto get_latch; @@ -2797,12 +2808,10 @@ ibuf_merge_corrupted: } else { get_latch: switch (rw_latch) { - mtr_memo_type_t fix_type; case RW_NO_LATCH: mtr->memo_push(block, MTR_MEMO_BUF_FIX); return block; case RW_S_LATCH: - fix_type = MTR_MEMO_PAGE_S_FIX; block->page.lock.s_lock(); ut_ad(!block->page.is_read_fixed()); if (UNIV_UNLIKELY(block->page.id() != page_id)) { @@ -2811,13 +2820,12 @@ get_latch: goto page_id_mismatch; } get_latch_valid: - mtr->memo_push(block, fix_type); + mtr->memo_push(block, mtr_memo_type_t(rw_latch)); #ifdef BTR_CUR_HASH_ADAPT btr_search_drop_page_hash_index(block, true); #endif /* BTR_CUR_HASH_ADAPT */ break; case RW_SX_LATCH: - fix_type = MTR_MEMO_PAGE_SX_FIX; block->page.lock.u_lock(); ut_ad(!block->page.is_io_fixed()); if (UNIV_UNLIKELY(block->page.id() != page_id)) { @@ -2827,7 +2835,6 @@ get_latch_valid: goto get_latch_valid; default: ut_ad(rw_latch == RW_X_LATCH); - fix_type = MTR_MEMO_PAGE_X_FIX; if (block->page.lock.x_lock_upgraded()) { ut_ad(block->page.id() == page_id); block->unfix(); @@ -2840,7 +2847,6 @@ get_latch_valid: goto get_latch_valid; } -got_latch: ut_ad(page_id_t(page_get_space_id(block->page.frame), page_get_page_no(block->page.frame)) == page_id); @@ -3029,8 +3035,7 @@ bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block, ut_ad(!block->page.is_read_fixed()); block->page.set_accessed(); buf_page_make_young_if_needed(&block->page); - mtr->memo_push(block, rw_latch == RW_S_LATCH - ? MTR_MEMO_PAGE_S_FIX : MTR_MEMO_PAGE_X_FIX); + mtr->memo_push(block, mtr_memo_type_t(rw_latch)); } ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate()); diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index e2afe17f892..cce5f2f24d0 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -725,7 +725,7 @@ dict_build_field_def_step( } /***************************************************************//** -Creates an index tree for the index if it is not a member of a cluster. +Creates an index tree for the index. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t @@ -758,9 +758,8 @@ dict_create_index_tree_step( pcur.btr_cur.page_cur.index = UT_LIST_GET_FIRST(dict_sys.sys_indexes->indexes); - dberr_t err = - btr_pcur_open(search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, - &pcur, 0, &mtr); + dberr_t err = btr_pcur_open(search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, + &pcur, &mtr); if (err != DB_SUCCESS) { func_exit: @@ -771,10 +770,25 @@ func_exit: btr_pcur_move_to_next_user_rec(&pcur, &mtr); if (UNIV_UNLIKELY(btr_pcur_is_after_last_on_page(&pcur))) { +corrupted: err = DB_CORRUPTION; goto func_exit; } + ulint len; + byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur), + DICT_FLD__SYS_INDEXES__ID, + &len); + if (UNIV_UNLIKELY(len != 8 || mach_read_from_8(data) != index->id)) { + goto corrupted; + } + + data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur), + DICT_FLD__SYS_INDEXES__PAGE_NO, &len); + if (len != 4) { + goto corrupted; + } + if (index->is_readable()) { index->set_modified(mtr); @@ -787,11 +801,6 @@ func_exit: err = DB_OUT_OF_FILE_SPACE; ); } - ulint len; - byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur), - DICT_FLD__SYS_INDEXES__PAGE_NO, - &len); - ut_ad(len == 4); mtr.write<4,mtr_t::MAYBE_NOP>(*btr_pcur_get_block(&pcur), data, node->page_no); goto func_exit; diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index e90dff03e16..d2fa8555e43 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -2,7 +2,7 @@ Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2022, MariaDB Corporation. +Copyright (c) 2013, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -4149,8 +4149,7 @@ void dict_set_corrupted(dict_index_t *index, const char *ctx) dict_index_copy_types(tuple, sys_index, 2); cursor.page_cur.index = sys_index; - if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_LE, - BTR_MODIFY_LEAF, &cursor, &mtr) + if (cursor.search_leaf(tuple, PAGE_CUR_LE, BTR_MODIFY_LEAF, &mtr) != DB_SUCCESS) { goto fail; } @@ -4225,8 +4224,7 @@ dict_index_set_merge_threshold( dict_index_copy_types(tuple, sys_index, 2); cursor.page_cur.index = sys_index; - if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_GE, - BTR_MODIFY_LEAF, &cursor, &mtr) + if (cursor.search_leaf(tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &mtr) != DB_SUCCESS) { goto func_exit; } diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 1ee10ec8232..bd3bd71544a 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2022, MariaDB Corporation. +Copyright (c) 2016, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1319,7 +1319,7 @@ static dberr_t dict_load_columns(dict_table_t *table, unsigned use_uncommitted, dict_index_copy_types(&tuple, sys_index, 1); pcur.btr_cur.page_cur.index = sys_index; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto func_exit; @@ -1450,7 +1450,7 @@ dict_load_virtual_col(dict_table_t *table, bool uncommitted, ulint nth_v_col) dict_index_copy_types(&tuple, sys_virtual_index, 2); pcur.btr_cur.page_cur.index = sys_virtual_index; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto func_exit; @@ -1690,8 +1690,7 @@ static dberr_t dict_load_fields(dict_index_t *index, bool uncommitted, dict_index_copy_types(&tuple, sys_index, 1); pcur.btr_cur.page_cur.index = sys_index; - dberr_t error = btr_pcur_open_on_user_rec(&tuple, - PAGE_CUR_GE, BTR_SEARCH_LEAF, + dberr_t error = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (error != DB_SUCCESS) { goto func_exit; @@ -1949,8 +1948,7 @@ dberr_t dict_load_indexes(dict_table_t *table, bool uncommitted, dict_index_copy_types(&tuple, sys_index, 1); pcur.btr_cur.page_cur.index = sys_index; - dberr_t error = btr_pcur_open_on_user_rec(&tuple, - PAGE_CUR_GE, BTR_SEARCH_LEAF, + dberr_t error = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (error != DB_SUCCESS) { goto func_exit; @@ -2347,7 +2345,7 @@ static dict_table_t *dict_load_table_one(const span<const char> &name, bool uncommitted = false; reload: mtr.start(); - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS || !btr_pcur_is_on_user_rec(&pcur)) { @@ -2605,8 +2603,7 @@ dict_load_table_on_id( dict_table_t* table = nullptr; - if (btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr) + if (btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr) == DB_SUCCESS && btr_pcur_is_on_user_rec(&pcur)) { /*---------------------------------------------------*/ @@ -2712,7 +2709,7 @@ static dberr_t dict_load_foreign_cols(dict_foreign_t *foreign, trx_id_t trx_id) pcur.btr_cur.page_cur.index = sys_index; mem_heap_t* heap = nullptr; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto func_exit; @@ -2889,7 +2886,7 @@ dict_load_foreign( mtr.start(); mem_heap_t* heap = nullptr; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto err_exit; @@ -3100,7 +3097,7 @@ start_load: dict_index_copy_types(&tuple, sec_index, 1); pcur.btr_cur.page_cur.index = sec_index; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { DBUG_RETURN(err); diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 06d30515229..7f453b1d8e0 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1697,7 +1697,7 @@ static dberr_t page_cur_open_level(page_cur_t *page_cur, ulint level, static dberr_t btr_pcur_open_level(btr_pcur_t *pcur, ulint level, mtr_t *mtr, dict_index_t *index) { - pcur->latch_mode= BTR_SEARCH_TREE; + pcur->latch_mode= BTR_SEARCH_LEAF; pcur->search_mode= PAGE_CUR_G; pcur->pos_state= BTR_PCUR_IS_POSITIONED; pcur->btr_cur.page_cur.index= index; diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 481a2dbce53..e482abeb848 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1429,7 +1429,7 @@ inline void mtr_t::log_file_op(mfile_type_t type, uint32_t space_id, ut_ad(strchr(path, '/')); ut_ad(!strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD)); - flag_modified(); + m_modifications= true; if (!is_logged()) return; m_last= nullptr; diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 09583e157b7..d63febf01f1 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -122,15 +122,22 @@ MY_ATTRIBUTE((nonnull, warn_unused_result)) static buf_block_t *fsp_get_header(const fil_space_t *space, mtr_t *mtr, dberr_t *err) { - buf_block_t *block= buf_page_get_gen(page_id_t(space->id, 0), - space->zip_size(), RW_SX_LATCH, - nullptr, BUF_GET_POSSIBLY_FREED, - mtr, err); - if (block && space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + - block->page.frame)) + const page_id_t id{space->id, 0}; + buf_block_t *block= mtr->get_already_latched(id, MTR_MEMO_PAGE_SX_FIX); + if (block) + *err= DB_SUCCESS; + else { - *err= DB_CORRUPTION; - block= nullptr; + block= buf_page_get_gen(id, space->zip_size(), RW_SX_LATCH, + nullptr, BUF_GET_POSSIBLY_FREED, + mtr, err); + if (block && + space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + + block->page.frame)) + { + *err= DB_CORRUPTION; + block= nullptr; + } } return block; } diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc index 207d49abeba..8ca8681bce9 100644 --- a/storage/innobase/gis/gis0sea.cc +++ b/storage/innobase/gis/gis0sea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2016, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -44,7 +44,6 @@ Created 2014/01/16 Jimmy Yang static bool rtr_cur_restore_position( - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_cur_t* cursor, /*!< in: detached persistent cursor */ ulint level, /*!< in: index level */ mtr_t* mtr); /*!< in: mtr */ @@ -74,6 +73,70 @@ rtr_adjust_parent_path( } } +/** Latches the leaf page or pages requested. +@param[in] block_savepoint leaf page where the search converged +@param[in] latch_mode BTR_SEARCH_LEAF, ... +@param[in] cursor cursor +@param[in] mtr mini-transaction */ +static void +rtr_latch_leaves( + ulint block_savepoint, + btr_latch_mode latch_mode, + btr_cur_t* cursor, + mtr_t* mtr) +{ + compile_time_assert(int(MTR_MEMO_PAGE_S_FIX) == int(RW_S_LATCH)); + compile_time_assert(int(MTR_MEMO_PAGE_X_FIX) == int(RW_X_LATCH)); + compile_time_assert(int(MTR_MEMO_PAGE_SX_FIX) == int(RW_SX_LATCH)); + + buf_block_t* block = mtr->at_savepoint(block_savepoint); + + ut_ad(block->page.id().space() == cursor->index()->table->space->id); + ut_ad(block->page.in_file()); + ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock, + MTR_MEMO_S_LOCK + | MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + + switch (latch_mode) { + uint32_t left_page_no; + uint32_t right_page_no; + default: + ut_ad(latch_mode == BTR_CONT_MODIFY_TREE); + break; + case BTR_MODIFY_TREE: + /* It is exclusive for other operations which calls + btr_page_set_prev() */ + ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock, + MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + /* x-latch also siblings from left to right */ + left_page_no = btr_page_get_prev(block->page.frame); + + if (left_page_no != FIL_NULL) { + btr_block_get(*cursor->index(), left_page_no, RW_X_LATCH, + true, mtr); + } + + mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH); + + right_page_no = btr_page_get_next(block->page.frame); + + if (right_page_no != FIL_NULL) { + btr_block_get(*cursor->index(), right_page_no, + RW_X_LATCH, true, mtr); + } + break; + case BTR_SEARCH_LEAF: + case BTR_MODIFY_LEAF: + rw_lock_type_t mode = + rw_lock_type_t(latch_mode & (RW_X_LATCH | RW_S_LATCH)); + static_assert(int{RW_S_LATCH} == int{BTR_SEARCH_LEAF}, ""); + static_assert(int{RW_X_LATCH} == int{BTR_MODIFY_LEAF}, ""); + mtr->upgrade_buffer_fix(block_savepoint, mode); + } +} + /*************************************************************//** Find the next matching record. This function is used by search or record locating during index delete/update. @@ -135,6 +198,7 @@ rtr_pcur_getnext_from_path( && (my_latch_mode | 4) == BTR_CONT_MODIFY_TREE; if (!index_locked) { + ut_ad(mtr->is_empty()); mtr_s_lock_index(index, mtr); } else { ut_ad(mtr->memo_contains_flagged(&index->lock, @@ -154,14 +218,12 @@ rtr_pcur_getnext_from_path( node_seq_t path_ssn; const page_t* page; rw_lock_type_t rw_latch; - ulint tree_idx; mysql_mutex_lock(&rtr_info->rtr_path_mutex); next_rec = rtr_info->path->back(); rtr_info->path->pop_back(); level = next_rec.level; path_ssn = next_rec.seq_no; - tree_idx = btr_cur->tree_height - level - 1; /* Maintain the parent path info as well, if needed */ if (need_parent && !skip_parent && !new_split) { @@ -223,37 +285,15 @@ rtr_pcur_getnext_from_path( rw_latch = RW_X_LATCH; } - /* Release previous locked blocks */ - if (my_latch_mode != BTR_SEARCH_LEAF) { - for (ulint idx = 0; idx < btr_cur->tree_height; - idx++) { - if (rtr_info->tree_blocks[idx]) { - mtr_release_block_at_savepoint( - mtr, - rtr_info->tree_savepoints[idx], - rtr_info->tree_blocks[idx]); - rtr_info->tree_blocks[idx] = NULL; - } - } - for (ulint idx = RTR_MAX_LEVELS; idx < RTR_MAX_LEVELS + 3; - idx++) { - if (rtr_info->tree_blocks[idx]) { - mtr_release_block_at_savepoint( - mtr, - rtr_info->tree_savepoints[idx], - rtr_info->tree_blocks[idx]); - rtr_info->tree_blocks[idx] = NULL; - } - } + if (my_latch_mode == BTR_MODIFY_LEAF) { + mtr->rollback_to_savepoint(1); } - /* set up savepoint to record any locks to be taken */ - rtr_info->tree_savepoints[tree_idx] = mtr_set_savepoint(mtr); - ut_ad((my_latch_mode | 4) == BTR_CONT_MODIFY_TREE || !page_is_leaf(btr_cur_get_page(btr_cur)) || !btr_cur->page_cur.block->page.lock.have_any()); + const auto block_savepoint = mtr->get_savepoint(); block = buf_page_get_gen( page_id_t(index->table->space_id, next_rec.page_no), zip_size, @@ -264,8 +304,6 @@ rtr_pcur_getnext_from_path( break; } - rtr_info->tree_blocks[tree_idx] = block; - page = buf_block_get_frame(block); page_ssn = page_get_ssn_id(page); @@ -396,24 +434,23 @@ rtr_pcur_getnext_from_path( if (found) { if (level == target_level) { - page_cur_t* r_cur;; + ut_ad(block + == mtr->at_savepoint(block_savepoint)); if (my_latch_mode == BTR_MODIFY_TREE && level == 0) { ut_ad(rw_latch == RW_NO_LATCH); - btr_cur_latch_leaves( - block, + rtr_latch_leaves( + block_savepoint, BTR_MODIFY_TREE, btr_cur, mtr); } - r_cur = btr_cur_get_page_cur(btr_cur); - page_cur_position( page_cur_get_rec(page_cursor), page_cur_get_block(page_cursor), - r_cur); + btr_cur_get_page_cur(btr_cur)); btr_cur->low_match = level != 0 ? DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1 @@ -425,13 +462,7 @@ rtr_pcur_getnext_from_path( last node just located */ skip_parent = true; } else { - /* Release latch on the current page */ - ut_ad(rtr_info->tree_blocks[tree_idx]); - - mtr_release_block_at_savepoint( - mtr, rtr_info->tree_savepoints[tree_idx], - rtr_info->tree_blocks[tree_idx]); - rtr_info->tree_blocks[tree_idx] = NULL; + mtr->release_last_page(); } } while (!rtr_info->path->empty()); @@ -509,50 +540,524 @@ static void rtr_compare_cursor_rec(const rec_t *rec, dict_index_t *index, } #endif +TRANSACTIONAL_TARGET +dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple, + page_cur_mode_t mode, + btr_latch_mode latch_mode, + btr_cur_t *cur, mtr_t *mtr) +{ + page_cur_mode_t page_mode; + page_cur_mode_t search_mode= PAGE_CUR_UNSUPP; + + bool mbr_adj= false; + bool found= false; + dict_index_t *const index= cur->index(); + + mem_heap_t *heap= nullptr; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs *offsets= offsets_; + rec_offs_init(offsets_); + ut_ad(level == 0 || mode == PAGE_CUR_LE || RTREE_SEARCH_MODE(mode)); + ut_ad(dict_index_check_search_tuple(index, tuple)); + ut_ad(dtuple_check_typed(tuple)); + ut_ad(index->is_spatial()); + ut_ad(index->page != FIL_NULL); + + MEM_UNDEFINED(&cur->up_match, sizeof cur->up_match); + MEM_UNDEFINED(&cur->up_bytes, sizeof cur->up_bytes); + MEM_UNDEFINED(&cur->low_match, sizeof cur->low_match); + MEM_UNDEFINED(&cur->low_bytes, sizeof cur->low_bytes); + ut_d(cur->up_match= ULINT_UNDEFINED); + ut_d(cur->low_match= ULINT_UNDEFINED); + + const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; + + ut_ad(!latch_by_caller + || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK + | MTR_MEMO_SX_LOCK)); + latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + + ut_ad(!latch_by_caller || latch_mode == BTR_SEARCH_LEAF || + latch_mode == BTR_MODIFY_LEAF); + + cur->flag= BTR_CUR_BINARY; + +#ifndef BTR_CUR_ADAPT + buf_block_t *guess= nullptr; +#else + btr_search_t *const info= btr_search_get_info(index); + buf_block_t *guess= info->root_guess; +#endif + + /* Store the position of the tree latch we push to mtr so that we + know how to release it when we have latched leaf node(s) */ + + const ulint savepoint= mtr->get_savepoint(); + + rw_lock_type_t upper_rw_latch, root_leaf_rw_latch= RW_NO_LATCH; + + switch (latch_mode) { + case BTR_MODIFY_TREE: + mtr_x_lock_index(index, mtr); + upper_rw_latch= root_leaf_rw_latch= RW_X_LATCH; + break; + case BTR_CONT_MODIFY_TREE: + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | + MTR_MEMO_SX_LOCK)); + upper_rw_latch= RW_X_LATCH; + break; + default: + ut_ad(latch_mode != BTR_MODIFY_PREV); + ut_ad(latch_mode != BTR_SEARCH_PREV); + if (!latch_by_caller) + mtr_s_lock_index(index, mtr); + upper_rw_latch= root_leaf_rw_latch= RW_S_LATCH; + if (latch_mode == BTR_MODIFY_LEAF) + root_leaf_rw_latch= RW_X_LATCH; + } + + auto root_savepoint= mtr->get_savepoint(); + const ulint zip_size= index->table->space->zip_size(); + + /* Start with the root page. */ + page_id_t page_id(index->table->space_id, index->page); + + ulint up_match= 0, up_bytes= 0, low_match= 0, low_bytes= 0; + ulint height= ULINT_UNDEFINED; + + /* We use these modified search modes on non-leaf levels of the + B-tree. These let us end up in the right B-tree leaf. In that leaf + we use the original search mode. */ + + switch (mode) { + case PAGE_CUR_GE: + page_mode= PAGE_CUR_L; + break; + case PAGE_CUR_G: + page_mode= PAGE_CUR_LE; + break; + default: +#ifdef PAGE_CUR_LE_OR_EXTENDS + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE + || RTREE_SEARCH_MODE(mode) + || mode == PAGE_CUR_LE_OR_EXTENDS); +#else /* PAGE_CUR_LE_OR_EXTENDS */ + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE + || RTREE_SEARCH_MODE(mode)); +#endif /* PAGE_CUR_LE_OR_EXTENDS */ + page_mode= mode; + break; + } + + search_loop: + auto buf_mode= BUF_GET; + ulint rw_latch= RW_NO_LATCH; + + if (height) + { + /* We are about to fetch the root or a non-leaf page. */ + if (latch_mode != BTR_MODIFY_TREE || height == level) + /* If doesn't have SX or X latch of index, + each page should be latched before reading. */ + rw_latch= upper_rw_latch; + } + else if (latch_mode <= BTR_MODIFY_LEAF) + rw_latch= latch_mode; + + dberr_t err; + auto block_savepoint= mtr->get_savepoint(); + buf_block_t *block= buf_page_get_gen(page_id, zip_size, rw_latch, guess, + buf_mode, mtr, &err, false); + if (!block) + { + if (err == DB_DECRYPTION_FAILED) + btr_decryption_failed(*index); + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + + if (mbr_adj) + /* remember that we will need to adjust parent MBR */ + cur->rtr_info->mbr_adj= true; + + return err; + } + + const page_t *page= buf_block_get_frame(block); +#ifdef UNIV_ZIP_DEBUG + if (rw_latch != RW_NO_LATCH) { + const page_zip_des_t *page_zip= buf_block_get_page_zip(block); + ut_a(!page_zip || page_zip_validate(page_zip, page, index)); + } +#endif /* UNIV_ZIP_DEBUG */ + + ut_ad(fil_page_index_page_check(page)); + ut_ad(index->id == btr_page_get_index_id(page)); + + if (height != ULINT_UNDEFINED); + else if (page_is_leaf(page) && + rw_latch != RW_NO_LATCH && rw_latch != root_leaf_rw_latch) + { + /* The root page is also a leaf page (root_leaf). + We should reacquire the page, because the root page + is latched differently from leaf pages. */ + ut_ad(root_leaf_rw_latch != RW_NO_LATCH); + ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH); + + ut_ad(block == mtr->at_savepoint(block_savepoint)); + mtr->rollback_to_savepoint(block_savepoint); + + upper_rw_latch= root_leaf_rw_latch; + goto search_loop; + } + else + { + /* We are in the root node */ + + height= btr_page_get_level(page); + cur->tree_height= height + 1; + + ut_ad(cur->rtr_info); + + /* If SSN in memory is not initialized, fetch it from root page */ + if (!rtr_get_current_ssn_id(index)) + /* FIXME: do this in dict_load_table_one() */ + index->set_ssn(page_get_ssn_id(page) + 1); + + /* Save the MBR */ + cur->rtr_info->thr= cur->thr; + rtr_get_mbr_from_tuple(tuple, &cur->rtr_info->mbr); + +#ifdef BTR_CUR_ADAPT + info->root_guess= block; +#endif + } + + if (height == 0) { + if (rw_latch == RW_NO_LATCH) + { + ut_ad(block == mtr->at_savepoint(block_savepoint)); + rtr_latch_leaves(block_savepoint, latch_mode, cur, mtr); + } + + switch (latch_mode) { + case BTR_MODIFY_TREE: + case BTR_CONT_MODIFY_TREE: + break; + default: + if (!latch_by_caller) + { + /* Release the tree s-latch */ + mtr->rollback_to_savepoint(savepoint, + savepoint + 1); + block_savepoint--; + root_savepoint--; + } + /* release upper blocks */ + if (savepoint < block_savepoint) + mtr->rollback_to_savepoint(savepoint, block_savepoint); + } + + page_mode= mode; + } + + /* Remember the page search mode */ + search_mode= page_mode; + + /* Some adjustment on search mode, when the page search mode is + PAGE_CUR_RTREE_LOCATE or PAGE_CUR_RTREE_INSERT, as we are searching + with MBRs. When it is not the target level, we should search all + sub-trees that "CONTAIN" the search range/MBR. When it is at the + target level, the search becomes PAGE_CUR_LE */ + + if (page_mode == PAGE_CUR_RTREE_INSERT) + { + page_mode= (level == height) + ? PAGE_CUR_LE + : PAGE_CUR_RTREE_INSERT; + + ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE); + } + else if (page_mode == PAGE_CUR_RTREE_LOCATE && level == height) + page_mode= level == 0 ? PAGE_CUR_LE : PAGE_CUR_RTREE_GET_FATHER; + + up_match= 0; + low_match= 0; + + if (latch_mode == BTR_MODIFY_TREE || latch_mode == BTR_CONT_MODIFY_TREE) + /* Tree are locked, no need for Page Lock to protect the "path" */ + cur->rtr_info->need_page_lock= false; + + cur->page_cur.block= block; + + if (page_mode >= PAGE_CUR_CONTAIN) + { + found= rtr_cur_search_with_match(block, index, tuple, page_mode, + &cur->page_cur, cur->rtr_info); + + /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */ + if (search_mode == PAGE_CUR_RTREE_INSERT && cur->rtr_info->mbr_adj) { + static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), ""); + + if (!(latch_mode & 8)) + /* Parent MBR needs updated, should retry with BTR_MODIFY_TREE */ + goto func_exit; + + cur->rtr_info->mbr_adj= false; + mbr_adj= true; + } + + if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) + cur->low_match= DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1; + } + else + { + /* Search for complete index fields. */ + up_bytes= low_bytes= 0; + if (page_cur_search_with_match(tuple, page_mode, &up_match, + &low_match, &cur->page_cur, nullptr)) { + err= DB_CORRUPTION; + goto func_exit; + } + } + + /* If this is the desired level, leave the loop */ + + ut_ad(height == btr_page_get_level(btr_cur_get_page(cur))); + + /* Add Predicate lock if it is serializable isolation + and only if it is in the search case */ + if (mode >= PAGE_CUR_CONTAIN && mode != PAGE_CUR_RTREE_INSERT && + mode != PAGE_CUR_RTREE_LOCATE && cur->rtr_info->need_prdt_lock) + { + lock_prdt_t prdt; + + { + trx_t* trx= thr_get_trx(cur->thr); + TMLockTrxGuard g{TMLockTrxArgs(*trx)}; + lock_init_prdt_from_mbr(&prdt, &cur->rtr_info->mbr, mode, + trx->lock.lock_heap); + } + + if (rw_latch == RW_NO_LATCH && height != 0) + block->page.lock.s_lock(); + + lock_prdt_lock(block, &prdt, index, LOCK_S, LOCK_PREDICATE, cur->thr); + + if (rw_latch == RW_NO_LATCH && height != 0) + block->page.lock.s_unlock(); + } + + if (level != height) + { + ut_ad(height > 0); + + height--; + guess= nullptr; + + const rec_t *node_ptr= btr_cur_get_rec(cur); + + offsets= rec_get_offsets(node_ptr, index, offsets, 0, + ULINT_UNDEFINED, &heap); + + if (page_rec_is_supremum(node_ptr)) + { + cur->low_match= 0; + cur->up_match= 0; + goto func_exit; + } + + /* If we are doing insertion or record locating, + remember the tree nodes we visited */ + if (page_mode == PAGE_CUR_RTREE_INSERT || + (search_mode == PAGE_CUR_RTREE_LOCATE && + latch_mode != BTR_MODIFY_LEAF)) + { + const bool add_latch= latch_mode == BTR_MODIFY_TREE && + rw_latch == RW_NO_LATCH; + + if (add_latch) + { + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | + MTR_MEMO_SX_LOCK)); + block->page.lock.s_lock(); + } + + /* Store the parent cursor location */ + ut_d(auto num_stored=) + rtr_store_parent_path(block, cur, latch_mode, height + 1, mtr); + + if (page_mode == PAGE_CUR_RTREE_INSERT) + { + btr_pcur_t *r_cursor= rtr_get_parent_cursor(cur, height + 1, true); + /* If it is insertion, there should be only one parent for + each level traverse */ + ut_ad(num_stored == 1); + node_ptr= btr_pcur_get_rec(r_cursor); + } + + if (add_latch) + block->page.lock.s_unlock(); + + ut_ad(!page_rec_is_supremum(node_ptr)); + } + + ut_ad(page_mode == search_mode || + (page_mode == PAGE_CUR_WITHIN && + search_mode == PAGE_CUR_RTREE_LOCATE)); + page_mode= search_mode; + + if (height == level && latch_mode == BTR_MODIFY_TREE) + { + ut_ad(upper_rw_latch == RW_X_LATCH); + for (auto i= root_savepoint, n= mtr->get_savepoint(); i < n; i++) + mtr->upgrade_buffer_fix(i, RW_X_LATCH); + } + + /* Go to the child node */ + page_id.set_page_no(btr_node_ptr_get_child_page_no(node_ptr, offsets)); + + if (page_mode >= PAGE_CUR_CONTAIN && page_mode != PAGE_CUR_RTREE_INSERT) + { + rtr_node_path_t *path= cur->rtr_info->path; + + if (found && !path->empty()) + { + ut_ad(path->back().page_no == page_id.page_no()); + path->pop_back(); +#ifdef UNIV_DEBUG + if (page_mode == PAGE_CUR_RTREE_LOCATE && + latch_mode != BTR_MODIFY_LEAF) + { + btr_pcur_t* pcur= cur->rtr_info->parent_path->back().cursor; + rec_t *my_node_ptr= btr_pcur_get_rec(pcur); + + offsets= rec_get_offsets(my_node_ptr, index, offsets, + 0, ULINT_UNDEFINED, &heap); + + ut_ad(page_id.page_no() == + btr_node_ptr_get_child_page_no(my_node_ptr, offsets)); + } +#endif + } + } + + goto search_loop; + } + + if (level) + { + if (upper_rw_latch == RW_NO_LATCH) + { + ut_ad(latch_mode == BTR_CONT_MODIFY_TREE); + btr_block_get(*index, page_id.page_no(), RW_X_LATCH, false, mtr, &err); + } + else + { + ut_ad(mtr->memo_contains_flagged(block, upper_rw_latch)); + ut_ad(!latch_by_caller); + } + + if (page_mode <= PAGE_CUR_LE) + { + cur->low_match= low_match; + cur->up_match= up_match; + } + } + else + { + cur->low_match= low_match; + cur->low_bytes= low_bytes; + cur->up_match= up_match; + cur->up_bytes= up_bytes; + + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + } + + goto func_exit; +} + +dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple, + btr_latch_mode latch_mode, + mtr_t *mtr, page_cur_mode_t mode) +{ + return rtr_search_to_nth_level(0, tuple, mode, latch_mode, cur, mtr); +} + +/** Search for a spatial index leaf page record. +@param pcur cursor +@param tuple search tuple +@param mode search mode +@param mtr mini-transaction */ +dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple, + page_cur_mode_t mode, mtr_t *mtr) +{ +#ifdef UNIV_DEBUG + switch (mode) { + case PAGE_CUR_CONTAIN: + case PAGE_CUR_INTERSECT: + case PAGE_CUR_WITHIN: + case PAGE_CUR_DISJOINT: + case PAGE_CUR_MBR_EQUAL: + break; + default: + ut_ad("invalid mode" == 0); + } +#endif + pcur->latch_mode= BTR_SEARCH_LEAF; + pcur->search_mode= mode; + pcur->pos_state= BTR_PCUR_IS_POSITIONED; + pcur->trx_if_known= nullptr; + return rtr_search_leaf(&pcur->btr_cur, tuple, BTR_SEARCH_LEAF, mtr, mode); +} + /**************************************************************//** Initializes and opens a persistent cursor to an index tree. It should be -closed with btr_pcur_close. Mainly called by row_search_index_entry() */ -bool -rtr_pcur_open( - dict_index_t* index, /*!< in: index */ +closed with btr_pcur_close. */ +bool rtr_search( const dtuple_t* tuple, /*!< in: tuple on which search done */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ + btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ { static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), ""); ut_ad(latch_mode & BTR_MODIFY_LEAF); + ut_ad(!(latch_mode & BTR_ALREADY_S_LATCHED)); + ut_ad(mtr->is_empty()); /* Initialize the cursor */ btr_pcur_init(cursor); cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - cursor->search_mode = PAGE_CUR_RTREE_LOCATE; - cursor->trx_if_known = NULL; + cursor->search_mode = PAGE_CUR_RTREE_LOCATE; + cursor->trx_if_known = nullptr; + + if (latch_mode & 8) { + mtr_x_lock_index(cursor->index(), mtr); + } else { + latch_mode + = btr_latch_mode(latch_mode | BTR_ALREADY_S_LATCHED); + mtr_sx_lock_index(cursor->index(), mtr); + } /* Search with the tree cursor */ btr_cur_t* btr_cursor = btr_pcur_get_btr_cur(cursor); - btr_cursor->page_cur.index = index; - btr_cursor->rtr_info = rtr_create_rtr_info(false, false, - btr_cursor, index); + btr_cursor->rtr_info + = rtr_create_rtr_info(false, false, + btr_cursor, cursor->index()); - /* Purge will SX lock the tree instead of take Page Locks */ if (btr_cursor->thr) { btr_cursor->rtr_info->need_page_lock = true; btr_cursor->rtr_info->thr = btr_cursor->thr; } - if ((latch_mode & 8) && index->lock.have_u_not_x()) { - index->lock.u_x_upgrade(SRW_LOCK_CALL); - mtr->lock_upgrade(index->lock); - } - - if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_RTREE_LOCATE, - latch_mode, - btr_cursor, mtr) != DB_SUCCESS) { + if (rtr_search_leaf(btr_cursor, tuple, latch_mode, mtr) + != DB_SUCCESS) { return true; } @@ -560,7 +1065,8 @@ rtr_pcur_open( const rec_t* rec = btr_pcur_get_rec(cursor); - const bool d= rec_get_deleted_flag(rec, index->table->not_redundant()); + const bool d= rec_get_deleted_flag( + rec, cursor->index()->table->not_redundant()); if (page_rec_is_infimum(rec) || btr_pcur_get_low_match(cursor) != dtuple_get_n_fields(tuple) @@ -571,26 +1077,12 @@ rtr_pcur_open( btr_cursor->rtr_info->fd_del = true; btr_cursor->low_match = 0; } - /* Did not find matched row in first dive. Release - latched block if any before search more pages */ - if (!(latch_mode & 8)) { - ulint tree_idx = btr_cursor->tree_height - 1; - rtr_info_t* rtr_info = btr_cursor->rtr_info; - - if (rtr_info->tree_blocks[tree_idx]) { - mtr_release_block_at_savepoint( - mtr, - rtr_info->tree_savepoints[tree_idx], - rtr_info->tree_blocks[tree_idx]); - rtr_info->tree_blocks[tree_idx] = NULL; - } - } + + mtr->rollback_to_savepoint(1); if (!rtr_pcur_getnext_from_path(tuple, PAGE_CUR_RTREE_LOCATE, btr_cursor, 0, latch_mode, - latch_mode - & (8 | BTR_ALREADY_S_LATCHED), - mtr)) { + true, mtr)) { return true; } @@ -598,6 +1090,10 @@ rtr_pcur_open( == dtuple_get_n_fields(tuple)); } + if (!(latch_mode & 8)) { + mtr->rollback_to_savepoint(0, 1); + } + return false; } @@ -641,8 +1137,7 @@ static const rec_t* rtr_get_father_node( if (sea_cur && sea_cur->tree_height > level) { ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); - if (rtr_cur_restore_position(BTR_CONT_MODIFY_TREE, sea_cur, - level, mtr)) { + if (rtr_cur_restore_position(sea_cur, level, mtr)) { btr_pcur_t* r_cursor = rtr_get_parent_cursor( sea_cur, level, false); @@ -668,9 +1163,8 @@ static const rec_t* rtr_get_father_node( btr_cur->rtr_info = rtr_create_rtr_info(false, false, btr_cur, index); - if (btr_cur_search_to_nth_level(level, tuple, - PAGE_CUR_RTREE_LOCATE, - BTR_CONT_MODIFY_TREE, btr_cur, mtr) + if (rtr_search_to_nth_level(level, tuple, PAGE_CUR_RTREE_LOCATE, + BTR_CONT_MODIFY_TREE, btr_cur, mtr) != DB_SUCCESS) { } else if (sea_cur && sea_cur->tree_height == level) { rec = btr_cur_get_rec(btr_cur); @@ -729,9 +1223,8 @@ rtr_page_get_father_node_ptr( page_no = btr_cur_get_block(cursor)->page.id().page_no(); index = btr_cur_get_index(cursor); - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); ut_ad(dict_index_get_page(index) != page_no); @@ -879,32 +1372,10 @@ rtr_init_rtr_info( if (!reinit) { /* Reset all members. */ - rtr_info->path = NULL; - rtr_info->parent_path = NULL; - rtr_info->matches = NULL; - + memset(rtr_info, 0, sizeof *rtr_info); + static_assert(PAGE_CUR_UNSUPP == 0, "compatibility"); mysql_mutex_init(rtr_path_mutex_key, &rtr_info->rtr_path_mutex, nullptr); - - memset(rtr_info->tree_blocks, 0x0, - sizeof(rtr_info->tree_blocks)); - memset(rtr_info->tree_savepoints, 0x0, - sizeof(rtr_info->tree_savepoints)); - rtr_info->mbr.xmin = 0.0; - rtr_info->mbr.xmax = 0.0; - rtr_info->mbr.ymin = 0.0; - rtr_info->mbr.ymax = 0.0; - rtr_info->thr = NULL; - rtr_info->heap = NULL; - rtr_info->cursor = NULL; - rtr_info->index = NULL; - rtr_info->need_prdt_lock = false; - rtr_info->need_page_lock = false; - rtr_info->allocated = false; - rtr_info->mbr_adj = false; - rtr_info->fd_del = false; - rtr_info->search_tuple = NULL; - rtr_info->search_mode = PAGE_CUR_UNSUPP; } ut_ad(!rtr_info->matches || rtr_info->matches->matched_recs->empty()); @@ -1130,7 +1601,6 @@ struct optimistic_get static bool rtr_cur_restore_position( - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_cur_t* btr_cur, /*!< in: detached persistent cursor */ ulint level, /*!< in: index level */ mtr_t* mtr) /*!< in: mtr */ @@ -1158,8 +1628,6 @@ rtr_cur_restore_position( r_cursor->modify_clock = 100; ); - ut_ad(latch_mode == BTR_CONT_MODIFY_TREE); - if (r_cursor->block_when_stored.run_with_hint( optimistic_get(r_cursor, mtr))) { ut_ad(r_cursor->pos_state == BTR_PCUR_IS_POSITIONED); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index a1fcab0a53b..713404798e0 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1543,8 +1543,7 @@ static void innodb_drop_database(handlerton*, char *path) mtr_t mtr; mtr.start(); pcur.btr_cur.page_cur.index = sys_index; - err= btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); + err= btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) goto err_exit; @@ -7977,6 +7976,7 @@ report_error: #ifdef WITH_WSREP if (!error_result && trx->is_wsrep() + && !trx->is_bulk_insert() && wsrep_thd_is_local(m_user_thd) && !wsrep_thd_ignore_table(m_user_thd) && !wsrep_consistency_check(m_user_thd) @@ -10080,6 +10080,8 @@ wsrep_append_key( (shared, exclusive, semi...) */ ) { + ut_ad(!trx->is_bulk_insert()); + DBUG_ENTER("wsrep_append_key"); DBUG_PRINT("enter", ("thd: %lu trx: %lld", thd_get_thread_id(thd), diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 91c1ff53d2d..4cd1505e0d8 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -6097,7 +6097,8 @@ func_exit: que_thr_t* thr = pars_complete_graph_for_exec( NULL, trx, ctx->heap, NULL); - const bool is_root = block->page.id().page_no() == index->page; + page_id_t id{block->page.id()}; + const bool is_root = id.page_no() == index->page; if (rec_is_metadata(rec, *index)) { ut_ad(page_rec_is_user_rec(rec)); @@ -6114,8 +6115,10 @@ func_exit: } /* Ensure that the root page is in the correct format. */ - buf_block_t* root = btr_root_block_get(index, RW_X_LATCH, - &mtr, &err); + id.set_page_no(index->page); + buf_block_t* root = mtr.get_already_latched( + id, MTR_MEMO_PAGE_SX_FIX); + if (UNIV_UNLIKELY(!root)) { goto func_exit; } @@ -11293,7 +11296,8 @@ err_index: } DBUG_EXECUTE_IF("stats_lock_fail", - error = DB_LOCK_WAIT_TIMEOUT;); + error = DB_LOCK_WAIT_TIMEOUT; + trx_rollback_for_mysql(trx);); if (error == DB_SUCCESS) { error = lock_sys_tables(trx); @@ -11311,6 +11315,18 @@ err_index: if (fts_exist) { purge_sys.resume_FTS(); } + + if (trx->state == TRX_STATE_NOT_STARTED) { + /* Transaction may have been rolled back + due to a lock wait timeout, deadlock, + or a KILL statement. So restart the + transaction to remove the newly created + table or index stubs from data dictionary + and table cache in + rollback_inplace_alter_table() */ + trx_start_for_ddl(trx); + } + DBUG_RETURN(true); } diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 66e330a0592..e55835425be 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2022, MariaDB Corporation. +Copyright (c) 2016, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2298,7 +2298,7 @@ loop: btr_pcur_t pcur; pcur.btr_cur.page_cur.index= ibuf.index; ibuf_mtr_start(&mtr); - if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, 0, &mtr)) + if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, &mtr)) goto func_exit; if (!btr_pcur_is_on_user_rec(&pcur)) { @@ -2494,8 +2494,8 @@ ibuf_merge_space( /* Position the cursor on the first matching record. */ pcur.btr_cur.page_cur.index = ibuf.index; - dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, 0, &mtr); + dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, + &pcur, &mtr); ut_ad(err != DB_SUCCESS || page_validate(btr_pcur_get_page(&pcur), ibuf.index)); @@ -3239,7 +3239,7 @@ ibuf_insert_low( ibuf_mtr_start(&mtr); pcur.btr_cur.page_cur.index = ibuf.index; - err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, 0, &mtr); + err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr); if (err != DB_SUCCESS) { func_exit: ibuf_mtr_commit(&mtr); @@ -3956,8 +3956,6 @@ ibuf_restore_pos( position is to be restored */ mtr_t* mtr) /*!< in/out: mini-transaction */ { - ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_PURGE_TREE); - if (UNIV_LIKELY(pcur->restore_position(mode, mtr) == btr_pcur_t::SAME_ALL)) { return true; @@ -4038,12 +4036,11 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, ibuf_mtr_start(mtr); mysql_mutex_lock(&ibuf_mutex); + mtr_x_lock_index(ibuf.index, mtr); - if (!ibuf_restore_pos(page_id, search_tuple, BTR_PURGE_TREE, - pcur, mtr)) { - + if (!ibuf_restore_pos(page_id, search_tuple, + BTR_PURGE_TREE_ALREADY_LATCHED, pcur, mtr)) { mysql_mutex_unlock(&ibuf_mutex); - ut_ad(mtr->has_committed()); goto func_exit; } @@ -4054,13 +4051,10 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, ut_a(err == DB_SUCCESS); ibuf_size_update(ibuf_root->page.frame); - mysql_mutex_unlock(&ibuf_mutex); - ibuf.empty = page_is_empty(ibuf_root->page.frame); - } else { - mysql_mutex_unlock(&ibuf_mutex); } + mysql_mutex_unlock(&ibuf_mutex); ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); func_exit: @@ -4238,7 +4232,7 @@ loop: /* Position pcur in the insert buffer at the first entry for this index page */ - if (btr_pcur_open_on_user_rec(search_tuple, PAGE_CUR_GE, + if (btr_pcur_open_on_user_rec(search_tuple, BTR_MODIFY_LEAF, &pcur, &mtr) != DB_SUCCESS) { err = DB_CORRUPTION; @@ -4455,7 +4449,7 @@ loop: /* Position pcur in the insert buffer at the first entry for the space */ - if (btr_pcur_open_on_user_rec(&search_tuple, PAGE_CUR_GE, + if (btr_pcur_open_on_user_rec(&search_tuple, BTR_MODIFY_LEAF, &pcur, &mtr) != DB_SUCCESS) { goto leave_loop; diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index a2aa46b62da..a1cc10b05db 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2022, MariaDB Corporation. +Copyright (c) 2014, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -211,13 +211,12 @@ btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset = false) @param[in,out] mtr mini-transaction */ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr); -ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result)) +ATTRIBUTE_COLD __attribute__((nonnull)) /** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE. @param[in] index clustered index with instant ALTER TABLE @param[in] all whether to reset FIL_PAGE_TYPE as well -@param[in,out] mtr mini-transaction -@return error code */ -dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr); +@param[in,out] mtr mini-transaction */ +void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr); /*************************************************************//** Makes tree one level higher by splitting the root, and inserts @@ -241,7 +240,7 @@ btr_root_raise_and_insert( ulint n_ext, /*!< in: number of externally stored columns */ mtr_t* mtr, /*!< in: mtr */ dberr_t* err) /*!< out: error code */ - MY_ATTRIBUTE((warn_unused_result)); + MY_ATTRIBUTE((nonnull, warn_unused_result)); /*************************************************************//** Reorganizes an index page. diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 49bc8a4ff1b..f6abc9f5e52 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -63,12 +63,6 @@ enum { BTR_KEEP_IBUF_BITMAP = 32 }; -/* btr_cur_latch_leaves() returns latched blocks and savepoints. */ -struct btr_latch_leaves_t { - buf_block_t* blocks[3]; - ulint savepoints[3]; -}; - #include "que0types.h" #include "row0types.h" @@ -126,51 +120,28 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page) ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result)); -/** Optimistically latches the leaf page or pages requested. -@param[in] block guessed buffer block -@param[in] modify_clock modify clock value -@param[in,out] latch_mode BTR_SEARCH_LEAF, ... -@param[in,out] cursor cursor -@param[in] mtr mini-transaction -@return true if success */ -bool -btr_cur_optimistic_latch_leaves( - buf_block_t* block, - ib_uint64_t modify_clock, - btr_latch_mode* latch_mode, - btr_cur_t* cursor, - mtr_t* mtr); - MY_ATTRIBUTE((warn_unused_result)) -/** Searches an index tree and positions a tree cursor on a given level. +/********************************************************************//** +Searches an index tree and positions a tree cursor on a given non-leaf level. NOTE: n_fields_cmp in tuple must be set so that it cannot be compared to node pointer page number fields on the upper levels of the tree! -Note that if mode is PAGE_CUR_LE, which is used in inserts, then cursor->up_match and cursor->low_match both will have sensible values. -If mode is PAGE_CUR_GE, then up_match will a have a sensible value. +Cursor is left at the place where an insert of the +search tuple should be performed in the B-tree. InnoDB does an insert +immediately after the cursor. Thus, the cursor may end up on a user record, +or on a page infimum record. @param level the tree level of search @param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that it cannot get compared to the node ptr page number field! -@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a - unique prefix of a record, mode should be PAGE_CUR_LE, not - PAGE_CUR_GE, as the latter may end up on the previous page of - the record! Inserts should always be made using PAGE_CUR_LE - to search the position! -@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT, - BTR_DELETE_MARK, or BTR_DELETE; - cursor->left_block is used to store a pointer to the left - neighbor page +@param latch RW_S_LATCH or RW_X_LATCH @param cursor tree cursor; the cursor page is s- or x-latched, but see also above! @param mtr mini-transaction -@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none) @return DB_SUCCESS on success or error code otherwise */ dberr_t btr_cur_search_to_nth_level(ulint level, const dtuple_t *tuple, - page_cur_mode_t mode, - btr_latch_mode latch_mode, - btr_cur_t *cursor, mtr_t *mtr, - ib_uint64_t autoinc= 0); + rw_lock_type_t rw_latch, + btr_cur_t *cursor, mtr_t *mtr); /*************************************************************//** Tries to perform an insert to a page in an index tree, next to cursor. @@ -653,20 +624,6 @@ btr_rec_copy_externally_stored_field( ulint* len, mem_heap_t* heap); -/** Latches the leaf page or pages requested. -@param[in] block leaf page where the search converged -@param[in] latch_mode BTR_SEARCH_LEAF, ... -@param[in] cursor cursor -@param[in,out] mtr mini-transaction -@param[out] latch_leaves latched blocks and savepoints */ -void -btr_cur_latch_leaves( - buf_block_t* block, - btr_latch_mode latch_mode, - btr_cur_t* cursor, - mtr_t* mtr, - btr_latch_leaves_t* latch_leaves = nullptr); - /*######################################################################*/ /** In the pessimistic delete, if the page data size drops below this @@ -727,21 +684,16 @@ to know struct size! */ struct btr_cur_t { page_cur_t page_cur; /*!< page cursor */ purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */ - buf_block_t* left_block; /*!< this field is used to store - a pointer to the left neighbor - page, in the cases - BTR_SEARCH_PREV and - BTR_MODIFY_PREV */ /*------------------------------*/ que_thr_t* thr; /*!< this field is only used - when btr_cur_search_to_nth_level + when search_leaf() is called for an index entry insertion: the calling query thread is passed here to be used in the insert buffer */ /*------------------------------*/ /** The following fields are used in - btr_cur_search_to_nth_level to pass information: */ + search_leaf() to pass information: */ /* @{ */ enum btr_cur_method flag; /*!< Search method used */ ulint tree_height; /*!< Tree height if the search is done @@ -750,8 +702,7 @@ struct btr_cur_t { ulint up_match; /*!< If the search mode was PAGE_CUR_LE, the number of matched fields to the the first user record to the right of - the cursor record after - btr_cur_search_to_nth_level; + the cursor record after search_leaf(); for the mode PAGE_CUR_GE, the matched fields to the first user record AT THE CURSOR or to the right of it; @@ -768,8 +719,7 @@ struct btr_cur_t { ulint low_match; /*!< if search mode was PAGE_CUR_LE, the number of matched fields to the first user record AT THE CURSOR or - to the left of it after - btr_cur_search_to_nth_level; + to the left of it after search_leaf(); NOT defined for PAGE_CUR_GE or any other search modes; see also the NOTE in up_match! */ @@ -803,6 +753,24 @@ struct btr_cur_t { dberr_t open_leaf(bool first, dict_index_t *index, btr_latch_mode latch_mode, mtr_t *mtr); + /** Search the leaf page record corresponding to a key. + @param tuple key to search for, with correct n_fields_cmp + @param mode search mode; PAGE_CUR_LE for unique prefix or for inserting + @param latch_mode latch mode + @param mtr mini-transaction + @return error code */ + dberr_t search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, + btr_latch_mode latch_mode, mtr_t *mtr); + + /** Search the leaf page record corresponding to a key, exclusively latching + all sibling pages on the way. + @param tuple key to search for, with correct n_fields_cmp + @param mode search mode; PAGE_CUR_LE for unique prefix or for inserting + @param mtr mini-transaction + @return error code */ + dberr_t pessimistic_search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, + mtr_t *mtr); + /** Open the cursor at a random leaf page record. @param offsets temporary memory for rec_get_offsets() @param heap memory heap for rec_get_offsets() @@ -862,14 +830,14 @@ inherited external field. */ #define BTR_EXTERN_INHERITED_FLAG 64U #ifdef BTR_CUR_HASH_ADAPT -/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */ +/** Number of searches down the B-tree in btr_cur_t::search_leaf(). */ extern ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_non_sea; /** Old value of btr_cur_n_non_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor(). */ extern ulint btr_cur_n_non_sea_old; /** Number of successful adaptive hash index lookups in -btr_cur_search_to_nth_level(). */ +btr_cur_t::search_leaf(). */ extern ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_sea; /** Old value of btr_cur_n_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h index cd8eacdc212..c66a3bfa329 100644 --- a/storage/innobase/include/btr0pcur.h +++ b/storage/innobase/include/btr0pcur.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -70,24 +70,6 @@ btr_pcur_init( /*==========*/ btr_pcur_t* pcur); /*!< in: persistent cursor */ -/**************************************************************//** -Initializes and opens a persistent cursor to an index tree. */ -inline -dberr_t -btr_pcur_open( - const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page from the - record! */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ - ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written - (0 if none) */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Opens an persistent cursor to an index tree without initializing the cursor. @param tuple tuple on which search done @@ -100,8 +82,7 @@ cursor. @param mtr mini-transaction @return DB_SUCCESS on success or error code otherwise. */ inline -dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, - page_cur_mode_t mode, +dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode, btr_latch_mode latch_mode, btr_pcur_t *cursor, mtr_t *mtr); @@ -356,7 +337,7 @@ struct btr_pcur_t /** the modify clock value of the buffer block when the cursor position was stored */ ib_uint64_t modify_clock= 0; - /** btr_pcur_store_position() and btr_pcur_restore_position() state. */ + /** btr_pcur_store_position() and restore_position() state. */ enum pcur_pos_t pos_state= BTR_PCUR_NOT_POSITIONED; page_cur_mode_t search_mode= PAGE_CUR_UNSUPP; /** the transaction, if we know it; otherwise this field is not defined; @@ -383,8 +364,8 @@ struct btr_pcur_t supremum. (4) cursor was positioned before the first or after the last in an empty tree: restores to before first or after the last in the tree. - @param restore_latch_mode BTR_SEARCH_LEAF, ... - @param mtr mtr + @param latch_mode BTR_SEARCH_LEAF, ... + @param mtr mini-transaction @retval SAME_ALL cursor position on user rec and points on the record with the same field values as in the stored record, @retval SAME_UNIQ cursor position is on user rec and points on the @@ -409,8 +390,7 @@ struct btr_pcur_t pos_state= BTR_PCUR_IS_POSITIONED; old_rec= nullptr; - return btr_cur.open_leaf(first, index, - BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode), mtr); + return btr_cur.open_leaf(first, index, this->latch_mode, mtr); } }; @@ -433,6 +413,24 @@ inline rec_t *btr_pcur_get_rec(const btr_pcur_t *cursor) return cursor->btr_cur.page_cur.rec; } +/**************************************************************//** +Initializes and opens a persistent cursor to an index tree. */ +inline +dberr_t +btr_pcur_open( + const dtuple_t* tuple, /*!< in: tuple on which search done */ + page_cur_mode_t mode, /*!< in: PAGE_CUR_LE, ... */ + btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ + btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ + mtr_t* mtr) /*!< in: mtr */ +{ + cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + cursor->search_mode= mode; + cursor->pos_state= BTR_PCUR_IS_POSITIONED; + cursor->trx_if_known= nullptr; + return cursor->btr_cur.search_leaf(tuple, mode, latch_mode, mtr); +} + /** Open a cursor on the first user record satisfying the search condition; in case of no match, after the last index record. */ MY_ATTRIBUTE((nonnull, warn_unused_result)) @@ -440,16 +438,15 @@ inline dberr_t btr_pcur_open_on_user_rec( const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ... */ btr_latch_mode latch_mode, /*!< in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ { - ut_ad(mode == PAGE_CUR_GE || mode == PAGE_CUR_G); ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); - if (dberr_t err= btr_pcur_open(tuple, mode, latch_mode, cursor, 0, mtr)) + if (dberr_t err= + btr_pcur_open(tuple, PAGE_CUR_GE, latch_mode, cursor, mtr)) return err; if (!btr_pcur_is_after_last_on_page(cursor) || btr_pcur_is_after_last_in_tree(cursor)) diff --git a/storage/innobase/include/btr0pcur.inl b/storage/innobase/include/btr0pcur.inl index 551f8f20fca..b827d70dc47 100644 --- a/storage/innobase/include/btr0pcur.inl +++ b/storage/innobase/include/btr0pcur.inl @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -299,38 +299,10 @@ btr_pcur_init( pcur->btr_cur.rtr_info = NULL; } -/**************************************************************//** -Initializes and opens a persistent cursor to an index tree. */ -inline -dberr_t -btr_pcur_open( - const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page from the - record! */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ - ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written - (0 if none) */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(!cursor->index()->is_spatial()); - cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - cursor->search_mode= mode; - cursor->pos_state= BTR_PCUR_IS_POSITIONED; - cursor->trx_if_known= nullptr; - return btr_cur_search_to_nth_level(0, tuple, mode, latch_mode, - btr_pcur_get_btr_cur(cursor), - mtr, autoinc); -} - /** Opens an persistent cursor to an index tree without initializing the cursor. @param tuple tuple on which search done -@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a +@param mode search mode; NOTE that if the search is made using a unique prefix of a record, mode should be PAGE_CUR_LE, not PAGE_CUR_GE, as the latter may end up on the previous page of the record! @@ -339,8 +311,7 @@ cursor. @param mtr mini-transaction @return DB_SUCCESS on success or error code otherwise. */ inline -dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, - page_cur_mode_t mode, +dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode, btr_latch_mode latch_mode, btr_pcur_t *cursor, mtr_t *mtr) { @@ -348,10 +319,7 @@ dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, cursor->search_mode= mode; cursor->pos_state= BTR_PCUR_IS_POSITIONED; cursor->trx_if_known= nullptr; - - /* Search with the tree cursor */ - return btr_cur_search_to_nth_level(0, tuple, mode, latch_mode, - btr_pcur_get_btr_cur(cursor), mtr); + return cursor->btr_cur.search_leaf(tuple, mode, latch_mode, mtr); } /**************************************************************//** diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h index 6118bfbc128..912c022c64f 100644 --- a/storage/innobase/include/btr0types.h +++ b/storage/innobase/include/btr0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2022, MariaDB Corporation. +Copyright (c) 2018, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -55,25 +55,26 @@ in the index record. */ #define BTR_EXTERN_LOCAL_STORED_MAX_SIZE \ (BTR_EXTERN_FIELD_REF_SIZE * 2) -/** Latching modes for btr_cur_search_to_nth_level(). */ +/** Latching modes for btr_cur_t::search_leaf(). */ enum btr_latch_mode { /** Search a record on a leaf page and S-latch it. */ BTR_SEARCH_LEAF = RW_S_LATCH, /** (Prepare to) modify a record on a leaf page and X-latch it. */ BTR_MODIFY_LEAF = RW_X_LATCH, + /** U-latch root and X-latch a leaf page */ + BTR_MODIFY_ROOT_AND_LEAF = RW_SX_LATCH, /** Obtain no latches. */ BTR_NO_LATCHES = RW_NO_LATCH, - /** Search the previous record. */ + /** Search the previous record. + Used in btr_pcur_move_backward_from_page(). */ BTR_SEARCH_PREV = 4 | BTR_SEARCH_LEAF, - /** Modify the previous record. */ + /** Modify the previous record. + Used in btr_pcur_move_backward_from_page() and ibuf_insert(). */ BTR_MODIFY_PREV = 4 | BTR_MODIFY_LEAF, - /** Start searching the entire B-tree. */ - BTR_SEARCH_TREE = 8 | BTR_SEARCH_LEAF, - /** Start modifying1 the entire B-tree. */ + /** Start modifying the entire B-tree. */ BTR_MODIFY_TREE = 8 | BTR_MODIFY_LEAF, - /** Continue searching the entire B-tree. */ - BTR_CONT_SEARCH_TREE = 4 | BTR_SEARCH_TREE, - /** Continue modifying the entire B-tree. */ + /** Continue modifying the entire R-tree. + Only used by rtr_search_to_nth_level(). */ BTR_CONT_MODIFY_TREE = 4 | BTR_MODIFY_TREE, /* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually @@ -98,14 +99,14 @@ enum btr_latch_mode { dict_index_t::lock S-latch is being held. */ BTR_SEARCH_LEAF_ALREADY_S_LATCHED = BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED, - /** Search the entire index tree, assuming that the - dict_index_t::lock S-latch is being held. */ - BTR_SEARCH_TREE_ALREADY_S_LATCHED = BTR_SEARCH_TREE - | BTR_ALREADY_S_LATCHED, /** Search and X-latch a leaf page, assuming that the dict_index_t::lock is being held in non-exclusive mode. */ BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED, + /** U-latch root and X-latch a leaf page, assuming that + dict_index_t::lock is being held in U mode. */ + BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF + | BTR_ALREADY_S_LATCHED, /** Attempt to delete-mark a secondary index record. */ BTR_DELETE_MARK_LEAF = BTR_MODIFY_LEAF | BTR_DELETE_MARK, @@ -132,6 +133,9 @@ enum btr_latch_mode { /** Attempt to delete a record in the tree. */ BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, + /** Attempt to delete a record in an x-latched tree. */ + BTR_PURGE_TREE_ALREADY_LATCHED = BTR_PURGE_TREE + | BTR_ALREADY_S_LATCHED, /** Attempt to insert a record into the tree. */ BTR_INSERT_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_INSERT, diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 24571483d05..5eb245b5d95 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -787,7 +787,7 @@ public: { ut_ad(fsp_is_system_temporary(id().space())); ut_ad(in_file()); - ut_ad(!oldest_modification() || oldest_modification() == 2); + ut_ad((oldest_modification() | 2) == 2); oldest_modification_= 2; } diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h index 777f2432c93..b07261ce042 100644 --- a/storage/innobase/include/gis0rtree.h +++ b/storage/innobase/include/gis0rtree.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -59,6 +59,44 @@ Created 2013/03/27 Jimmy Yang and Allen Lai /* Geometry data header */ #define GEO_DATA_HEADER_SIZE 4 + +/** Search for a spatial index leaf page record. +@param cur cursor +@param tuple search tuple +@param latch_mode latching mode +@param mtr mini-transaction +@param mode search mode */ +dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple, + btr_latch_mode latch_mode, mtr_t *mtr, + page_cur_mode_t mode= PAGE_CUR_RTREE_LOCATE) + MY_ATTRIBUTE((nonnull, warn_unused_result)); + +/** Search for inserting a spatial index leaf page record. +@param cur cursor +@param tuple search tuple +@param latch_mode latching mode +@param mtr mini-transaction */ +inline dberr_t rtr_insert_leaf(btr_cur_t *cur, const dtuple_t *tuple, + btr_latch_mode latch_mode, mtr_t *mtr) +{ + return rtr_search_leaf(cur, tuple, latch_mode, mtr, PAGE_CUR_RTREE_INSERT); +} + +/** Search for a spatial index leaf page record. +@param pcur cursor +@param tuple search tuple +@param mode search mode +@param mtr mini-transaction */ +dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple, + page_cur_mode_t mode, mtr_t *mtr) + MY_ATTRIBUTE((nonnull, warn_unused_result)); + +dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple, + page_cur_mode_t mode, + btr_latch_mode latch_mode, + btr_cur_t *cur, mtr_t *mtr) + MY_ATTRIBUTE((nonnull, warn_unused_result)); + /**********************************************************************//** Builds a Rtree node pointer out of a physical record and a page number. @return own: node pointer */ @@ -295,11 +333,9 @@ rtr_store_parent_path( /**************************************************************//** Initializes and opens a persistent cursor to an index tree. It should be closed with btr_pcur_close. */ -bool -rtr_pcur_open( - dict_index_t* index, /*!< in: index */ +bool rtr_search( const dtuple_t* tuple, /*!< in: tuple on which search done */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ + btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ MY_ATTRIBUTE((warn_unused_result)); diff --git a/storage/innobase/include/gis0type.h b/storage/innobase/include/gis0type.h index 4fccfdb6c26..d6a4ef67a38 100644 --- a/storage/innobase/include/gis0type.h +++ b/storage/innobase/include/gis0type.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2020, MariaDB Corporation. +Copyright (c) 2018, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -105,12 +105,6 @@ typedef struct rtr_info{ matched_rec_t* matches;/*!< struct holding matching leaf records */ mysql_mutex_t rtr_path_mutex; /*!< mutex protect the "path" vector */ - buf_block_t* tree_blocks[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM]; - /*!< tracking pages that would be locked - at leaf level, for future free */ - ulint tree_savepoints[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM]; - /*!< savepoint used to release latches/blocks - on each level and leaf level */ rtr_mbr_t mbr; /*!< the search MBR */ que_thr_t* thr; /*!< the search thread */ mem_heap_t* heap; /*!< memory heap */ diff --git a/storage/innobase/include/ibuf0ibuf.inl b/storage/innobase/include/ibuf0ibuf.inl index 12aa1ca6837..003bf22a047 100644 --- a/storage/innobase/include/ibuf0ibuf.inl +++ b/storage/innobase/include/ibuf0ibuf.inl @@ -100,8 +100,8 @@ ibuf_should_try( a secondary index when we decide */ { - if (!innodb_change_buffering || !ibuf.max_size || index->is_clust() || - index->is_spatial()) + if (index->type & (DICT_CLUSTERED | DICT_IBUF | DICT_SPATIAL) || + !innodb_change_buffering || !ibuf.max_size) return false; if (!ignore_sec_unique && index->is_unique()) return false; diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index ca194f905b5..e2419309764 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2019, 2022, MariaDB Corporation. +Copyright (c) 2019, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -402,7 +402,8 @@ inline byte *mtr_t::log_write(const page_id_t id, const buf_page_t *bpage, ut_ad(have_offset || offset == 0); ut_ad(offset + len <= srv_page_size); static_assert(MIN_4BYTE >= UNIV_PAGE_SIZE_MAX, "consistency"); - + ut_ad(type == FREE_PAGE || type == OPTION || (type == EXTENDED && !bpage) || + memo_contains_flagged(bpage, MTR_MEMO_MODIFY)); size_t max_len; if (!have_len) max_len= 1 + 5 + 5; @@ -512,33 +513,6 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str, memcpy(b, ut_align_offset(d, srv_page_size), len); } -/** Initialize an entire page. -@param[in,out] b buffer page */ -inline void mtr_t::init(buf_block_t *b) -{ - const page_id_t id{b->page.id()}; - ut_ad(is_named_space(id.space())); - ut_ad(!m_freed_pages == !m_freed_space); - - if (UNIV_LIKELY_NULL(m_freed_space) && - m_freed_space->id == id.space() && - m_freed_pages->remove_if_exists(b->page.id().page_no()) && - m_freed_pages->empty()) - { - delete m_freed_pages; - m_freed_pages= nullptr; - m_freed_space= nullptr; - } - - b->page.set_reinit(b->page.state() & buf_page_t::LRU_MASK); - - if (!is_logged()) - return; - - m_log.close(log_write<INIT_PAGE>(b->page.id(), &b->page)); - m_last_offset= FIL_PAGE_TYPE; -} - /** Write an EXTENDED log record. @param block buffer pool page @param type extended record subtype; @see mrec_ext_t */ diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index abc1f65e692..299f658e98a 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2022, MariaDB Corporation. +Copyright (c) 2013, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -29,7 +29,7 @@ Created 11/26/1995 Heikki Tuuri #include "fil0fil.h" #include "dyn0buf.h" #include "buf0buf.h" -#include <vector> +#include "small_vector.h" /** Start a mini-transaction. */ #define mtr_start(m) (m)->start() @@ -37,15 +37,6 @@ Created 11/26/1995 Heikki Tuuri /** Commit a mini-transaction. */ #define mtr_commit(m) (m)->commit() -/** Set and return a savepoint in mtr. -@return savepoint */ -#define mtr_set_savepoint(m) (m)->get_savepoint() - -/** Release the (index tree) s-latch stored in an mtr memo after a -savepoint. */ -#define mtr_release_s_latch_at_savepoint(m, s, l) \ - (m)->release_s_latch_at_savepoint((s), (l)) - /** Change the logging mode of a mini-transaction. @return old mode */ #define mtr_set_log_mode(m, d) (m)->set_log_mode((d)) @@ -60,13 +51,10 @@ savepoint. */ # define mtr_sx_lock_index(i,m) (m)->u_lock(&(i)->lock) #endif -#define mtr_release_block_at_savepoint(m, s, b) \ - (m)->release_block_at_savepoint((s), (b)) - /** Mini-transaction memo stack slot. */ struct mtr_memo_slot_t { - /** pointer to the object, or nullptr if released */ + /** pointer to the object */ void *object; /** type of the stored object */ mtr_memo_type_t type; @@ -77,6 +65,9 @@ struct mtr_memo_slot_t /** Mini-transaction handle and buffer */ struct mtr_t { + mtr_t(); + ~mtr_t(); + /** Start a mini-transaction. */ void start(); @@ -91,11 +82,11 @@ struct mtr_t { /** Release latches of unmodified buffer pages. @param begin first slot to release */ void rollback_to_savepoint(ulint begin) - { rollback_to_savepoint(begin, m_memo->size()); } + { rollback_to_savepoint(begin, m_memo.size()); } /** Release the last acquired buffer page latch. */ void release_last_page() - { auto s= m_memo->size(); rollback_to_savepoint(s - 1, s); } + { auto s= m_memo.size(); rollback_to_savepoint(s - 1, s); } /** Commit a mini-transaction that is shrinking a tablespace. @param space tablespace that is being shrunk */ @@ -120,86 +111,39 @@ struct mtr_t { ulint get_savepoint() const { ut_ad(is_active()); - return m_memo ? m_memo->size() : 0; + return m_memo.size(); } - /** Release the (index tree) s-latch stored in an mtr memo after a savepoint. - @param savepoint value returned by get_savepoint() - @param lock index latch to release */ - void release_s_latch_at_savepoint(ulint savepoint, index_lock *lock) + /** Get the block at a savepoint */ + buf_block_t *at_savepoint(ulint savepoint) const { ut_ad(is_active()); - mtr_memo_slot_t &slot= m_memo->at(savepoint); - ut_ad(slot.object == lock); - ut_ad(slot.type == MTR_MEMO_S_LOCK); - slot.object= nullptr; - lock->s_unlock(); - } - /** Release the block in an mtr memo after a savepoint. */ - void release_block_at_savepoint(ulint savepoint, buf_block_t *block) - { - ut_ad(is_active()); - mtr_memo_slot_t &slot= m_memo->at(savepoint); - ut_ad(slot.object == block); - ut_ad(!(slot.type & MTR_MEMO_MODIFY)); - slot.object= nullptr; - block->page.unfix(); - - switch (slot.type) { - case MTR_MEMO_PAGE_S_FIX: - block->page.lock.s_unlock(); - break; - case MTR_MEMO_PAGE_SX_FIX: - case MTR_MEMO_PAGE_X_FIX: - block->page.lock.u_or_x_unlock(slot.type == MTR_MEMO_PAGE_SX_FIX); - break; - default: - break; - } - } - - /** @return if we are about to make a clean buffer block dirty */ - static bool is_block_dirtied(const buf_page_t &b) - { - ut_ad(b.in_file()); - ut_ad(b.frame); - ut_ad(b.buf_fix_count()); - return b.oldest_modification() <= 1 && b.id().space() < SRV_TMP_SPACE_ID; + const mtr_memo_slot_t &slot= m_memo[savepoint]; + ut_ad(slot.type < MTR_MEMO_S_LOCK); + ut_ad(slot.object); + return static_cast<buf_block_t*>(slot.object); } - /** X-latch a not yet latched block after a savepoint. */ - void x_latch_at_savepoint(ulint savepoint, buf_block_t *block) + /** Try to get a block at a savepoint. + @param savepoint the savepoint right before the block was acquired + @return the block at the savepoint + @retval nullptr if no buffer block was registered at that savepoint */ + buf_block_t *block_at_savepoint(ulint savepoint) const { ut_ad(is_active()); - ut_ad(!memo_contains_flagged(block, MTR_MEMO_PAGE_S_FIX | - MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - mtr_memo_slot_t &slot= m_memo->at(savepoint); - ut_ad(slot.object == block); - ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.type= MTR_MEMO_PAGE_X_FIX; - block->page.lock.x_lock(); - ut_ad(!block->page.is_io_fixed()); - - if (!m_made_dirty) - m_made_dirty= is_block_dirtied(block->page); + const mtr_memo_slot_t &slot= m_memo[savepoint]; + return slot.type < MTR_MEMO_S_LOCK + ? static_cast<buf_block_t*>(slot.object) + : nullptr; } - /** U-latch a not yet latched block after a savepoint. */ - void sx_latch_at_savepoint(ulint savepoint, buf_block_t *block) - { - ut_ad(is_active()); - ut_ad(!memo_contains_flagged(block, MTR_MEMO_PAGE_S_FIX | - MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - mtr_memo_slot_t &slot= m_memo->at(savepoint); - ut_ad(slot.object == block); - ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.type= MTR_MEMO_PAGE_SX_FIX; - block->page.lock.u_lock(); - ut_ad(!block->page.is_io_fixed()); - - if (!m_made_dirty) - m_made_dirty= is_block_dirtied(block->page); - } + /** Retrieve a page that has already been latched. + @param id page identifier + @param type page latch type + @return block + @retval nullptr if the block had not been latched yet */ + buf_block_t *get_already_latched(const page_id_t id, mtr_memo_type_t type) + const; /** @return the logging mode */ mtr_log_t get_log_mode() const @@ -358,23 +302,17 @@ struct mtr_t { void release(const index_lock &lock) { release(&lock); } /** Release a latch to an unmodified page. */ void release(const buf_block_t &block) { release(&block); } - - /** Note that the mini-transaction will modify data. */ - void flag_modified() { m_modifications = true; } private: /** Release an unmodified object. */ void release(const void *object); +public: /** Mark the given latched page as modified. @param block page that will be modified */ - void modify(const buf_block_t& block); -public: - /** Note that the mini-transaction will modify a block. */ - void set_modified(const buf_block_t &block) - { flag_modified(); if (m_log_mode != MTR_LOG_NONE) modify(block); } + void set_modified(const buf_block_t &block); /** Set the state to not-modified. This will not log the changes. This is only used during redo log apply, to avoid logging the changes. */ - void discard_modifications() { m_modifications = false; } + void discard_modifications() { m_modifications= false; } /** Get the LSN of commit(). @return the commit LSN @@ -403,28 +341,17 @@ public: @param rw_latch latch to acquire */ void upgrade_buffer_fix(ulint savepoint, rw_lock_type_t rw_latch); - /** Register a page latch on a buffer-fixed block was buffer-fixed. - @param latch latch type */ - void u_lock_register(ulint savepoint) + /** Register a change to the page latch state. */ + void lock_register(ulint savepoint, mtr_memo_type_t type) { - mtr_memo_slot_t &slot= m_memo->at(savepoint); - ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.type= MTR_MEMO_PAGE_SX_FIX; - } - - /** Register a page latch on a buffer-fixed block was buffer-fixed. - @param latch latch type */ - void s_lock_register(ulint savepoint) - { - mtr_memo_slot_t &slot= m_memo->at(savepoint); - ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.type= MTR_MEMO_PAGE_S_FIX; + mtr_memo_slot_t &slot= m_memo[savepoint]; + ut_ad(slot.type <= MTR_MEMO_BUF_FIX); + ut_ad(type <= MTR_MEMO_BUF_FIX); + slot.type= type; } /** Upgrade U locks on a block to X */ void page_lock_upgrade(const buf_block_t &block); - /** Upgrade U lock to X */ - void lock_upgrade(const index_lock &lock); /** Check if we are holding tablespace latch @param space tablespace to search for @@ -454,31 +381,66 @@ public: @retval nullptr if not found */ buf_block_t *memo_contains_page_flagged(const byte *ptr, ulint flags) const; - /** @return true if mini-transaction contains modifications. */ + /** @return whether this mini-transaction modifies persistent data */ bool has_modifications() const { return m_modifications; } #endif /* UNIV_DEBUG */ - /** Push an object to an mtr memo stack. - @param object object + /** Push a buffer page to an the memo. + @param block buffer block @param type object type: MTR_MEMO_S_LOCK, ... */ - void memo_push(void *object, mtr_memo_type_t type) __attribute__((nonnull)) + void memo_push(buf_block_t *block, mtr_memo_type_t type) + __attribute__((nonnull)) { ut_ad(is_active()); - /* If this mtr has U or X latched a clean page then we set - the m_made_dirty flag. This tells us if we need to - grab log_sys.flush_order_mutex at mtr_t::commit() so that we - can insert the dirtied page into the buf_pool.flush_list. - - FIXME: Do this only when the MTR_MEMO_MODIFY flag is set! */ - if (!m_made_dirty && - (type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX))) - m_made_dirty= - is_block_dirtied(*static_cast<const buf_page_t*>(object)); - - if (!m_memo) - m_memo= new std::vector<mtr_memo_slot_t>(1, {object, type}); + ut_ad(type <= MTR_MEMO_PAGE_SX_MODIFY); + ut_ad(block->page.buf_fix_count()); + ut_ad(block->page.in_file()); +#ifdef UNIV_DEBUG + switch (type) { + case MTR_MEMO_PAGE_S_FIX: + ut_ad(block->page.lock.have_s()); + break; + case MTR_MEMO_PAGE_X_FIX: case MTR_MEMO_PAGE_X_MODIFY: + ut_ad(block->page.lock.have_x()); + break; + case MTR_MEMO_PAGE_SX_FIX: case MTR_MEMO_PAGE_SX_MODIFY: + ut_ad(block->page.lock.have_u_or_x()); + break; + case MTR_MEMO_BUF_FIX: + break; + case MTR_MEMO_MODIFY: + case MTR_MEMO_S_LOCK: case MTR_MEMO_X_LOCK: case MTR_MEMO_SX_LOCK: + case MTR_MEMO_SPACE_X_LOCK: case MTR_MEMO_SPACE_S_LOCK: + ut_ad("invalid type" == 0); + } +#endif + if (!(type & MTR_MEMO_MODIFY)); + else if (block->page.id().space() >= SRV_TMP_SPACE_ID) + { + block->page.set_temp_modified(); + type= mtr_memo_type_t(type & ~MTR_MEMO_MODIFY); + } else - m_memo->emplace_back(mtr_memo_slot_t{object, type}); + { + m_modifications= true; + if (!m_made_dirty) + /* If we are going to modify a previously clean persistent page, + we must set m_made_dirty, so that commit() will acquire + log_sys.flush_order_mutex and insert the block into + buf_pool.flush_list. */ + m_made_dirty= block->page.oldest_modification() <= 1; + } + m_memo.emplace_back(mtr_memo_slot_t{block, type}); + } + + /** Push an index lock or tablespace latch to the memo. + @param object index lock or tablespace latch + @param type object type: MTR_MEMO_S_LOCK, ... */ + void memo_push(void *object, mtr_memo_type_t type) __attribute__((nonnull)) + { + ut_ad(is_active()); + ut_ad(type >= MTR_MEMO_S_LOCK); + m_memo.emplace_back(mtr_memo_slot_t{object, type}); } /** @return the size of the log is empty */ @@ -783,7 +745,7 @@ private: /** specifies which operations should be logged; default MTR_LOG_ALL */ uint16_t m_log_mode:2; - /** whether at least one buffer pool page was written to */ + /** whether at least one persistent page was written to */ uint16_t m_modifications:1; /** whether at least one previously clean buffer pool page was written to */ @@ -809,7 +771,7 @@ private: #endif /* UNIV_DEBUG */ /** acquired dict_index_t::lock, fil_space_t::latch, buf_block_t */ - std::vector<mtr_memo_slot_t> *m_memo= nullptr; + small_vector<mtr_memo_slot_t, 16> m_memo; /** mini-transaction log */ mtr_buf_t m_log; diff --git a/storage/innobase/include/small_vector.h b/storage/innobase/include/small_vector.h new file mode 100644 index 00000000000..d28a36184b8 --- /dev/null +++ b/storage/innobase/include/small_vector.h @@ -0,0 +1,100 @@ +/***************************************************************************** + +Copyright (c) 2023, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +#pragma once +/* A normally small vector, inspired by llvm::SmallVector */ +#include "my_global.h" +#include <iterator> +#include <memory> + +class small_vector_base +{ +protected: + typedef uint32_t Size_T; + void *BeginX; + Size_T Size= 0, Capacity; + small_vector_base()= delete; + small_vector_base(void *small, size_t small_size) + : BeginX(small), Capacity(Size_T(small_size)) {} + ATTRIBUTE_COLD void grow_by_1(void *small, size_t element_size); +public: + size_t size() const { return Size; } + size_t capacity() const { return Capacity; } + bool empty() const { return !Size; } + void clear() { Size= 0; } +protected: + void set_size(size_t N) { Size= Size_T(N); } +}; + +template <typename T, unsigned N> +class small_vector : public small_vector_base +{ + /** The fixed storage allocation */ + T small[N]; + + using small_vector_base::set_size; + + void grow_if_needed() + { + if (unlikely(size() >= capacity())) + grow_by_1(small, sizeof *small); + } + +public: + small_vector() : small_vector_base(small, N) + { + TRASH_ALLOC(small, sizeof small); + } + ~small_vector() + { + if (small != begin()) + my_free(begin()); + MEM_MAKE_ADDRESSABLE(small, sizeof small); + } + + using iterator= T *; + using const_iterator= const T *; + using reverse_iterator= std::reverse_iterator<iterator>; + using reference= T &; + using const_reference= const T&; + + iterator begin() { return static_cast<iterator>(BeginX); } + const_iterator begin() const { return static_cast<const_iterator>(BeginX); } + iterator end() { return begin() + size(); } + const_iterator end() const { return begin() + size(); } + + reverse_iterator rbegin() { return reverse_iterator(end()); } + reverse_iterator rend() { return reverse_iterator(begin()); } + + reference operator[](size_t i) { assert(i < size()); return begin()[i]; } + const_reference operator[](size_t i) const + { return const_cast<small_vector&>(*this)[i]; } + + void erase(const_iterator S, const_iterator E) + { + set_size(std::move(const_cast<iterator>(E), end(), + const_cast<iterator>(S)) - begin()); + } + + void emplace_back(T &&arg) + { + grow_if_needed(); + ::new (end()) T(arg); + set_size(size() + 1); + } +}; diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 037bbcd6c05..860462906b6 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -37,6 +37,8 @@ Created 11/26/1995 Heikki Tuuri void mtr_memo_slot_t::release() const { + ut_ad(object); + switch (type) { case MTR_MEMO_S_LOCK: static_cast<index_lock*>(object)->s_unlock(); @@ -153,10 +155,13 @@ inline void buf_pool_t::insert_into_flush_list(buf_page_t *prev, block->page.set_oldest_modification(lsn); } +mtr_t::mtr_t()= default; +mtr_t::~mtr_t()= default; + /** Start a mini-transaction. */ void mtr_t::start() { - ut_ad(!m_memo); + ut_ad(m_memo.empty()); ut_ad(!m_freed_pages); ut_ad(!m_freed_space); MEM_UNDEFINED(this, sizeof *this); @@ -188,7 +193,7 @@ void mtr_t::start() inline void mtr_t::release_resources() { ut_ad(is_active()); - ut_ad(!m_memo); + ut_ad(m_memo.empty()); m_log.erase(); ut_d(m_commit= true); } @@ -243,15 +248,13 @@ void mtr_t::release_unlogged() { ut_ad(m_log_mode == MTR_LOG_NO_REDO); ut_ad(m_log.size() == 0); - ut_ad(m_memo); process_freed_pages(); - for (auto it= m_memo->rbegin(); it != m_memo->rend(); it++) + for (auto it= m_memo.rbegin(); it != m_memo.rend(); it++) { mtr_memo_slot_t &slot= *it; - if (!slot.object) - continue; + ut_ad(slot.object); switch (slot.type) { case MTR_MEMO_S_LOCK: static_cast<index_lock*>(slot.object)->s_unlock(); @@ -278,10 +281,8 @@ void mtr_t::release_unlogged() { ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY || slot.type == MTR_MEMO_PAGE_SX_MODIFY); - if (UNIV_LIKELY(block->page.id() >= end_page_id)) - block->page.set_temp_modified(); - else - insert_imported(block); + ut_ad(block->page.id() < end_page_id); + insert_imported(block); } switch (slot.type) { @@ -300,23 +301,14 @@ void mtr_t::release_unlogged() } } - delete m_memo; - m_memo= nullptr; + m_memo.clear(); } void mtr_t::release() { - if (m_memo) - { - for (auto it= m_memo->rbegin(); it != m_memo->rend(); it++) - { - mtr_memo_slot_t &slot= *it; - if (slot.object) - slot.release(); - } - delete m_memo; - m_memo= nullptr; - } + for (auto it= m_memo.rbegin(); it != m_memo.rend(); it++) + it->release(); + m_memo.clear(); } /** Commit a mini-transaction. */ @@ -344,19 +336,18 @@ void mtr_t::commit() if (m_made_dirty) { - ut_ad(m_memo); size_t modified= 0; - auto it= m_memo->rbegin(); + auto it= m_memo.rbegin(); mysql_mutex_lock(&buf_pool.flush_list_mutex); buf_page_t *const prev= buf_pool.prepare_insert_into_flush_list(lsns.first); - while (it != m_memo->rend()) + while (it != m_memo.rend()) { const mtr_memo_slot_t &slot= *it++; - if (slot.object && slot.type & MTR_MEMO_MODIFY) + if (slot.type & MTR_MEMO_MODIFY) { ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY || slot.type == MTR_MEMO_PAGE_SX_MODIFY); @@ -401,72 +392,67 @@ void mtr_t::commit() else log_sys.latch.rd_unlock(); - if (m_memo) - { - size_t modified= 0; + size_t modified= 0; - for (auto it= m_memo->rbegin(); it != m_memo->rend(); ) - { - const mtr_memo_slot_t &slot= *it++; - if (!slot.object) - continue; - switch (slot.type) { - case MTR_MEMO_S_LOCK: - static_cast<index_lock*>(slot.object)->s_unlock(); - break; - case MTR_MEMO_SPACE_X_LOCK: - static_cast<fil_space_t*>(slot.object)->set_committed_size(); - static_cast<fil_space_t*>(slot.object)->x_unlock(); - break; - case MTR_MEMO_SPACE_S_LOCK: - static_cast<fil_space_t*>(slot.object)->s_unlock(); - break; - case MTR_MEMO_X_LOCK: - case MTR_MEMO_SX_LOCK: - static_cast<index_lock*>(slot.object)-> - u_or_x_unlock(slot.type == MTR_MEMO_SX_LOCK); - break; - default: - buf_page_t *bpage= static_cast<buf_page_t*>(slot.object); - const auto s= bpage->unfix(); - if (slot.type & MTR_MEMO_MODIFY) + for (auto it= m_memo.rbegin(); it != m_memo.rend(); ) + { + const mtr_memo_slot_t &slot= *it++; + ut_ad(slot.object); + switch (slot.type) { + case MTR_MEMO_S_LOCK: + static_cast<index_lock*>(slot.object)->s_unlock(); + break; + case MTR_MEMO_SPACE_X_LOCK: + static_cast<fil_space_t*>(slot.object)->set_committed_size(); + static_cast<fil_space_t*>(slot.object)->x_unlock(); + break; + case MTR_MEMO_SPACE_S_LOCK: + static_cast<fil_space_t*>(slot.object)->s_unlock(); + break; + case MTR_MEMO_X_LOCK: + case MTR_MEMO_SX_LOCK: + static_cast<index_lock*>(slot.object)-> + u_or_x_unlock(slot.type == MTR_MEMO_SX_LOCK); + break; + default: + buf_page_t *bpage= static_cast<buf_page_t*>(slot.object); + const auto s= bpage->unfix(); + if (slot.type & MTR_MEMO_MODIFY) + { + ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY || + slot.type == MTR_MEMO_PAGE_SX_MODIFY); + ut_ad(bpage->oldest_modification() > 1); + ut_ad(bpage->oldest_modification() < m_commit_lsn); + ut_ad(bpage->id() < end_page_id); + ut_ad(s >= buf_page_t::FREED); + ut_ad(s < buf_page_t::READ_FIX); + ut_ad(mach_read_from_8(bpage->frame + FIL_PAGE_LSN) <= + m_commit_lsn); + if (s >= buf_page_t::UNFIXED) { - ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY || - slot.type == MTR_MEMO_PAGE_SX_MODIFY); - ut_ad(bpage->oldest_modification() > 1); - ut_ad(bpage->oldest_modification() < m_commit_lsn); - ut_ad(bpage->id() < end_page_id); - ut_ad(s >= buf_page_t::FREED); - ut_ad(s < buf_page_t::READ_FIX); - ut_ad(mach_read_from_8(bpage->frame + FIL_PAGE_LSN) <= - m_commit_lsn); - if (s >= buf_page_t::UNFIXED) - { - mach_write_to_8(bpage->frame + FIL_PAGE_LSN, m_commit_lsn); - if (UNIV_LIKELY_NULL(bpage->zip.data)) - memcpy_aligned<8>(FIL_PAGE_LSN + bpage->zip.data, - FIL_PAGE_LSN + bpage->frame, 8); - } - modified++; - } - switch (auto latch= slot.type & ~MTR_MEMO_MODIFY) { - case MTR_MEMO_PAGE_S_FIX: - bpage->lock.s_unlock(); - continue; - case MTR_MEMO_PAGE_SX_FIX: - case MTR_MEMO_PAGE_X_FIX: - bpage->lock.u_or_x_unlock(latch == MTR_MEMO_PAGE_SX_FIX); - continue; - default: - ut_ad(latch == MTR_MEMO_BUF_FIX); + mach_write_to_8(bpage->frame + FIL_PAGE_LSN, m_commit_lsn); + if (UNIV_LIKELY_NULL(bpage->zip.data)) + memcpy_aligned<8>(FIL_PAGE_LSN + bpage->zip.data, + FIL_PAGE_LSN + bpage->frame, 8); } + modified++; + } + switch (auto latch= slot.type & ~MTR_MEMO_MODIFY) { + case MTR_MEMO_PAGE_S_FIX: + bpage->lock.s_unlock(); + continue; + case MTR_MEMO_PAGE_SX_FIX: + case MTR_MEMO_PAGE_X_FIX: + bpage->lock.u_or_x_unlock(latch == MTR_MEMO_PAGE_SX_FIX); + continue; + default: + ut_ad(latch == MTR_MEMO_BUF_FIX); } } - - buf_pool.add_flush_list_requests(modified); - delete m_memo; - m_memo= nullptr; } + + buf_pool.add_flush_list_requests(modified); + m_memo.clear(); } if (UNIV_UNLIKELY(lsns.second != PAGE_FLUSH_NO)) @@ -481,16 +467,14 @@ func_exit: void mtr_t::rollback_to_savepoint(ulint begin, ulint end) { - ut_ad(m_memo); - ut_ad(end <= m_memo->size()); + ut_ad(end <= m_memo.size()); ut_ad(begin <= end); ulint s= end; while (s-- > begin) { - const mtr_memo_slot_t &slot= (*m_memo)[s]; - if (!slot.object) - continue; + const mtr_memo_slot_t &slot= m_memo[s]; + ut_ad(slot.object); /* This is intended for releasing latches on indexes or unmodified buffer pool pages. */ ut_ad(slot.type <= MTR_MEMO_SX_LOCK); @@ -498,7 +482,7 @@ void mtr_t::rollback_to_savepoint(ulint begin, ulint end) slot.release(); } - m_memo->erase(m_memo->begin() + begin, m_memo->begin() + end); + m_memo.erase(m_memo.begin() + begin, m_memo.begin() + end); } /** Commit a mini-transaction that is shrinking a tablespace. @@ -510,9 +494,10 @@ void mtr_t::commit_shrink(fil_space_t &space) ut_ad(!high_level_read_only); ut_ad(m_modifications); ut_ad(m_made_dirty); - ut_ad(m_memo); + ut_ad(!m_memo.empty()); ut_ad(!recv_recovery_is_on()); ut_ad(m_log_mode == MTR_LOG_ALL); + ut_ad(!m_freed_pages); ut_ad(UT_LIST_GET_LEN(space.chain) == 1); log_write_and_flush_prepare(); @@ -531,22 +516,21 @@ void mtr_t::commit_shrink(fil_space_t &space) os_file_truncate(space.chain.start->name, space.chain.start->handle, os_offset_t{space.size} << srv_page_size_shift, true); - ut_ad(!m_freed_pages || m_freed_space == &space); - process_freed_pages(); + space.clear_freed_ranges(); const page_id_t high{space.id, space.size}; size_t modified= 0; - auto it= m_memo->rbegin(); + auto it= m_memo.rbegin(); mysql_mutex_lock(&buf_pool.flush_list_mutex); buf_page_t *const prev= buf_pool.prepare_insert_into_flush_list(start_lsn); - while (it != m_memo->rend()) + while (it != m_memo.rend()) { mtr_memo_slot_t &slot= *it++; - if (!slot.object); - else if (slot.type == MTR_MEMO_SPACE_X_LOCK) + ut_ad(slot.object); + if (slot.type == MTR_MEMO_SPACE_X_LOCK) ut_ad(high.space() == static_cast<fil_space_t*>(slot.object)->id); else { @@ -727,7 +711,7 @@ lsn_t mtr_t::commit_files(lsn_t checkpoint_lsn) ut_ad(!is_inside_ibuf()); ut_ad(m_log_mode == MTR_LOG_ALL); ut_ad(!m_made_dirty); - ut_ad(!m_memo); + ut_ad(m_memo.empty()); ut_ad(!srv_read_only_mode); ut_ad(!m_freed_space); ut_ad(!m_freed_pages); @@ -837,19 +821,18 @@ void mtr_t::x_lock_space(fil_space_t *space) void mtr_t::release(const void *object) { ut_ad(is_active()); - ut_ad(m_memo); auto it= - std::find_if(m_memo->begin(), m_memo->end(), + std::find_if(m_memo.begin(), m_memo.end(), [object](const mtr_memo_slot_t& slot) { return slot.object == object; }); - ut_ad(it != m_memo->end()); + ut_ad(it != m_memo.end()); ut_ad(!(it->type & MTR_MEMO_MODIFY)); it->release(); - m_memo->erase(it); - ut_ad(std::find_if(m_memo->begin(), m_memo->end(), + m_memo.erase(it, it + 1); + ut_ad(std::find_if(m_memo.begin(), m_memo.end(), [object](const mtr_memo_slot_t& slot) - { return slot.object == &object; }) == m_memo->end()); + { return slot.object == &object; }) == m_memo.end()); } static time_t log_close_warn_time; @@ -1028,11 +1011,11 @@ std::pair<lsn_t,mtr_t::page_flush_ahead> mtr_t::do_write() #ifndef DBUG_OFF do { - if (!m_memo || m_log_mode != MTR_LOG_ALL) + if (m_log_mode != MTR_LOG_ALL) continue; DBUG_EXECUTE_IF("skip_page_checksum", continue;); - for (const mtr_memo_slot_t& slot : *m_memo) + for (const mtr_memo_slot_t& slot : m_memo) if (slot.type & MTR_MEMO_MODIFY) { const buf_page_t &b= *static_cast<const buf_page_t*>(slot.object); @@ -1168,12 +1151,9 @@ mtr_t::finish_write(size_t len) bool mtr_t::have_x_latch(const buf_block_t &block) const { - if (!m_memo) - return false; - ut_d(const mtr_memo_slot_t *found= nullptr); - for (const mtr_memo_slot_t &slot : *m_memo) + for (const mtr_memo_slot_t &slot : m_memo) { if (slot.object != &block) continue; @@ -1193,16 +1173,13 @@ bool mtr_t::have_x_latch(const buf_block_t &block) const bool mtr_t::have_u_or_x_latch(const buf_block_t &block) const { - if (m_memo) + for (const mtr_memo_slot_t &slot : m_memo) { - for (const mtr_memo_slot_t &slot : *m_memo) + if (slot.object == &block && + slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)) { - if (slot.object == &block && - slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)) - { - ut_ad(block.page.lock.have_u_or_x()); - return true; - } + ut_ad(block.page.lock.have_u_or_x()); + return true; } } return false; @@ -1214,18 +1191,15 @@ bool mtr_t::have_u_or_x_latch(const buf_block_t &block) const @return whether space.latch is being held */ bool mtr_t::memo_contains(const fil_space_t& space, bool shared) const { - if (m_memo) - { - const mtr_memo_type_t type= shared - ? MTR_MEMO_SPACE_S_LOCK : MTR_MEMO_SPACE_X_LOCK; + const mtr_memo_type_t type= shared + ? MTR_MEMO_SPACE_S_LOCK : MTR_MEMO_SPACE_X_LOCK; - for (const mtr_memo_slot_t &slot : *m_memo) + for (const mtr_memo_slot_t &slot : m_memo) + { + if (slot.object == &space && slot.type == type) { - if (slot.object == &space && slot.type == type) - { - ut_ad(shared || space.is_owner()); - return true; - } + ut_ad(shared || space.is_owner()); + return true; } } @@ -1235,9 +1209,8 @@ bool mtr_t::memo_contains(const fil_space_t& space, bool shared) const void mtr_t::page_lock_upgrade(const buf_block_t &block) { ut_ad(block.page.lock.have_x()); - ut_ad(m_memo); - for (mtr_memo_slot_t &slot : *m_memo) + for (mtr_memo_slot_t &slot : m_memo) if (slot.object == &block && slot.type & MTR_MEMO_PAGE_SX_FIX) slot.type= mtr_memo_type_t(slot.type ^ (MTR_MEMO_PAGE_SX_FIX | MTR_MEMO_PAGE_X_FIX)); @@ -1247,16 +1220,6 @@ void mtr_t::page_lock_upgrade(const buf_block_t &block) #endif /* BTR_CUR_HASH_ADAPT */ } -void mtr_t::lock_upgrade(const index_lock &lock) -{ - ut_ad(lock.have_x()); - ut_ad(m_memo); - - for (mtr_memo_slot_t &slot : *m_memo) - if (slot.object == &lock && slot.type == MTR_MEMO_SX_LOCK) - slot.type= MTR_MEMO_X_LOCK; -} - /** Latch a buffer pool block. @param block block to be latched @param rw_latch RW_S_LATCH, RW_SX_LATCH, RW_X_LATCH, RW_NO_LATCH */ @@ -1305,27 +1268,29 @@ done: void mtr_t::upgrade_buffer_fix(ulint savepoint, rw_lock_type_t rw_latch) { ut_ad(is_active()); - ut_ad(m_memo); - ut_ad(savepoint < m_memo->size()); - - mtr_memo_slot_t &slot= (*m_memo)[savepoint]; + mtr_memo_slot_t &slot= m_memo[savepoint]; ut_ad(slot.type == MTR_MEMO_BUF_FIX); buf_block_t *block= static_cast<buf_block_t*>(slot.object); ut_d(const auto state= block->page.state()); ut_ad(state > buf_page_t::UNFIXED); ut_ad(state > buf_page_t::WRITE_FIX || state < buf_page_t::READ_FIX); + static_assert(int{MTR_MEMO_PAGE_S_FIX} == int{RW_S_LATCH}, ""); + static_assert(int{MTR_MEMO_PAGE_X_FIX} == int{RW_X_LATCH}, ""); + static_assert(int{MTR_MEMO_PAGE_SX_FIX} == int{RW_SX_LATCH}, ""); + slot.type= mtr_memo_type_t(rw_latch); switch (rw_latch) { default: ut_ad("invalid state" == 0); break; + case RW_S_LATCH: + block->page.lock.s_lock(); + break; case RW_SX_LATCH: - slot.type= MTR_MEMO_PAGE_SX_FIX; block->page.lock.u_lock(); ut_ad(!block->page.is_io_fixed()); break; case RW_X_LATCH: - slot.type= MTR_MEMO_PAGE_X_FIX; block->page.lock.x_lock(); ut_ad(!block->page.is_io_fixed()); } @@ -1347,27 +1312,24 @@ bool mtr_t::memo_contains(const index_lock &lock, mtr_memo_type_t type) const ut_ad(type == MTR_MEMO_X_LOCK || type == MTR_MEMO_S_LOCK || type == MTR_MEMO_SX_LOCK); - if (m_memo) + for (const mtr_memo_slot_t &slot : m_memo) { - for (const mtr_memo_slot_t &slot : *m_memo) + if (slot.object == &lock && slot.type == type) { - if (slot.object == &lock && slot.type == type) - { - switch (type) { - case MTR_MEMO_X_LOCK: - ut_ad(lock.have_x()); - break; - case MTR_MEMO_SX_LOCK: - ut_ad(lock.have_u_or_x()); - break; - case MTR_MEMO_S_LOCK: - ut_ad(lock.have_s()); - break; - default: - break; - } - return true; + switch (type) { + case MTR_MEMO_X_LOCK: + ut_ad(lock.have_x()); + break; + case MTR_MEMO_SX_LOCK: + ut_ad(lock.have_u_or_x()); + break; + case MTR_MEMO_S_LOCK: + ut_ad(lock.have_s()); + break; + default: + break; } + return true; } } @@ -1395,7 +1357,7 @@ bool mtr_t::memo_contains_flagged(const void *object, ulint flags) const MTR_MEMO_MODIFY)) == !!(flags & (MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK | MTR_MEMO_S_LOCK))); - for (const mtr_memo_slot_t &slot : *m_memo) + for (const mtr_memo_slot_t &slot : m_memo) { if (object != slot.object) continue; @@ -1430,9 +1392,10 @@ buf_block_t* mtr_t::memo_contains_page_flagged(const byte *ptr, ulint flags) { ptr= page_align(ptr); - for (const mtr_memo_slot_t &slot : *m_memo) + for (const mtr_memo_slot_t &slot : m_memo) { - if (!slot.object || !(flags & slot.type)) + ut_ad(slot.object); + if (!(flags & slot.type)) continue; buf_page_t *bpage= static_cast<buf_page_t*>(slot.object); @@ -1453,35 +1416,84 @@ buf_block_t* mtr_t::memo_contains_page_flagged(const byte *ptr, ulint flags) /** Mark the given latched page as modified. @param block page that will be modified */ -void mtr_t::modify(const buf_block_t &block) +void mtr_t::set_modified(const buf_block_t &block) { - if (UNIV_UNLIKELY(!m_memo)) + if (block.page.id().space() >= SRV_TMP_SPACE_ID) { - /* This must be PageConverter::update_page() in IMPORT TABLESPACE. */ - ut_ad(!block.page.in_LRU_list); + const_cast<buf_block_t&>(block).page.set_temp_modified(); return; } - mtr_memo_slot_t *found= nullptr; + m_modifications= true; + + if (UNIV_UNLIKELY(m_log_mode == MTR_LOG_NONE)) + return; - for (mtr_memo_slot_t &slot : *m_memo) + for (mtr_memo_slot_t &slot : m_memo) { if (slot.object == &block && slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)) { - found= &slot; - break; + if (slot.type & MTR_MEMO_MODIFY) + ut_ad(m_made_dirty || block.page.oldest_modification() > 1); + else + { + slot.type= static_cast<mtr_memo_type_t>(slot.type | MTR_MEMO_MODIFY); + if (!m_made_dirty) + m_made_dirty= block.page.oldest_modification() <= 1; + } + return; } } - if (UNIV_UNLIKELY(!found)) + /* This must be PageConverter::update_page() in IMPORT TABLESPACE. */ + ut_ad(m_memo.empty()); + ut_ad(!block.page.in_LRU_list); +} + +void mtr_t::init(buf_block_t *b) +{ + const page_id_t id{b->page.id()}; + ut_ad(is_named_space(id.space())); + ut_ad(!m_freed_pages == !m_freed_space); + ut_ad(memo_contains_flagged(b, MTR_MEMO_PAGE_X_FIX)); + + if (id.space() >= SRV_TMP_SPACE_ID) + b->page.set_temp_modified(); + else { - ut_ad("modifying an unlatched page" == 0); - return; + for (mtr_memo_slot_t &slot : m_memo) + { + if (slot.object == b && slot.type & MTR_MEMO_PAGE_X_FIX) + { + slot.type= MTR_MEMO_PAGE_X_MODIFY; + m_modifications= true; + if (!m_made_dirty) + m_made_dirty= b->page.oldest_modification() <= 1; + goto found; + } + } + ut_ad("block not X-latched" == 0); + } + + found: + if (UNIV_LIKELY_NULL(m_freed_space) && + m_freed_space->id == id.space() && + m_freed_pages->remove_if_exists(id.page_no()) && + m_freed_pages->empty()) + { + delete m_freed_pages; + m_freed_pages= nullptr; + m_freed_space= nullptr; } - found->type= static_cast<mtr_memo_type_t>(found->type | MTR_MEMO_MODIFY); - if (!m_made_dirty) - m_made_dirty= is_block_dirtied(block.page); + + b->page.set_reinit(b->page.state() & buf_page_t::LRU_MASK); + + if (!is_logged()) + return; + + m_log.close(log_write<INIT_PAGE>(id, &b->page)); + m_last_offset= FIL_PAGE_TYPE; } /** Free a page. @@ -1494,24 +1506,26 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset) if (is_logged()) { - ut_ad(m_memo); buf_block_t *freed= nullptr; const page_id_t id{space.id, offset}; - for (auto it= m_memo->rbegin(); it != m_memo->rend(); it++) + for (auto it= m_memo.end(); it != m_memo.begin(); ) { + it--; + next: mtr_memo_slot_t &slot= *it; buf_block_t *block= static_cast<buf_block_t*>(slot.object); - if (!block); - else if (block == freed) + ut_ad(block); + if (block == freed) { if (slot.type & (MTR_MEMO_PAGE_SX_FIX | MTR_MEMO_PAGE_X_FIX)) slot.type= MTR_MEMO_PAGE_X_FIX; else { ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.object= nullptr; block->page.unfix(); + m_memo.erase(it, it + 1); + goto next; } } else if (slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX) && @@ -1525,7 +1539,17 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset) ut_d(bool upgraded=) block->page.lock.x_lock_upgraded(); ut_ad(upgraded); } - slot.type= MTR_MEMO_PAGE_X_MODIFY; + if (id.space() >= SRV_TMP_SPACE_ID) + { + block->page.set_temp_modified(); + slot.type= MTR_MEMO_PAGE_X_FIX; + } + else + { + slot.type= MTR_MEMO_PAGE_X_MODIFY; + if (!m_made_dirty) + m_made_dirty= block->page.oldest_modification() <= 1; + } #ifdef BTR_CUR_HASH_ADAPT if (block->index) btr_search_drop_page_hash_index(block, false); @@ -1534,8 +1558,22 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset) } } - if (freed && !m_made_dirty) - m_made_dirty= is_block_dirtied(freed->page); m_log.close(log_write<FREE_PAGE>(id, nullptr)); } } + +void small_vector_base::grow_by_1(void *small, size_t element_size) +{ + const size_t cap= Capacity*= 2, s= cap * element_size; + void *new_begin; + if (BeginX == small) + { + new_begin= my_malloc(PSI_NOT_INSTRUMENTED, s, MYF(0)); + memcpy(new_begin, BeginX, size() * element_size); + TRASH_FREE(small, size() * element_size); + } + else + new_begin= my_realloc(PSI_NOT_INSTRUMENTED, BeginX, s, MYF(0)); + + BeginX= new_begin; +} diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index c4ee55e79b3..028f73db9c9 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1617,6 +1617,9 @@ inline dberr_t IndexPurge::purge_pessimistic_delete() noexcept dberr_t IndexPurge::purge() noexcept { btr_pcur_store_position(&m_pcur, &m_mtr); + m_mtr.commit(); + m_mtr.start(); + m_mtr.set_log_mode(MTR_LOG_NO_REDO); dberr_t err= purge_pessimistic_delete(); m_mtr.start(); diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index de58e3896b0..3b21b0315cd 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2022, MariaDB Corporation. +Copyright (c) 2016, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -314,8 +314,10 @@ row_ins_clust_index_entry_by_modify( } if (mode != BTR_MODIFY_TREE) { - ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED)) - == BTR_MODIFY_LEAF); + ut_ad(mode == BTR_MODIFY_LEAF + || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED + || mode == BTR_MODIFY_ROOT_AND_LEAF + || mode == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED); /* Try optimistic updating of the record, keeping changes within the page */ @@ -1621,8 +1623,7 @@ row_ins_check_foreign_constraint( dtuple_set_n_fields_cmp(entry, foreign->n_fields); pcur.btr_cur.page_cur.index = check_index; - err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, 0, - &mtr); + err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); if (UNIV_UNLIKELY(err != DB_SUCCESS)) { goto end_scan; } @@ -2119,7 +2120,7 @@ row_ins_scan_sec_index_for_duplicate( pcur.btr_cur.page_cur.index = index; trx_t* const trx = thr_get_trx(thr); dberr_t err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, - &pcur, 0, mtr); + &pcur, mtr); if (err != DB_SUCCESS) { goto end_scan; } @@ -2543,8 +2544,8 @@ row_ins_index_entry_big_rec( index->set_modified(mtr); } - dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &pcur, 0, &mtr); + dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_MODIFY_TREE, + &pcur, &mtr); if (error != DB_SUCCESS) { return error; } @@ -2577,6 +2578,42 @@ but GCC 4.8.5 does not support pop_options. */ # pragma GCC optimize ("O0") #endif +#ifdef WITH_WSREP +/** Start bulk insert operation for Galera by appending +table-level exclusive key for bulk insert. +@param trx transaction +@param index index +@retval false on success +@retval true on failure */ +ATTRIBUTE_COLD static bool row_ins_wsrep_start_bulk(trx_t *trx, const dict_index_t &index) +{ + char db_buf[NAME_LEN + 1]; + char tbl_buf[NAME_LEN + 1]; + ulint db_buf_len, tbl_buf_len; + + if (!index.table->parse_name(db_buf, tbl_buf, &db_buf_len, &tbl_buf_len)) + { + WSREP_ERROR("Parse_name for bulk insert failed: %s", + wsrep_thd_query(trx->mysql_thd)); + trx->error_state = DB_ROLLBACK; + return true; + } + + /* Append table-level exclusive key for bulk insert. */ + const int rcode = wsrep_thd_append_table_key(trx->mysql_thd, db_buf, + tbl_buf, WSREP_SERVICE_KEY_EXCLUSIVE); + if (rcode) + { + WSREP_ERROR("Appending table key for bulk insert failed: %s, %d", + wsrep_thd_query(trx->mysql_thd), rcode); + trx->error_state = DB_ROLLBACK; + return true; + } + + return false; +} +#endif + /***************************************************************//** Tries to insert an entry into a clustered index, ignoring foreign key constraints. If a record with the same unique key is found, the other @@ -2602,11 +2639,10 @@ row_ins_clust_index_entry_low( que_thr_t* thr) /*!< in: query thread */ { btr_pcur_t pcur; - btr_cur_t* cursor; dberr_t err = DB_SUCCESS; big_rec_t* big_rec = NULL; mtr_t mtr; - ib_uint64_t auto_inc = 0; + uint64_t auto_inc = 0; mem_heap_t* offsets_heap = NULL; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs* offsets = offsets_; @@ -2622,7 +2658,7 @@ row_ins_clust_index_entry_low( ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index)); ut_ad(!trx->in_rollback); - mtr_start(&mtr); + mtr.start(); if (index->table->is_temporary()) { /* Disable REDO logging as the lifetime of temp-tables is @@ -2662,6 +2698,13 @@ row_ins_clust_index_entry_low( dfield->type.mtype, dfield->type.prtype & DATA_UNSIGNED); + if (auto_inc + && mode != BTR_MODIFY_TREE) { + mode = btr_latch_mode( + BTR_MODIFY_ROOT_AND_LEAF + ^ BTR_MODIFY_LEAF + ^ mode); + } } } } @@ -2671,20 +2714,26 @@ row_ins_clust_index_entry_low( the function will return in both low_match and up_match of the cursor sensible values */ pcur.btr_cur.page_cur.index = index; - err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, auto_inc, &mtr); + err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, &mtr); if (err != DB_SUCCESS) { index->table->file_unreadable = true; -commit_exit: +err_exit: mtr.commit(); goto func_exit; } - cursor = btr_pcur_get_btr_cur(&pcur); - cursor->thr = thr; + if (auto_inc) { + buf_block_t* root + = mtr.at_savepoint(mode != BTR_MODIFY_ROOT_AND_LEAF); + ut_ad(index->page == root->page.id().page_no()); + page_set_autoinc(root, auto_inc, &mtr, false); + } + + btr_pcur_get_btr_cur(&pcur)->thr = thr; #ifdef UNIV_DEBUG { - page_t* page = btr_cur_get_page(cursor); + page_t* page = btr_pcur_get_page(&pcur); rec_t* first_rec = page_rec_get_next( page_get_infimum_rec(page)); @@ -2693,7 +2742,7 @@ commit_exit: } #endif /* UNIV_DEBUG */ - block = btr_cur_get_block(cursor); + block = btr_pcur_get_block(&pcur); DBUG_EXECUTE_IF("row_ins_row_level", goto skip_bulk_insert;); @@ -2707,7 +2756,7 @@ commit_exit: && !index->table->n_rec_locks && !index->table->is_active_ddl() && !index->table->has_spatial_index() - && !trx->is_wsrep() /* FIXME: MDEV-24623 */ + && !index->table->versioned() && !thd_is_slave(trx->mysql_thd) /* FIXME: MDEV-24622 */) { DEBUG_SYNC_C("empty_root_page_insert"); @@ -2719,7 +2768,7 @@ commit_exit: if (err != DB_SUCCESS) { trx->error_state = err; trx->bulk_insert = false; - goto commit_exit; + goto err_exit; } if (index->table->n_rec_locks) { @@ -2728,6 +2777,16 @@ avoid_bulk: goto skip_bulk_insert; } +#ifdef WITH_WSREP + if (trx->is_wsrep()) + { + if (!wsrep_thd_is_local_transaction(trx->mysql_thd)) + goto skip_bulk_insert; + if (row_ins_wsrep_start_bulk(trx, *index)) + goto err_exit; + } +#endif /* WITH_WSREP */ + #ifdef BTR_CUR_HASH_ADAPT if (btr_search_enabled) { btr_search_x_lock_all(); @@ -2751,7 +2810,7 @@ avoid_bulk: goto avoid_bulk; } - goto commit_exit; + goto err_exit; } } @@ -2762,7 +2821,7 @@ skip_bulk_insert: ut_ad(index->is_instant()); ut_ad(!dict_index_is_online_ddl(index)); - const rec_t* rec = btr_cur_get_rec(cursor); + const rec_t* rec = btr_pcur_get_rec(&pcur); if (rec_get_info_bits(rec, page_rec_is_comp(rec)) & REC_INFO_MIN_REC_FLAG) { @@ -2771,16 +2830,17 @@ skip_bulk_insert: goto err_exit; } - ut_ad(!row_ins_must_modify_rec(cursor)); + ut_ad(!row_ins_must_modify_rec(&pcur.btr_cur)); goto do_insert; } - if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) { + if (rec_is_metadata(btr_pcur_get_rec(&pcur), *index)) { goto do_insert; } if (n_uniq - && (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) { + && (pcur.btr_cur.up_match >= n_uniq + || pcur.btr_cur.low_match >= n_uniq)) { if (flags == (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG @@ -2788,7 +2848,7 @@ skip_bulk_insert: /* Set no locks when applying log in online table rebuild. Only check for duplicates. */ err = row_ins_duplicate_error_in_clust_online( - n_uniq, entry, cursor, + n_uniq, entry, &pcur.btr_cur, &offsets, &offsets_heap); switch (err) { @@ -2799,26 +2859,24 @@ skip_bulk_insert: /* fall through */ case DB_SUCCESS_LOCKED_REC: case DB_DUPLICATE_KEY: - trx->error_info = cursor->index(); + trx->error_info = index; } } else { /* Note that the following may return also DB_LOCK_WAIT */ err = row_ins_duplicate_error_in_clust( - flags, cursor, entry, thr); + flags, &pcur.btr_cur, entry, thr); } if (err != DB_SUCCESS) { -err_exit: - mtr_commit(&mtr); - goto func_exit; + goto err_exit; } } /* Note: Allowing duplicates would qualify for modification of an existing record as the new entry is exactly same as old entry. */ - if (row_ins_must_modify_rec(cursor)) { + if (row_ins_must_modify_rec(&pcur.btr_cur)) { /* There is already an index entry with a long enough common prefix, we must convert the insert into a modify of an existing record */ @@ -2836,10 +2894,13 @@ do_insert: rec_t* insert_rec; if (mode != BTR_MODIFY_TREE) { - ut_ad(mode == BTR_MODIFY_LEAF || - mode == BTR_MODIFY_LEAF_ALREADY_LATCHED); + ut_ad(mode == BTR_MODIFY_LEAF + || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED + || mode == BTR_MODIFY_ROOT_AND_LEAF + || mode + == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED); err = btr_cur_optimistic_insert( - flags, cursor, &offsets, &offsets_heap, + flags, &pcur.btr_cur, &offsets, &offsets_heap, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); } else { @@ -2848,17 +2909,15 @@ do_insert: goto err_exit; } - DEBUG_SYNC_C("before_insert_pessimitic_row_ins_clust"); - err = btr_cur_optimistic_insert( - flags, cursor, + flags, &pcur.btr_cur, &offsets, &offsets_heap, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); if (err == DB_FAIL) { err = btr_cur_pessimistic_insert( - flags, cursor, + flags, &pcur.btr_cur, &offsets, &offsets_heap, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); @@ -2970,9 +3029,7 @@ row_ins_sec_index_entry_low( rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); rtr_info_update_btr(&cursor, &rtr_info); - err = btr_cur_search_to_nth_level(0, entry, - PAGE_CUR_RTREE_INSERT, - search_mode, &cursor, &mtr); + err = rtr_insert_leaf(&cursor, entry, search_mode, &mtr); if (err == DB_SUCCESS && search_mode == BTR_MODIFY_LEAF && rtr_info.mbr_adj) { @@ -2988,9 +3045,8 @@ row_ins_sec_index_entry_low( } else { index->set_modified(mtr); } - err = btr_cur_search_to_nth_level( - 0, entry, PAGE_CUR_RTREE_INSERT, - search_mode, &cursor, &mtr); + err = rtr_insert_leaf(&cursor, entry, + search_mode, &mtr); } DBUG_EXECUTE_IF( @@ -3006,8 +3062,8 @@ row_ins_sec_index_entry_low( : BTR_INSERT)); } - err = btr_cur_search_to_nth_level(0, entry, PAGE_CUR_LE, - search_mode, &cursor, &mtr); + err = cursor.search_leaf(entry, PAGE_CUR_LE, search_mode, + &mtr); } if (err != DB_SUCCESS) { @@ -3083,12 +3139,12 @@ row_ins_sec_index_entry_low( prevent any insertion of a duplicate by another transaction. Let us now reposition the cursor and continue the insertion (bypassing the change buffer). */ - err = btr_cur_search_to_nth_level( - 0, entry, PAGE_CUR_LE, + err = cursor.search_leaf( + entry, PAGE_CUR_LE, btr_latch_mode(search_mode & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE)), - &cursor, &mtr); + &mtr); if (err != DB_SUCCESS) { goto func_exit; } diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 64b4d02fa18..60eda9b14a1 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1696,8 +1696,8 @@ err_exit: mtr->start(); index->set_modified(*mtr); pcur->btr_cur.page_cur.index = index; - error = btr_pcur_open(entry, PAGE_CUR_LE, - BTR_PURGE_TREE, pcur, 0, mtr); + error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_PURGE_TREE, pcur, + mtr); if (error) { goto err_exit; } @@ -1780,8 +1780,8 @@ row_log_table_apply_delete( mtr_start(&mtr); index->set_modified(mtr); - dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE, - BTR_PURGE_TREE, &pcur, 0, &mtr); + dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_PURGE_TREE, &pcur, + &mtr); if (err != DB_SUCCESS) { goto all_done; } @@ -1917,8 +1917,8 @@ row_log_table_apply_update( mtr.start(); index->set_modified(mtr); - error = btr_pcur_open(old_pk, PAGE_CUR_LE, - BTR_MODIFY_TREE, &pcur, 0, &mtr); + error = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_MODIFY_TREE, &pcur, + &mtr); if (error != DB_SUCCESS) { func_exit: mtr.commit(); @@ -3084,11 +3084,8 @@ row_log_apply_op_low( record. The operation may already have been performed, depending on when the row in the clustered index was scanned. */ - *error = btr_cur_search_to_nth_level(0, entry, PAGE_CUR_LE, - has_index_lock - ? BTR_MODIFY_TREE - : BTR_MODIFY_LEAF, - &cursor, &mtr); + *error = cursor.search_leaf(entry, PAGE_CUR_LE, has_index_lock + ? BTR_MODIFY_TREE : BTR_MODIFY_LEAF, &mtr); if (UNIV_UNLIKELY(*error != DB_SUCCESS)) { goto func_exit; } @@ -3138,9 +3135,9 @@ row_log_apply_op_low( mtr_commit(&mtr); mtr_start(&mtr); index->set_modified(mtr); - *error = btr_cur_search_to_nth_level( - 0, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &cursor, &mtr); + *error = cursor.search_leaf(entry, PAGE_CUR_LE, + BTR_MODIFY_TREE, + &mtr); if (UNIV_UNLIKELY(*error != DB_SUCCESS)) { goto func_exit; } @@ -3242,9 +3239,9 @@ insert_the_rec: mtr_commit(&mtr); mtr_start(&mtr); index->set_modified(mtr); - *error = btr_cur_search_to_nth_level( - 0, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &cursor, &mtr); + *error = cursor.search_leaf(entry, PAGE_CUR_LE, + BTR_MODIFY_TREE, + &mtr); if (*error != DB_SUCCESS) { break; } diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index 723b4b1d387..5601a786555 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -150,9 +150,8 @@ public: false); rtr_info_update_btr(&ins_cur, &rtr_info); - error = btr_cur_search_to_nth_level( - 0, dtuple, PAGE_CUR_RTREE_INSERT, - BTR_MODIFY_LEAF, &ins_cur, &mtr); + error = rtr_insert_leaf(&ins_cur, dtuple, + BTR_MODIFY_LEAF, &mtr); /* It need to update MBR in parent entry, so change search mode to BTR_MODIFY_TREE */ @@ -164,10 +163,8 @@ public: rtr_info_update_btr(&ins_cur, &rtr_info); mtr.start(); index->set_modified(mtr); - error = btr_cur_search_to_nth_level( - 0, dtuple, - PAGE_CUR_RTREE_INSERT, - BTR_MODIFY_TREE, &ins_cur, &mtr); + error = rtr_insert_leaf(&ins_cur, dtuple, + BTR_MODIFY_TREE, &mtr); } if (error == DB_SUCCESS) { @@ -189,11 +186,8 @@ public: &ins_cur, index, false); rtr_info_update_btr(&ins_cur, &rtr_info); - error = btr_cur_search_to_nth_level( - 0, dtuple, - PAGE_CUR_RTREE_INSERT, - BTR_MODIFY_TREE, - &ins_cur, &mtr); + error = rtr_insert_leaf(&ins_cur, dtuple, + BTR_MODIFY_TREE, &mtr); if (error == DB_SUCCESS) { error = btr_cur_pessimistic_insert( diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index df042f66521..65d26e0a733 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -104,7 +104,7 @@ bool row_purge_remove_clust_if_poss_low( /*===============================*/ purge_node_t* node, /*!< in/out: row purge node */ - btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ + btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_PURGE_TREE */ { dict_index_t* index = dict_table_get_first_index(node->table); table_id_t table_id = 0; @@ -342,17 +342,20 @@ row_purge_remove_sec_if_poss_tree( ibool success = TRUE; dberr_t err; mtr_t mtr; - enum row_search_result search_result; log_free_check(); mtr.start(); index->set_modified(mtr); pcur.btr_cur.page_cur.index = index; - search_result = row_search_index_entry(entry, BTR_PURGE_TREE, - &pcur, &mtr); + if (index->is_spatial()) { + if (!rtr_search(entry, BTR_PURGE_TREE, &pcur, &mtr)) { + goto found; + } + goto func_exit; + } - switch (search_result) { + switch (row_search_index_entry(entry, BTR_PURGE_TREE, &pcur, &mtr)) { case ROW_NOT_FOUND: /* Not found. This is a legitimate condition. In a rollback, InnoDB will remove secondary recs that would @@ -381,6 +384,7 @@ row_purge_remove_sec_if_poss_tree( which cannot be purged yet, requires its existence. If some requires, we should do nothing. */ +found: if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) { /* Remove the index record, which should have been @@ -439,8 +443,6 @@ row_purge_remove_sec_if_poss_leaf( { mtr_t mtr; btr_pcur_t pcur; - enum btr_latch_mode mode; - enum row_search_result search_result; bool success = true; log_free_check(); @@ -449,31 +451,27 @@ row_purge_remove_sec_if_poss_leaf( mtr.start(); index->set_modified(mtr); - /* Change buffering is disabled for spatial index and - virtual index. */ - mode = (index->type & (DICT_SPATIAL | DICT_VIRTUAL)) - ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF; pcur.btr_cur.page_cur.index = index; /* Set the purge node for the call to row_purge_poss_sec(). */ pcur.btr_cur.purge_node = node; if (index->is_spatial()) { pcur.btr_cur.thr = NULL; - index->lock.u_lock(SRW_LOCK_CALL); - search_result = row_search_index_entry( - entry, mode, &pcur, &mtr); - index->lock.u_unlock(); - } else { - /* Set the query thread, so that ibuf_insert_low() will be - able to invoke thd_get_trx(). */ - pcur.btr_cur.thr = static_cast<que_thr_t*>( - que_node_get_parent(node)); - search_result = row_search_index_entry( - entry, mode, &pcur, &mtr); + if (!rtr_search(entry, BTR_MODIFY_LEAF, &pcur, &mtr)) { + goto found; + } + goto func_exit; } - switch (search_result) { + /* Set the query thread, so that ibuf_insert_low() will be + able to invoke thd_get_trx(). */ + pcur.btr_cur.thr = static_cast<que_thr_t*>(que_node_get_parent(node)); + + switch (row_search_index_entry(entry, index->has_virtual() + ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF, + &pcur, &mtr)) { case ROW_FOUND: +found: /* Before attempting to purge a record, check if it is safe to do so. */ if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, false)) { diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc index b998d27d836..4a00b2a430e 100644 --- a/storage/innobase/row/row0row.cc +++ b/storage/innobase/row/row0row.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2022, MariaDB Corporation. +Copyright (c) 2018, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1216,7 +1216,7 @@ row_search_on_row_ref( & REC_INFO_MIN_REC_FLAG; } else { ut_a(ref->n_fields == index->n_uniq); - if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, 0, mtr) + if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) { return false; } @@ -1278,21 +1278,13 @@ row_search_index_entry( ut_ad(dtuple_check_typed(entry)); - if (pcur->index()->is_spatial()) { - if (rtr_pcur_open(pcur->index(), entry, mode, pcur, mtr)) { - return ROW_NOT_FOUND; - } - } else { - if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, 0, mtr) - != DB_SUCCESS) { - return ROW_NOT_FOUND; - } + if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) { + return ROW_NOT_FOUND; } switch (btr_pcur_get_btr_cur(pcur)->flag) { case BTR_CUR_DELETE_REF: ut_ad(!(~mode & BTR_DELETE)); - ut_ad(!pcur->index()->is_spatial()); return(ROW_NOT_DELETED_REF); case BTR_CUR_DEL_MARK_IBUF: diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 23100a96ebd..716e5351446 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -2,7 +2,7 @@ Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -4775,7 +4775,7 @@ wait_table_again: pcur->btr_cur.thr = thr; pcur->old_rec = nullptr; - if (dict_index_is_spatial(index)) { + if (index->is_spatial()) { if (!prebuilt->rtr_info) { prebuilt->rtr_info = rtr_create_rtr_info( set_also_gap_locks, true, @@ -4791,10 +4791,13 @@ wait_table_again: prebuilt->rtr_info->search_tuple = search_tuple; prebuilt->rtr_info->search_mode = mode; } - } - err = btr_pcur_open_with_no_init(search_tuple, mode, - BTR_SEARCH_LEAF, pcur, &mtr); + err = rtr_search_leaf(pcur, search_tuple, mode, &mtr); + } else { + err = btr_pcur_open_with_no_init(search_tuple, mode, + BTR_SEARCH_LEAF, + pcur, &mtr); + } if (err != DB_SUCCESS) { page_corrupted: @@ -5771,8 +5774,7 @@ next_rec_after_check: if (spatial_search) { /* No need to do store restore for R-tree */ - mtr.commit(); - mtr.start(); + mtr.rollback_to_savepoint(0); } else if (mtr_extra_clust_savepoint) { /* We must release any clustered index latches if we are moving to the next non-clustered diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 6567019a33d..50196e78092 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -233,7 +233,7 @@ func_exit: if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_INSERT_METADATA) { /* When rolling back the very first instant ADD COLUMN operation, reset the root page to the basic state. */ - err = btr_reset_instant(*index, true, &mtr); + btr_reset_instant(*index, true, &mtr); } btr_pcur_commit_specify_mtr(&node->pcur, &mtr); @@ -268,21 +268,32 @@ row_undo_ins_remove_sec_low( pcur.btr_cur.page_cur.index = index; row_mtr_start(&mtr, index, !modify_leaf); - if (modify_leaf) { - mode = BTR_MODIFY_LEAF_ALREADY_LATCHED; - mtr_s_lock_index(index, &mtr); - } else { - ut_ad(mode == BTR_PURGE_TREE); - mtr_sx_lock_index(index, &mtr); - } - if (index->is_spatial()) { mode = modify_leaf - ? btr_latch_mode(BTR_MODIFY_LEAF_ALREADY_LATCHED + ? btr_latch_mode(BTR_MODIFY_LEAF | BTR_RTREE_DELETE_MARK | BTR_RTREE_UNDO_INS) : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS); btr_pcur_get_btr_cur(&pcur)->thr = thr; + if (rtr_search(entry, mode, &pcur, &mtr)) { + goto func_exit; + } + + if (rec_get_deleted_flag( + btr_pcur_get_rec(&pcur), + dict_table_is_comp(index->table))) { + ib::error() << "Record found in index " << index->name + << " is deleted marked on insert rollback."; + ut_ad(0); + } + goto found; + } else if (modify_leaf) { + mode = BTR_MODIFY_LEAF_ALREADY_LATCHED; + mtr_s_lock_index(index, &mtr); + } else { + ut_ad(mode == BTR_PURGE_TREE); + mode = BTR_PURGE_TREE_ALREADY_LATCHED; + mtr_x_lock_index(index, &mtr); } switch (row_search_index_entry(entry, mode, &pcur, &mtr)) { @@ -295,15 +306,7 @@ row_undo_ins_remove_sec_low( case ROW_NOT_FOUND: break; case ROW_FOUND: - if (dict_index_is_spatial(index) - && rec_get_deleted_flag( - btr_pcur_get_rec(&pcur), - dict_table_is_comp(index->table))) { - ib::error() << "Record found in index " << index->name - << " is deleted marked on insert rollback."; - ut_ad(0); - } - + found: btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); if (modify_leaf) { @@ -318,6 +321,7 @@ row_undo_ins_remove_sec_low( } } +func_exit: btr_pcur_close(&pcur); mtr_commit(&mtr); diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 2d04dca4003..50e15e03cc9 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -133,8 +133,7 @@ row_undo_mod_clust_low( && node->ref == &trx_undo_metadata && btr_cur_get_index(btr_cur)->table->instant && node->update->info_bits == REC_INFO_METADATA_ADD) { - err = btr_reset_instant(*btr_cur_get_index(btr_cur), - false, mtr); + btr_reset_instant(*btr_cur->index(), false, mtr); } } @@ -490,7 +489,6 @@ row_undo_mod_del_mark_or_remove_sec_low( dberr_t err = DB_SUCCESS; mtr_t mtr; mtr_t mtr_vers; - row_search_result search_result; const bool modify_leaf = mode == BTR_MODIFY_LEAF; row_mtr_start(&mtr, index, !modify_leaf); @@ -505,6 +503,11 @@ row_undo_mod_del_mark_or_remove_sec_low( | BTR_RTREE_UNDO_INS) : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS); btr_cur->thr = thr; + if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) { + goto found; + } else { + goto func_exit; + } } else if (!index->is_committed()) { /* The index->online_status may change if the index is or was being created online, but not committed yet. It @@ -514,7 +517,8 @@ row_undo_mod_del_mark_or_remove_sec_low( mtr_s_lock_index(index, &mtr); } else { ut_ad(mode == BTR_PURGE_TREE); - mtr_sx_lock_index(index, &mtr); + mode = BTR_PURGE_TREE_ALREADY_LATCHED; + mtr_x_lock_index(index, &mtr); } } else { /* For secondary indexes, @@ -523,9 +527,8 @@ row_undo_mod_del_mark_or_remove_sec_low( ut_ad(!dict_index_is_online_ddl(index)); } - search_result = row_search_index_entry(entry, mode, &pcur, &mtr); - - switch (UNIV_EXPECT(search_result, ROW_FOUND)) { + switch (UNIV_EXPECT(row_search_index_entry(entry, mode, &pcur, &mtr), + ROW_FOUND)) { case ROW_NOT_FOUND: /* In crash recovery, the secondary index record may be missing if the UPDATE did not have time to insert @@ -547,6 +550,7 @@ row_undo_mod_del_mark_or_remove_sec_low( ut_error; } +found: /* We should remove the index record if no prior version of the row, which cannot be purged yet, requires its existence. If some requires, we should delete mark the record. */ @@ -665,13 +669,12 @@ row_undo_mod_del_unmark_sec_and_undo_update( trx_t* trx = thr_get_trx(thr); const ulint flags = BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG; - row_search_result search_result; const auto orig_mode = mode; pcur.btr_cur.page_cur.index = index; ut_ad(trx->id != 0); - if (dict_index_is_spatial(index)) { + if (index->is_spatial()) { /* FIXME: Currently we do a 2-pass search for the undo due to avoid undel-mark a wrong rec in rolling back in partial update. Later, we could log some info in @@ -686,9 +689,22 @@ try_again: btr_cur->thr = thr; - search_result = row_search_index_entry(entry, mode, &pcur, &mtr); + if (index->is_spatial()) { + if (!rtr_search(entry, mode, &pcur, &mtr)) { + goto found; + } - switch (search_result) { + if (mode != orig_mode && btr_cur->rtr_info->fd_del) { + mode = orig_mode; + btr_pcur_close(&pcur); + mtr.commit(); + goto try_again; + } + + goto not_found; + } + + switch (row_search_index_entry(entry, mode, &pcur, &mtr)) { mem_heap_t* heap; mem_heap_t* offsets_heap; rec_offs* offsets; @@ -699,17 +715,7 @@ try_again: flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ ut_error; case ROW_NOT_FOUND: - /* For spatial index, if first search didn't find an - undel-marked rec, try to find a del-marked rec. */ - if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) { - if (mode != orig_mode) { - mode = orig_mode; - btr_pcur_close(&pcur); - mtr_commit(&mtr); - goto try_again; - } - } - +not_found: if (btr_cur->up_match >= dict_index_get_n_unique(index) || btr_cur->low_match >= dict_index_get_n_unique(index)) { ib::warn() << "Record in index " << index->name @@ -767,6 +773,7 @@ try_again: break; case ROW_FOUND: +found: btr_rec_set_deleted<false>(btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur), &mtr); heap = mem_heap_create( diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index a3f940adff5..fe88fce58a2 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1832,12 +1832,10 @@ row_upd_sec_index_entry( que_thr_t* thr) /*!< in: query thread */ { mtr_t mtr; - const rec_t* rec; btr_pcur_t pcur; mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; - btr_cur_t* btr_cur; dberr_t err = DB_SUCCESS; trx_t* trx = thr_get_trx(thr); btr_latch_mode mode; @@ -1876,10 +1874,6 @@ row_upd_sec_index_entry( case SRV_TMP_SPACE_ID: mtr.set_log_mode(MTR_LOG_NO_REDO); flags = BTR_NO_LOCKING_FLAG; - if (index->is_spatial()) { - mode = btr_latch_mode(BTR_MODIFY_LEAF - | BTR_RTREE_DELETE_MARK); - } break; default: index->set_modified(mtr); @@ -1888,26 +1882,35 @@ row_upd_sec_index_entry( flags = index->table->no_rollback() ? BTR_NO_ROLLBACK : 0; /* We can only buffer delete-mark operations if there are no foreign key constraints referring to the index. */ - mode = index->is_spatial() - ? btr_latch_mode(BTR_MODIFY_LEAF - | BTR_RTREE_DELETE_MARK) - : referenced - ? BTR_MODIFY_LEAF : BTR_DELETE_MARK_LEAF; + if (!referenced) { + mode = BTR_DELETE_MARK_LEAF; + } break; } /* Set the query thread, so that ibuf_insert_low() will be able to invoke thd_get_trx(). */ - btr_pcur_get_btr_cur(&pcur)->thr = thr; + pcur.btr_cur.thr = thr; pcur.btr_cur.page_cur.index = index; - search_result = row_search_index_entry(entry, mode, &pcur, &mtr); + if (index->is_spatial()) { + mode = btr_latch_mode(BTR_MODIFY_LEAF | BTR_RTREE_DELETE_MARK); + if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) { + goto found; + } - btr_cur = btr_pcur_get_btr_cur(&pcur); + if (pcur.btr_cur.rtr_info->fd_del) { + /* We found the record, but a delete marked */ + goto close; + } - rec = btr_cur_get_rec(btr_cur); + goto not_found; + } + + search_result = row_search_index_entry(entry, mode, &pcur, &mtr); switch (search_result) { + const rec_t* rec; case ROW_NOT_DELETED_REF: /* should only occur for BTR_DELETE */ ut_error; break; @@ -1916,11 +1919,8 @@ row_upd_sec_index_entry( break; case ROW_NOT_FOUND: - if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) { - /* We found the record, but a delete marked */ - break; - } - +not_found: + rec = btr_pcur_get_rec(&pcur); ib::error() << "Record in index " << index->name << " of table " << index->table->name @@ -1934,7 +1934,9 @@ row_upd_sec_index_entry( #endif /* UNIV_DEBUG */ break; case ROW_FOUND: +found: ut_ad(err == DB_SUCCESS); + rec = btr_pcur_get_rec(&pcur); /* Delete mark the old index record; it can already be delete marked if we return after a lock wait in @@ -1943,14 +1945,14 @@ row_upd_sec_index_entry( rec, dict_table_is_comp(index->table))) { err = lock_sec_rec_modify_check_and_lock( flags, - btr_cur_get_block(btr_cur), - btr_cur_get_rec(btr_cur), index, thr, &mtr); + btr_pcur_get_block(&pcur), + btr_pcur_get_rec(&pcur), index, thr, &mtr); if (err != DB_SUCCESS) { break; } - btr_rec_set_deleted<true>(btr_cur_get_block(btr_cur), - btr_cur_get_rec(btr_cur), + btr_rec_set_deleted<true>(btr_pcur_get_block(&pcur), + btr_pcur_get_rec(&pcur), &mtr); #ifdef WITH_WSREP if (!referenced && foreign @@ -2009,6 +2011,7 @@ row_upd_sec_index_entry( } } +close: btr_pcur_close(&pcur); mtr_commit(&mtr); diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 22782cc0d1e..a1d569571c8 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -424,7 +424,6 @@ static dberr_t trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr) block->fix(); mtr.commit(); mtr.start(); - mtr.flag_modified(); rseg->latch.wr_lock(SRW_LOCK_CALL); rseg_hdr->page.lock.x_lock(); block->page.lock.x_lock(); diff --git a/storage/spider/ha_spider.cc b/storage/spider/ha_spider.cc index 48297a4ae07..9504b5a6c26 100644 --- a/storage/spider/ha_spider.cc +++ b/storage/spider/ha_spider.cc @@ -1210,10 +1210,8 @@ int ha_spider::reset() #endif result_list.direct_distinct = FALSE; store_error_num = 0; - if ( - wide_handler && - wide_handler->sql_command != SQLCOM_END - ) { + if (wide_handler) + { wide_handler->sql_command = SQLCOM_END; wide_handler->between_flg = FALSE; wide_handler->idx_bitmap_is_set = FALSE; diff --git a/storage/spider/mysql-test/spider/bugfix/r/mdev_30191.result b/storage/spider/mysql-test/spider/bugfix/r/mdev_30191.result new file mode 100644 index 00000000000..941f210cbbe --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/mdev_30191.result @@ -0,0 +1,44 @@ +# +# MDEV-30191 SIGSEGV & heap-use-after-free in spider_db_print_item_type, SIGABRT in __cxa_pure_virtual/spider_db_print_item_type, Got error 128 "Out of memory in engine", 56/112 memory not freed, and Assertion `fixed()' failed in Item_sp_variable::val_str on SP call +# +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 +connection child2_1; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; +CREATE TABLE tbl_a (c INT); +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +CREATE TABLE tbl_a ( +c INT +) ENGINE=Spider DEFAULT CHARSET=utf8 COMMENT='table "tbl_a", srv "s_2_1"'; +CREATE TABLE tbl_b (c INT); +CREATE PROCEDURE sp() BEGIN +DECLARE v1 DATE; +WHILE EXISTS (SELECT 1 FROM tbl_a WHERE c>v1 AND c<=v1) DO +SELECT 1; +END WHILE; +WHILE EXISTS (SELECT 1 +FROM tbl_a +WHERE c<v1 AND EXISTS (SELECT 1 +FROM tbl_b +WHERE tbl_a.c=tbl_b.c)) DO +SELECT 1; +END WHILE; +END $$ +CALL sp(); +connection master_1; +DROP DATABASE IF EXISTS auto_test_local; +connection child2_1; +DROP DATABASE IF EXISTS auto_test_remote; +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_30191.cnf b/storage/spider/mysql-test/spider/bugfix/t/mdev_30191.cnf new file mode 100644 index 00000000000..05dfd8a0bce --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_30191.cnf @@ -0,0 +1,3 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf +!include ../my_2_1.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_30191.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_30191.test new file mode 100644 index 00000000000..8d19a3515cf --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_30191.test @@ -0,0 +1,51 @@ +--echo # +--echo # MDEV-30191 SIGSEGV & heap-use-after-free in spider_db_print_item_type, SIGABRT in __cxa_pure_virtual/spider_db_print_item_type, Got error 128 "Out of memory in engine", 56/112 memory not freed, and Assertion `fixed()' failed in Item_sp_variable::val_str on SP call +--echo # + +--disable_query_log +--disable_result_log +--source ../../t/test_init.inc +--enable_result_log +--enable_query_log + +--connection child2_1 +CREATE DATABASE auto_test_remote; +USE auto_test_remote; +CREATE TABLE tbl_a (c INT); + +--connection master_1 +CREATE DATABASE auto_test_local; +USE auto_test_local; + +eval CREATE TABLE tbl_a ( + c INT +) $MASTER_1_ENGINE $MASTER_1_CHARSET COMMENT='table "tbl_a", srv "s_2_1"'; +CREATE TABLE tbl_b (c INT); + +--delimiter $$ +CREATE PROCEDURE sp() BEGIN + DECLARE v1 DATE; + WHILE EXISTS (SELECT 1 FROM tbl_a WHERE c>v1 AND c<=v1) DO + SELECT 1; + END WHILE; + WHILE EXISTS (SELECT 1 + FROM tbl_a + WHERE c<v1 AND EXISTS (SELECT 1 + FROM tbl_b + WHERE tbl_a.c=tbl_b.c)) DO + SELECT 1; + END WHILE; +END $$ +--delimiter ; +CALL sp(); + +--connection master_1 +DROP DATABASE IF EXISTS auto_test_local; +--connection child2_1 +DROP DATABASE IF EXISTS auto_test_remote; + +--disable_query_log +--disable_result_log +--source ../t/test_deinit.inc +--enable_query_log +--enable_result_log |