diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2022-08-26 15:18:11 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2022-08-26 15:18:11 +0300 |
commit | 8ab50a3a68c6e8a63b8785ba9c66c7dc5a001b88 (patch) | |
tree | d99705e7ce125e4c5eea1657fa8b403cc43833e0 | |
parent | 0fbcb0a2b87d8807b85fec85507074bcda3d4da9 (diff) | |
download | mariadb-git-bb-10.6-MDEV-28800.tar.gz |
MDEV-28800 WIP: Avoid crashes on memory allocation failurebb-10.6-MDEV-28800
FIXME: Allocate locks upfront for page split or reorganize,
so that the operation can gracefully fail before any irreversible
persistent changes are performed. This affects lock_move_reorganize_page(),
lock_move_rec_list_end(), lock_move_rec_list_start(),
btr_root_raise_and_insert(), btr_insert_into_right_sibling(),
btr_page_split_and_insert().
buf_block_alloc(): Remove. This was an alias of
buf_LRU_get_free_block(false). Let us call that function directly.
buf_LRU_get_free_block(), buf_buddy_alloc_low(), buf_buddy_alloc():
If there is no free block in the buffer pool, return nullptr.
recv_sys_t::recover_low(), recv_sys_t::recover(): Return an error code,
which may be DB_OUT_OF_MEMORY.
lock_rec_create_low(): Return nullptr if the lock table is full.
This will be the only caller of buf_pool.running_out().
btr_search_check_free_space_in_heap(): Replaced with
btr_search_lock_and_alloc().
28 files changed, 617 insertions, 474 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 772ac99a5d5..901beb12461 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -1300,7 +1300,9 @@ static dberr_t btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, btr_search_drop_page_hash_index(block); - buf_block_t *old= buf_block_alloc(); + buf_block_t *old= buf_LRU_get_free_block(false); + if (UNIV_UNLIKELY(!old)) + return DB_OUT_OF_MEMORY; /* Copy the old page to temporary space */ memcpy_aligned<UNIV_PAGE_SIZE_MIN>(old->page.frame, block->page.frame, srv_page_size); @@ -1411,7 +1413,7 @@ static dberr_t btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, PAGE_DATA - (PAGE_MAX_TRX_ID + PAGE_HEADER))); if (index->has_locking()) - lock_move_reorganize_page(block, old); + err= lock_move_reorganize_page(block, old); /* Write log for the changes, if needed. */ if (log_mode == MTR_LOG_ALL) @@ -1538,7 +1540,7 @@ static dberr_t btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, MONITOR_INC(MONITOR_INDEX_REORG_ATTEMPTS); MONITOR_INC(MONITOR_INDEX_REORG_SUCCESSFUL); - return DB_SUCCESS; + return err; } /*************************************************************//** @@ -1895,17 +1897,20 @@ btr_root_raise_and_insert( /* Update the lock table and possible hash index. */ if (index->has_locking()) { - lock_move_rec_list_end( - new_block, root, - page_get_infimum_rec(root->page.frame)); - } - - /* Move any existing predicate locks */ - if (dict_index_is_spatial(index)) { - lock_prdt_rec_move(new_block, root_id); - } else { btr_search_move_or_delete_hash_entries( new_block, root); + *err = lock_move_rec_list_end( + new_block, root, + page_get_infimum_rec(root->page.frame)); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return nullptr; + } + if (index->is_spatial()) { + *err = lock_prdt_rec_move(new_block, root_id); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return nullptr; + } + } } } @@ -1947,7 +1952,10 @@ btr_root_raise_and_insert( root page: we cannot discard the lock structs on the root page */ if (index->has_locking()) { - lock_update_root_raise(*new_block, root_id); + *err = lock_update_root_raise(*new_block, root_id); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return nullptr; + } } /* Create a memory heap where the node pointer is stored */ @@ -2657,8 +2665,9 @@ btr_insert_into_right_sibling( max_size = page_get_max_insert_size_after_reorganize(next_page, 1); /* Extends gap lock for the next page */ - if (is_leaf && cursor->index->has_locking()) { - lock_update_node_pointer(block, next_block); + if (is_leaf && cursor->index->has_locking() + && lock_update_node_pointer(block, next_block) != DB_SUCCESS) { + return nullptr; } rec = page_cur_tuple_insert( @@ -3077,14 +3086,16 @@ insert_empty: /* Update the lock table and possible hash index. */ if (cursor->index->has_locking()) { - lock_move_rec_list_start( + btr_search_move_or_delete_hash_entries( + new_block, block); + *err = lock_move_rec_list_start( new_block, block, move_limit, new_page + PAGE_NEW_INFIMUM); + if (*err != DB_SUCCESS) { + return nullptr; + } } - btr_search_move_or_delete_hash_entries( - new_block, block); - /* Delete the records from the source page. */ page_delete_rec_list_start(move_limit, block, @@ -3095,7 +3106,7 @@ insert_empty: right_block = block; if (cursor->index->has_locking()) { - lock_update_split_left(right_block, left_block); + *err = lock_update_split_left(right_block, left_block); } } else { /* fputs("Split right\n", stderr); */ @@ -3126,13 +3137,15 @@ insert_empty: /* Update the lock table and possible hash index. */ if (cursor->index->has_locking()) { - lock_move_rec_list_end(new_block, block, - move_limit); + btr_search_move_or_delete_hash_entries( + new_block, block); + *err = lock_move_rec_list_end(new_block, block, + move_limit); + if (*err != DB_FAIL) { + return nullptr; + } } - btr_search_move_or_delete_hash_entries( - new_block, block); - /* Delete the records from the source page. */ *err = page_delete_rec_list_end(move_limit, block, @@ -3148,7 +3161,11 @@ insert_empty: right_block = new_block; if (cursor->index->has_locking()) { - lock_update_split_right(right_block, left_block); + *err = lock_update_split_right(right_block, + left_block); + if (*err != DB_SUCCESS) { + return nullptr; + } } } @@ -3462,16 +3479,20 @@ btr_lift_page_up( /* Update the lock table and possible hash index. */ if (index->has_locking()) { - lock_move_rec_list_end(father_block, block, - page_get_infimum_rec(page)); - } - - /* Also update the predicate locks */ - if (dict_index_is_spatial(index)) { - lock_prdt_rec_move(father_block, block->page.id()); - } else { btr_search_move_or_delete_hash_entries( father_block, block); + *err = lock_move_rec_list_end( + father_block, block, page + PAGE_NEW_INFIMUM); + if (*err != DB_SUCCESS) { + return nullptr; + } + if (index->is_spatial()) { + *err = lock_prdt_rec_move(father_block, + block->page.id()); + if (*err != DB_SUCCESS) { + return nullptr; + } + } } } @@ -3481,7 +3502,10 @@ btr_lift_page_up( if (index->is_spatial()) { lock_sys.prdt_page_free_from_discard(id); } else { - lock_update_copy_and_discard(*father_block, id); + *err = lock_update_copy_and_discard(*father_block, id); + if (*err != DB_SUCCESS) { + return nullptr; + } } } @@ -3737,8 +3761,11 @@ cannot_merge: goto err_exit; } if (index->has_locking()) { - lock_update_merge_left( + err = lock_update_merge_left( *merge_block, orig_pred, id); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + goto err_exit; + } } } @@ -3893,7 +3920,9 @@ cannot_merge: &cursor2, BTR_CREATE_FLAG, false, mtr); - ut_a(err == DB_SUCCESS); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + goto err_exit; + } if (!compressed) { btr_cur_compress_if_useful(&cursor2, false, @@ -3901,8 +3930,11 @@ cannot_merge: } if (index->has_locking()) { - lock_update_merge_right( + err = lock_update_merge_right( merge_block, orig_succ, block); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + goto err_exit; + } } } } diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index e9b4e2937b9..f40523d1e76 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -1962,14 +1962,20 @@ retry_page_get: && mode != PAGE_CUR_RTREE_INSERT && mode != PAGE_CUR_RTREE_LOCATE && mode >= PAGE_CUR_CONTAIN) { - lock_prdt_t prdt; + lock_prdt_t prdt; + prdt.op = mode; { trx_t* trx = thr_get_trx(cursor->thr); - TMLockTrxGuard g{TMLockTrxArgs(*trx)}; - lock_init_prdt_from_mbr( - &prdt, &cursor->rtr_info->mbr, mode, - trx->lock.lock_heap); + trx->mutex_lock(); + prdt.data = mem_heap_dup(trx->lock.lock_heap, + &cursor->rtr_info->mbr, + sizeof cursor->rtr_info->mbr); + trx->mutex_unlock(); + } + + if (UNIV_UNLIKELY(!prdt.data)) { + return DB_LOCK_TABLE_FULL; } if (rw_latch == RW_NO_LATCH && height != 0) { @@ -3179,7 +3185,6 @@ btr_cur_ins_lock_and_undo( if (!(flags & BTR_NO_LOCKING_FLAG)) { const unsigned type = index->type; if (UNIV_UNLIKELY(type & DICT_SPATIAL)) { - lock_prdt_t prdt; rtr_mbr_t mbr; rtr_get_mbr_from_tuple(entry, &mbr); @@ -3187,7 +3192,7 @@ btr_cur_ins_lock_and_undo( /* Use on stack MBR variable to test if a lock is needed. If so, the predicate (MBR) will be allocated from lock heap in lock_prdt_insert_check_and_lock() */ - lock_init_prdt_from_mbr(&prdt, &mbr, 0, nullptr); + lock_prdt_t prdt{static_cast<void*>(&mbr), 0}; if (dberr_t err = lock_prdt_insert_check_and_lock( rec, btr_cur_get_block(cursor), @@ -3552,7 +3557,10 @@ fail_err: if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) { - lock_update_insert(block, *rec); + err = lock_update_insert(block, *rec); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + goto fail_err; + } } if (leaf @@ -3727,6 +3735,8 @@ btr_cur_pessimistic_insert( } } + err = DB_SUCCESS; + if (!page_is_leaf(btr_cur_get_page(cursor))) { ut_ad(!big_rec_vec); } else { @@ -3744,11 +3754,11 @@ btr_cur_pessimistic_insert( #endif /* BTR_CUR_HASH_ADAPT */ if (inherit && !(flags & BTR_NO_LOCKING_FLAG)) { - lock_update_insert(btr_cur_get_block(cursor), *rec); + err = lock_update_insert(btr_cur_get_block(cursor), + *rec); } } - err = DB_SUCCESS; func_exit: index->table->space->release_free_extents(n_reserved); *big_rec = big_rec_vec; diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 0c1959bb554..a83343dcb1a 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -410,10 +410,11 @@ btr_defragment_merge_pages( } else if (n_recs_to_move == n_recs) { /* The whole page is merged with the previous page, free it. */ - lock_update_merge_left(*to_block, orig_pred, - from_block->page.id()); btr_search_drop_page_hash_index(from_block); - if (btr_level_list_remove(*from_block, *index, mtr) + if (lock_update_merge_left(*to_block, orig_pred, + from_block->page.id()) + != DB_SUCCESS + || btr_level_list_remove(*from_block, *index, mtr) != DB_SUCCESS || btr_cur_node_ptr_delete(&parent, mtr) != DB_SUCCESS || btr_page_free(index, from_block, mtr) != DB_SUCCESS) { @@ -430,11 +431,12 @@ btr_defragment_merge_pages( dtuple_t* node_ptr; page_delete_rec_list_start(rec, from_block, index, mtr); - lock_update_split_and_merge(to_block, - orig_pred, - from_block); // FIXME: reuse the node_ptr! - if (btr_cur_node_ptr_delete(&parent, mtr) + if (lock_update_split_and_merge(to_block, + orig_pred, + from_block) + != DB_SUCCESS + || btr_cur_node_ptr_delete(&parent, mtr) != DB_SUCCESS) { return nullptr; } diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index 5b35c7f4f97..e75bd9e5951 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -162,35 +162,6 @@ btr_search_get_n_fields( return(btr_search_get_n_fields(cursor->n_fields, cursor->n_bytes)); } -/** This function should be called before reserving any btr search mutex, if -the intended operation might add nodes to the search system hash table. -Because of the latching order, once we have reserved the btr search system -latch, we cannot allocate a free frame from the buffer pool. Checks that -there is a free buffer frame allocated for hash table heap in the btr search -system. If not, allocates a free frames for the heap. This check makes it -probable that, when have reserved the btr search system latch and we need to -allocate a new node to the hash table, it will succeed. However, the check -will not guarantee success. -@param[in] index index handler */ -static void btr_search_check_free_space_in_heap(const dict_index_t *index) -{ - /* Note that we peek the value of heap->free_block without reserving - the latch: this is ok, because we will not guarantee that there will - be enough free space in the hash table. */ - - buf_block_t *block= buf_block_alloc(); - auto part= btr_search_sys.get_part(*index); - - part->latch.wr_lock(SRW_LOCK_CALL); - - if (!btr_search_enabled || part->heap->free_block) - buf_block_free(block); - else - part->heap->free_block= block; - - part->latch.wr_unlock(); -} - /** Set index->ref_count = 0 on all indexes of a table. @param[in,out] table table handler */ static void btr_search_disable_ref_count(dict_table_t *table) @@ -706,6 +677,7 @@ btr_search_update_hash_ref( if (index != cursor->index) { ut_ad(index->id == cursor->index->id); +drop_ahi: btr_search_drop_page_hash_index(block); return; } @@ -714,6 +686,11 @@ btr_search_update_hash_ref( ut_ad(index == cursor->index); ut_ad(!dict_index_is_ibuf(index)); auto part = btr_search_sys.get_part(*index); + + buf_block_t *ahi_block = buf_LRU_get_free_block(false); + if (UNIV_UNLIKELY(!ahi_block)) { + goto drop_ahi; + } part->latch.wr_lock(SRW_LOCK_CALL); ut_ad(!block->index || block->index == index); @@ -722,6 +699,12 @@ btr_search_update_hash_ref( && (block->curr_n_bytes == info->n_bytes) && (block->curr_left_side == info->left_side) && btr_search_enabled) { + if (part->heap->free_block) { + buf_block_free(ahi_block); + } else { + part->heap->free_block = ahi_block; + } + mem_heap_t* heap = NULL; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs_init(offsets_); @@ -746,6 +729,8 @@ btr_search_update_hash_ref( ha_insert_for_fold(&part->table, part->heap, fold, block, rec); MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); + } else { + buf_block_free(ahi_block); } func_exit: @@ -1668,12 +1653,22 @@ btr_search_build_page_hash_index( fold = next_fold; } - btr_search_check_free_space_in_heap(index); + if (buf_block_t *ahi_block = buf_LRU_get_free_block(false)) { + ahi_latch->wr_lock(SRW_LOCK_CALL); - ahi_latch->wr_lock(SRW_LOCK_CALL); + if (!btr_search_enabled) { + buf_block_free(ahi_block); + goto exit_func; + } - if (!btr_search_enabled) { - goto exit_func; + auto part = btr_search_sys.get_part(*index); + if (part->heap->free_block) { + buf_block_free(ahi_block); + } else { + part->heap->free_block = ahi_block; + } + } else { + goto exit_func_after_unlock; } /* This counter is decremented every time we drop page @@ -1710,7 +1705,7 @@ btr_search_build_page_hash_index( exit_func: assert_block_ahi_valid(block); ahi_latch->wr_unlock(); - +exit_func_after_unlock: ut_free(folds); ut_free(recs); if (UNIV_LIKELY_NULL(heap)) { @@ -1723,8 +1718,8 @@ exit_func: @param[in,out] cursor cursor which was just positioned */ void btr_search_info_update_slow(btr_search_t *info, btr_cur_t *cursor) { - srw_spin_lock* ahi_latch = &btr_search_sys.get_part(*cursor->index) - ->latch; + auto part = btr_search_sys.get_part(*cursor->index); + srw_spin_lock* ahi_latch = &part->latch; buf_block_t* block = btr_cur_get_block(cursor); /* NOTE that the following two function calls do NOT protect @@ -1736,14 +1731,7 @@ void btr_search_info_update_slow(btr_search_t *info, btr_cur_t *cursor) bool build_index = btr_search_update_block_hash_info(info, block); - if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) { - - btr_search_check_free_space_in_heap(cursor->index); - } - if (cursor->flag == BTR_CUR_HASH_FAIL) { - /* Update the hash node reference, if appropriate */ - #ifdef UNIV_SEARCH_PERF_STAT btr_search_n_hash_fail++; #endif /* UNIV_SEARCH_PERF_STAT */ @@ -1985,6 +1973,32 @@ func_exit: } } +/** Allocate memory and acquire ahi_latch. +@param ahi_latch the adaptive hash index partition latch +@param index B-tree to maintain the adaptive hash index on +@return the partititon +@retval nullptr if the AHI is disabled or we run out of memory */ +static btr_search_sys_t::partition * +btr_search_lock_and_alloc(srw_spin_lock *ahi_latch, const dict_index_t &index) +{ + buf_block_t *ahi_block= buf_LRU_get_free_block(false); + if (!ahi_block) + return nullptr; + ahi_latch->wr_lock(SRW_LOCK_CALL); + if (!btr_search_enabled) + { + buf_block_free(ahi_block); + return nullptr; + } + + auto part= btr_search_sys.get_part(index); + if (part->heap->free_block) + buf_block_free(ahi_block); + else + part->heap->free_block= ahi_block; + return part; +} + /** Updates the page hash index when a single record is inserted on a page. @param[in,out] cursor cursor which was positioned to the place to insert using btr_cur_search_..., @@ -2029,7 +2043,6 @@ void btr_search_update_hash_on_insert(btr_cur_t *cursor, } ut_ad(block->page.id().space() == index->table->space_id); - btr_search_check_free_space_in_heap(index); rec = btr_cur_get_rec(cursor); @@ -2042,7 +2055,6 @@ drop: return; } - ut_a(index == cursor->index); ut_ad(!dict_index_is_ibuf(index)); n_fields = block->curr_n_fields; @@ -2069,7 +2081,6 @@ drop: /* We must not look up "part" before acquiring ahi_latch. */ btr_search_sys_t::partition* part= nullptr; - bool locked = false; if (!page_rec_is_infimum(rec) && !rec_is_metadata(rec, *index)) { offsets = rec_get_offsets( @@ -2078,14 +2089,10 @@ drop: fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id); } else { if (left_side) { - locked = true; - ahi_latch->wr_lock(SRW_LOCK_CALL); - - if (!btr_search_enabled || !block->index) { + part = btr_search_lock_and_alloc(ahi_latch, *index); + if (!part || !block->index) { goto function_exit; } - - part = btr_search_sys.get_part(*index); ha_insert_for_fold(&part->table, part->heap, ins_fold, block, ins_rec); MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); @@ -2096,15 +2103,12 @@ drop: if (fold != ins_fold) { - if (!locked) { - locked = true; + if (!part) { ahi_latch->wr_lock(SRW_LOCK_CALL); - - if (!btr_search_enabled || !block->index) { + part = btr_search_lock_and_alloc(ahi_latch, *index); + if (!part || !block->index) { goto function_exit; } - - part = btr_search_sys.get_part(*index); } if (!left_side) { @@ -2121,15 +2125,12 @@ check_next_rec: if (page_rec_is_supremum(next_rec)) { if (!left_side) { - if (!locked) { - locked = true; - ahi_latch->wr_lock(SRW_LOCK_CALL); - - if (!btr_search_enabled || !block->index) { + if (!part) { + part = btr_search_lock_and_alloc(ahi_latch, + *index); + if (!part || !block->index) { goto function_exit; } - - part = btr_search_sys.get_part(*index); } ha_insert_for_fold(&part->table, part->heap, @@ -2141,15 +2142,11 @@ check_next_rec: } if (ins_fold != next_fold) { - if (!locked) { - locked = true; - ahi_latch->wr_lock(SRW_LOCK_CALL); - - if (!btr_search_enabled || !block->index) { + if (!part) { + part = btr_search_lock_and_alloc(ahi_latch, *index); + if (!part || !block->index) { goto function_exit; } - - part = btr_search_sys.get_part(*index); } if (!left_side) { @@ -2166,7 +2163,7 @@ function_exit: if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } - if (locked) { + if (part) { ahi_latch->wr_unlock(); } } diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc index 85a698bc875..da3373ea1ee 100644 --- a/storage/innobase/buf/buf0buddy.cc +++ b/storage/innobase/buf/buf0buddy.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2021, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -429,7 +429,8 @@ buf_buddy_alloc_from(void* buf, ulint i, ulint j) /** Allocate a ROW_FORMAT=COMPRESSED block. @param i index of buf_pool.zip_free[] or BUF_BUDDY_SIZES @param lru assigned to true if buf_pool.mutex was temporarily released -@return allocated block, never NULL */ +@return allocated block +@retval nullptr on failure */ byte *buf_buddy_alloc_low(ulint i, bool *lru) { buf_block_t* block; @@ -455,6 +456,10 @@ byte *buf_buddy_alloc_low(ulint i, bool *lru) /* Try replacing an uncompressed page in the buffer pool. */ block = buf_LRU_get_free_block(true); + if (UNIV_UNLIKELY(!block)) { + return nullptr; + } + if (lru) { *lru = true; } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index eaf0f955a1f..1f722dfa451 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2587,6 +2587,14 @@ ignore_block: } buf_block_t *new_block = buf_LRU_get_free_block(false); + if (UNIV_UNLIKELY(!new_block)) { + block->page.lock.x_unlock(); + if (err) { + *err = DB_OUT_OF_MEMORY; + } + return nullptr; + } + buf_block_init_low(new_block); wait_for_unfix: @@ -2891,15 +2899,10 @@ buf_page_get_gen( dberr_t* err, bool allow_ibuf_merge) { - if (buf_block_t *block= recv_sys.recover(page_id)) + if (buf_block_t *block= recv_sys.recover(page_id, err)) { if (UNIV_UNLIKELY(block == reinterpret_cast<buf_block_t*>(-1))) - { - corrupted: - if (err) - *err= DB_CORRUPTION; return nullptr; - } /* Recovery is a special case; we fix() before acquiring lock. */ auto s= block->page.fix(); ut_ad(s >= buf_page_t::FREED); @@ -2915,7 +2918,9 @@ buf_page_get_gen( got_freed_page: ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL); block->page.unfix(); - goto corrupted; + if (err) + *err= DB_CORRUPTION; + return nullptr; } else if (must_merge && fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX && diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index c01b0e4ee66..91278879611 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -326,17 +326,6 @@ static void buf_LRU_check_size_of_non_data_objects() const auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU); - if (s < buf_pool.curr_size / 20) - ib::fatal() << "Over 95 percent of the buffer pool is" - " occupied by lock heaps" -#ifdef BTR_CUR_HASH_ADAPT - " or the adaptive hash index" -#endif /* BTR_CUR_HASH_ADAPT */ - "! Check that your transactions do not set too many" - " row locks, or review if innodb_buffer_pool_size=" - << (buf_pool.curr_size >> (20U - srv_page_size_shift)) - << "M could be bigger."; - if (s < buf_pool.curr_size / 3) { if (!buf_lru_switched_on_innodb_mon && srv_monitor_timer) @@ -389,7 +378,8 @@ we put it to free list to be used. * scan LRU list even if buf_pool.try_LRU_scan is not set @param have_mutex whether buf_pool.mutex is already being held -@return the free control block, in state BUF_BLOCK_MEMORY */ +@return the free control block, in state BUF_BLOCK_MEMORY +@retval nullptr if no block is available (buf_pool.mutex will be released) */ buf_block_t *buf_LRU_get_free_block(bool have_mutex) { ulint n_iterations = 0; @@ -452,8 +442,10 @@ not_found: #endif mysql_mutex_unlock(&buf_pool.mutex); - if (n_iterations > 20 && !buf_lru_free_blocks_error_printed - && srv_buf_pool_old_size == srv_buf_pool_size) { + if (n_iterations <= 20) { + } else if (buf_lru_free_blocks_error_printed) { + return nullptr; + } else if (srv_buf_pool_old_size == srv_buf_pool_size) { ib::warn() << "Difficult to find free blocks in the buffer pool" " (" << n_iterations << " search iterations)! " diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index 2f15fa62796..9eb5edbb300 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -85,17 +85,20 @@ then this function does nothing. Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock on the buffer frame. The io-handler must take care that the flag is cleared and the lock released later. -@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ... -@param[in] page_id page id -@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 -@param[in] unzip whether the uncompressed page is - requested (for ROW_FORMAT=COMPRESSED) +@param mode BUF_READ_IBUF_PAGES_ONLY, ... +@param page_id page identifier +@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 +@param unzip whether the uncompressed page is requested + (for ROW_FORMAT=COMPRESSED) +@param err error code @return pointer to the block -@retval NULL in case of an error */ +@retval nullptr in case of an error */ TRANSACTIONAL_TARGET -static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, - ulint zip_size, bool unzip) +static buf_page_t *buf_page_init_for_read(ulint mode, const page_id_t page_id, + ulint zip_size, bool unzip, + dberr_t *err) { + ut_ad(*err == DB_SUCCESS); mtr_t mtr; if (mode == BUF_READ_IBUF_PAGES_ONLY) @@ -113,22 +116,29 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, else ut_ad(mode == BUF_READ_ANY_PAGE); + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); + buf_page_t *hash_page; + buf_page_t *bpage= nullptr; buf_block_t *block= nullptr; if (!zip_size || unzip || recv_recovery_is_on()) { block= buf_LRU_get_free_block(false); + if (UNIV_UNLIKELY(!block)) + { + *err= DB_OUT_OF_MEMORY; + goto func_exit_no_mutex; + } + block->initialise(page_id, zip_size, buf_page_t::READ_FIX); /* x_unlock() will be invoked in buf_page_t::read_complete() by the io-handler thread. */ block->page.lock.x_lock(true); } - buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); - mysql_mutex_lock(&buf_pool.mutex); - buf_page_t *hash_page= buf_pool.page_hash.get(page_id, chain); + hash_page= buf_pool.page_hash.get(page_id, chain); if (hash_page && !buf_pool.watch_is_sentinel(*hash_page)) { /* The page is already in the buffer pool. */ @@ -185,6 +195,11 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, uninitialized data. */ bool lru= false; void *data= buf_buddy_alloc(zip_size, &lru); + if (UNIV_UNLIKELY(!data)) + { + *err= DB_OUT_OF_MEMORY; + goto func_exit; + } /* If buf_buddy_alloc() allocated storage from the LRU list, it released and reacquired buf_pool.mutex. Thus, we must @@ -245,9 +260,7 @@ buffer buf_pool if it is not already there, in which case does nothing. Sets the io_fix flag and sets an exclusive lock on the buffer frame. The flag is cleared and the x-lock released by an i/o-handler thread. -@param[out] err DB_SUCCESS or DB_TABLESPACE_DELETED - if we are trying - to read from a non-existent tablespace +@param[out] err DB_SUCCESS, DB_TABLESPACE_DELETED, DB_OUT_OF_MEMORY @param[in,out] space tablespace @param[in] sync true if synchronous aio is desired @param[in] mode BUF_READ_IBUF_PAGES_ONLY, ..., @@ -297,7 +310,7 @@ nothing_read: or is being dropped; if we succeed in initing the page in the buffer pool for read, then DISCARD cannot proceed until the read has completed */ - bpage = buf_page_init_for_read(mode, page_id, zip_size, unzip); + bpage = buf_page_init_for_read(mode, page_id, zip_size, unzip, err); if (bpage == NULL) { goto nothing_read; diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 153a0e88c63..43311af9141 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -504,6 +504,10 @@ dberr_t fsp_header_init(fil_space_t *space, uint32_t size, mtr_t *mtr) buf_block_t *free_block = buf_LRU_get_free_block(false); + if (UNIV_UNLIKELY(!free_block)) { + return DB_OUT_OF_MEMORY; + } + mtr->x_lock_space(space); buf_block_t* block = buf_page_create(space, 0, zip_size, mtr, @@ -837,8 +841,11 @@ fsp_fill_free_list( if (i) { buf_block_t *f= buf_LRU_get_free_block(false); + if (UNIV_UNLIKELY(!f)) + return DB_OUT_OF_MEMORY; + buf_block_t *block= buf_page_create(space, static_cast<uint32_t>(i), - zip_size, mtr, f); + zip_size, mtr, f); if (UNIV_UNLIKELY(block != f)) buf_pool.free_block(f); fsp_init_file_page(space, block, mtr); @@ -849,6 +856,8 @@ fsp_fill_free_list( if (space->purpose != FIL_TYPE_TEMPORARY) { buf_block_t *f= buf_LRU_get_free_block(false); + if (UNIV_UNLIKELY(!f)) + return DB_OUT_OF_MEMORY; buf_block_t *block= buf_page_create(space, static_cast<uint32_t>(i + FSP_IBUF_BITMAP_OFFSET), @@ -1018,15 +1027,18 @@ fsp_alloc_from_free_frag(buf_block_t *header, buf_block_t *xdes, xdes_t *descr, return DB_SUCCESS; } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Gets a buffer block for an allocated page. @param[in,out] space tablespace @param[in] offset page number of the allocated page @param[in,out] mtr mini-transaction +@param[out] err error code @return block, initialized */ static -buf_block_t* -fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr) +buf_block_t *fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr, + dberr_t *err) { + ut_ad(*err == DB_SUCCESS); buf_block_t *block, *free_block; if (UNIV_UNLIKELY(space->is_being_truncated)) @@ -1051,6 +1063,12 @@ fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr) } free_block= buf_LRU_get_free_block(false); + if (UNIV_UNLIKELY(!free_block)) + { + *err= DB_OUT_OF_MEMORY; + return nullptr; + } + got_free_block: block= buf_page_create(space, static_cast<uint32_t>(offset), space->zip_size(), mtr, free_block); @@ -1165,7 +1183,7 @@ buf_block_t *fsp_alloc_free_page(fil_space_t *space, uint32_t hint, *err= fsp_alloc_from_free_frag(block, xdes, descr, free, mtr); if (UNIV_UNLIKELY(*err != DB_SUCCESS)) goto corrupted; - return fsp_page_create(space, page_no, init_mtr); + return fsp_page_create(space, page_no, init_mtr, err); } MY_ATTRIBUTE((nonnull, warn_unused_result)) @@ -2220,7 +2238,7 @@ got_hinted_page: } } - return fsp_page_create(space, ret_page, init_mtr); + return fsp_page_create(space, ret_page, init_mtr, err); } /**********************************************************************//** diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc index 59ad44ec093..a784ccd4f41 100644 --- a/storage/innobase/gis/gis0rtree.cc +++ b/storage/innobase/gis/gis0rtree.cc @@ -829,7 +829,10 @@ rtr_split_page_move_rec_list( } /* Update the lock table */ - lock_rtr_move_rec_list(new_block, block, rec_move, moved); + if (dberr_t err = lock_rtr_move_rec_list(new_block, block, + rec_move, moved)) { + return err; + } /* Delete recs in second group from the old page. */ for (cur_split_node = node_array; @@ -1053,7 +1056,11 @@ corrupted: } /* Update the lock table */ - lock_rtr_move_rec_list(new_block, block, rec_move, moved); + *err = lock_rtr_move_rec_list(new_block, block, rec_move, + moved); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return nullptr; + } const ulint n_core = page_level ? 0 : cursor->index->n_core_fields; diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc index 9a378d531d9..fd443de6e82 100644 --- a/storage/innobase/gis/gis0sea.cc +++ b/storage/innobase/gis/gis0sea.cc @@ -376,15 +376,22 @@ rtr_pcur_getnext_from_path( && mode != PAGE_CUR_RTREE_LOCATE && mode >= PAGE_CUR_CONTAIN && btr_cur->rtr_info->need_prdt_lock) { - lock_prdt_t prdt; + lock_prdt_t prdt; + prdt.op = mode; - trx_t* trx = thr_get_trx( - btr_cur->rtr_info->thr); { - TMLockTrxGuard g{TMLockTrxArgs(*trx)}; - lock_init_prdt_from_mbr( - &prdt, &btr_cur->rtr_info->mbr, - mode, trx->lock.lock_heap); + trx_t* trx = thr_get_trx( + btr_cur->rtr_info->thr); + trx->mutex_lock(); + prdt.data = mem_heap_dup( + trx->lock.lock_heap, + &btr_cur->rtr_info->mbr, + sizeof btr_cur->rtr_info->mbr); + trx->mutex_unlock(); + } + + if (UNIV_UNLIKELY(!prdt.data)) { + return false; // FIXME: return an error } if (rw_latch == RW_NO_LATCH) { diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h index bb9994203d6..0bc2ce590e6 100644 --- a/storage/innobase/include/buf0buddy.h +++ b/storage/innobase/include/buf0buddy.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2020, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -52,13 +52,15 @@ buf_buddy_get_slot(ulint size) /** Allocate a ROW_FORMAT=COMPRESSED block. @param i index of buf_pool.zip_free[] or BUF_BUDDY_SIZES @param lru assigned to true if buf_pool.mutex was temporarily released -@return allocated block, never NULL */ +@return allocated block +@retval nullptr on failure */ byte *buf_buddy_alloc_low(ulint i, bool *lru) MY_ATTRIBUTE((malloc)); /** Allocate a ROW_FORMAT=COMPRESSED block. @param size compressed page size in bytes @param lru assigned to true if buf_pool.mutex was temporarily released -@return allocated block, never NULL */ +@return allocated block +@retval nullptr on failure */ inline byte *buf_buddy_alloc(ulint size, bool *lru= nullptr) { return buf_buddy_alloc_low(buf_buddy_get_slot(size), lru); diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 87236415150..0741519457e 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -145,9 +145,6 @@ operator<<( #ifndef UNIV_INNOCHECKSUM # define buf_pool_get_curr_size() srv_buf_pool_curr_size -/** Allocate a buffer block. -@return own: the allocated block, state()==MEMORY */ -inline buf_block_t *buf_block_alloc(); /********************************************************************//** Frees a buffer block which does not contain a file page. */ UNIV_INLINE diff --git a/storage/innobase/include/buf0buf.inl b/storage/innobase/include/buf0buf.inl index 3c4da98f83b..4b885ec891b 100644 --- a/storage/innobase/include/buf0buf.inl +++ b/storage/innobase/include/buf0buf.inl @@ -2,7 +2,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. -Copyright (c) 2014, 2021, MariaDB Corporation. +Copyright (c) 2014, 2022, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -86,13 +86,6 @@ inline bool buf_page_peek_if_too_old(const buf_page_t *bpage) } } -/** Allocate a buffer block. -@return own: the allocated block, in state BUF_BLOCK_MEMORY */ -inline buf_block_t *buf_block_alloc() -{ - return buf_LRU_get_free_block(false); -} - /********************************************************************//** Frees a buffer block which does not contain a file page. */ UNIV_INLINE diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index aec08e77f54..e94959ca7e9 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -84,8 +84,9 @@ we put it to free list to be used. * scan LRU list even if buf_pool.try_LRU_scan is not set @param have_mutex whether buf_pool.mutex is already being held -@return the free control block, in state BUF_BLOCK_MEMORY */ -buf_block_t* buf_LRU_get_free_block(bool have_mutex) +@return the free control block, in state BUF_BLOCK_MEMORY +@retval nullptr if no block is available (buf_pool.mutex will be released) */ +buf_block_t *buf_LRU_get_free_block(bool have_mutex) MY_ATTRIBUTE((malloc,warn_unused_result)); /** @return whether the unzip_LRU list should be used for evicting a victim diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index b67a1011f6b..3c047090555 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -66,32 +66,35 @@ after an aborted CREATE INDEX operation. @param index a stale index on which ADD INDEX operation was aborted */ ATTRIBUTE_COLD void lock_discard_for_index(const dict_index_t &index); +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*************************************************************//** Updates the lock table when we have reorganized a page. NOTE: we copy also the locks set on the infimum of the page; the infimum may carry locks if an update of a record is occurring on the page, and its locks were temporarily stored on the infimum. */ -void +dberr_t lock_move_reorganize_page( /*======================*/ const buf_block_t* block, /*!< in: old index page, now reorganized */ const buf_block_t* oblock);/*!< in: copy of the old, not reorganized page */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*************************************************************//** Moves the explicit locks on user records to another page if a record list end is moved to another page. */ -void +dberr_t lock_move_rec_list_end( /*===================*/ const buf_block_t* new_block, /*!< in: index page to move to */ const buf_block_t* block, /*!< in: index page */ const rec_t* rec); /*!< in: record on page: this is the first record moved */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*************************************************************//** Moves the explicit locks on user records to another page if a record list start is moved to another page. */ -void +dberr_t lock_move_rec_list_start( /*=====================*/ const buf_block_t* new_block, /*!< in: index page to move to */ @@ -104,16 +107,18 @@ lock_move_rec_list_start( record on new_page before the records were copied */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*************************************************************//** Updates the lock table when a page is split to the right. */ -void +dberr_t lock_update_split_right( /*====================*/ const buf_block_t* right_block, /*!< in: right page */ const buf_block_t* left_block); /*!< in: left page */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*************************************************************//** Updates the lock table when a page is merged to the right. */ -void +dberr_t lock_update_merge_right( /*====================*/ const buf_block_t* right_block, /*!< in: right page to @@ -125,18 +130,22 @@ lock_update_merge_right( const buf_block_t* left_block); /*!< in: merged index page which will be discarded */ +MY_ATTRIBUTE((warn_unused_result)) /** Update locks when the root page is copied to another in btr_root_raise_and_insert(). Note that we leave lock structs on the root page, even though they do not make sense on other than leaf pages: the reason is that in a pessimistic update the infimum record of the root page will act as a dummy carrier of the locks of the record to be updated. */ -void lock_update_root_raise(const buf_block_t &block, const page_id_t root); +dberr_t lock_update_root_raise(const buf_block_t &block, const page_id_t root); +MY_ATTRIBUTE((warn_unused_result)) /** Update the lock table when a page is copied to another. @param new_block the target page @param old old page (not index root page) */ -void lock_update_copy_and_discard(const buf_block_t &new_block, page_id_t old); +dberr_t lock_update_copy_and_discard(const buf_block_t &new_block, + page_id_t old); +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Update gap locks between the last record of the left_block and the first record of the right_block when a record is about to be inserted at the start of the right_block, even though it should "naturally" be @@ -169,25 +178,28 @@ insertion, and there's no correctness requirement to avoid waking them up too soon. @param left_block left page @param right_block right page */ -void lock_update_node_pointer(const buf_block_t *left_block, - const buf_block_t *right_block); +dberr_t lock_update_node_pointer(const buf_block_t *left_block, + const buf_block_t *right_block); +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*************************************************************//** Updates the lock table when a page is split to the left. */ -void +dberr_t lock_update_split_left( /*===================*/ const buf_block_t* right_block, /*!< in: right page */ const buf_block_t* left_block); /*!< in: left page */ +MY_ATTRIBUTE((warn_unused_result)) /** Update the lock table when a page is merged to the left. @param left left page @param orig_pred original predecessor of supremum on the left page before merge @param right merged, to-be-discarded right page */ -void lock_update_merge_left(const buf_block_t& left, const rec_t *orig_pred, - const page_id_t right); +dberr_t lock_update_merge_left(const buf_block_t& left, const rec_t *orig_pred, + const page_id_t right); +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Update the locks when a page is split and merged to two pages, in defragmentation. */ -void lock_update_split_and_merge( +dberr_t lock_update_split_and_merge( const buf_block_t* left_block, /*!< in: left page to which merged */ const rec_t* orig_pred, /*!< in: original predecessor of supremum on the left page before merge*/ @@ -219,9 +231,10 @@ lock_update_discard( which will inherit the locks */ const buf_block_t* block); /*!< in: index page which will be discarded */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*************************************************************//** Updates the lock table when a new user record is inserted. */ -void +dberr_t lock_update_insert( /*===============*/ const buf_block_t* block, /*!< in: buffer block containing rec */ @@ -1223,6 +1236,7 @@ lock_rec_create_low( trx_t* trx, bool holds_trx_mutex); +MY_ATTRIBUTE((nonnull(1,4,6,7), warn_unused_result)) /** Enqueue a waiting request for a lock which cannot be granted immediately. Check for deadlocks. @param[in] c_lock conflicting lock @@ -1240,7 +1254,8 @@ Check for deadlocks. @param[in,out] thr query thread @param[in] prdt minimum bounding box (spatial index) @retval DB_LOCK_WAIT if the waiting lock was enqueued -@retval DB_DEADLOCK if this transaction was chosen as the victim */ +@retval DB_DEADLOCK if this transaction was chosen as the victim +@retval DB_LOCK_TABLE_FULL when running out of memory */ dberr_t lock_rec_enqueue_waiting( lock_t* c_lock, @@ -1251,10 +1266,11 @@ lock_rec_enqueue_waiting( dict_index_t* index, que_thr_t* thr, lock_prdt_t* prdt); +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*************************************************************//** Moves the explicit locks on user records to another page if a record list start is moved to another page. */ -void +dberr_t lock_rtr_move_rec_list( /*===================*/ const buf_block_t* new_block, /*!< in: index page to diff --git a/storage/innobase/include/lock0prdt.h b/storage/innobase/include/lock0prdt.h index db8e33922c4..62f41ee93cb 100644 --- a/storage/innobase/include/lock0prdt.h +++ b/storage/innobase/include/lock0prdt.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2021, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -31,7 +31,7 @@ Created 9/7/2013 Jimmy Yang /* Predicate lock data */ typedef struct lock_prdt { void* data; /* Predicate data */ - uint16 op; /* Predicate operator */ + ulint op; /* Predicate operator */ } lock_prdt_t; /*********************************************************************//** @@ -63,16 +63,6 @@ lock_place_prdt_page_lock( que_thr_t* thr); /*!< in: query thread */ /*********************************************************************//** -Initiate a Predicate lock from a MBR */ -void -lock_init_prdt_from_mbr( -/*====================*/ - lock_prdt_t* prdt, /*!< in/out: predicate to initialized */ - rtr_mbr_t* mbr, /*!< in: Minimum Bounding Rectangle */ - ulint mode, /*!< in: Search mode */ - mem_heap_t* heap); /*!< in: heap for allocating memory */ - -/*********************************************************************//** Get predicate lock's minimum bounding box @return the minimum bounding box*/ lock_prdt_t* @@ -173,10 +163,11 @@ prdt_get_mbr_from_prdt( #endif +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*************************************************************//** Moves the locks of a record to another record and resets the lock bits of the donating record. */ -void +dberr_t lock_prdt_rec_move( /*===============*/ const buf_block_t* receiver, /*!< in: buffer block containing diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index b1c09cfa2bc..bf484bc059b 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -304,10 +304,11 @@ private: mtr_t &mtr, buf_block_t *b); /** Attempt to initialize a page based on redo log records. @param page_id page identifier + @param err error code @return the recovered block @retval nullptr if the page cannot be initialized based on log records - @retval -1 if the page cannot be recovered due to corruption */ - buf_block_t *recover_low(const page_id_t page_id); + @retval -1 if the page cannot be recovered; *err will be set */ + buf_block_t *recover_low(const page_id_t page_id, dberr_t *err); /** All found log files (multiple ones are possible if we are upgrading from before MariaDB Server 10.5.1) */ @@ -407,12 +408,13 @@ public: /** Attempt to initialize a page based on redo log records. @param page_id page identifier + @param err error code @return the recovered block @retval nullptr if the page cannot be initialized based on log records - @retval -1 if the page cannot be recovered due to corruption */ - buf_block_t *recover(const page_id_t page_id) + @retval -1 if the page cannot be recovered; *err will be set */ + buf_block_t *recover(const page_id_t page_id, dberr_t *err) { - return UNIV_UNLIKELY(recovery_on) ? recover_low(page_id) : nullptr; + return UNIV_UNLIKELY(recovery_on) ? recover_low(page_id, err) : nullptr; } /** Try to recover a tablespace that was not readable earlier diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 1c11efafc7a..4a0049c0f3f 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1185,6 +1185,11 @@ lock_rec_create_low( } } + if (UT_LIST_GET_LEN(trx->lock.trx_locks) > 10000 + && buf_pool.running_out()) { + return nullptr; + } + if (!holds_trx_mutex) { trx->mutex_lock(); } @@ -1296,6 +1301,10 @@ lock_rec_enqueue_waiting( c_lock, type_mode | LOCK_WAIT, id, page, heap_no, index, trx, true); + if (UNIV_UNLIKELY(!lock)) { + return DB_LOCK_TABLE_FULL; + } + if (prdt && type_mode & LOCK_PREDICATE) { lock_prdt_set_prdt(lock, prdt); } @@ -1355,12 +1364,14 @@ which does NOT check for deadlocks or lock compatibility! @param[in] heap_no heap number of the record @param[in] index index of record @param[in,out] trx transaction -@param[in] caller_owns_trx_mutex TRUE if caller owns the transaction mutex */ -TRANSACTIONAL_TARGET -static void lock_rec_add_to_queue(unsigned type_mode, hash_cell_t &cell, - const page_id_t id, const page_t *page, - ulint heap_no, dict_index_t *index, - trx_t *trx, bool caller_owns_trx_mutex) +@param[in] caller_owns_trx_mutex TRUE if caller owns the transaction mutex +@return the lock +@retval nullptr if we ran out of memory */ +TRANSACTIONAL_TARGET MY_ATTRIBUTE((warn_unused_result)) +static lock_t *lock_rec_add_to_queue(unsigned type_mode, hash_cell_t &cell, + const page_id_t id, const page_t *page, + ulint heap_no, dict_index_t *index, + trx_t *trx, bool caller_owns_trx_mutex) { ut_d(lock_sys.hash_get(type_mode).assert_locked(id)); ut_ad(xtest() || caller_owns_trx_mutex == trx->mutex_is_owner()); @@ -1444,7 +1455,7 @@ static void lock_rec_add_to_queue(unsigned type_mode, hash_cell_t &cell, if (caller_owns_trx_mutex) { trx->mutex_lock(); } - return; + return lock; } } @@ -1453,9 +1464,9 @@ create: because we should be moving an existing waiting lock request. */ ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx); - lock_rec_create_low(nullptr, - type_mode, id, page, heap_no, index, trx, - caller_owns_trx_mutex); + return lock_rec_create_low(nullptr, + type_mode, id, page, heap_no, index, trx, + caller_owns_trx_mutex); } /*********************************************************************//** @@ -1542,9 +1553,10 @@ lock_rec_lock( else if (!impl) { /* Set the requested lock on the record. */ - lock_rec_add_to_queue(mode, g.cell(), id, block->page.frame, heap_no, - index, trx, true); - err= DB_SUCCESS_LOCKED_REC; + err= lock_rec_add_to_queue(mode, g.cell(), id, block->page.frame, + heap_no, index, trx, true) + ? DB_SUCCESS_LOCKED_REC + : DB_LOCK_TABLE_FULL; } } } @@ -1565,9 +1577,9 @@ lock_rec_lock( } /* Simplified and faster path for the most common cases */ - if (!impl) - lock_rec_create_low(nullptr, mode, id, block->page.frame, heap_no, index, - trx, false); + if (!impl && !lock_rec_create_low(nullptr, mode, id, block->page.frame, + heap_no, index, trx, false)) + return DB_LOCK_TABLE_FULL; return DB_SUCCESS_LOCKED_REC; } @@ -2124,7 +2136,7 @@ of another record as gap type locks, but does not reset the lock bits of the other record. Also waiting lock requests on rec are inherited as GRANTED gap locks. */ static -void +dberr_t lock_rec_inherit_to_gap( /*====================*/ hash_cell_t& heir_cell, /*!< heir hash table cell */ @@ -2141,34 +2153,36 @@ lock_rec_inherit_to_gap( ulint heap_no) /*!< in: heap_no of the donating record */ { - /* At READ UNCOMMITTED or READ COMMITTED isolation level, - we do not want locks set - by an UPDATE or a DELETE to be inherited as gap type locks. But we - DO want S-locks/X-locks(taken for replace) set by a consistency - constraint to be inherited also then. */ + /* At READ UNCOMMITTED or READ COMMITTED isolation level, + we do not want locks set + by an UPDATE or a DELETE to be inherited as gap type locks. + But we DO want S-locks/X-locks(taken for replace) set by a consistency + constraint to be inherited also then. */ - for (lock_t* lock= lock_sys_t::get_first(donor_cell, donor, heap_no); - lock; - lock = lock_rec_get_next(heap_no, lock)) { - trx_t* lock_trx = lock->trx; - if (!lock->is_insert_intention() - && (lock_trx->isolation_level > TRX_ISO_READ_COMMITTED - || lock->mode() != - (lock_trx->duplicates ? LOCK_S : LOCK_X))) { - lock_rec_add_to_queue(LOCK_GAP | lock->mode(), - heir_cell, heir, heir_page, - heir_heap_no, - lock->index, lock_trx, false); - } - } + for (lock_t *lock= lock_sys_t::get_first(donor_cell, donor, heap_no); + lock; + lock= lock_rec_get_next(heap_no, lock)) + { + trx_t* lock_trx= lock->trx; + if (!lock->is_insert_intention() && + (lock_trx->isolation_level > TRX_ISO_READ_COMMITTED || + lock->mode() != (lock_trx->duplicates ? LOCK_S : LOCK_X))) + if (!lock_rec_add_to_queue(LOCK_GAP | lock->mode(), + heir_cell, heir, heir_page, + heir_heap_no, + lock->index, lock_trx, false)) + return DB_LOCK_TABLE_FULL; + } + return DB_SUCCESS; } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*************************************************************//** Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type) of another record as gap type locks, but does not reset the lock bits of the other record. Also waiting lock requests are inherited as GRANTED gap locks. */ static -void +dberr_t lock_rec_inherit_to_gap_if_gap_lock( /*================================*/ const buf_block_t* block, /*!< in: buffer block */ @@ -2187,9 +2201,11 @@ lock_rec_inherit_to_gap_if_gap_lock( if (!lock->is_insert_intention() && (heap_no == PAGE_HEAP_NO_SUPREMUM || !lock->is_record_not_gap()) && !lock_table_has(lock->trx, lock->index->table, LOCK_X)) - lock_rec_add_to_queue(LOCK_GAP | lock->mode(), - g.cell(), id, block->page.frame, - heir_heap_no, lock->index, lock->trx, false); + if (!lock_rec_add_to_queue(LOCK_GAP | lock->mode(), g.cell(), id, + block->page.frame, heir_heap_no, lock->index, + lock->trx, false)) + return DB_LOCK_TABLE_FULL; + return DB_SUCCESS; } /*************************************************************//** @@ -2197,7 +2213,7 @@ Moves the locks of a record to another record and resets the lock bits of the donating record. */ TRANSACTIONAL_TARGET static -void +dberr_t lock_rec_move( hash_cell_t& receiver_cell, /*!< in: hash table cell */ const buf_block_t& receiver, /*!< in: buffer block containing @@ -2216,6 +2232,8 @@ lock_rec_move( ut_ad(!lock_sys_t::get_first(receiver_cell, receiver_id, receiver_heap_no)); + dberr_t err = DB_SUCCESS; + for (lock_t *lock = lock_sys_t::get_first(donator_cell, donator_id, donator_heap_no); lock != NULL; @@ -2233,15 +2251,19 @@ lock_rec_move( /* Note that we FIRST reset the bit, and then set the lock: the function works also if donator_id == receiver_id */ - lock_rec_add_to_queue(type_mode, receiver_cell, - receiver_id, receiver.page.frame, - receiver_heap_no, - lock->index, lock_trx, true); + if (!lock_rec_add_to_queue(type_mode, receiver_cell, + receiver_id, receiver.page.frame, + receiver_heap_no, + lock->index, lock_trx, true)) { + err = DB_LOCK_TABLE_FULL; + } lock_trx->mutex_unlock(); } ut_ad(!lock_sys_t::get_first(donator_cell, donator_id, donator_heap_no)); + + return err; } /** Move all the granted locks to the front of the given lock list. @@ -2283,7 +2305,7 @@ also the locks set on the infimum of the page; the infimum may carry locks if an update of a record is occurring on the page, and its locks were temporarily stored on the infimum. */ TRANSACTIONAL_TARGET -void +dberr_t lock_move_reorganize_page( /*======================*/ const buf_block_t* block, /*!< in: old index page, now @@ -2292,6 +2314,7 @@ lock_move_reorganize_page( reorganized page */ { mem_heap_t *heap; + dberr_t err= DB_SUCCESS; { UT_LIST_BASE_NODE_T(lock_t) old_locks; @@ -2302,7 +2325,7 @@ lock_move_reorganize_page( { TMLockGuard g{lock_sys.rec_hash, id}; if (!lock_sys_t::get_first(g.cell(), id)) - return; + return DB_SUCCESS; } /* We will modify arbitrary trx->lock.trx_locks. @@ -2317,7 +2340,7 @@ lock_move_reorganize_page( lock_t *lock= lock_sys_t::get_first(cell, id); if (!lock) - return; + return DB_SUCCESS; heap= mem_heap_create(256); @@ -2390,21 +2413,23 @@ lock_move_reorganize_page( } trx_t *lock_trx= lock->trx; - lock_trx->mutex_lock(); + lock_trx->mutex_lock(); - /* Clear the bit in old_lock. */ - if (old_heap_no < lock->un_member.rec_lock.n_bits && + /* Clear the bit in old_lock. */ + if (old_heap_no < lock->un_member.rec_lock.n_bits && lock_rec_reset_nth_bit(lock, old_heap_no)) { ut_ad(!page_rec_is_metadata(orec)); /* NOTE that the old lock bitmap could be too small for the new heap number! */ - lock_rec_add_to_queue(lock->type_mode, cell, id, block->page.frame, - new_heap_no, lock->index, lock_trx, true); + if (!lock_rec_add_to_queue(lock->type_mode, cell, id, + block->page.frame, new_heap_no, + lock->index, lock_trx, true)) + err= DB_LOCK_TABLE_FULL; } - lock_trx->mutex_unlock(); + lock_trx->mutex_unlock(); if (!rec1 || !rec2) { @@ -2428,13 +2453,15 @@ lock_move_reorganize_page( space->release(); } #endif + + return err; } /*************************************************************//** Moves the explicit locks on user records to another page if a record list end is moved to another page. */ TRANSACTIONAL_TARGET -void +dberr_t lock_move_rec_list_end( /*===================*/ const buf_block_t* new_block, /*!< in: index page to move to */ @@ -2449,6 +2476,7 @@ lock_move_rec_list_end( const page_id_t id{block->page.id()}; const page_id_t new_id{new_block->page.id()}; + dberr_t err= DB_SUCCESS; { /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, id, new_id}; @@ -2483,7 +2511,7 @@ lock_move_rec_list_end( if (UNIV_UNLIKELY(!rec1 || !rec2)) { ut_ad("corrupted page" == 0); - return; + return DB_CORRUPTION; } /* Copy lock requests on user records to new page and @@ -2527,7 +2555,7 @@ lock_move_rec_list_end( if (UNIV_UNLIKELY(!rec2)) { ut_ad("corrupted page" == 0); - return; + return DB_CORRUPTION; } trx_t *lock_trx= lock->trx; @@ -2544,9 +2572,11 @@ lock_move_rec_list_end( lock->type_mode&= ~LOCK_WAIT; } - lock_rec_add_to_queue(type_mode, g.cell2(), new_id, - new_block->page.frame, - rec2_heap_no, lock->index, lock_trx, true); + if (!lock_rec_add_to_queue(type_mode, g.cell2(), new_id, + new_block->page.frame, + rec2_heap_no, lock->index, lock_trx, + true)) + err= DB_LOCK_TABLE_FULL; } lock_trx->mutex_unlock(); @@ -2563,13 +2593,14 @@ lock_move_rec_list_end( space->release(); } #endif + return err; } /*************************************************************//** Moves the explicit locks on user records to another page if a record list start is moved to another page. */ TRANSACTIONAL_TARGET -void +dberr_t lock_move_rec_list_start( /*=====================*/ const buf_block_t* new_block, /*!< in: index page to @@ -2592,6 +2623,7 @@ lock_move_rec_list_start( ut_ad(!page_rec_is_metadata(rec)); const page_id_t id{block->page.id()}; const page_id_t new_id{new_block->page.id()}; + dberr_t err= DB_SUCCESS; { /* This would likely be too large for a memory transaction. */ @@ -2625,7 +2657,7 @@ lock_move_rec_list_start( if (UNIV_UNLIKELY(!rec1 || !rec2)) { ut_ad("corrupted page" == 0); - return; + return DB_CORRUPTION; } ut_ad(page_rec_is_metadata(rec1) == page_rec_is_metadata(rec2)); @@ -2667,9 +2699,11 @@ lock_move_rec_list_start( lock->type_mode&= ~LOCK_WAIT; } - lock_rec_add_to_queue(type_mode, g.cell2(), new_id, - new_block->page.frame, - rec2_heap_no, lock->index, lock_trx, true); + if (!lock_rec_add_to_queue(type_mode, g.cell2(), new_id, + new_block->page.frame, + rec2_heap_no, lock->index, lock_trx, + true)) + err= DB_LOCK_TABLE_FULL; } lock_trx->mutex_unlock(); @@ -2686,13 +2720,14 @@ lock_move_rec_list_start( #ifdef UNIV_DEBUG_LOCK_VALIDATE ut_ad(lock_rec_validate_page(block)); #endif + return err; } /*************************************************************//** Moves the explicit locks on user records to another page if a record list start is moved to another page. */ TRANSACTIONAL_TARGET -void +dberr_t lock_rtr_move_rec_list( /*===================*/ const buf_block_t* new_block, /*!< in: index page to @@ -2703,7 +2738,7 @@ lock_rtr_move_rec_list( ulint num_move) /*!< in: num of rec to move */ { if (!num_move) - return; + return DB_SUCCESS; const ulint comp= page_rec_is_comp(rec_move[0].old_rec); @@ -2712,6 +2747,7 @@ lock_rtr_move_rec_list( ut_ad(comp == page_rec_is_comp(rec_move[0].new_rec)); const page_id_t id{block->page.id()}; const page_id_t new_id{new_block->page.id()}; + dberr_t err= DB_SUCCESS; { /* This would likely be too large for a memory transaction. */ @@ -2762,9 +2798,11 @@ lock_rtr_move_rec_list( lock->type_mode&= ~LOCK_WAIT; } - lock_rec_add_to_queue(type_mode, g.cell2(), new_id, - new_block->page.frame, - rec2_heap_no, lock->index, lock_trx, true); + if (!lock_rec_add_to_queue(type_mode, g.cell2(), new_id, + new_block->page.frame, + rec2_heap_no, lock->index, + lock_trx, true)) + err= DB_LOCK_TABLE_FULL; rec_move[moved].moved= true; } @@ -2777,10 +2815,11 @@ lock_rtr_move_rec_list( #ifdef UNIV_DEBUG_LOCK_VALIDATE ut_ad(lock_rec_validate_page(block)); #endif + return err; } /*************************************************************//** Updates the lock table when a page is split to the right. */ -void +dberr_t lock_update_split_right( /*====================*/ const buf_block_t* right_block, /*!< in: right page */ @@ -2796,25 +2835,28 @@ lock_update_split_right( /* Move the locks on the supremum of the left page to the supremum of the right page */ - lock_rec_move(g.cell2(), *right_block, r, g.cell1(), l, - PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); + if (dberr_t err= lock_rec_move(g.cell2(), *right_block, r, g.cell1(), l, + PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM)) + return err; /* Inherit the locks to the supremum of left page from the successor of the infimum on right page */ - lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, left_block->page.frame, - PAGE_HEAP_NO_SUPREMUM, h); + return lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, + left_block->page.frame, + PAGE_HEAP_NO_SUPREMUM, h); } -void lock_update_node_pointer(const buf_block_t *left_block, - const buf_block_t *right_block) +dberr_t lock_update_node_pointer(const buf_block_t *left_block, + const buf_block_t *right_block) { const ulint h= lock_get_min_heap_no(right_block); const page_id_t l{left_block->page.id()}; const page_id_t r{right_block->page.id()}; LockMultiGuard g{lock_sys.rec_hash, l, r}; - lock_rec_inherit_to_gap(g.cell2(), r, g.cell1(), l, right_block->page.frame, - h, PAGE_HEAP_NO_SUPREMUM); + return lock_rec_inherit_to_gap(g.cell2(), r, g.cell1(), l, + right_block->page.frame, + h, PAGE_HEAP_NO_SUPREMUM); } #ifdef UNIV_DEBUG @@ -2838,7 +2880,7 @@ static void lock_assert_no_spatial(const page_id_t id) /*************************************************************//** Updates the lock table when a page is merged to the right. */ -void +dberr_t lock_update_merge_right( /*====================*/ const buf_block_t* right_block, /*!< in: right page to @@ -2861,9 +2903,11 @@ lock_update_merge_right( /* Inherit the locks from the supremum of the left page to the original successor of infimum on the right page, to which the left page was merged */ - lock_rec_inherit_to_gap(g.cell2(), r, g.cell1(), l, right_block->page.frame, - page_rec_get_heap_no(orig_succ), - PAGE_HEAP_NO_SUPREMUM); + if (dberr_t err= lock_rec_inherit_to_gap(g.cell2(), r, g.cell1(), l, + right_block->page.frame, + page_rec_get_heap_no(orig_succ), + PAGE_HEAP_NO_SUPREMUM)) + return err; /* Reset the locks on the supremum of the left page, releasing waiting transactions */ @@ -2871,6 +2915,7 @@ lock_update_merge_right( lock_rec_free_all_from_discard_page(l, g.cell1(), lock_sys.rec_hash); ut_d(lock_assert_no_spatial(l)); + return DB_SUCCESS; } /** Update locks when the root page is copied to another in @@ -2879,33 +2924,36 @@ root page, even though they do not make sense on other than leaf pages: the reason is that in a pessimistic update the infimum record of the root page will act as a dummy carrier of the locks of the record to be updated. */ -void lock_update_root_raise(const buf_block_t &block, const page_id_t root) +dberr_t lock_update_root_raise(const buf_block_t &block, const page_id_t root) { const page_id_t id{block.page.id()}; /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, id, root}; /* Move the locks on the supremum of the root to the supremum of block */ - lock_rec_move(g.cell1(), block, id, g.cell2(), root, - PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); + return lock_rec_move(g.cell1(), block, id, g.cell2(), root, + PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); } /** Update the lock table when a page is copied to another. @param new_block the target page @param old old page (not index root page) */ -void lock_update_copy_and_discard(const buf_block_t &new_block, page_id_t old) +dberr_t lock_update_copy_and_discard(const buf_block_t &new_block, + page_id_t old) { const page_id_t id{new_block.page.id()}; /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, id, old}; /* Move the locks on the supremum of the old page to the supremum of new */ - lock_rec_move(g.cell1(), new_block, id, g.cell2(), old, - PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); + if (dberr_t err= lock_rec_move(g.cell1(), new_block, id, g.cell2(), old, + PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM)) + return err; lock_rec_free_all_from_discard_page(old, g.cell2(), lock_sys.rec_hash); + return DB_SUCCESS; } /*************************************************************//** Updates the lock table when a page is split to the left. */ -void +dberr_t lock_update_split_left( /*===================*/ const buf_block_t* right_block, /*!< in: right page */ @@ -2917,16 +2965,17 @@ lock_update_split_left( LockMultiGuard g{lock_sys.rec_hash, l, r}; /* Inherit the locks to the supremum of the left page from the successor of the infimum on the right page */ - lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, left_block->page.frame, - PAGE_HEAP_NO_SUPREMUM, h); + return lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, + left_block->page.frame, PAGE_HEAP_NO_SUPREMUM, + h); } /** Update the lock table when a page is merged to the left. @param left left page @param orig_pred original predecessor of supremum on the left page before merge @param right merged, to-be-discarded right page */ -void lock_update_merge_left(const buf_block_t& left, const rec_t *orig_pred, - const page_id_t right) +dberr_t lock_update_merge_left(const buf_block_t& left, const rec_t *orig_pred, + const page_id_t right) { ut_ad(left.page.frame == page_align(orig_pred)); @@ -2935,7 +2984,7 @@ void lock_update_merge_left(const buf_block_t& left, const rec_t *orig_pred, if (UNIV_UNLIKELY(!left_next_rec)) { ut_ad("corrupted page" == 0); - return; + return DB_CORRUPTION; } /* This would likely be too large for a memory transaction. */ @@ -2944,9 +2993,11 @@ void lock_update_merge_left(const buf_block_t& left, const rec_t *orig_pred, { /* Inherit the locks on the supremum of the left page to the first record which was moved from the right page */ - lock_rec_inherit_to_gap(g.cell1(), l, g.cell1(), l, left.page.frame, - page_rec_get_heap_no(left_next_rec), - PAGE_HEAP_NO_SUPREMUM); + if (dberr_t err= + lock_rec_inherit_to_gap(g.cell1(), l, g.cell1(), l, left.page.frame, + page_rec_get_heap_no(left_next_rec), + PAGE_HEAP_NO_SUPREMUM)) + return err; /* Reset the locks on the supremum of the left page, releasing waiting transactions */ @@ -2955,13 +3006,16 @@ void lock_update_merge_left(const buf_block_t& left, const rec_t *orig_pred, /* Move the locks from the supremum of right page to the supremum of the left page */ - lock_rec_move(g.cell1(), left, l, g.cell2(), right, - PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); + if (dberr_t err= + lock_rec_move(g.cell1(), left, l, g.cell2(), right, + PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM)) + return err; lock_rec_free_all_from_discard_page(right, g.cell2(), lock_sys.rec_hash); /* there should exist no page lock on the right page, otherwise, it will be blocked from merge */ ut_d(lock_assert_no_spatial(right)); + return DB_SUCCESS; } /*************************************************************//** @@ -3069,7 +3123,7 @@ lock_update_discard( /*************************************************************//** Updates the lock table when a new user record is inserted. */ -void +dberr_t lock_update_insert( /*===============*/ const buf_block_t* block, /*!< in: buffer block containing rec */ @@ -3088,19 +3142,19 @@ lock_update_insert( receiver_heap_no = rec_get_heap_no_new(rec); rec = page_rec_get_next_low(rec, TRUE); if (UNIV_UNLIKELY(!rec)) { - return; + return DB_CORRUPTION; } donator_heap_no = rec_get_heap_no_new(rec); } else { receiver_heap_no = rec_get_heap_no_old(rec); rec = page_rec_get_next_low(rec, FALSE); if (UNIV_UNLIKELY(!rec)) { - return; + return DB_CORRUPTION; } donator_heap_no = rec_get_heap_no_old(rec); } - lock_rec_inherit_to_gap_if_gap_lock( + return lock_rec_inherit_to_gap_if_gap_lock( block, receiver_heap_no, donator_heap_no); } @@ -5100,13 +5154,14 @@ lock_rec_insert_check_and_lock( return err; } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*********************************************************************//** Creates an explicit record lock for a running transaction that currently only has an implicit lock on the record. The transaction instance must have a reference count > 0 so that it can't be committed and freed before this function has completed. */ static -void +dberr_t lock_rec_convert_impl_to_expl_for_trx( /*==================================*/ const page_id_t id, /*!< in: page identifier */ @@ -5120,6 +5175,7 @@ lock_rec_convert_impl_to_expl_for_trx( ut_ad(!rec_is_metadata(rec, *index)); DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx"); + dberr_t err= DB_SUCCESS; { LockGuard g{lock_sys.rec_hash, id}; trx->mutex_lock(); @@ -5128,14 +5184,16 @@ lock_rec_convert_impl_to_expl_for_trx( if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, heap_no, trx)) - lock_rec_add_to_queue(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, - page_align(rec), heap_no, index, trx, true); + if (!lock_rec_add_to_queue(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, + page_align(rec), heap_no, index, trx, true)) + err= DB_LOCK_TABLE_FULL; } trx->mutex_unlock(); trx->release_reference(); DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx"); + return err; } @@ -5210,6 +5268,7 @@ static void lock_rec_other_trx_holds_expl(trx_t *caller_trx, trx_t *trx, #endif /* UNIV_DEBUG */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** If an implicit x-lock exists on a record, convert it to an explicit one. Often, this is called by a transaction that is about to enter a lock wait @@ -5226,9 +5285,11 @@ should be created. @param[in] rec record on the leaf page @param[in] index the index of the record @param[in] offsets rec_get_offsets(rec,index) -@return whether caller_trx already holds an exclusive lock on rec */ +@return error code +@retval DB_SUCCESS_LOCKED_REC if caller_trx already held rec exclusively +@retval DB_LOCK_TABLE_FULL if a lock cannot be created */ static -bool +dberr_t lock_rec_convert_impl_to_expl( trx_t* caller_trx, page_id_t id, @@ -5251,10 +5312,10 @@ lock_rec_convert_impl_to_expl( trx_id = lock_clust_rec_some_has_impl(rec, index, offsets); if (trx_id == 0) { - return false; + return DB_SUCCESS; } if (UNIV_UNLIKELY(trx_id == caller_trx->id)) { - return true; + return DB_SUCCESS_LOCKED_REC; } trx = trx_sys.find(caller_trx, trx_id); @@ -5265,26 +5326,24 @@ lock_rec_convert_impl_to_expl( offsets); if (trx == caller_trx) { trx->release_reference(); - return true; + return DB_SUCCESS_LOCKED_REC; } ut_d(lock_rec_other_trx_holds_expl(caller_trx, trx, rec, id)); } if (trx) { - ulint heap_no = page_rec_get_heap_no(rec); - ut_ad(trx->is_referenced()); /* If the transaction is still active and has no explicit x-lock set on the record, set one for it. trx cannot be committed until the ref count is zero. */ - lock_rec_convert_impl_to_expl_for_trx( - id, rec, index, trx, heap_no); + return lock_rec_convert_impl_to_expl_for_trx( + id, rec, index, trx, page_rec_get_heap_no(rec)); } - return false; + return DB_SUCCESS; } /*********************************************************************//** @@ -5323,10 +5382,17 @@ lock_clust_rec_modify_check_and_lock( /* If a transaction has no explicit x-lock set on the record, set one for it */ - if (lock_rec_convert_impl_to_expl(thr_get_trx(thr), block->page.id(), - rec, index, offsets)) { + err = lock_rec_convert_impl_to_expl(thr_get_trx(thr), block->page.id(), + rec, index, offsets); + switch (err) { + case DB_SUCCESS: + break; + case DB_SUCCESS_LOCKED_REC: /* We already hold an implicit exclusive lock. */ - return DB_SUCCESS; + err = DB_SUCCESS; + /* fall through */ + default: + return err; } err = lock_rec_lock(true, LOCK_X | LOCK_REC_NOT_GAP, @@ -5570,11 +5636,22 @@ lock_clust_rec_read_check_and_lock( trx_t *trx = thr_get_trx(thr); if (!lock_table_has(trx, index->table, LOCK_X) - && heap_no != PAGE_HEAP_NO_SUPREMUM - && lock_rec_convert_impl_to_expl(trx, id, rec, index, offsets) - && gap_mode == LOCK_REC_NOT_GAP) { - /* We already hold an implicit exclusive lock. */ - return DB_SUCCESS; + && heap_no != PAGE_HEAP_NO_SUPREMUM) { + switch (dberr_t err = lock_rec_convert_impl_to_expl(trx, id, + rec, index, + offsets)) { + case DB_SUCCESS: + break; + case DB_SUCCESS_LOCKED_REC: + if (gap_mode != LOCK_REC_NOT_GAP) { + break; + } + /* We already hold an implicit exclusive lock. */ + err = DB_SUCCESS; + /* fall through */ + default: + return err; + } } dberr_t err = lock_rec_lock(false, gap_mode | mode, @@ -6389,7 +6466,7 @@ void lock_sys_t::deadlock_check() /** Update the locks when a page is split and merged to two pages, in defragmentation. */ -void lock_update_split_and_merge( +dberr_t lock_update_split_and_merge( const buf_block_t* left_block, /*!< in: left page to which merged */ const rec_t* orig_pred, /*!< in: original predecessor of supremum on the left page before merge*/ @@ -6405,7 +6482,7 @@ void lock_update_split_and_merge( if (UNIV_UNLIKELY(!left_next_rec)) { ut_ad("corrupted page" == 0); - return; + return DB_CORRUPTION; } ut_ad(!page_rec_is_metadata(left_next_rec)); @@ -6414,9 +6491,12 @@ void lock_update_split_and_merge( /* Inherit the locks on the supremum of the left page to the first record which was moved from the right page */ - lock_rec_inherit_to_gap(g.cell1(), l, g.cell1(), l, left_block->page.frame, - page_rec_get_heap_no(left_next_rec), - PAGE_HEAP_NO_SUPREMUM); + if (dberr_t err= + lock_rec_inherit_to_gap(g.cell1(), l, g.cell1(), l, + left_block->page.frame, + page_rec_get_heap_no(left_next_rec), + PAGE_HEAP_NO_SUPREMUM)) + return err; /* Reset the locks on the supremum of the left page, releasing waiting transactions */ @@ -6424,7 +6504,8 @@ void lock_update_split_and_merge( /* Inherit the locks to the supremum of the left page from the successor of the infimum on the right page */ - lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, left_block->page.frame, - PAGE_HEAP_NO_SUPREMUM, - lock_get_min_heap_no(right_block)); + return lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, + left_block->page.frame, + PAGE_HEAP_NO_SUPREMUM, + lock_get_min_heap_no(right_block)); } diff --git a/storage/innobase/lock/lock0prdt.cc b/storage/innobase/lock/lock0prdt.cc index 5a12d97411f..acf0a4be574 100644 --- a/storage/innobase/lock/lock0prdt.cc +++ b/storage/innobase/lock/lock0prdt.cc @@ -530,11 +530,13 @@ lock_prdt_insert_check_and_lock( if (c_lock) { rtr_mbr_t *mbr= prdt_get_mbr_from_prdt(prdt); + prdt->op= 0; trx->mutex_lock(); - /* Allocate MBR on the lock heap */ - lock_init_prdt_from_mbr(prdt, mbr, 0, trx->lock.lock_heap); - err= lock_rec_enqueue_waiting(c_lock, mode, id, block->page.frame, - PRDT_HEAPNO, index, thr, prdt); + prdt->data= mem_heap_dup(trx->lock.lock_heap, mbr, sizeof *mbr); + err= UNIV_UNLIKELY(!prdt->data) + ? DB_LOCK_TABLE_FULL + : lock_rec_enqueue_waiting(c_lock, mode, id, block->page.frame, + PRDT_HEAPNO, index, thr, prdt); trx->mutex_unlock(); } } @@ -669,27 +671,6 @@ lock_prdt_update_split( } /*********************************************************************//** -Initiate a Predicate Lock from a MBR */ -void -lock_init_prdt_from_mbr( -/*====================*/ - lock_prdt_t* prdt, /*!< in/out: predicate to initialized */ - rtr_mbr_t* mbr, /*!< in: Minimum Bounding Rectangle */ - ulint mode, /*!< in: Search mode */ - mem_heap_t* heap) /*!< in: heap for allocating memory */ -{ - memset(prdt, 0, sizeof(*prdt)); - - if (heap != NULL) { - prdt->data = mem_heap_dup(heap, mbr, sizeof *mbr); - } else { - prdt->data = static_cast<void*>(mbr); - } - - prdt->op = static_cast<uint16>(mode); -} - -/*********************************************************************//** Acquire a predicate lock on a block @return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */ dberr_t @@ -850,32 +831,33 @@ bool lock_test_prdt_page_lock(const trx_t *trx, const page_id_t page_id) /*************************************************************//** Moves the locks of a page to another page and resets the lock bits of the donating records. */ -void +dberr_t lock_prdt_rec_move( /*===============*/ const buf_block_t* receiver, /*!< in: buffer block containing the receiving record */ const page_id_t donator) /*!< in: target page */ { - LockMultiGuard g{lock_sys.prdt_hash, receiver->page.id(), donator}; - - for (lock_t *lock = lock_sys_t::get_first(g.cell2(), donator, - PRDT_HEAPNO); - lock; - lock = lock_rec_get_next(PRDT_HEAPNO, lock)) { + dberr_t err= DB_SUCCESS; + LockMultiGuard g{lock_sys.prdt_hash, receiver->page.id(), donator}; - const auto type_mode = lock->type_mode; - lock_prdt_t* lock_prdt = lock_get_prdt_from_lock(lock); + for (lock_t *lock= lock_sys_t::get_first(g.cell2(), donator, PRDT_HEAPNO); + lock; lock= lock_rec_get_next(PRDT_HEAPNO, lock)) + { + const auto type_mode= lock->type_mode; + lock_prdt_t *lock_prdt= lock_get_prdt_from_lock(lock); - lock_rec_reset_nth_bit(lock, PRDT_HEAPNO); - if (type_mode & LOCK_WAIT) { - ut_ad(lock->trx->lock.wait_lock == lock); - lock->type_mode &= ~LOCK_WAIT; - } - lock_prdt_add_to_queue( - type_mode, receiver, lock->index, lock->trx, - lock_prdt, false); - } + lock_rec_reset_nth_bit(lock, PRDT_HEAPNO); + if (type_mode & LOCK_WAIT) + { + ut_ad(lock->trx->lock.wait_lock == lock); + lock->type_mode&= ~LOCK_WAIT; + } + if (!lock_prdt_add_to_queue(type_mode, receiver, lock->index, lock->trx, + lock_prdt, false)) + err= DB_LOCK_TABLE_FULL; + } + return err; } /** Remove locks on a discarded SPATIAL INDEX page. diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index fc2d50da62c..37791df48cb 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -749,6 +749,8 @@ static struct retry: bool fail= false; buf_block_t *free_block= buf_LRU_get_free_block(false); + if (UNIV_UNLIKELY(!free_block)) + return true; mysql_mutex_lock(&recv_sys.mutex); for (auto d= defers.begin(); d != defers.end(); ) @@ -1478,7 +1480,8 @@ inline void *recv_sys_t::alloc(size_t len) if (UNIV_UNLIKELY(!block)) { create_block: - block= buf_block_alloc(); + block= buf_LRU_get_free_block(false); + ut_a(block); block->page.access_time= 1U << 16 | ut_calc_align<uint16_t>(static_cast<uint16_t>(len), ALIGNMENT); static_assert(ut_is_2pow(ALIGNMENT), "ALIGNMENT must be a power of 2"); @@ -3140,11 +3143,18 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id, /** Attempt to initialize a page based on redo log records. @param page_id page identifier +@param err error code @return recovered block -@retval nullptr if the page cannot be initialized based on log records */ -buf_block_t *recv_sys_t::recover_low(const page_id_t page_id) +@retval -1 if the page cannot be recovered; *err will be set */ +buf_block_t *recv_sys_t::recover_low(const page_id_t page_id, dberr_t *err) { buf_block_t *free_block= buf_LRU_get_free_block(false); + if (UNIV_UNLIKELY(!free_block)) + { + if (err) + *err= DB_OUT_OF_MEMORY; + return reinterpret_cast<buf_block_t*>(-1); + } buf_block_t *block= nullptr; mysql_mutex_lock(&mutex); @@ -3154,8 +3164,13 @@ buf_block_t *recv_sys_t::recover_low(const page_id_t page_id) { mtr_t mtr; block= recover_low(page_id, p, mtr, free_block); - ut_ad(!block || block == reinterpret_cast<buf_block_t*>(-1) || - block == free_block); + if (block == reinterpret_cast<buf_block_t*>(-1)) + { + if (err) + *err= DB_CORRUPTION; + } + else + ut_ad(!block || block == free_block); } mysql_mutex_unlock(&mutex); diff --git a/storage/innobase/mem/mem0mem.cc b/storage/innobase/mem/mem0mem.cc index 5e8587bfea6..9ed50e61c2d 100644 --- a/storage/innobase/mem/mem0mem.cc +++ b/storage/innobase/mem/mem0mem.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -285,13 +285,12 @@ mem_heap_create_block_func( buf_block = static_cast<buf_block_t*>(heap->free_block); heap->free_block = NULL; - - if (UNIV_UNLIKELY(!buf_block)) { - - return(NULL); - } } else { - buf_block = buf_block_alloc(); + buf_block = buf_LRU_get_free_block(false); + } + + if (UNIV_UNLIKELY(!buf_block)) { + return NULL; } block = (mem_block_t*) buf_block->page.frame; diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index 28b63dc4209..6d0657aa8e8 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -654,21 +654,24 @@ err_exit: } /* Update the lock table and possible hash index */ + if (UNIV_LIKELY_NULL(heap) && UNIV_LIKELY(!rec_move)) { + mem_heap_free(heap); + } if (!index->has_locking()) { + return ret; } else if (UNIV_LIKELY_NULL(rec_move)) { - lock_rtr_move_rec_list(new_block, block, rec_move, num_moved); + *err = lock_rtr_move_rec_list(new_block, block, rec_move, + num_moved); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } } else { - lock_move_rec_list_end(new_block, block, rec); + btr_search_move_or_delete_hash_entries(new_block, block); + *err = lock_move_rec_list_end(new_block, block, rec); } - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - btr_search_move_or_delete_hash_entries(new_block, block); - - return(ret); + return *err == DB_SUCCESS ? ret : nullptr; } /*************************************************************//** @@ -843,22 +846,22 @@ zip_reorganize: } /* Update the lock table and possible hash index */ - - if (!index->has_locking()) { - } else if (dict_index_is_spatial(index)) { - lock_rtr_move_rec_list(new_block, block, rec_move, num_moved); - } else { - lock_move_rec_list_start(new_block, block, rec, ret); + if (UNIV_LIKELY_NULL(heap) && !index->is_spatial()) { + mem_heap_free(heap); } - if (heap) { + if (!index->has_locking()) { + *err = DB_SUCCESS; + return ret; + } else if (index->is_spatial()) { + *err = lock_rtr_move_rec_list(new_block, block, rec_move, num_moved); mem_heap_free(heap); + } else { + btr_search_move_or_delete_hash_entries(new_block, block); + *err = lock_move_rec_list_start(new_block, block, rec, ret); } - btr_search_move_or_delete_hash_entries(new_block, block); - - *err = DB_SUCCESS; - return(ret); + return *err == DB_SUCCESS ? ret : nullptr; } /*************************************************************//** diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index 7b603bb876b..91721ccda01 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -4406,10 +4406,14 @@ page_zip_reorganize( MEM_CHECK_DEFINED(buf_block_get_page_zip(block)->data, page_zip_get_size(buf_block_get_page_zip(block))); + temp_block = buf_LRU_get_free_block(false); + if (UNIV_UNLIKELY(!temp_block)) { + return DB_OUT_OF_MEMORY; + } + /* Disable logging */ mtr_log_t log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); - temp_block = buf_block_alloc(); btr_search_drop_page_hash_index(block); temp_page = temp_block->page.frame; @@ -4481,7 +4485,7 @@ page_zip_reorganize( err = DB_FAIL; } else { - lock_move_reorganize_page(block, temp_block); + err = lock_move_reorganize_page(block, temp_block); } buf_block_free(temp_block); diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index b22ff862759..dfabb64556d 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -241,11 +241,6 @@ row_ins_sec_index_entry_by_modify( } } else { ut_a(mode == BTR_MODIFY_TREE); - if (buf_pool.running_out()) { - - return(DB_LOCK_TABLE_FULL); - } - err = btr_cur_pessimistic_update( flags | BTR_KEEP_SYS_FLAG, cursor, offsets, &offsets_heap, @@ -331,10 +326,6 @@ row_ins_clust_index_entry_by_modify( break; } } else { - if (buf_pool.running_out()) { - return DB_LOCK_TABLE_FULL; - } - big_rec_t* big_rec = NULL; err = btr_cur_pessimistic_update( @@ -2754,11 +2745,6 @@ do_insert: entry, &insert_rec, &big_rec, n_ext, thr, &mtr); } else { - if (buf_pool.running_out()) { - err = DB_LOCK_TABLE_FULL; - goto err_exit; - } - DEBUG_SYNC_C("before_insert_pessimitic_row_ins_clust"); err = btr_cur_optimistic_insert( @@ -3040,11 +3026,6 @@ row_ins_sec_index_entry_low( } } else { ut_ad(mode == BTR_MODIFY_TREE); - if (buf_pool.running_out()) { - err = DB_LOCK_TABLE_FULL; - goto func_exit; - } - err = btr_cur_optimistic_insert( flags, &cursor, &offsets, &offsets_heap, diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 0c9ad03cc93..3c8e3472e6e 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -694,6 +694,7 @@ handle_new_error: case DB_DEADLOCK: case DB_LOCK_TABLE_FULL: + case DB_OUT_OF_MEMORY: rollback: /* Roll back the whole transaction; this resolution was added to version 3.23.43 */ diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index ec4d09115f6..cc3d2618603 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -1319,19 +1319,11 @@ sel_set_rec_lock( que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr) /*!< in: mtr */ { - trx_t* trx; dberr_t err = DB_SUCCESS; const buf_block_t* block; block = btr_pcur_get_block(pcur); - trx = thr_get_trx(thr); - - if (UT_LIST_GET_LEN(trx->lock.trx_locks) > 10000 - && buf_pool.running_out()) { - return DB_LOCK_TABLE_FULL; - } - if (dict_index_is_clust(index)) { err = lock_clust_rec_read_check_and_lock( 0, block, rec, index, offsets, diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index a2eacaf8e12..4c838f1009c 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -2416,11 +2416,6 @@ row_upd_clust_rec( goto func_exit; } - if (buf_pool.running_out()) { - err = DB_LOCK_TABLE_FULL; - goto func_exit; - } - /* We may have to modify the tree structure: do a pessimistic descent down the index tree */ |