diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2021-11-16 19:55:06 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2021-11-18 17:47:19 +0200 |
commit | aaef2e1d8c843d1e40b1ce0c5199c3abb3c5da28 (patch) | |
tree | e1cf32baf32aecdeafe8a3ad259b3d1f3a631ffc /storage/innobase | |
parent | db915f738709a1988420e73522f5a3e6515d17e9 (diff) | |
download | mariadb-git-aaef2e1d8c843d1e40b1ce0c5199c3abb3c5da28.tar.gz |
MDEV-27058: Reduce the size of buf_block_t and buf_page_t
buf_page_t::frame: Moved from buf_block_t::frame.
All 'thin' buf_page_t describing compressed-only ROW_FORMAT=COMPRESSED
pages will have frame=nullptr, while all 'fat' buf_block_t
will have a non-null frame pointing to aligned innodb_page_size bytes.
This eliminates the need for separate states for
BUF_BLOCK_FILE_PAGE and BUF_BLOCK_ZIP_PAGE.
buf_page_t::lock: Moved from buf_block_t::lock. That is, all block
descriptors will have a page latch. The IO_PIN state that was used
for discarding or creating the uncompressed page frame of a
ROW_FORMAT=COMPRESSED block is replaced by a combination of read-fix
and page X-latch.
page_zip_des_t::fix: Replaces state_, buf_fix_count_, io_fix_, status
of buf_page_t with a single std::atomic<uint32_t>. All modifications
will use store(), fetch_add(), fetch_sub(). This space was previously
wasted to alignment on 64-bit systems. We will use the following encoding
that combines a state (partly read-fix or write-fix) and a buffer-fix
count:
buf_page_t::NOT_USED=0 (previously BUF_BLOCK_NOT_USED)
buf_page_t::MEMORY=1 (previously BUF_BLOCK_MEMORY)
buf_page_t::REMOVE_HASH=2 (previously BUF_BLOCK_REMOVE_HASH)
buf_page_t::FREED=3 + fix: pages marked as freed in the file
buf_page_t::UNFIXED=1U<<29 + fix: normal pages
buf_page_t::IBUF_EXIST=2U<<29 + fix: normal pages; may need ibuf merge
buf_page_t::REINIT=3U<<29 + fix: reinitialized pages (skip doublewrite)
buf_page_t::READ_FIX=4U<<29 + fix: read-fixed pages (also X-latched)
buf_page_t::WRITE_FIX=5U<<29 + fix: write-fixed pages (also U-latched)
buf_page_t::WRITE_FIX_IBUF=6U<<29 + fix: write-fixed; may have ibuf
buf_page_t::WRITE_FIX_REINIT=7U<<29 + fix: write-fixed (no doublewrite)
buf_page_t::write_complete(): Change WRITE_FIX or WRITE_FIX_REINIT to
UNFIXED, and WRITE_FIX_IBUF to IBUF_EXIST, before releasing the U-latch.
buf_page_t::read_complete(): Renamed from buf_page_read_complete().
Change READ_FIX to UNFIXED or IBUF_EXIST, before releasing the X-latch.
buf_page_t::can_relocate(): If the page latch is being held or waited for,
or the block is buffer-fixed or io-fixed, return false. (The condition
on the page latch is new.)
Outside buf_page_get_gen(), buf_page_get_low() and buf_page_free(), we
will acquire the page latch before fix(), and unfix() before unlocking.
buf_page_t::flush(): Replaces buf_flush_page(). Optimize the
handling of FREED pages.
buf_pool_t::release_freed_page(): Assume that buf_pool.mutex is held
by the caller.
buf_page_t::is_read_fixed(), buf_page_t::is_write_fixed(): New predicates.
buf_page_get_low(): Ignore guesses that are read-fixed because they
may not yet be registered in buf_pool.page_hash and buf_pool.LRU.
buf_page_optimistic_get(): Acquire latch before buffer-fixing.
buf_page_make_young(): Leave read-fixed blocks alone, because they
might not be registered in buf_pool.LRU yet.
recv_sys_t::recover_deferred(), recv_sys_t::recover_low():
Possibly fix MDEV-26326, by holding a page X-latch instead of
only buffer-fixing the page.
Diffstat (limited to 'storage/innobase')
79 files changed, 2818 insertions, 3296 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index 8819ead882c..5c138d41f58 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -95,7 +95,6 @@ SET(INNOBASE_SOURCES include/buf0dblwr.h include/buf0dump.h include/buf0flu.h - include/buf0flu.ic include/buf0lru.h include/buf0rea.h include/buf0types.h diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index e02df95b641..743a2f1fbfa 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -403,7 +403,7 @@ btr_page_create( { ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); byte *index_id= my_assume_aligned<2>(PAGE_HEADER + PAGE_INDEX_ID + - block->frame); + block->page.frame); if (UNIV_LIKELY_NULL(page_zip)) { @@ -417,16 +417,17 @@ btr_page_create( { static_assert(((FIL_PAGE_INDEX & 0xff00) | byte(FIL_PAGE_RTREE)) == FIL_PAGE_RTREE, "compatibility"); - mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame, + mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->page.frame, byte(FIL_PAGE_RTREE)); - if (mach_read_from_8(block->frame + FIL_RTREE_SPLIT_SEQ_NUM)) + if (mach_read_from_8(block->page.frame + FIL_RTREE_SPLIT_SEQ_NUM)) mtr->memset(block, FIL_RTREE_SPLIT_SEQ_NUM, 8, 0); } /* Set the level of the new index page */ mtr->write<2,mtr_t::MAYBE_NOP>(*block, my_assume_aligned<2>(PAGE_HEADER + PAGE_LEVEL + - block->frame), level); + block->page.frame), + level); mtr->write<8,mtr_t::MAYBE_NOP>(*block, index_id, index->id); } } @@ -448,7 +449,7 @@ btr_page_alloc_for_ibuf( fil_addr_t node_addr = flst_get_first(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST - + root->frame); + + root->page.frame); ut_a(node_addr.page != FIL_NULL); new_block = buf_page_get( @@ -564,7 +565,7 @@ void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, #ifdef BTR_CUR_HASH_ADAPT if (block->index && !block->index->freed()) { ut_ad(!blob); - ut_ad(page_is_leaf(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); } #endif const page_id_t id(block->page.id()); @@ -587,7 +588,8 @@ void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, The page will be freed, so previous changes to it by this mini-transaction should not matter. */ page_t* root = btr_root_get(index, mtr); - fseg_header_t* seg_header = &root[blob || page_is_leaf(block->frame) + fseg_header_t* seg_header = &root[blob + || page_is_leaf(block->page.frame) ? PAGE_HEADER + PAGE_BTR_SEG_LEAF : PAGE_HEADER + PAGE_BTR_SEG_TOP]; fil_space_t* space= index->table->space; @@ -813,13 +815,15 @@ static void btr_free_root(buf_block_t *block, mtr_t *mtr) btr_search_drop_page_hash_index(block); #ifdef UNIV_BTR_DEBUG - ut_a(btr_root_fseg_validate(PAGE_HEADER + PAGE_BTR_SEG_TOP + block->frame, - block->page.id().space())); + ut_a(btr_root_fseg_validate(PAGE_HEADER + PAGE_BTR_SEG_TOP + + block->page.frame, + block->page.id().space())); #endif /* UNIV_BTR_DEBUG */ /* Free the entire segment in small steps. */ ut_d(mtr->freeing_tree()); - while (!fseg_free_step(PAGE_HEADER + PAGE_BTR_SEG_TOP + block->frame, mtr)); + while (!fseg_free_step(PAGE_HEADER + PAGE_BTR_SEG_TOP + + block->page.frame, mtr)); } MY_ATTRIBUTE((warn_unused_result)) @@ -841,14 +845,14 @@ buf_block_t *btr_free_root_check(const page_id_t page_id, ulint zip_size, nullptr, BUF_GET_POSSIBLY_FREED, mtr); if (!block); - else if (block->page.status == buf_page_t::FREED) + else if (block->page.is_freed()) block= nullptr; - else if (fil_page_index_page_check(block->frame) && - index_id == btr_page_get_index_id(block->frame)) + else if (fil_page_index_page_check(block->page.frame) && + index_id == btr_page_get_index_id(block->page.frame)) /* This should be a root page. It should not be possible to reassign the same index_id for some other index in the tablespace. */ - ut_ad(!page_has_siblings(block->frame)); + ut_ad(!page_has_siblings(block->page.frame)); else block= nullptr; @@ -864,7 +868,7 @@ static void btr_root_page_init(buf_block_t *block, index_id_t index_id, dict_index_t *index, mtr_t *mtr) { constexpr uint16_t field= PAGE_HEADER + PAGE_INDEX_ID; - byte *page_index_id= my_assume_aligned<2>(field + block->frame); + byte *page_index_id= my_assume_aligned<2>(field + block->page.frame); /* Create a new index page on the allocated segment page */ if (UNIV_LIKELY_NULL(block->page.zip.data)) @@ -880,14 +884,14 @@ static void btr_root_page_init(buf_block_t *block, index_id_t index_id, { static_assert(((FIL_PAGE_INDEX & 0xff00) | byte(FIL_PAGE_RTREE)) == FIL_PAGE_RTREE, "compatibility"); - mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame, + mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->page.frame, byte(FIL_PAGE_RTREE)); - if (mach_read_from_8(block->frame + FIL_RTREE_SPLIT_SEQ_NUM)) + if (mach_read_from_8(block->page.frame + FIL_RTREE_SPLIT_SEQ_NUM)) mtr->memset(block, FIL_RTREE_SPLIT_SEQ_NUM, 8, 0); } /* Set the level of the new index page */ mtr->write<2,mtr_t::MAYBE_NOP>( - *block, PAGE_HEADER + PAGE_LEVEL + block->frame, 0U); + *block, PAGE_HEADER + PAGE_LEVEL + block->page.frame, 0U); mtr->write<8,mtr_t::MAYBE_NOP>(*block, page_index_id, index_id); } } @@ -962,7 +966,7 @@ btr_create( } } - ut_ad(!page_has_siblings(block->frame)); + ut_ad(!page_has_siblings(block->page.frame)); btr_root_page_init(block, index_id, index, mtr); @@ -982,7 +986,7 @@ btr_create( allowed size fit on the root page: this fact is needed to ensure correctness of split algorithms */ - ut_ad(page_get_max_insert_size(block->frame, 2) + ut_ad(page_get_max_insert_size(block->page.frame, 2) > 2 * BTR_PAGE_MAX_REC_SIZE); return(block->page.id().page_no()); @@ -1004,15 +1008,15 @@ btr_free_but_not_root( { mtr_t mtr; - ut_ad(fil_page_index_page_check(block->frame)); - ut_ad(!page_has_siblings(block->frame)); + ut_ad(fil_page_index_page_check(block->page.frame)); + ut_ad(!page_has_siblings(block->page.frame)); leaf_loop: mtr_start(&mtr); ut_d(mtr.freeing_tree()); mtr_set_log_mode(&mtr, log_mode); mtr.set_named_space_id(block->page.id().space()); - page_t* root = block->frame; + page_t* root = block->page.frame; if (!root) { mtr_commit(&mtr); @@ -1046,7 +1050,7 @@ top_loop: mtr_set_log_mode(&mtr, log_mode); mtr.set_named_space_id(block->page.id().space()); - root = block->frame; + root = block->page.frame; #ifdef UNIV_BTR_DEBUG ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP @@ -1164,7 +1168,7 @@ btr_read_autoinc(dict_index_t* index) page_id_t(index->table->space_id, index->page), index->table->space->zip_size(), RW_S_LATCH, &mtr)) { - autoinc = page_get_autoinc(block->frame); + autoinc = page_get_autoinc(block->page.frame); } else { autoinc = 0; } @@ -1197,9 +1201,10 @@ btr_read_autoinc_with_fallback(const dict_table_t* table, unsigned col_no) index->table->space->zip_size(), RW_S_LATCH, &mtr); - ib_uint64_t autoinc = block ? page_get_autoinc(block->frame) : 0; + ib_uint64_t autoinc = block + ? page_get_autoinc(block->page.frame) : 0; const bool retry = block && autoinc == 0 - && !page_is_empty(block->frame); + && !page_is_empty(block->page.frame); mtr.commit(); if (retry) { @@ -1259,15 +1264,16 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); ut_ad(!is_buf_block_get_page_zip(block)); btr_assert_not_corrupted(block, index); - ut_ad(fil_page_index_page_check(block->frame)); + ut_ad(fil_page_index_page_check(block->page.frame)); ut_ad(index->is_dummy || block->page.id().space() == index->table->space->id); ut_ad(index->is_dummy || block->page.id().page_no() != index->page || - !page_has_siblings(block->frame)); + !page_has_siblings(block->page.frame)); buf_block_t *old= buf_block_alloc(); /* Copy the old page to temporary space */ - memcpy_aligned<UNIV_PAGE_SIZE_MIN>(old->frame, block->frame, srv_page_size); + memcpy_aligned<UNIV_PAGE_SIZE_MIN>(old->page.frame, block->page.frame, + srv_page_size); btr_search_drop_page_hash_index(block); @@ -1276,7 +1282,7 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, page_create(block, mtr, index->table->not_redundant()); if (index->is_spatial()) - block->frame[FIL_PAGE_TYPE + 1]= byte(FIL_PAGE_RTREE); + block->page.frame[FIL_PAGE_TYPE + 1]= byte(FIL_PAGE_RTREE); static_assert(((FIL_PAGE_INDEX & 0xff00) | byte(FIL_PAGE_RTREE)) == FIL_PAGE_RTREE, "compatibility"); @@ -1284,19 +1290,20 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, /* Copy the records from the temporary space to the recreated page; do not copy the lock bits yet */ - page_copy_rec_list_end_no_locks(block, old, page_get_infimum_rec(old->frame), + page_copy_rec_list_end_no_locks(block, old, + page_get_infimum_rec(old->page.frame), index, mtr); /* Copy the PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC. */ - ut_ad(!page_get_max_trx_id(block->frame)); - memcpy_aligned<8>(PAGE_MAX_TRX_ID + PAGE_HEADER + block->frame, - PAGE_MAX_TRX_ID + PAGE_HEADER + old->frame, 8); + ut_ad(!page_get_max_trx_id(block->page.frame)); + memcpy_aligned<8>(PAGE_MAX_TRX_ID + PAGE_HEADER + block->page.frame, + PAGE_MAX_TRX_ID + PAGE_HEADER + old->page.frame, 8); #ifdef UNIV_DEBUG - if (page_get_max_trx_id(block->frame)) + if (page_get_max_trx_id(block->page.frame)) /* PAGE_MAX_TRX_ID must be zero on non-leaf pages other than clustered index root pages. */ ut_ad(dict_index_is_sec_or_ibuf(index) - ? page_is_leaf(block->frame) + ? page_is_leaf(block->page.frame) : block->page.id().page_no() == index->page); else /* PAGE_MAX_TRX_ID is unused in clustered index pages (other than @@ -1304,14 +1311,16 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, pages, and in temporary tables. It was always zero-initialized in page_create(). PAGE_MAX_TRX_ID must be nonzero on dict_index_is_sec_or_ibuf() leaf pages. */ - ut_ad(index->table->is_temporary() || !page_is_leaf(block->frame) || + ut_ad(index->table->is_temporary() || !page_is_leaf(block->page.frame) || !dict_index_is_sec_or_ibuf(index)); #endif - const uint16_t data_size1= page_get_data_size(old->frame); - const uint16_t data_size2= page_get_data_size(block->frame); - const ulint max1= page_get_max_insert_size_after_reorganize(old->frame, 1); - const ulint max2= page_get_max_insert_size_after_reorganize(block->frame, 1); + const uint16_t data_size1= page_get_data_size(old->page.frame); + const uint16_t data_size2= page_get_data_size(block->page.frame); + const ulint max1= + page_get_max_insert_size_after_reorganize(old->page.frame, 1); + const ulint max2= + page_get_max_insert_size_after_reorganize(block->page.frame, 1); if (UNIV_UNLIKELY(data_size1 != data_size2 || max1 != max2)) ib::fatal() << "Page old data size " << data_size1 @@ -1321,39 +1330,39 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, /* Restore the cursor position. */ if (pos) - cursor->rec = page_rec_get_nth(block->frame, pos); + cursor->rec = page_rec_get_nth(block->page.frame, pos); else - ut_ad(cursor->rec == page_get_infimum_rec(block->frame)); + ut_ad(cursor->rec == page_get_infimum_rec(block->page.frame)); if (block->page.id().page_no() == index->page && - fil_page_get_type(old->frame) == FIL_PAGE_TYPE_INSTANT) + fil_page_get_type(old->page.frame) == FIL_PAGE_TYPE_INSTANT) { /* Preserve the PAGE_INSTANT information. */ ut_ad(index->is_instant()); - memcpy_aligned<2>(FIL_PAGE_TYPE + block->frame, - FIL_PAGE_TYPE + old->frame, 2); - memcpy_aligned<2>(PAGE_HEADER + PAGE_INSTANT + block->frame, - PAGE_HEADER + PAGE_INSTANT + old->frame, 2); + memcpy_aligned<2>(FIL_PAGE_TYPE + block->page.frame, + FIL_PAGE_TYPE + old->page.frame, 2); + memcpy_aligned<2>(PAGE_HEADER + PAGE_INSTANT + block->page.frame, + PAGE_HEADER + PAGE_INSTANT + old->page.frame, 2); if (!index->table->instant); - else if (page_is_comp(block->frame)) + else if (page_is_comp(block->page.frame)) { - memcpy(PAGE_NEW_INFIMUM + block->frame, - PAGE_NEW_INFIMUM + old->frame, 8); - memcpy(PAGE_NEW_SUPREMUM + block->frame, - PAGE_NEW_SUPREMUM + old->frame, 8); + memcpy(PAGE_NEW_INFIMUM + block->page.frame, + PAGE_NEW_INFIMUM + old->page.frame, 8); + memcpy(PAGE_NEW_SUPREMUM + block->page.frame, + PAGE_NEW_SUPREMUM + old->page.frame, 8); } else { - memcpy(PAGE_OLD_INFIMUM + block->frame, - PAGE_OLD_INFIMUM + old->frame, 8); - memcpy(PAGE_OLD_SUPREMUM + block->frame, - PAGE_OLD_SUPREMUM + old->frame, 8); + memcpy(PAGE_OLD_INFIMUM + block->page.frame, + PAGE_OLD_INFIMUM + old->page.frame, 8); + memcpy(PAGE_OLD_SUPREMUM + block->page.frame, + PAGE_OLD_SUPREMUM + old->page.frame, 8); } } - ut_ad(!memcmp(old->frame, block->frame, PAGE_HEADER)); - ut_ad(!memcmp(old->frame + PAGE_MAX_TRX_ID + PAGE_HEADER, - block->frame + PAGE_MAX_TRX_ID + PAGE_HEADER, + ut_ad(!memcmp(old->page.frame, block->page.frame, PAGE_HEADER)); + ut_ad(!memcmp(old->page.frame + PAGE_MAX_TRX_ID + PAGE_HEADER, + block->page.frame + PAGE_MAX_TRX_ID + PAGE_HEADER, PAGE_DATA - (PAGE_MAX_TRX_ID + PAGE_HEADER))); if (!dict_table_is_locking_disabled(index->table)) @@ -1367,9 +1376,9 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, ulint a, e; for (a= PAGE_HEADER, e= PAGE_MAX_TRX_ID + PAGE_HEADER; a < e; a++) { - if (old->frame[a] == block->frame[a]) + if (old->page.frame[a] == block->page.frame[a]) continue; - while (--e, old->frame[e] == block->frame[e]); + while (--e, old->page.frame[e] == block->page.frame[e]); e++; ut_ad(a < e); /* Write log for the changed page header fields. */ @@ -1377,88 +1386,92 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, break; } - const uint16_t top= page_header_get_offs(block->frame, PAGE_HEAP_TOP); + const uint16_t top= page_header_get_offs(block->page.frame, PAGE_HEAP_TOP); - if (page_is_comp(block->frame)) + if (page_is_comp(block->page.frame)) { /* info_bits=0, n_owned=1, heap_no=0, status */ - ut_ad(!memcmp(PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES + block->frame, - PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES + old->frame, 3)); + ut_ad(!memcmp(PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES + + block->page.frame, + PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES + + old->page.frame, 3)); /* If the 'next' pointer of the infimum record has changed, log it. */ a= PAGE_NEW_INFIMUM - 2; e= a + 2; - if (block->frame[a] == old->frame[a]) + if (block->page.frame[a] == old->page.frame[a]) a++; - if (--e, block->frame[e] != old->frame[e]) + if (--e, block->page.frame[e] != old->page.frame[e]) e++; if (ulint len= e - a) mtr->memcpy(*block, a, len); /* The infimum record itself must not change. */ - ut_ad(!memcmp(PAGE_NEW_INFIMUM + block->frame, - PAGE_NEW_INFIMUM + old->frame, 8)); + ut_ad(!memcmp(PAGE_NEW_INFIMUM + block->page.frame, + PAGE_NEW_INFIMUM + old->page.frame, 8)); /* Log any change of the n_owned of the supremum record. */ a= PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES; - if (block->frame[a] != old->frame[a]) + if (block->page.frame[a] != old->page.frame[a]) mtr->memcpy(*block, a, 1); /* The rest of the supremum record must not change. */ - ut_ad(!memcmp(&block->frame[a + 1], &old->frame[a + 1], + ut_ad(!memcmp(&block->page.frame[a + 1], &old->page.frame[a + 1], PAGE_NEW_SUPREMUM_END - PAGE_NEW_SUPREMUM + REC_N_NEW_EXTRA_BYTES - 1)); /* Log the differences in the payload. */ for (a= PAGE_NEW_SUPREMUM_END, e= top; a < e; a++) { - if (old->frame[a] == block->frame[a]) + if (old->page.frame[a] == block->page.frame[a]) continue; - while (--e, old->frame[e] == block->frame[e]); + while (--e, old->page.frame[e] == block->page.frame[e]); e++; ut_ad(a < e); - /* TODO: write MEMMOVE records to minimize this further! */ + /* TODO: write MEMMOVE records to minimize this further! */ mtr->memcpy(*block, a, e - a); - break; + break; } } else { /* info_bits=0, n_owned=1, heap_no=0, number of fields, 1-byte format */ - ut_ad(!memcmp(PAGE_OLD_INFIMUM - REC_N_OLD_EXTRA_BYTES + block->frame, - PAGE_OLD_INFIMUM - REC_N_OLD_EXTRA_BYTES + old->frame, 4)); + ut_ad(!memcmp(PAGE_OLD_INFIMUM - REC_N_OLD_EXTRA_BYTES + + block->page.frame, + PAGE_OLD_INFIMUM - REC_N_OLD_EXTRA_BYTES + + old->page.frame, 4)); /* If the 'next' pointer of the infimum record has changed, log it. */ a= PAGE_OLD_INFIMUM - 2; e= a + 2; - if (block->frame[a] == old->frame[a]) + if (block->page.frame[a] == old->page.frame[a]) a++; - if (--e, block->frame[e] != old->frame[e]) + if (--e, block->page.frame[e] != old->page.frame[e]) e++; if (ulint len= e - a) mtr->memcpy(*block, a, len); /* The infimum record itself must not change. */ - ut_ad(!memcmp(PAGE_OLD_INFIMUM + block->frame, - PAGE_OLD_INFIMUM + old->frame, 8)); + ut_ad(!memcmp(PAGE_OLD_INFIMUM + block->page.frame, + PAGE_OLD_INFIMUM + old->page.frame, 8)); /* Log any change of the n_owned of the supremum record. */ a= PAGE_OLD_SUPREMUM - REC_N_OLD_EXTRA_BYTES; - if (block->frame[a] != old->frame[a]) + if (block->page.frame[a] != old->page.frame[a]) mtr->memcpy(*block, a, 1); - ut_ad(!memcmp(&block->frame[a + 1], &old->frame[a + 1], + ut_ad(!memcmp(&block->page.frame[a + 1], &old->page.frame[a + 1], PAGE_OLD_SUPREMUM_END - PAGE_OLD_SUPREMUM + REC_N_OLD_EXTRA_BYTES - 1)); /* Log the differences in the payload. */ for (a= PAGE_OLD_SUPREMUM_END, e= top; a < e; a++) { - if (old->frame[a] == block->frame[a]) + if (old->page.frame[a] == block->page.frame[a]) continue; - while (--e, old->frame[e] == block->frame[e]); + while (--e, old->page.frame[e] == block->page.frame[e]); e++; ut_ad(a < e); - /* TODO: write MEMMOVE records to minimize this further! */ + /* TODO: write MEMMOVE records to minimize this further! */ mtr->memcpy(*block, a, e - a); - break; + break; } } e= srv_page_size - PAGE_DIR; - a= e - PAGE_DIR_SLOT_SIZE * page_dir_get_n_slots(block->frame); + a= e - PAGE_DIR_SLOT_SIZE * page_dir_get_n_slots(block->page.frame); /* Zero out the payload area. */ mtr->memset(*block, top, a - top, 0); @@ -1466,9 +1479,9 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, /* Log changes to the page directory. */ for (; a < e; a++) { - if (old->frame[a] == block->frame[a]) + if (old->page.frame[a] == block->page.frame[a]) continue; - while (--e, old->frame[e] == block->frame[e]); + while (--e, old->page.frame[e] == block->page.frame[e]); e++; ut_ad(a < e); /* Write log for the changed page directory slots. */ @@ -1542,10 +1555,10 @@ btr_page_reorganize( return false; } if (pos) { - cursor->rec = page_rec_get_nth(cursor->block->frame, pos); + cursor->rec = page_rec_get_nth(cursor->block->page.frame, pos); } else { ut_ad(cursor->rec == page_get_infimum_rec( - cursor->block->frame)); + cursor->block->page.frame)); } return true; @@ -1570,7 +1583,8 @@ btr_page_empty( ut_ad(!index->is_dummy); ut_ad(index->table->space->id == block->page.id().space()); #ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, block->frame, index)); + ut_a(!page_zip + || page_zip_validate(page_zip, block->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ btr_search_drop_page_hash_index(block); @@ -1583,7 +1597,7 @@ btr_page_empty( const ib_uint64_t autoinc = dict_index_is_clust(index) && index->page == block->page.id().page_no() - ? page_get_autoinc(block->frame) + ? page_get_autoinc(block->page.frame) : 0; if (page_zip) { @@ -1594,19 +1608,20 @@ btr_page_empty( static_assert(((FIL_PAGE_INDEX & 0xff00) | byte(FIL_PAGE_RTREE)) == FIL_PAGE_RTREE, "compatibility"); - mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame, + mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + + block->page.frame, byte(FIL_PAGE_RTREE)); - if (mach_read_from_8(block->frame + if (mach_read_from_8(block->page.frame + FIL_RTREE_SPLIT_SEQ_NUM)) { mtr->memset(block, FIL_RTREE_SPLIT_SEQ_NUM, 8, 0); } } mtr->write<2,mtr_t::MAYBE_NOP>(*block, PAGE_HEADER + PAGE_LEVEL - + block->frame, level); + + block->page.frame, level); if (autoinc) { mtr->write<8>(*block, PAGE_HEADER + PAGE_MAX_TRX_ID - + block->frame, autoinc); + + block->page.frame, autoinc); } } } @@ -1620,19 +1635,20 @@ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr) ut_ad(index.n_core_fields > 0); ut_ad(index.n_core_fields < REC_MAX_N_FIELDS); ut_ad(index.is_instant()); - ut_ad(fil_page_get_type(root->frame) == FIL_PAGE_TYPE_INSTANT - || fil_page_get_type(root->frame) == FIL_PAGE_INDEX); - ut_ad(!page_has_siblings(root->frame)); + ut_ad(fil_page_get_type(root->page.frame) == FIL_PAGE_TYPE_INSTANT + || fil_page_get_type(root->page.frame) == FIL_PAGE_INDEX); + ut_ad(!page_has_siblings(root->page.frame)); ut_ad(root->page.id().page_no() == index.page); - rec_t* infimum = page_get_infimum_rec(root->frame); - rec_t* supremum = page_get_supremum_rec(root->frame); - byte* page_type = root->frame + FIL_PAGE_TYPE; - uint16_t i = page_header_get_field(root->frame, PAGE_INSTANT); + rec_t* infimum = page_get_infimum_rec(root->page.frame); + rec_t* supremum = page_get_supremum_rec(root->page.frame); + byte* page_type = root->page.frame + FIL_PAGE_TYPE; + uint16_t i = page_header_get_field(root->page.frame, PAGE_INSTANT); switch (mach_read_from_2(page_type)) { case FIL_PAGE_TYPE_INSTANT: - ut_ad(page_get_instant(root->frame) == index.n_core_fields); + ut_ad(page_get_instant(root->page.frame) + == index.n_core_fields); if (memcmp(infimum, "infimum", 8) || memcmp(supremum, "supremum", 8)) { ut_ad(index.table->instant); @@ -1649,21 +1665,21 @@ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr) ut_ad("wrong page type" == 0); /* fall through */ case FIL_PAGE_INDEX: - ut_ad(!page_is_comp(root->frame) - || !page_get_instant(root->frame)); + ut_ad(!page_is_comp(root->page.frame) + || !page_get_instant(root->page.frame)); ut_ad(!memcmp(infimum, "infimum", 8)); ut_ad(!memcmp(supremum, "supremum", 8)); mtr->write<2>(*root, page_type, FIL_PAGE_TYPE_INSTANT); ut_ad(i <= PAGE_NO_DIRECTION); i |= static_cast<uint16_t>(index.n_core_fields << 3); - mtr->write<2>(*root, PAGE_HEADER + PAGE_INSTANT + root->frame, - i); + mtr->write<2>(*root, PAGE_HEADER + PAGE_INSTANT + + root->page.frame, i); break; } if (index.table->instant) { - mtr->memset(root, infimum - root->frame, 8, 0); - mtr->memset(root, supremum - root->frame, 7, 0); + mtr->memset(root, infimum - root->page.frame, 8, 0); + mtr->memset(root, supremum - root->page.frame, 7, 0); mtr->write<1,mtr_t::MAYBE_NOP>(*root, &supremum[7], index.n_core_null_bytes); } @@ -1680,13 +1696,13 @@ void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) ut_ad(index.is_primary()); if (buf_block_t *root = btr_root_block_get(&index, RW_SX_LATCH, mtr)) { - byte *page_type= root->frame + FIL_PAGE_TYPE; + byte *page_type= root->page.frame + FIL_PAGE_TYPE; if (all) { ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT || mach_read_from_2(page_type) == FIL_PAGE_INDEX); mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX); - byte *instant= PAGE_INSTANT + PAGE_HEADER + root->frame; + byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame; mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant, page_ptr_get_direction(instant + 1)); } @@ -1694,7 +1710,7 @@ void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT); static const byte supremuminfimum[8 + 8] = "supremuminfimum"; uint16_t infimum, supremum; - if (page_is_comp(root->frame)) + if (page_is_comp(root->page.frame)) { infimum= PAGE_NEW_INFIMUM; supremum= PAGE_NEW_SUPREMUM; @@ -1704,11 +1720,11 @@ void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) infimum= PAGE_OLD_INFIMUM; supremum= PAGE_OLD_SUPREMUM; } - ut_ad(!memcmp(&root->frame[infimum], supremuminfimum + 8, 8) == - !memcmp(&root->frame[supremum], supremuminfimum, 8)); - mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->frame[infimum], + ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) == + !memcmp(&root->page.frame[supremum], supremuminfimum, 8)); + mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[infimum], supremuminfimum + 8, 8); - mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->frame[supremum], + mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[supremum], supremuminfimum, 8); } } @@ -1748,12 +1764,12 @@ btr_root_raise_and_insert( root = btr_cur_get_block(cursor); root_page_zip = buf_block_get_page_zip(root); - ut_ad(!page_is_empty(root->frame)); + ut_ad(!page_is_empty(root->page.frame)); index = btr_cur_get_index(cursor); ut_ad(index->n_core_null_bytes <= UT_BITS_IN_BYTES(index->n_nullable)); #ifdef UNIV_ZIP_DEBUG - ut_a(!root_page_zip || page_zip_validate(root_page_zip, root->frame, - index)); + ut_a(!root_page_zip + || page_zip_validate(root_page_zip, root->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ const page_id_t root_id{root->page.id()}; @@ -1762,9 +1778,9 @@ btr_root_raise_and_insert( ulint space = index->table->space_id; ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF - + root->frame, space)); + + root->page.frame, space)); ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP - + root->frame, space)); + + root->page.frame, space)); } ut_a(dict_index_get_page(index) == root_id.page_no()); @@ -1777,7 +1793,7 @@ btr_root_raise_and_insert( moving the root records to the new page, emptying the root, putting a node pointer to the new page, and then splitting the new page. */ - level = btr_page_get_level(root->frame); + level = btr_page_get_level(root->page.frame); new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr); @@ -1792,11 +1808,12 @@ btr_root_raise_and_insert( == page_zip_get_size(root_page_zip)); btr_page_create(new_block, new_page_zip, index, level, mtr); - if (page_has_siblings(new_block->frame)) { + if (page_has_siblings(new_block->page.frame)) { compile_time_assert(FIL_PAGE_NEXT == FIL_PAGE_PREV + 4); compile_time_assert(FIL_NULL == 0xffffffff); static_assert(FIL_PAGE_PREV % 8 == 0, "alignment"); - memset_aligned<8>(new_block->frame + FIL_PAGE_PREV, 0xff, 8); + memset_aligned<8>(new_block->page.frame + FIL_PAGE_PREV, + 0xff, 8); mtr->memset(new_block, FIL_PAGE_PREV, 8, 0xff); if (UNIV_LIKELY_NULL(new_page_zip)) { memset_aligned<8>(new_page_zip->data + FIL_PAGE_PREV, @@ -1811,17 +1828,17 @@ btr_root_raise_and_insert( || new_page_zip #endif /* UNIV_ZIP_COPY */ || !page_copy_rec_list_end(new_block, root, - page_get_infimum_rec(root->frame), + page_get_infimum_rec(root->page.frame), index, mtr)) { ut_a(new_page_zip); /* Copy the page byte for byte. */ - page_zip_copy_recs(new_block, - root_page_zip, root->frame, index, mtr); + page_zip_copy_recs(new_block, root_page_zip, + root->page.frame, index, mtr); /* Update the lock table and possible hash index. */ lock_move_rec_list_end(new_block, root, - page_get_infimum_rec(root->frame)); + page_get_infimum_rec(root->page.frame)); /* Move any existing predicate locks */ if (dict_index_is_spatial(index)) { @@ -1840,7 +1857,7 @@ btr_root_raise_and_insert( longer is a leaf page. (Older versions of InnoDB did set PAGE_MAX_TRX_ID on all secondary index pages.) */ byte* p = my_assume_aligned<8>( - PAGE_HEADER + PAGE_MAX_TRX_ID + root->frame); + PAGE_HEADER + PAGE_MAX_TRX_ID + root->page.frame); if (mach_read_from_8(p)) { mtr->memset(root, max_trx_id, 8, 0); if (UNIV_LIKELY_NULL(root->page.zip.data)) { @@ -1853,7 +1870,7 @@ btr_root_raise_and_insert( root page; on other clustered index pages, we want to reserve the field PAGE_MAX_TRX_ID for future use. */ byte* p = my_assume_aligned<8>( - PAGE_HEADER + PAGE_MAX_TRX_ID + new_block->frame); + PAGE_HEADER + PAGE_MAX_TRX_ID + new_block->page.frame); if (mach_read_from_8(p)) { mtr->memset(new_block, max_trx_id, 8, 0); if (UNIV_LIKELY_NULL(new_block->page.zip.data)) { @@ -1878,7 +1895,7 @@ btr_root_raise_and_insert( *heap = mem_heap_create(1000); } - rec = page_rec_get_next(page_get_infimum_rec(new_block->frame)); + rec = page_rec_get_next(page_get_infimum_rec(new_block->page.frame)); new_page_no = new_block->page.id().page_no(); /* Build the node pointer (= node key and page address) for the @@ -1903,14 +1920,14 @@ btr_root_raise_and_insert( /* Rebuild the root page to get free space */ btr_page_empty(root, root_page_zip, index, level + 1, mtr); /* btr_page_empty() is supposed to zero-initialize the field. */ - ut_ad(!page_get_instant(root->frame)); + ut_ad(!page_get_instant(root->page.frame)); if (index->is_instant()) { ut_ad(!root_page_zip); btr_set_instant(root, *index, mtr); } - ut_ad(!page_has_siblings(root->frame)); + ut_ad(!page_has_siblings(root->page.frame)); page_cursor = btr_cur_get_page_cur(cursor); @@ -2395,12 +2412,12 @@ btr_attach_half_pages( } /* Get the level of the split pages */ - const ulint level = btr_page_get_level(buf_block_get_frame(block)); - ut_ad(level == btr_page_get_level(buf_block_get_frame(new_block))); + const ulint level = btr_page_get_level(block->page.frame); + ut_ad(level == btr_page_get_level(new_block->page.frame)); /* Get the previous and next pages of page */ - const uint32_t prev_page_no = btr_page_get_prev(block->frame); - const uint32_t next_page_no = btr_page_get_next(block->frame); + const uint32_t prev_page_no = btr_page_get_prev(block->page.frame); + const uint32_t next_page_no = btr_page_get_next(block->page.frame); /* for consistency, both blocks should be locked, before change */ if (prev_page_no != FIL_NULL && direction == FSP_DOWN) { @@ -2432,9 +2449,9 @@ btr_attach_half_pages( if (prev_block) { #ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(prev_block->frame) - == page_is_comp(block->frame)); - ut_a(btr_page_get_next(prev_block->frame) + ut_a(page_is_comp(prev_block->page.frame) + == page_is_comp(block->page.frame)); + ut_a(btr_page_get_next(prev_block->page.frame) == block->page.id().page_no()); #endif /* UNIV_BTR_DEBUG */ btr_page_set_next(prev_block, lower_block->page.id().page_no(), @@ -2443,9 +2460,9 @@ btr_attach_half_pages( if (next_block) { #ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(next_block->frame) - == page_is_comp(block->frame)); - ut_a(btr_page_get_prev(next_block->frame) + ut_a(page_is_comp(next_block->page.frame) + == page_is_comp(block->page.frame)); + ut_a(btr_page_get_prev(next_block->page.frame) == block->page.id().page_no()); #endif /* UNIV_BTR_DEBUG */ btr_page_set_prev(next_block, upper_block->page.id().page_no(), @@ -2454,11 +2471,13 @@ btr_attach_half_pages( if (direction == FSP_DOWN) { ut_ad(lower_block == new_block); - ut_ad(btr_page_get_next(upper_block->frame) == next_page_no); + ut_ad(btr_page_get_next(upper_block->page.frame) + == next_page_no); btr_page_set_prev(lower_block, prev_page_no, mtr); } else { ut_ad(upper_block == new_block); - ut_ad(btr_page_get_prev(lower_block->frame) == prev_page_no); + ut_ad(btr_page_get_prev(lower_block->page.frame) + == prev_page_no); btr_page_set_next(upper_block, next_page_no, mtr); } @@ -2490,10 +2509,10 @@ btr_page_tuple_smaller( page_cur_move_to_next(&pcur); first_rec = page_cur_get_rec(&pcur); - *offsets = rec_get_offsets( - first_rec, cursor->index, *offsets, - page_is_leaf(block->frame) ? cursor->index->n_core_fields : 0, - n_uniq, heap); + *offsets = rec_get_offsets(first_rec, cursor->index, *offsets, + page_is_leaf(block->page.frame) + ? cursor->index->n_core_fields : 0, + n_uniq, heap); return(cmp_dtuple_rec(tuple, first_rec, *offsets) < 0); } @@ -3053,7 +3072,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block, ut_ad(index.table->space->id == block.page.id().space()); /* Get the previous and next page numbers of page */ - const page_t* page = block.frame; + const page_t* page = block.page.frame; const uint32_t prev_page_no = btr_page_get_prev(page); const uint32_t next_page_no = btr_page_get_next(page); @@ -3064,10 +3083,11 @@ dberr_t btr_level_list_remove(const buf_block_t& block, index, prev_page_no, RW_X_LATCH, page_is_leaf(page), mtr); #ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(prev_block->frame) == page_is_comp(page)); + ut_a(page_is_comp(prev_block->page.frame) + == page_is_comp(page)); static_assert(FIL_PAGE_NEXT % 4 == 0, "alignment"); static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); - ut_a(!memcmp_aligned<4>(prev_block->frame + FIL_PAGE_NEXT, + ut_a(!memcmp_aligned<4>(prev_block->page.frame + FIL_PAGE_NEXT, page + FIL_PAGE_OFFSET, 4)); #endif /* UNIV_BTR_DEBUG */ @@ -3083,10 +3103,11 @@ dberr_t btr_level_list_remove(const buf_block_t& block, return DB_ERROR; } #ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(next_block->frame) == page_is_comp(page)); + ut_a(page_is_comp(next_block->page.frame) + == page_is_comp(page)); static_assert(FIL_PAGE_PREV % 4 == 0, "alignment"); static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); - ut_a(!memcmp_aligned<4>(next_block->frame + FIL_PAGE_PREV, + ut_a(!memcmp_aligned<4>(next_block->page.frame + FIL_PAGE_PREV, page + FIL_PAGE_OFFSET, 4)); #endif /* UNIV_BTR_DEBUG */ @@ -3203,7 +3224,7 @@ btr_lift_page_up( /* Make the father empty */ btr_page_empty(father_block, father_page_zip, index, page_level, mtr); /* btr_page_empty() is supposed to zero-initialize the field. */ - ut_ad(!page_get_instant(father_block->frame)); + ut_ad(!page_get_instant(father_block->page.frame)); if (index->is_instant() && father_block->page.id().page_no() == root_page_no) { @@ -3256,7 +3277,8 @@ btr_lift_page_up( /* Go upward to root page, decrementing levels by one. */ for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) { - ut_ad(btr_page_get_level(blocks[i]->frame) == page_level + 1); + ut_ad(btr_page_get_level(blocks[i]->page.frame) + == page_level + 1); btr_page_set_level(blocks[i], page_level, mtr); } @@ -3272,7 +3294,7 @@ btr_lift_page_up( && !index->table->is_temporary()) { ibuf_reset_free_bits(father_block); } - ut_ad(page_validate(father_block->frame, index)); + ut_ad(page_validate(father_block->page.frame, index)); ut_ad(btr_check_node_ptr(index, father_block, mtr)); return(lift_father_up ? block_orig : father_block); @@ -3448,7 +3470,7 @@ retry: offsets2 = rec_get_offsets( btr_cur_get_rec(&cursor2), index, NULL, - page_is_leaf(cursor2.page_cur.block->frame) + page_is_leaf(btr_cur_get_page(&cursor2)) ? index->n_fields : 0, ULINT_UNDEFINED, &heap); @@ -3627,7 +3649,7 @@ retry: offsets2 = rec_get_offsets( btr_cur_get_rec(&cursor2), index, NULL, - page_is_leaf(cursor2.page_cur.block->frame) + page_is_leaf(btr_cur_get_page(&cursor2)) ? index->n_fields : 0, ULINT_UNDEFINED, &heap); @@ -3747,7 +3769,7 @@ func_exit: ut_ad(nth_rec > 0); btr_cur_position( index, - page_rec_get_nth(merge_block->frame, nth_rec), + page_rec_get_nth(merge_block->page.frame, nth_rec), merge_block, cursor); } @@ -3758,7 +3780,7 @@ func_exit: err_exit: /* We play it safe and reset the free bits. */ if (merge_block && merge_block->zip_size() - && page_is_leaf(merge_block->frame) + && page_is_leaf(merge_block->page.frame) && !dict_index_is_clust(index)) { ibuf_reset_free_bits(merge_block); @@ -3787,8 +3809,9 @@ btr_discard_only_page_on_level( ut_ad(!index->is_dummy); /* Save the PAGE_MAX_TRX_ID from the leaf page. */ - const trx_id_t max_trx_id = page_get_max_trx_id(block->frame); - const rec_t* r = page_rec_get_next(page_get_infimum_rec(block->frame)); + const trx_id_t max_trx_id = page_get_max_trx_id(block->page.frame); + const rec_t* r = page_rec_get_next( + page_get_infimum_rec(block->page.frame)); ut_ad(rec_is_metadata(r, *index) == index->is_instant()); while (block->page.id().page_no() != dict_index_get_page(index)) { @@ -3827,7 +3850,7 @@ btr_discard_only_page_on_level( /* block is the root page, which must be empty, except for the node pointer to the (now discarded) block(s). */ - ut_ad(!page_has_siblings(block->frame)); + ut_ad(!page_has_siblings(block->page.frame)); #ifdef UNIV_BTR_DEBUG if (!dict_index_is_ibuf(index)) { @@ -3861,7 +3884,7 @@ btr_discard_only_page_on_level( btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr); ut_ad(page_is_leaf(buf_block_get_frame(block))); /* btr_page_empty() is supposed to zero-initialize the field. */ - ut_ad(!page_get_instant(block->frame)); + ut_ad(!page_get_instant(block->page.frame)); if (index->is_primary()) { if (rec) { @@ -3923,15 +3946,15 @@ btr_discard_page( /* Decide the page which will inherit the locks */ - const uint32_t left_page_no = btr_page_get_prev(block->frame); - const uint32_t right_page_no = btr_page_get_next(block->frame); + const uint32_t left_page_no = btr_page_get_prev(block->page.frame); + const uint32_t right_page_no = btr_page_get_next(block->page.frame); ut_d(bool parent_is_different = false); if (left_page_no != FIL_NULL) { merge_block = btr_block_get(*index, left_page_no, RW_X_LATCH, true, mtr); #ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(merge_block->frame) + ut_a(btr_page_get_next(merge_block->page.frame) == block->page.id().page_no()); #endif /* UNIV_BTR_DEBUG */ ut_d(parent_is_different = @@ -3944,14 +3967,14 @@ btr_discard_page( merge_block = btr_block_get(*index, right_page_no, RW_X_LATCH, true, mtr); #ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(merge_block->frame) + ut_a(btr_page_get_prev(merge_block->page.frame) == block->page.id().page_no()); #endif /* UNIV_BTR_DEBUG */ ut_d(parent_is_different = page_rec_is_supremum( page_rec_get_next(btr_cur_get_rec(&parent_cursor)))); - if (!page_is_leaf(merge_block->frame)) { + if (!page_is_leaf(merge_block->page.frame)) { rec_t* node_ptr = page_rec_get_next( - page_get_infimum_rec(merge_block->frame)); + page_get_infimum_rec(merge_block->page.frame)); ut_ad(page_rec_is_user_rec(node_ptr)); /* We have to mark the leftmost node pointer as the predefined minimum record. */ @@ -3964,9 +3987,12 @@ btr_discard_page( return; } - ut_a(page_is_comp(merge_block->frame) == page_is_comp(block->frame)); - ut_ad(!memcmp_aligned<2>(&merge_block->frame[PAGE_HEADER + PAGE_LEVEL], - &block->frame[PAGE_HEADER + PAGE_LEVEL], 2)); + ut_a(page_is_comp(merge_block->page.frame) + == page_is_comp(block->page.frame)); + ut_ad(!memcmp_aligned<2>(&merge_block->page.frame + [PAGE_HEADER + PAGE_LEVEL], + &block->page.frame + [PAGE_HEADER + PAGE_LEVEL], 2)); btr_search_drop_page_hash_index(block); if (dict_index_is_spatial(index)) { @@ -3983,8 +4009,8 @@ btr_discard_page( page_zip_des_t* merge_page_zip = buf_block_get_page_zip(merge_block); ut_a(!merge_page_zip - || page_zip_validate(merge_page_zip, merge_block->frame, - index)); + || page_zip_validate(merge_page_zip, + merge_block->page.frame, index)); } #endif /* UNIV_ZIP_DEBUG */ diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc index 046291158a8..263a2f92003 100644 --- a/storage/innobase/btr/btr0bulk.cc +++ b/storage/innobase/btr/btr0bulk.cc @@ -494,8 +494,8 @@ inline void PageBulk::finishPage() inline bool PageBulk::needs_finish() const { - ut_ad(page_align(m_cur_rec) == m_block->frame); - ut_ad(m_page == m_block->frame); + ut_ad(page_align(m_cur_rec) == m_block->page.frame); + ut_ad(m_page == m_block->page.frame); if (!m_page[PAGE_HEADER + PAGE_DIRECTION_B]) return true; ulint heap_no, n_heap= page_header_get_field(m_page, PAGE_N_HEAP); @@ -839,7 +839,7 @@ PageBulk::release() finish(); /* We fix the block because we will re-pin it soon. */ - buf_block_buf_fix_inc(m_block); + m_block->page.fix(); /* No other threads can modify this block. */ m_modify_clock = buf_block_get_modify_clock(m_block); @@ -873,11 +873,11 @@ PageBulk::latch() ut_ad(m_block != NULL); } - buf_block_buf_fix_dec(m_block); + ut_d(const auto buf_fix_count =) m_block->page.unfix(); - ut_ad(m_block->page.buf_fix_count()); - - ut_ad(m_cur_rec > m_page && m_cur_rec < m_heap_top); + ut_ad(buf_fix_count); + ut_ad(m_cur_rec > m_page); + ut_ad(m_cur_rec < m_heap_top); return (m_err); } @@ -1208,7 +1208,7 @@ BtrBulk::finish(dberr_t err) last_block = btr_block_get(*m_index, last_page_no, RW_X_LATCH, false, &mtr); first_rec = page_rec_get_next( - page_get_infimum_rec(last_block->frame)); + page_get_infimum_rec(last_block->page.frame)); ut_ad(page_rec_is_user_rec(first_rec)); /* Copy last page to root page. */ diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 1c9ae6d2047..9d509637f44 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -246,8 +246,8 @@ btr_cur_latch_leaves( true, mtr); latch_leaves.blocks[1] = get_block; #ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->frame) - == page_is_comp(block->frame)); + ut_a(page_is_comp(get_block->page.frame) + == page_is_comp(block->page.frame)); #endif /* UNIV_BTR_DEBUG */ if (spatial) { cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS] @@ -262,7 +262,7 @@ btr_cur_latch_leaves( MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); /* x-latch also siblings from left to right */ - left_page_no = btr_page_get_prev(block->frame); + left_page_no = btr_page_get_prev(block->page.frame); if (left_page_no != FIL_NULL) { @@ -295,15 +295,8 @@ btr_cur_latch_leaves( latch_leaves.blocks[1] = get_block; #ifdef UNIV_BTR_DEBUG - /* Sanity check only after both the blocks are latched. */ - if (latch_leaves.blocks[0] != NULL) { - ut_a(page_is_comp(latch_leaves.blocks[0]->frame) - == page_is_comp(block->frame)); - ut_a(btr_page_get_next(latch_leaves.blocks[0]->frame) - == block->page.id().page_no()); - } - ut_a(page_is_comp(get_block->frame) - == page_is_comp(block->frame)); + ut_a(page_is_comp(get_block->page.frame) + == page_is_comp(block->page.frame)); #endif /* UNIV_BTR_DEBUG */ if (spatial) { @@ -311,7 +304,7 @@ btr_cur_latch_leaves( = get_block; } - right_page_no = btr_page_get_next(block->frame); + right_page_no = btr_page_get_next(block->page.frame); if (right_page_no != FIL_NULL) { if (spatial) { @@ -326,9 +319,9 @@ btr_cur_latch_leaves( latch_leaves.blocks[2] = get_block; #ifdef UNIV_BTR_DEBUG if (get_block) { - ut_a(page_is_comp(get_block->frame) - == page_is_comp(block->frame)); - ut_a(btr_page_get_prev(get_block->frame) + ut_a(page_is_comp(get_block->page.frame) + == page_is_comp(block->page.frame)); + ut_a(btr_page_get_prev(get_block->page.frame) == block->page.id().page_no()); } #endif /* UNIV_BTR_DEBUG */ @@ -346,7 +339,7 @@ btr_cur_latch_leaves( /* Because we are holding index->lock, no page splits or merges may run concurrently, and we may read FIL_PAGE_PREV from a buffer-fixed, unlatched page. */ - left_page_no = btr_page_get_prev(block->frame); + left_page_no = btr_page_get_prev(block->page.frame); if (left_page_no != FIL_NULL) { latch_leaves.savepoints[0] = mtr_set_savepoint(mtr); @@ -356,9 +349,9 @@ btr_cur_latch_leaves( latch_leaves.blocks[0] = get_block; cursor->left_block = get_block; #ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->frame) - == page_is_comp(block->frame)); - ut_a(btr_page_get_next(get_block->frame) + ut_a(page_is_comp(get_block->page.frame) + == page_is_comp(block->page.frame)); + ut_a(btr_page_get_next(get_block->page.frame) == block->page.id().page_no()); #endif /* UNIV_BTR_DEBUG */ } @@ -369,8 +362,8 @@ btr_cur_latch_leaves( true, mtr); latch_leaves.blocks[1] = get_block; #ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->frame) - == page_is_comp(block->frame)); + ut_a(page_is_comp(get_block->page.frame) + == page_is_comp(block->page.frame)); #endif /* UNIV_BTR_DEBUG */ return(latch_leaves); case BTR_CONT_MODIFY_TREE: @@ -407,13 +400,13 @@ unreadable: buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, mtr); - if (!root || btr_cur_instant_root_init(index, root->frame)) { + if (!root || btr_cur_instant_root_init(index, root->page.frame)) { goto unreadable; } ut_ad(index->n_core_null_bytes != dict_index_t::NO_CORE_NULL_BYTES); - if (fil_page_get_type(root->frame) == FIL_PAGE_INDEX) { + if (fil_page_get_type(root->page.frame) == FIL_PAGE_INDEX) { ut_ad(!index->is_instant()); return DB_SUCCESS; } @@ -431,7 +424,7 @@ unreadable: } ut_ad(page_cur_is_before_first(&cur.page_cur)); - ut_ad(page_is_leaf(cur.page_cur.block->frame)); + ut_ad(page_is_leaf(cur.page_cur.block->page.frame)); page_cur_move_to_next(&cur.page_cur); @@ -556,20 +549,22 @@ incompatible: page_id_t(space->id, mach_read_from_4(ptr + BTR_EXTERN_PAGE_NO)), 0, RW_S_LATCH, mtr); - if (fil_page_get_type(block->frame) != FIL_PAGE_TYPE_BLOB - || mach_read_from_4(&block->frame[FIL_PAGE_DATA - + BTR_BLOB_HDR_NEXT_PAGE_NO]) + if (fil_page_get_type(block->page.frame) != FIL_PAGE_TYPE_BLOB + || mach_read_from_4(&block->page.frame + [FIL_PAGE_DATA + + BTR_BLOB_HDR_NEXT_PAGE_NO]) != FIL_NULL - || mach_read_from_4(&block->frame[FIL_PAGE_DATA - + BTR_BLOB_HDR_PART_LEN]) + || mach_read_from_4(&block->page.frame + [FIL_PAGE_DATA + + BTR_BLOB_HDR_PART_LEN]) != len) { goto incompatible; } /* The unused part of the BLOB page should be zero-filled. */ - for (const byte* b = block->frame + for (const byte* b = block->page.frame + (FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE) + len, - * const end = block->frame + srv_page_size + * const end = block->page.frame + srv_page_size - BTR_EXTERN_LEN; b < end; ) { if (*b++) { @@ -578,8 +573,8 @@ incompatible: } if (index->table->deserialise_columns( - &block->frame[FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE], - len)) { + &block->page.frame + [FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE], len)) { goto incompatible; } @@ -768,7 +763,8 @@ btr_cur_optimistic_latch_leaves( mtr_t* mtr) { ut_ad(block->page.buf_fix_count()); - ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); + ut_ad(block->page.in_file()); + ut_ad(block->page.frame); switch (*latch_mode) { default: @@ -783,12 +779,12 @@ btr_cur_optimistic_latch_leaves( uint32_t curr_page_no, left_page_no; { transactional_shared_lock_guard<block_lock> g{ - block->lock}; + block->page.lock}; if (block->modify_clock != modify_clock) { return false; } curr_page_no = block->page.id().page_no(); - left_page_no = btr_page_get_prev(block->frame); + left_page_no = btr_page_get_prev(block->page.frame); } const rw_lock_type_t mode = *latch_mode == BTR_SEARCH_PREV @@ -805,12 +801,10 @@ btr_cur_optimistic_latch_leaves( if (!cursor->left_block) { cursor->index->table->file_unreadable = true; - } - - if (cursor->left_block->page.status - == buf_page_t::FREED - || btr_page_get_next(cursor->left_block->frame) - != curr_page_no) { + } else if (cursor->left_block->page.is_freed() + || btr_page_get_next( + cursor->left_block->page.frame) + != curr_page_no) { /* release the left block */ btr_leaf_page_release( cursor->left_block, mode, mtr); @@ -821,7 +815,8 @@ btr_cur_optimistic_latch_leaves( } if (buf_page_optimistic_get(mode, block, modify_clock, mtr)) { - if (btr_page_get_prev(block->frame) == left_page_no) { + if (btr_page_get_prev(block->page.frame) + == left_page_no) { /* block was already buffer-fixed while entering the function and buf_page_optimistic_get() buffer-fixes @@ -1713,7 +1708,7 @@ retry_page_get: /* Because we are holding index->lock, no page splits or merges may run concurrently, and we may read FIL_PAGE_PREV from a buffer-fixed, unlatched page. */ - uint32_t left_page_no = btr_page_get_prev(block->frame); + uint32_t left_page_no = btr_page_get_prev(block->page.frame); if (left_page_no != FIL_NULL) { ut_ad(prev_n_blocks < leftmost_from_level); @@ -2010,14 +2005,14 @@ retry_page_get: } if (rw_latch == RW_NO_LATCH && height != 0) { - block->lock.s_lock(); + block->page.lock.s_lock(); } lock_prdt_lock(block, &prdt, index, LOCK_S, LOCK_PREDICATE, cursor->thr); if (rw_latch == RW_NO_LATCH && height != 0) { - block->lock.s_unlock(); + block->page.lock.s_unlock(); } } @@ -2091,7 +2086,7 @@ need_opposite_intention: ut_ad(mtr->memo_contains_flagged( &index->lock, MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); - block->lock.s_lock(); + block->page.lock.s_lock(); add_latch = true; } @@ -2123,7 +2118,7 @@ need_opposite_intention: } if (add_latch) { - block->lock.s_unlock(); + block->page.lock.s_unlock(); } ut_ad(!page_rec_is_supremum(node_ptr)); @@ -3321,12 +3316,12 @@ Prefetch siblings of the leaf for the pessimistic operation. static void btr_cur_prefetch_siblings(const buf_block_t *block, const dict_index_t *index) { - ut_ad(page_is_leaf(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); if (index->is_ibuf()) return; - const page_t *page= block->frame; + const page_t *page= block->page.frame; uint32_t prev= mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_PREV)); uint32_t next= mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_NEXT)); @@ -4255,7 +4250,7 @@ btr_cur_update_in_place( roll_ptr_t roll_ptr = 0; ulint was_delete_marked; - ut_ad(page_is_leaf(cursor->page_cur.block->frame)); + ut_ad(page_is_leaf(cursor->page_cur.block->page.frame)); rec = btr_cur_get_rec(cursor); index = cursor->index; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -4425,15 +4420,15 @@ static void btr_cur_trim_alter_metadata(dtuple_t* entry, page_id_t(index->table->space->id, mach_read_from_4(ptr + BTR_EXTERN_PAGE_NO)), 0, RW_S_LATCH, &mtr); - ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_TYPE_BLOB); - ut_ad(mach_read_from_4(&block->frame[FIL_PAGE_DATA - + BTR_BLOB_HDR_NEXT_PAGE_NO]) + ut_ad(fil_page_get_type(block->page.frame) == FIL_PAGE_TYPE_BLOB); + ut_ad(mach_read_from_4(&block->page.frame + [FIL_PAGE_DATA + BTR_BLOB_HDR_NEXT_PAGE_NO]) == FIL_NULL); - ut_ad(mach_read_from_4(&block->frame[FIL_PAGE_DATA - + BTR_BLOB_HDR_PART_LEN]) + ut_ad(mach_read_from_4(&block->page.frame + [FIL_PAGE_DATA + BTR_BLOB_HDR_PART_LEN]) == mach_read_from_4(ptr + BTR_EXTERN_LEN + 4)); n_fields = mach_read_from_4( - &block->frame[FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE]) + &block->page.frame[FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE]) + index->first_user_field(); /* Rollback should not increase the number of fields. */ ut_ad(n_fields <= index->n_fields); @@ -4839,7 +4834,7 @@ btr_cur_pess_upd_restore_supremum( prev_block = buf_page_get_with_no_latch(prev_id, block->zip_size(), mtr); #ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(prev_block->frame) + ut_a(btr_page_get_next(prev_block->page.frame) == block->page.id().page_no()); #endif /* UNIV_BTR_DEBUG */ @@ -4910,7 +4905,8 @@ btr_cur_pessimistic_update( MTR_MEMO_SX_LOCK)); ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); #ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, block->frame, index)); + ut_a(!page_zip + || page_zip_validate(page_zip, block->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ ut_ad(!page_zip || !index->table->is_temporary()); /* The insert buffer tree should never be updated in place. */ @@ -4943,7 +4939,7 @@ btr_cur_pessimistic_update( if (page_zip && optim_err != DB_ZIP_OVERFLOW && !dict_index_is_clust(index) - && page_is_leaf(block->frame)) { + && page_is_leaf(block->page.frame)) { ut_ad(!index->table->is_temporary()); ibuf_update_free_bits_zip(block, mtr); } @@ -4990,7 +4986,7 @@ btr_cur_pessimistic_update( /* We have to set appropriate extern storage bits in the new record to be inserted: we have to remember which fields were such */ - ut_ad(!page_is_comp(block->frame) || !rec_get_node_ptr_flag(rec)); + ut_ad(!page_is_comp(block->page.frame) || !rec_get_node_ptr_flag(rec)); ut_ad(rec_offs_validate(rec, index, *offsets)); if ((flags & BTR_NO_UNDO_LOG_FLAG) @@ -5016,7 +5012,7 @@ btr_cur_pessimistic_update( if (page_zip_rec_needs_ext( rec_get_converted_size(index, new_entry, n_ext), - page_is_comp(block->frame), + page_is_comp(block->page.frame), dict_index_get_n_fields(index), block->zip_size()) || (UNIV_UNLIKELY(update->is_alter_metadata()) @@ -5032,7 +5028,7 @@ btr_cur_pessimistic_update( BTR_KEEP_IBUF_BITMAP. */ #ifdef UNIV_ZIP_DEBUG ut_a(!page_zip - || page_zip_validate(page_zip, block->frame, + || page_zip_validate(page_zip, block->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ index->table->space->release_free_extents(n_reserved); @@ -5040,7 +5036,7 @@ btr_cur_pessimistic_update( goto err_exit; } - ut_ad(page_is_leaf(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); ut_ad(dict_index_is_clust(index)); ut_ad(flags & BTR_KEEP_POS_FLAG); } @@ -5076,8 +5072,9 @@ btr_cur_pessimistic_update( } const ulint max_ins_size = page_zip - ? 0 : page_get_max_insert_size_after_reorganize(block->frame, - 1); + ? 0 + : page_get_max_insert_size_after_reorganize(block->page.frame, + 1); if (UNIV_UNLIKELY(is_metadata)) { ut_ad(new_entry->is_metadata()); @@ -5105,7 +5102,8 @@ btr_cur_pessimistic_update( } #ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, block->frame, index)); + ut_a(!page_zip + || page_zip_validate(page_zip, block->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ page_cursor = btr_cur_get_page_cur(cursor); @@ -5151,7 +5149,7 @@ btr_cur_pessimistic_update( } bool adjust = big_rec_vec && (flags & BTR_KEEP_POS_FLAG); - ut_ad(!adjust || page_is_leaf(block->frame)); + ut_ad(!adjust || page_is_leaf(block->page.frame)); if (btr_cur_compress_if_useful(cursor, adjust, mtr)) { if (adjust) { @@ -5159,7 +5157,7 @@ btr_cur_pessimistic_update( true, *offsets); } } else if (!dict_index_is_clust(index) - && page_is_leaf(block->frame)) { + && page_is_leaf(block->page.frame)) { /* Update the free bits in the insert buffer. This is the same block which was skipped by BTR_KEEP_IBUF_BITMAP. */ @@ -5174,7 +5172,7 @@ btr_cur_pessimistic_update( if (!srv_read_only_mode && !big_rec_vec - && page_is_leaf(block->frame) + && page_is_leaf(block->page.frame) && !dict_index_is_online_ddl(index)) { mtr_memo_release(mtr, &index->lock, @@ -5199,13 +5197,13 @@ btr_cur_pessimistic_update( BTR_KEEP_IBUF_BITMAP. */ if (!dict_index_is_clust(index) && !index->table->is_temporary() - && page_is_leaf(block->frame)) { + && page_is_leaf(block->page.frame)) { ibuf_reset_free_bits(block); } } if (big_rec_vec != NULL) { - ut_ad(page_is_leaf(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); ut_ad(dict_index_is_clust(index)); ut_ad(flags & BTR_KEEP_POS_FLAG); @@ -5260,8 +5258,8 @@ btr_cur_pessimistic_update( /* The new inserted record owns its possible externally stored fields */ #ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, block->frame, - index)); + ut_a(!page_zip + || page_zip_validate(page_zip, block->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ btr_cur_unmark_extern_fields(btr_cur_get_block(cursor), rec, index, *offsets, mtr); @@ -5511,7 +5509,7 @@ btr_cur_optimistic_delete_func( } if (UNIV_UNLIKELY(block->page.id().page_no() == cursor->index->page - && page_get_n_recs(block->frame) == 1 + && page_get_n_recs(block->page.frame) == 1 + (cursor->index->is_instant() && !rec_is_metadata(rec, *cursor->index)) && !cursor->index->must_avoid_clear_instant_add())) { @@ -5523,7 +5521,7 @@ btr_cur_optimistic_delete_func( table becomes empty, clean up the whole page. */ dict_index_t* index = cursor->index; const rec_t* first_rec = page_rec_get_next_const( - page_get_infimum_rec(block->frame)); + page_get_infimum_rec(block->page.frame)); ut_ad(!index->is_instant() || rec_is_metadata(first_rec, *index)); const bool is_metadata = rec_is_metadata(rec, *index); @@ -5974,9 +5972,9 @@ btr_cur_add_path_info( const buf_block_t* block = btr_cur_get_block(cursor); slot->nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor)); - slot->n_recs = page_get_n_recs(block->frame); + slot->n_recs = page_get_n_recs(block->page.frame); slot->page_no = block->page.id().page_no(); - slot->page_level = btr_page_get_level(block->frame); + slot->page_level = btr_page_get_level(block->page.frame); } /*******************************************************************//** @@ -6796,7 +6794,7 @@ struct btr_blob_log_check_t { m_op(op) { ut_ad(rec_offs_validate(*m_rec, m_pcur->index(), m_offsets)); - ut_ad((*m_block)->frame == page_align(*m_rec)); + ut_ad((*m_block)->page.frame == page_align(*m_rec)); ut_ad(*m_rec == btr_pcur_get_rec(m_pcur)); } @@ -6811,7 +6809,7 @@ struct btr_blob_log_check_t { if (UNIV_UNLIKELY(m_op == BTR_STORE_INSERT_BULK)) { offs = page_offset(*m_rec); page_no = (*m_block)->page.id().page_no(); - buf_block_buf_fix_inc(*m_block); + (*m_block)->page.fix(); ut_ad(page_no != FIL_NULL); } else { btr_pcur_store_position(m_pcur, m_mtr); @@ -6833,10 +6831,10 @@ struct btr_blob_log_check_t { m_pcur->btr_cur.page_cur.block = btr_block_get( *index, page_no, RW_X_LATCH, false, m_mtr); m_pcur->btr_cur.page_cur.rec - = m_pcur->btr_cur.page_cur.block->frame + = m_pcur->btr_cur.page_cur.block->page.frame + offs; - buf_block_buf_fix_dec(m_pcur->btr_cur.page_cur.block); + m_pcur->btr_cur.page_cur.block->page.unfix(); } else { ut_ad(m_pcur->rel_pos == BTR_PCUR_ON); bool ret = btr_pcur_restore_position( @@ -7048,42 +7046,43 @@ btr_store_big_rec_extern_fields( if (page_zip) { mtr.write<4>(*prev_block, - prev_block->frame + prev_block->page.frame + FIL_PAGE_NEXT, page_no); memcpy_aligned<4>( buf_block_get_page_zip( prev_block) ->data + FIL_PAGE_NEXT, - prev_block->frame + prev_block->page.frame + FIL_PAGE_NEXT, 4); } else { mtr.write<4>(*prev_block, BTR_BLOB_HDR_NEXT_PAGE_NO + FIL_PAGE_DATA - + prev_block->frame, + + prev_block->page.frame, page_no); } } else if (dict_index_is_online_ddl(index)) { row_log_table_blob_alloc(index, page_no); } - ut_ad(!page_has_siblings(block->frame)); - ut_ad(!fil_page_get_type(block->frame)); + ut_ad(!page_has_siblings(block->page.frame)); + ut_ad(!fil_page_get_type(block->page.frame)); if (page_zip) { int err; page_zip_des_t* blob_page_zip; mtr.write<1>(*block, - FIL_PAGE_TYPE + 1 + block->frame, + FIL_PAGE_TYPE + 1 + + block->page.frame, prev_page_no == FIL_NULL ? FIL_PAGE_TYPE_ZBLOB : FIL_PAGE_TYPE_ZBLOB2); block->page.zip.data[FIL_PAGE_TYPE + 1] - = block->frame[FIL_PAGE_TYPE + 1]; + = block->page.frame[FIL_PAGE_TYPE + 1]; - c_stream.next_out = block->frame + c_stream.next_out = block->page.frame + FIL_PAGE_DATA; c_stream.avail_out = static_cast<uInt>( payload_size_zip); @@ -7105,7 +7104,7 @@ btr_store_big_rec_extern_fields( ut_ad(blob_page_zip); ut_ad(page_zip_get_size(blob_page_zip) == page_zip_get_size(page_zip)); - memcpy(blob_page_zip->data, block->frame, + memcpy(blob_page_zip->data, block->page.frame, page_zip_get_size(page_zip)); if (err == Z_OK && prev_page_no != FIL_NULL) { @@ -7158,7 +7157,7 @@ next_zip_page: } } else { mtr.write<1>(*block, FIL_PAGE_TYPE + 1 - + block->frame, + + block->page.frame, FIL_PAGE_TYPE_BLOB); if (extern_len > payload_size) { @@ -7170,13 +7169,14 @@ next_zip_page: mtr.memcpy<mtr_t::MAYBE_NOP>( *block, FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE - + block->frame, + + block->page.frame, static_cast<const byte*> (big_rec_vec->fields[i].data) + big_rec_vec->fields[i].len - extern_len, store_len); mtr.write<4>(*block, BTR_BLOB_HDR_PART_LEN - + FIL_PAGE_DATA + block->frame, + + FIL_PAGE_DATA + + block->page.frame, store_len); compile_time_assert(FIL_NULL == 0xffffffff); mtr.memset(block, BTR_BLOB_HDR_NEXT_PAGE_NO @@ -7260,7 +7260,7 @@ func_exit: @param[in] read true=read, false=purge */ static void btr_check_blob_fil_page_type(const buf_block_t& block, bool read) { - uint16_t type= fil_page_get_type(block.frame); + uint16_t type= fil_page_get_type(block.page.frame); if (UNIV_LIKELY(type == FIL_PAGE_TYPE_BLOB)) return; @@ -7711,11 +7711,13 @@ inflate_error: } end_of_blob: - buf_page_release_zip(bpage); + bpage->lock.s_unlock(); + bpage->unfix(); goto func_exit; } - buf_page_release_zip(bpage); + bpage->lock.s_unlock(); + bpage->unfix(); /* On other BLOB pages except the first the BLOB header always is at the page header: */ diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 19f6edc7ec7..f8b5d42bb57 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -476,7 +476,7 @@ btr_defragment_n_pages( /* It doesn't make sense to call this function with n_pages = 1. */ ut_ad(n_pages > 1); - if (!page_is_leaf(block->frame)) { + if (!page_is_leaf(block->page.frame)) { return NULL; } diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc index e425ed7820e..ac91f87c8e1 100644 --- a/storage/innobase/btr/btr0pcur.cc +++ b/storage/innobase/btr/btr0pcur.cc @@ -112,8 +112,9 @@ btr_pcur_store_position( page_cursor = btr_pcur_get_page_cur(cursor); rec = page_cur_get_rec(page_cursor); - offs = rec - block->frame; - ut_ad(block->page.id().page_no() == page_get_page_no(block->frame)); + offs = rec - block->page.frame; + ut_ad(block->page.id().page_no() + == page_get_page_no(block->page.frame)); ut_ad(block->page.buf_fix_count()); /* For spatial index, when we do positioning on parent buffer if necessary, it might not hold latches, but the @@ -126,13 +127,13 @@ btr_pcur_store_position( cursor->old_stored = true; - if (page_is_empty(block->frame)) { + if (page_is_empty(block->page.frame)) { /* It must be an empty index tree; NOTE that in this case we do not store the modify_clock, but always do a search if we restore the cursor position */ - ut_a(!page_has_siblings(block->frame)); - ut_ad(page_is_leaf(block->frame)); + ut_a(!page_has_siblings(block->page.frame)); + ut_ad(page_is_leaf(block->page.frame)); ut_ad(block->page.id().page_no() == index->page); if (page_rec_is_supremum_low(offs)) { @@ -159,9 +160,9 @@ before_first: #endif ut_ad(index->is_instant() || block->page.id().page_no() != index->page); - ut_ad(page_get_n_recs(block->frame) == 1); - ut_ad(page_is_leaf(block->frame)); - ut_ad(!page_has_prev(block->frame)); + ut_ad(page_get_n_recs(block->page.frame) == 1); + ut_ad(page_is_leaf(block->page.frame)); + ut_ad(!page_has_prev(block->page.frame)); cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE; return; } @@ -171,7 +172,7 @@ before_first: rec = page_rec_get_next(rec); if (rec_is_metadata(rec, *index)) { - ut_ad(!page_has_prev(block->frame)); + ut_ad(!page_has_prev(block->page.frame)); rec = page_rec_get_next(rec); if (page_rec_is_supremum(rec)) { goto before_first; diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index a59a54676ed..bc26397d106 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -409,14 +409,10 @@ static bool btr_search_update_block_hash_info(btr_search_t* info, buf_block_t* block) { - ut_ad(block->lock.have_x() || block->lock.have_s()); + ut_ad(block->page.lock.have_x() || block->page.lock.have_s()); info->last_hash_succ = FALSE; - ut_d(auto state= block->page.state()); - ut_ad(state == BUF_BLOCK_NOT_USED - || state == BUF_BLOCK_FILE_PAGE - || state == BUF_BLOCK_MEMORY - || state == BUF_BLOCK_REMOVE_HASH); + ut_ad(buf_pool.is_uncompressed(block)); ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N); if ((block->n_hash_helps > 0) @@ -444,13 +440,13 @@ btr_search_update_block_hash_info(btr_search_t* info, buf_block_t* block) block->left_side = info->left_side; } - if ((block->n_hash_helps > page_get_n_recs(block->frame) + if ((block->n_hash_helps > page_get_n_recs(block->page.frame) / BTR_SEARCH_PAGE_BUILD_LIMIT) && (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) { if ((!block->index) || (block->n_hash_helps - > 2U * page_get_n_recs(block->frame)) + > 2U * page_get_n_recs(block->page.frame)) || (block->n_fields != block->curr_n_fields) || (block->n_bytes != block->curr_n_bytes) || (block->left_side != block->curr_left_side)) { @@ -489,7 +485,7 @@ static bool ha_insert_for_fold(hash_table_t *table, mem_heap_t* heap, const rec_t *data) { #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - ut_a(block->frame == page_align(data)); + ut_a(block->page.frame == page_align(data)); #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ ut_ad(btr_search_enabled); @@ -502,7 +498,7 @@ static bool ha_insert_for_fold(hash_table_t *table, mem_heap_t* heap, { #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG buf_block_t *prev_block= prev->block; - ut_a(prev_block->frame == page_align(prev->data)); + ut_a(prev_block->page.frame == page_align(prev->data)); ut_a(prev_block->n_pointers-- < MAX_N_POINTERS); ut_a(block->n_pointers++ < MAX_N_POINTERS); @@ -550,7 +546,7 @@ static void ha_delete_hash_node(hash_table_t *table, mem_heap_t *heap, { ut_ad(btr_search_enabled); #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - ut_a(del_node->block->frame == page_align(del_node->data)); + ut_a(del_node->block->page.frame == page_align(del_node->data)); ut_a(del_node->block->n_pointers-- < MAX_N_POINTERS); #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ @@ -650,7 +646,7 @@ static bool ha_search_and_update_if_found(hash_table_t *table, ulint fold, const rec_t *new_data) { #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - ut_a(new_block->frame == page_align(new_data)); + ut_a(new_block->page.frame == page_align(new_data)); #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ if (!btr_search_enabled) @@ -697,9 +693,9 @@ btr_search_update_hash_ref( { ut_ad(cursor->flag == BTR_CUR_HASH_FAIL); - ut_ad(block->lock.have_x() || block->lock.have_s()); - ut_ad(page_align(btr_cur_get_rec(cursor)) == block->frame); - ut_ad(page_is_leaf(block->frame)); + ut_ad(block->page.lock.have_x() || block->page.lock.have_s()); + ut_ad(page_align(btr_cur_get_rec(cursor)) == block->page.frame); + ut_ad(page_is_leaf(block->page.frame)); assert_block_ahi_valid(block); dict_index_t* index = block->index; @@ -935,15 +931,15 @@ inline void buf_pool_t::clear_hash_index() continue; } - ut_d(buf_page_state state= block->page.state()); + ut_d(const auto s= block->page.state()); /* Another thread may have set the state to - BUF_BLOCK_REMOVE_HASH in buf_LRU_block_remove_hashed(). + REMOVE_HASH in buf_LRU_block_remove_hashed(). The state change in buf_pool_t::realloc() is not observable here, because in that case we would have !block->index. In the end, the entire adaptive hash index will be removed. */ - ut_ad(state == BUF_BLOCK_FILE_PAGE || state == BUF_BLOCK_REMOVE_HASH); + ut_ad(s >= buf_page_t::UNFIXED || s == buf_page_t::REMOVE_HASH); # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG block->n_pointers= 0; # endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ @@ -974,18 +970,19 @@ inline buf_block_t* buf_pool_t::block_from_ahi(const byte *ptr) const ? chunk_map->rbegin()->second : (--it)->second; - const size_t offs= size_t(ptr - chunk->blocks->frame) >> srv_page_size_shift; + const size_t offs= size_t(ptr - chunk->blocks->page.frame) >> + srv_page_size_shift; ut_a(offs < chunk->size); buf_block_t *block= &chunk->blocks[offs]; /* buf_pool_t::chunk_t::init() invokes buf_block_init() so that - block[n].frame == block->frame + n * srv_page_size. Check it. */ - ut_ad(block->frame == page_align(ptr)); + block[n].frame == block->page.frame + n * srv_page_size. Check it. */ + ut_ad(block->page.frame == page_align(ptr)); /* Read the state of the block without holding hash_lock. - A state transition from BUF_BLOCK_FILE_PAGE to - BUF_BLOCK_REMOVE_HASH is possible during this execution. */ - ut_d(const buf_page_state state = block->page.state()); - ut_ad(state == BUF_BLOCK_FILE_PAGE || state == BUF_BLOCK_REMOVE_HASH); + A state transition to REMOVE_HASH is possible during + this execution. */ + ut_ad(block->page.state() >= buf_page_t::REMOVE_HASH); + return block; } @@ -1093,48 +1090,47 @@ fail: if (!ahi_latch) { buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get( block->page.id().fold()); - bool fail; + bool fail, got_latch; { transactional_shared_lock_guard<page_hash_latch> g{ buf_pool.page_hash.lock_get(chain)}; - switch (block->page.state()) { - case BUF_BLOCK_REMOVE_HASH: + const auto state = block->page.state(); + if (state == buf_page_t::REMOVE_HASH) { /* Another thread is just freeing the block from the LRU list of the buffer pool: do not try to access this page. */ goto fail; - case BUF_BLOCK_FILE_PAGE: - break; - default: + } + if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) { #ifndef NO_ELISION xend(); #endif ut_error; } - block->fix(); fail = index != block->index && index_id == block->index->id; + got_latch = (latch_mode == BTR_SEARCH_LEAF) + ? block->page.lock.s_lock_try() + : block->page.lock.x_lock_try(); } ut_a(!fail || block->index->freed()); - block->page.set_accessed(); + if (!got_latch) { + goto fail; + } + block->page.fix(); + block->page.set_accessed(); buf_page_make_young_if_needed(&block->page); mtr_memo_type_t fix_type; if (latch_mode == BTR_SEARCH_LEAF) { - if (!block->lock.s_lock_try()) { -got_no_latch: - buf_block_buf_fix_dec(block); - goto fail; - } fix_type = MTR_MEMO_PAGE_S_FIX; + ut_ad(!block->page.is_read_fixed()); } else { - if (!block->lock.x_lock_try()) { - goto got_no_latch; - } fix_type = MTR_MEMO_PAGE_X_FIX; + ut_ad(!block->page.is_io_fixed()); } mtr->memo_push(block, fix_type); @@ -1146,16 +1142,15 @@ got_no_latch: goto fail_and_release_page; } - DBUG_ASSERT(block->page.status != buf_page_t::FREED); + DBUG_ASSERT(!block->page.is_freed()); } else if (UNIV_UNLIKELY(index != block->index && index_id == block->index->id)) { ut_a(block->index->freed()); goto fail_and_release_page; } - if (block->page.state() != BUF_BLOCK_FILE_PAGE) { - - ut_ad(block->page.state() == BUF_BLOCK_REMOVE_HASH); + if (!block->page.in_file()) { + ut_ad(block->page.state() == buf_page_t::REMOVE_HASH); fail_and_release_page: if (!ahi_latch) { @@ -1177,7 +1172,7 @@ fail_and_release_page: is positioned on. We cannot look at the next of the previous record to determine if our guess for the cursor position is right. */ - if (index_id != btr_page_get_index_id(block->frame) + if (index_id != btr_page_get_index_id(block->page.frame) || !btr_search_check_guess(cursor, !!ahi_latch, tuple, mode)) { goto fail_and_release_page; } @@ -1268,17 +1263,21 @@ retry: return; } - ut_ad(!block->page.buf_fix_count() - || block->page.state() == BUF_BLOCK_REMOVE_HASH - || block->lock.have_any()); - ut_ad(page_is_leaf(block->frame)); + ut_d(const auto state = block->page.state()); + ut_ad(state == buf_page_t::REMOVE_HASH + || state >= buf_page_t::UNFIXED); + ut_ad(state == buf_page_t::REMOVE_HASH + || !(~buf_page_t::LRU_MASK & state) + || block->page.lock.have_any()); + ut_ad(state < buf_page_t::READ_FIX || state >= buf_page_t::WRITE_FIX); + ut_ad(page_is_leaf(block->page.frame)); /* We must not dereference block->index here, because it could be freed if (!index->table->get_ref_count() && !dict_sys.frozen()). Determine the ahi_slot based on the block contents. */ const index_id_t index_id - = btr_page_get_index_id(block->frame); + = btr_page_get_index_id(block->page.frame); auto part = btr_search_sys.get_part(index_id, block->page.id().space()); @@ -1322,7 +1321,7 @@ retry: ut_a(n_fields > 0 || n_bytes > 0); - page = block->frame; + page = block->page.frame; n_recs = page_get_n_recs(page); /* Calculate and cache fold values into an array for fast deletion @@ -1441,7 +1440,7 @@ void btr_search_drop_page_hash_when_freed(const page_id_t page_id) if (block) { /* If AHI is still valid, page can't be in free state. AHI is dropped when page is freed. */ - DBUG_ASSERT(block->page.status != buf_page_t::FREED); + DBUG_ASSERT(!block->page.is_freed()); if (block->index) { /* In all our callers, the table handle should @@ -1499,9 +1498,9 @@ btr_search_build_page_hash_index( ut_ad(index); ut_ad(block->page.id().space() == index->table->space_id); ut_ad(!dict_index_is_ibuf(index)); - ut_ad(page_is_leaf(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); - ut_ad(block->lock.have_x() || block->lock.have_s()); + ut_ad(block->page.lock.have_x() || block->page.lock.have_s()); ut_ad(block->page.id().page_no() >= 3); ahi_latch->rd_lock(SRW_LOCK_CALL); @@ -1722,8 +1721,8 @@ btr_search_move_or_delete_hash_entries( buf_block_t* new_block, buf_block_t* block) { - ut_ad(block->lock.have_x()); - ut_ad(new_block->lock.have_x()); + ut_ad(block->page.lock.have_x()); + ut_ad(new_block->page.lock.have_x()); if (!btr_search_enabled) { return; @@ -1804,7 +1803,7 @@ void btr_search_update_hash_on_delete(btr_cur_t *cursor) block = btr_cur_get_block(cursor); - ut_ad(block->lock.have_x()); + ut_ad(block->page.lock.have_x()); assert_block_ahi_valid(block); index = block->index; @@ -1879,7 +1878,7 @@ void btr_search_update_hash_node_on_insert(btr_cur_t *cursor, block = btr_cur_get_block(cursor); - ut_ad(block->lock.have_x()); + ut_ad(block->page.lock.have_x()); index = block->index; @@ -1962,7 +1961,7 @@ void btr_search_update_hash_on_insert(btr_cur_t *cursor, block = btr_cur_get_block(cursor); - ut_ad(block->lock.have_x()); + ut_ad(block->page.lock.have_x()); assert_block_ahi_valid(block); index = block->index; @@ -2210,9 +2209,7 @@ btr_search_hash_table_validate(ulint hash_table_id) = buf_pool.block_from_ahi((byte*) node->data); index_id_t page_index_id; - if (UNIV_LIKELY(block->page.state() - == BUF_BLOCK_FILE_PAGE)) { - + if (UNIV_LIKELY(block->page.in_file())) { /* The space and offset are only valid for file blocks. It is possible that the block is being freed @@ -2233,13 +2230,15 @@ btr_search_hash_table_validate(ulint hash_table_id) the block from buf_pool.page_hash by calling buf_LRU_block_remove_hashed_page(). Then it invokes btr_search_drop_page_hash_index(). */ - ut_a(block->page.state() == BUF_BLOCK_REMOVE_HASH); + ut_a(block->page.state() == buf_page_t::REMOVE_HASH); state_ok: ut_ad(!dict_index_is_ibuf(block->index)); ut_ad(block->page.id().space() == block->index->table->space_id); - page_index_id = btr_page_get_index_id(block->frame); + const page_t* page = block->page.frame; + + page_index_id = btr_page_get_index_id(page); offsets = rec_get_offsets( node->data, block->index, offsets, @@ -2255,8 +2254,6 @@ state_ok: page_index_id); if (node->fold != fold) { - const page_t* page = block->frame; - ok = FALSE; ib::error() << "Error in an adaptive hash" diff --git a/storage/innobase/buf/buf0block_hint.cc b/storage/innobase/buf/buf0block_hint.cc index 00c968511b3..6bd01faa279 100644 --- a/storage/innobase/buf/buf0block_hint.cc +++ b/storage/innobase/buf/buf0block_hint.cc @@ -43,16 +43,15 @@ void Block_hint::buffer_fix_block_if_still_valid() different page, and that slice of buf_pool.page_hash could be protected by another hash_lock that we are not holding.) - Finally, assuming that we have correct hash bucket latched, we must - validate m_block->state() to ensure that the block is not being freed. */ + Finally, we must ensure that the block is not being freed. */ if (m_block) { auto &cell= buf_pool.page_hash.cell_get(m_page_id.fold()); transactional_shared_lock_guard<page_hash_latch> g {buf_pool.page_hash.lock_get(cell)}; if (buf_pool.is_uncompressed(m_block) && m_page_id == m_block->page.id() && - m_block->page.state() == BUF_BLOCK_FILE_PAGE) - m_block->fix(); + m_block->page.frame && m_block->page.in_file()) + m_block->page.fix(); else clear(); } diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc index 125dac5333e..3d476fbac77 100644 --- a/storage/innobase/buf/buf0buddy.cc +++ b/storage/innobase/buf/buf0buddy.cc @@ -354,11 +354,11 @@ buf_buddy_block_free(void* buf) ut_a(!ut_align_offset(buf, srv_page_size)); HASH_SEARCH(hash, &buf_pool.zip_hash, fold, buf_page_t*, bpage, - ut_ad(bpage->state() == BUF_BLOCK_MEMORY + ut_ad(bpage->state() == buf_page_t::MEMORY && bpage->in_zip_hash), - ((buf_block_t*) bpage)->frame == buf); + bpage->frame == buf); ut_a(bpage); - ut_a(bpage->state() == BUF_BLOCK_MEMORY); + ut_a(bpage->state() == buf_page_t::MEMORY); ut_ad(bpage->in_zip_hash); ut_d(bpage->in_zip_hash = false); HASH_DELETE(buf_page_t, hash, &buf_pool.zip_hash, fold, bpage); @@ -383,10 +383,10 @@ buf_buddy_block_register( buf_block_t* block) /*!< in: buffer frame to allocate */ { const ulint fold = BUF_POOL_ZIP_FOLD(block); - ut_ad(block->page.state() == BUF_BLOCK_MEMORY); + ut_ad(block->page.state() == buf_page_t::MEMORY); - ut_a(block->frame); - ut_a(!ut_align_offset(block->frame, srv_page_size)); + ut_a(block->page.frame); + ut_a(!ut_align_offset(block->page.frame, srv_page_size)); ut_ad(!block->page.in_zip_hash); ut_d(block->page.in_zip_hash = true); @@ -462,8 +462,8 @@ byte *buf_buddy_alloc_low(ulint i, bool *lru) alloc_big: buf_buddy_block_register(block); - block = (buf_block_t*) buf_buddy_alloc_from( - block->frame, i, BUF_BUDDY_SIZES); + block = reinterpret_cast<buf_block_t*>( + buf_buddy_alloc_from(block->page.frame, i, BUF_BUDDY_SIZES)); func_exit: buf_pool.buddy_stat[i].used++; @@ -694,7 +694,7 @@ buf_buddy_realloc(void* buf, ulint size) block = reinterpret_cast<buf_block_t*>( buf_buddy_alloc_from( - block->frame, i, BUF_BUDDY_SIZES)); + block->page.frame, i, BUF_BUDDY_SIZES)); } buf_pool.buddy_stat[i].used++; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index cbc19d3b14a..a74925f667f 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -219,11 +219,11 @@ buf_pool.LRU. The chains of free memory blocks (buf_pool.zip_free[]) are used by the buddy allocator (buf0buddy.cc) to keep track of currently unused -memory blocks of size UNIV_PAGE_SIZE_MIN..srv_page_size / 2. These -blocks are inside the srv_page_size-sized memory blocks of type +memory blocks of size 1024..innodb_page_size / 2. These +blocks are inside the memory blocks of size innodb_page_size and type BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer -pool. The buddy allocator is solely used for allocating control -blocks for compressed pages (buf_page_t) and compressed page frames. +pool. The buddy allocator is solely used for allocating +ROW_FORMAT=COMPRESSED page frames. Loading a file page ------------------- @@ -231,9 +231,9 @@ blocks for compressed pages (buf_page_t) and compressed page frames. First, a victim block for replacement has to be found in the buf_pool. It is taken from the free list or searched for from the end of the LRU-list. An exclusive lock is reserved for the frame, -the io_fix field is set in the block fixing the block in buf_pool, +the io_fix is set in the block fixing the block in buf_pool, and the io-operation for loading the page is queued. The io-handler thread -releases the X-lock on the frame and resets the io_fix field +releases the X-lock on the frame and releases the io_fix when the io operation completes. A thread may request the above operation using the function @@ -312,8 +312,6 @@ void page_hash_latch::write_lock_wait() } # endif -constexpr std::chrono::microseconds WAIT_FOR_READ(100); -constexpr int WAIT_FOR_WRITE= 100; /** Number of attempts made to read in a page in the buffer pool */ constexpr ulint BUF_PAGE_READ_MAX_RETRIES= 100; /** The maximum portion of the buffer pool that can be used for the @@ -342,11 +340,8 @@ static Atomic_counter<size_t> buf_dbg_counter; /** Macro to determine whether the read of write counter is used depending on the io_type */ -#define MONITOR_RW_COUNTER(io_type, counter) \ - ((io_type == BUF_IO_READ) \ - ? (counter##_READ) \ - : (counter##_WRITTEN)) - +#define MONITOR_RW_COUNTER(read, counter) \ + (read ? (counter##_READ) : (counter##_WRITTEN)) /** Decrypt a page for temporary tablespace. @param[in,out] tmp_frame Temporary buffer @@ -400,8 +395,7 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage, ut_ad(node.space->id == bpage->id().space()); const auto flags = node.space->flags; - byte* dst_frame = bpage->zip.data ? bpage->zip.data : - ((buf_block_t*) bpage)->frame; + byte* dst_frame = bpage->zip.data ? bpage->zip.data : bpage->frame; bool page_compressed = node.space->is_compressed() && buf_page_is_compressed(dst_frame, flags); const page_id_t id(bpage->id()); @@ -979,11 +973,11 @@ buf_block_init(buf_block_t* block, byte* frame) buf_pool.resize(). Either way, adaptive hash index must not exist. */ assert_block_ahi_empty_on_init(block); - block->frame = frame; + block->page.frame = frame; MEM_MAKE_DEFINED(&block->modify_clock, sizeof block->modify_clock); ut_ad(!block->modify_clock); - block->page.init(BUF_BLOCK_NOT_USED, page_id_t(~0ULL)); + block->page.init(buf_page_t::NOT_USED, page_id_t(~0ULL)); #ifdef BTR_CUR_HASH_ADAPT MEM_MAKE_DEFINED(&block->index, sizeof block->index); ut_ad(!block->index); @@ -995,8 +989,8 @@ buf_block_init(buf_block_t* block, byte* frame) MEM_MAKE_DEFINED(&block->page.hash, sizeof block->page.hash); ut_ad(!block->page.hash); - MEM_MAKE_DEFINED(&block->lock, sizeof block->lock); - block->lock.init(); + MEM_MAKE_DEFINED(&block->page.lock, sizeof block->page.lock); + block->page.lock.init(); } /** Allocate a chunk of buffer frames. @@ -1069,7 +1063,7 @@ inline bool buf_pool_t::chunk_t::create(size_t bytes) for (auto i= size; i--; ) { buf_block_init(block, frame); - MEM_UNDEFINED(block->frame, srv_page_size); + MEM_UNDEFINED(block->page.frame, srv_page_size); /* Add the block to the free list */ UT_LIST_ADD_LAST(buf_pool.free, &block->page); @@ -1092,18 +1086,11 @@ inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const buf_block_t *block= blocks; for (auto i= size; i--; block++) { - switch (block->page.state()) { - case BUF_BLOCK_ZIP_PAGE: + if (block->page.in_file()) + { /* The uncompressed buffer pool should never contain ROW_FORMAT=COMPRESSED block descriptors. */ - ut_error; - break; - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - /* Skip blocks that are not being used for file pages. */ - break; - case BUF_BLOCK_FILE_PAGE: + ut_ad(block->page.frame); const lsn_t lsn= block->page.oldest_modification(); if (srv_read_only_mode) @@ -1112,8 +1099,6 @@ inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const can be dirtied, so all of them must be clean. */ ut_ad(lsn == 0 || lsn == recv_sys.recovered_lsn || srv_force_recovery == SRV_FORCE_NO_LOG_REDO); - ut_ad(!block->page.buf_fix_count()); - ut_ad(block->page.io_fix() == BUF_IO_NONE); break; } @@ -1134,13 +1119,6 @@ inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const } #endif /* UNIV_DEBUG */ -/** Free the synchronization objects of a buffer pool block descriptor -@param[in,out] block buffer pool block descriptor */ -static void buf_block_free_mutexes(buf_block_t* block) -{ - block->lock.free(); -} - /** Create the hash table. @param n the lower bound of n_cells */ void buf_pool_t::page_hash_table::create(ulint n) @@ -1193,7 +1171,7 @@ bool buf_pool_t::create() buf_block_t* block= chunk->blocks; for (auto i= chunk->size; i--; block++) - buf_block_free_mutexes(block); + block->page.lock.free(); allocator.deallocate_large_dodump(chunk->mem, &chunk->mem_pfx); } @@ -1291,8 +1269,11 @@ void buf_pool_t::close() ? (oldest == 0 || oldest == 2) : oldest <= 1 || srv_is_being_started || srv_fast_shutdown == 2); - if (bpage->state() != BUF_BLOCK_FILE_PAGE) - buf_page_free_descriptor(bpage); + if (UNIV_UNLIKELY(!bpage->frame)) + { + bpage->lock.free(); + ut_free(bpage); + } } for (auto chunk= chunks + n_chunks; --chunk >= chunks; ) @@ -1300,7 +1281,7 @@ void buf_pool_t::close() buf_block_t *block= chunk->blocks; for (auto i= chunk->size; i--; block++) - buf_block_free_mutexes(block); + block->page.lock.free(); allocator.deallocate_large_dodump(chunk->mem, &chunk->mem_pfx); } @@ -1330,7 +1311,8 @@ inline bool buf_pool_t::realloc(buf_block_t *block) buf_block_t* new_block; mysql_mutex_assert_owner(&mutex); - ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); + ut_ad(block->page.in_file()); + ut_ad(block->page.frame); new_block = buf_LRU_get_free_only(); @@ -1349,9 +1331,13 @@ inline bool buf_pool_t::realloc(buf_block_t *block) if (block->page.can_relocate()) { memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>( - new_block->frame, block->frame, srv_page_size); + new_block->page.frame, block->page.frame, + srv_page_size); mysql_mutex_lock(&buf_pool.flush_list_mutex); + const auto frame = new_block->page.frame; + new_block->page.lock.free(); new (&new_block->page) buf_page_t(block->page); + new_block->page.frame = frame; /* relocate LRU list */ if (buf_page_t* prev_b = buf_pool.LRU_remove(&block->page)) { @@ -1395,13 +1381,14 @@ inline bool buf_pool_t::realloc(buf_block_t *block) &new_block->page); buf_block_modify_clock_inc(block); static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); - memset_aligned<4>(block->frame + FIL_PAGE_OFFSET, 0xff, 4); + memset_aligned<4>(block->page.frame + + FIL_PAGE_OFFSET, 0xff, 4); static_assert(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID % 4 == 2, "not perfect alignment"); - memset_aligned<2>(block->frame + memset_aligned<2>(block->page.frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4); - MEM_UNDEFINED(block->frame, srv_page_size); - block->page.set_state(BUF_BLOCK_REMOVE_HASH); + MEM_UNDEFINED(block->page.frame, srv_page_size); + block->page.set_state(buf_page_t::REMOVE_HASH); if (!fsp_is_system_temporary(id.space())) { buf_flush_relocate_on_flush_list(&block->page, &new_block->page); @@ -1422,7 +1409,7 @@ inline bool buf_pool_t::realloc(buf_block_t *block) new_block->n_fields = 1; new_block->left_side = TRUE; #endif /* BTR_CUR_HASH_ADAPT */ - ut_d(block->page.set_state(BUF_BLOCK_MEMORY)); + ut_d(block->page.set_state(buf_page_t::MEMORY)); /* free block */ new_block = block; } @@ -1467,17 +1454,13 @@ inline bool buf_pool_t::withdraw_blocks() ib::info() << "start to withdraw the last " << withdraw_target << " blocks"; - /* Minimize zip_free[i] lists */ - mysql_mutex_lock(&mutex); - buf_buddy_condense_free(); - mysql_mutex_unlock(&mutex); - while (UT_LIST_GET_LEN(withdraw) < withdraw_target) { /* try to withdraw from free_list */ ulint count1 = 0; mysql_mutex_lock(&mutex); + buf_buddy_condense_free(); block = reinterpret_cast<buf_block_t*>( UT_LIST_GET_FIRST(free)); while (block != NULL @@ -1517,45 +1500,38 @@ inline bool buf_pool_t::withdraw_blocks() ulint count2 = 0; mysql_mutex_lock(&mutex); - buf_page_t* bpage; - bpage = UT_LIST_GET_FIRST(LRU); - while (bpage != NULL) { - buf_page_t* next_bpage = UT_LIST_GET_NEXT(LRU, bpage); - if (bpage->zip.data != NULL + buf_pool_mutex_exit_forbid(); + for (buf_page_t* bpage = UT_LIST_GET_FIRST(LRU), *next_bpage; + bpage; bpage = next_bpage) { + ut_ad(bpage->in_file()); + next_bpage = UT_LIST_GET_NEXT(LRU, bpage); + if (UNIV_LIKELY_NULL(bpage->zip.data) && will_be_withdrawn(bpage->zip.data) && bpage->can_relocate()) { - buf_pool_mutex_exit_forbid(); if (!buf_buddy_realloc( bpage->zip.data, page_zip_get_size(&bpage->zip))) { /* failed to allocate block */ - buf_pool_mutex_exit_allow(); break; } - buf_pool_mutex_exit_allow(); count2++; + if (bpage->frame) { + goto realloc_frame; + } } - if (bpage->state() == BUF_BLOCK_FILE_PAGE - && will_be_withdrawn(*bpage)) { - if (bpage->can_relocate()) { - buf_pool_mutex_exit_forbid(); - if (!realloc( - reinterpret_cast<buf_block_t*>( - bpage))) { - /* failed to allocate block */ - buf_pool_mutex_exit_allow(); - break; - } - buf_pool_mutex_exit_allow(); - count2++; + if (bpage->frame && will_be_withdrawn(*bpage) + && bpage->can_relocate()) { +realloc_frame: + if (!realloc(reinterpret_cast<buf_block_t*>( + bpage))) { + /* failed to allocate block */ + break; } - /* NOTE: if the page is in use, - not relocated yet */ + count2++; } - - bpage = next_bpage; } + buf_pool_mutex_exit_allow(); mysql_mutex_unlock(&mutex); buf_resize_status( @@ -1585,7 +1561,7 @@ inline bool buf_pool_t::withdraw_blocks() * const echunk = chunks + n_chunks; chunk != echunk; chunk++) { block = chunk->blocks; for (ulint j = chunk->size; j--; block++) { - ut_a(block->page.state() == BUF_BLOCK_NOT_USED); + ut_a(block->page.state() == buf_page_t::NOT_USED); ut_ad(block->in_withdraw_list); } } @@ -1675,16 +1651,6 @@ inline void buf_pool_t::resize() srv_buf_pool_old_size, srv_buf_pool_size, srv_buf_pool_chunk_unit); - mysql_mutex_lock(&mutex); - ut_ad(curr_size == old_size); - ut_ad(n_chunks_new == n_chunks); - ut_ad(UT_LIST_GET_LEN(withdraw) == 0); - - n_chunks_new = (new_instance_size << srv_page_size_shift) - / srv_buf_pool_chunk_unit; - curr_size = n_chunks_new * chunks->size; - mysql_mutex_unlock(&mutex); - #ifdef BTR_CUR_HASH_ADAPT /* disable AHI if needed */ buf_resize_status("Disabling adaptive hash index."); @@ -1700,6 +1666,16 @@ inline void buf_pool_t::resize() } #endif /* BTR_CUR_HASH_ADAPT */ + mysql_mutex_lock(&mutex); + ut_ad(curr_size == old_size); + ut_ad(n_chunks_new == n_chunks); + ut_ad(UT_LIST_GET_LEN(withdraw) == 0); + + n_chunks_new = (new_instance_size << srv_page_size_shift) + / srv_buf_pool_chunk_unit; + curr_size = n_chunks_new * chunks->size; + mysql_mutex_unlock(&mutex); + if (curr_size < old_size) { /* set withdraw target */ size_t w = 0; @@ -1828,7 +1804,7 @@ withdraw_retry: buf_block_t* block = chunk->blocks; for (ulint j = chunk->size; j--; block++) { - buf_block_free_mutexes(block); + block->page.lock.free(); } allocator.deallocate_large_dodump( @@ -2050,23 +2026,28 @@ void buf_resize_shutdown() /** Relocate a ROW_FORMAT=COMPRESSED block in the LRU list and buf_pool.page_hash. The caller must relocate bpage->list. -@param bpage BUF_BLOCK_ZIP_PAGE block +@param bpage ROW_FORMAT=COMPRESSED only block @param dpage destination control block */ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage) { - const page_id_t id= bpage->id(); + const page_id_t id{bpage->id()}; buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold()); - ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE); + ut_ad(!bpage->frame); mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(buf_pool.page_hash.lock_get(chain).is_write_locked()); - ut_a(bpage->io_fix() == BUF_IO_NONE); - ut_a(!bpage->buf_fix_count()); ut_ad(bpage == buf_pool.page_hash.get(id, chain)); ut_ad(!buf_pool.watch_is_sentinel(*bpage)); - ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE); + ut_d(const auto state= bpage->state()); + ut_ad(state >= buf_page_t::FREED); + ut_ad(state <= buf_page_t::READ_FIX); + ut_ad(bpage->lock.is_write_locked()); + const auto frame= dpage->frame; + dpage->lock.free(); new (dpage) buf_page_t(*bpage); + dpage->frame= frame; + /* Important that we adjust the hazard pointer before removing bpage from LRU list. */ if (buf_page_t *b= buf_pool.LRU_remove(bpage)) @@ -2136,29 +2117,30 @@ retry: ut_ad(!w->oldest_modification()); ut_ad(!w->zip.data); ut_ad(!w->in_zip_hash); - if (w->state() == BUF_BLOCK_ZIP_PAGE) + static_assert(buf_page_t::NOT_USED == 0, "efficiency"); + if (ut_d(auto s=) w->state()) + { /* This watch may be in use for some other page. */ + ut_ad(s >= buf_page_t::UNFIXED); continue; - ut_ad(w->state() == BUF_BLOCK_NOT_USED); - ut_ad(!w->buf_fix_count()); + } /* w is pointing to watch[], which is protected by mutex. Normally, buf_page_t::id for objects that are reachable by page_hash.get(id, chain) are protected by hash_lock. */ - w->set_state(BUF_BLOCK_ZIP_PAGE); + w->set_state(buf_page_t::UNFIXED + 1); w->id_= id; buf_page_t *bpage= page_hash.get(id, chain); if (UNIV_LIKELY_NULL(bpage)) { - w->set_state(BUF_BLOCK_NOT_USED); + w->set_state(buf_page_t::NOT_USED); page_hash.lock_get(chain).lock(); mysql_mutex_unlock(&mutex); goto retry; } page_hash.lock_get(chain).lock(); - ut_ad(!w->buf_fix_count_); - w->buf_fix_count_= 1; + ut_ad(w->state() == buf_page_t::UNFIXED + 1); buf_pool.page_hash.append(chain, w); mysql_mutex_unlock(&mutex); return nullptr; @@ -2182,10 +2164,11 @@ void buf_pool_t::watch_unset(const page_id_t id, buf_pool_t::hash_chain &chain) transactional_lock_guard<page_hash_latch> g{page_hash.lock_get(chain)}; /* The page must exist because watch_set() increments buf_fix_count. */ w= page_hash.get(id, chain); - const auto buf_fix_count= w->buf_fix_count(); - ut_ad(buf_fix_count); + const auto state= w->state(); + ut_ad(state >= buf_page_t::UNFIXED); + ut_ad(~buf_page_t::LRU_MASK & state); ut_ad(w->in_page_hash); - if (buf_fix_count != 1 || !watch_is_sentinel(*w)) + if (state != buf_page_t::UNFIXED + 1 || !watch_is_sentinel(*w)) { w->unfix(); w= nullptr; @@ -2203,14 +2186,17 @@ void buf_pool_t::watch_unset(const page_id_t id, buf_pool_t::hash_chain &chain) { transactional_lock_guard<page_hash_latch> g {buf_pool.page_hash.lock_get(chain)}; - if (w->unfix() == 0 && w == old) + auto f= w->unfix(); + ut_ad(f < buf_page_t::READ_FIX || w != old); + + if (f == buf_page_t::UNFIXED && w == old) { page_hash.remove(chain, w); // Now that w is detached from page_hash, release it to watch[]. ut_ad(w->id_ == id); - ut_ad(!w->buf_fix_count()); - ut_ad(w->state() == BUF_BLOCK_ZIP_PAGE); - w->set_state(BUF_BLOCK_NOT_USED); + ut_ad(!w->frame); + ut_ad(!w->zip.data); + w->set_state(buf_page_t::NOT_USED); } } @@ -2243,28 +2229,30 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr) {buf_pool.page_hash.lock_get(chain)}; block= reinterpret_cast<buf_block_t*> (buf_pool.page_hash.get(page_id, chain)); - if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE) + if (!block || !block->page.frame) /* FIXME: convert ROW_FORMAT=COMPRESSED, without buf_zip_decompress() */ return; - block->fix(); + /* To avoid a deadlock with buf_LRU_free_page() of some other page + and buf_page_write_complete() of this page, we must not wait for a + page latch while holding a page_hash latch. */ + block->page.fix(); } - ut_ad(block->page.buf_fix_count()); + block->page.lock.x_lock(); + block->page.set_freed(block->page.state()); mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); - block->lock.x_lock(); - block->page.status= buf_page_t::FREED; } /** Get read access to a compressed page (usually of type FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2). -The page must be released with buf_page_release_zip(). +The page must be released with unfix(). NOTE: the page is not protected by any latch. Mutual exclusion has to be implemented at a higher level. In other words, all possible accesses to a given page through this function must be protected by the same set of mutexes or latches. -@param[in] page_id page id -@param[in] zip_size ROW_FORMAT=COMPRESSED page size -@return pointer to the block */ +@param page_id page identifier +@param zip_size ROW_FORMAT=COMPRESSED page size in bytes +@return pointer to the block, s-latched */ TRANSACTIONAL_TARGET buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size) { @@ -2292,9 +2280,14 @@ lookup: /* There is no ROW_FORMAT=COMPRESSED page. */ return nullptr; - if (discard_attempted || bpage->state() == BUF_BLOCK_ZIP_PAGE) + if (discard_attempted || !bpage->frame) { - bpage->fix(); + /* Even when we are holding a page_hash latch, it should be + acceptable to wait for a page S-latch here, because + buf_page_t::read_complete() will not wait for buf_pool.mutex, + and because S-latch would not conflict with a U-latch + that would be protecting buf_page_t::write_complete(). */ + bpage->lock.s_lock(); break; } } @@ -2306,19 +2299,18 @@ lookup: mysql_mutex_unlock(&buf_pool.mutex); } - DBUG_ASSERT(bpage->status != buf_page_t::FREED); + { + ut_d(const auto s=) bpage->fix(); + ut_ad(s >= buf_page_t::UNFIXED); + ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX); + } + bpage->set_accessed(); buf_page_make_young_if_needed(bpage); #ifdef UNIV_DEBUG if (!(++buf_dbg_counter % 5771)) buf_pool.validate(); #endif /* UNIV_DEBUG */ - ut_ad(bpage->buf_fix_count()); - ut_ad(bpage->in_file()); - - /* Let us wait until the read operation completes */ - while (bpage->io_fix() == BUF_IO_READ) - std::this_thread::sleep_for(WAIT_FOR_READ); return bpage; must_read_page: @@ -2394,7 +2386,7 @@ buf_zip_decompress( case FIL_PAGE_INDEX: case FIL_PAGE_RTREE: if (page_zip_decompress(&block->page.zip, - block->frame, TRUE)) { + block->page.frame, TRUE)) { if (space) { space->release(); } @@ -2413,7 +2405,7 @@ buf_zip_decompress( case FIL_PAGE_TYPE_ZBLOB: case FIL_PAGE_TYPE_ZBLOB2: /* Copy to uncompressed storage. */ - memcpy(block->frame, frame, block->zip_size()); + memcpy(block->page.frame, frame, block->zip_size()); if (space) { space->release(); } @@ -2445,27 +2437,6 @@ err_exit: return(FALSE); } -/** Wait for the block to be read in. -@param[in] block The block to check */ -static -void -buf_wait_for_read( - buf_block_t* block) -{ - /* Note: - - We are using the block->lock to check for IO state. - We set the IO_READ state under the protection of the hash_lock. - This is safe because another thread can only - access the block (and check for IO state) after the block has been - added to the page hashtable. */ - - while (block->page.io_fix() == BUF_IO_READ) { - block->lock.s_lock(); - block->lock.s_unlock(); - } -} - /** Low level function used to get access to a database page. @param[in] page_id page id @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @@ -2553,11 +2524,18 @@ loop: if (block) { transactional_shared_lock_guard<page_hash_latch> g{hash_lock}; if (buf_pool.is_uncompressed(block) - && page_id == block->page.id() - && block->page.state() == BUF_BLOCK_FILE_PAGE) { + && page_id == block->page.id()) { ut_ad(!block->page.in_zip_hash); - block->fix(); - goto got_block; + const auto state = block->page.state(); + /* Ignore guesses that point to read-fixed blocks. + We can only avoid a race condition by + looking up the block via buf_pool.page_hash. */ + if ((state >= buf_page_t::FREED + && state < buf_page_t::READ_FIX) || + state >= buf_page_t::WRITE_FIX) { + block->fix(); + goto got_block; + } } } @@ -2587,8 +2565,8 @@ loop: block = reinterpret_cast<buf_block_t*> (buf_pool.watch_set(page_id, chain)); if (block) { - /* buffer-fixing prevents block->page.state() - changes */ + /* buffer-fixing prevents block->page.in_file() + from changing */ block->fix(); } hash_lock.unlock(); @@ -2602,7 +2580,7 @@ loop: /* The call path is buf_read_page() -> buf_read_page_low() (fil_space_t::io()) -> - buf_page_read_complete() -> + buf_page_t::read_complete() -> buf_decrypt_after_read(). Here fil_space_t* is used and we decrypt -> buf_page_check_corrupt() where page checksums are compared. Decryption, decompression as @@ -2679,91 +2657,45 @@ loop: got_block: ut_ad(!block->page.in_zip_hash); - switch (mode) { - default: - ut_ad(block->zip_size() == zip_size); - break; - case BUF_GET_IF_IN_POOL: - case BUF_PEEK_IF_IN_POOL: - case BUF_EVICT_IF_IN_POOL: - if (block->page.io_fix() == BUF_IO_READ) { - /* The page is being read to buffer pool, - but we cannot wait around for the read to - complete. */ - block->unfix(); - return(NULL); - } - } - - switch (UNIV_EXPECT(block->page.state(), BUF_BLOCK_FILE_PAGE)) { - case BUF_BLOCK_FILE_PAGE: - if (fsp_is_system_temporary(page_id.space()) - && block->page.io_fix() != BUF_IO_NONE) { - /* This suggests that the page is being flushed. - Avoid returning reference to this page. - Instead wait for the flush action to complete. */ - block->unfix(); - std::this_thread::sleep_for( - std::chrono::microseconds(WAIT_FOR_WRITE)); - goto loop; - } - - if (UNIV_UNLIKELY(mode == BUF_EVICT_IF_IN_POOL)) { -evict_from_pool: - ut_ad(!block->page.oldest_modification()); - mysql_mutex_lock(&buf_pool.mutex); - block->unfix(); - - if (!buf_LRU_free_page(&block->page, true)) { - ut_ad(0); - } - - mysql_mutex_unlock(&buf_pool.mutex); - return(NULL); - } - - break; - default: - ut_error; - break; + ut_ad(block->page.in_file()); - case BUF_BLOCK_ZIP_PAGE: - if (UNIV_UNLIKELY(mode == BUF_EVICT_IF_IN_POOL)) { - goto evict_from_pool; - } - - if (mode == BUF_PEEK_IF_IN_POOL) { + if (mode == BUF_PEEK_IF_IN_POOL) { + if (UNIV_UNLIKELY(!block->page.frame + || block->page.is_read_fixed())) { /* This mode is only used for dropping an - adaptive hash index. There cannot be an - adaptive hash index for a compressed-only - page, so do not bother decompressing the page. */ + adaptive hash index. There cannot be an + adaptive hash index for a compressed-only page + or a page that is only being read into the + buffer pool. */ block->unfix(); - - return(NULL); + return nullptr; } + } else if (mode == BUF_EVICT_IF_IN_POOL) { + ut_ad(!block->page.oldest_modification()); + mysql_mutex_lock(&buf_pool.mutex); + block->unfix(); - buf_page_t* bpage = &block->page; - - /* Note: We have already buffer fixed this block. */ - if (bpage->buf_fix_count() > 1 - || bpage->io_fix() != BUF_IO_NONE) { + if (!buf_LRU_free_page(&block->page, true)) { + ut_ad(0); + } - /* This condition often occurs when the buffer - is not buffer-fixed, but I/O-fixed by - buf_page_init_for_read(). */ - block->unfix(); + mysql_mutex_unlock(&buf_pool.mutex); + return nullptr; + } - /* The block is buffer-fixed or I/O-fixed. - Try again later. */ - std::this_thread::sleep_for(WAIT_FOR_READ); + ut_ad(mode == BUF_GET_IF_IN_POOL || block->zip_size() == zip_size); + if (UNIV_UNLIKELY(!block->page.frame)) { + if (!block->page.lock.x_lock_try()) { + /* The page is being read or written, or + another thread is executing buf_zip_decompress() + in buf_page_get_low() on it. */ + block->page.unfix(); + std::this_thread::sleep_for( + std::chrono::microseconds(100)); goto loop; } - /* Buffer-fix the block so that it cannot be evicted - or relocated while we are attempting to allocate an - uncompressed page. */ - buf_block_t *new_block = buf_LRU_get_free_block(false); buf_block_init_low(new_block); @@ -2775,92 +2707,102 @@ evict_from_pool: would likely make a memory transaction too large. */ hash_lock.lock(); - /* Buffer-fixing prevents the page_hash from changing. */ - ut_ad(bpage == buf_pool.page_hash.get(page_id, chain)); - - block->unfix(); /* hash_lock protects us after this */ - - if (bpage->buf_fix_count() || bpage->io_fix() != BUF_IO_NONE) { - /* The block was buffer-fixed or I/O-fixed while - buf_pool.mutex was not held by this thread. - Free the block that was allocated and retry. - This should be extremely unlikely, for example, - if buf_page_get_zip() was invoked. */ - - hash_lock.unlock(); - buf_LRU_block_free_non_file_page(new_block); - mysql_mutex_unlock(&buf_pool.mutex); + /* block->page.lock implies !block->page.can_relocate() */ + ut_ad(&block->page == buf_pool.page_hash.get(page_id, chain)); + + /* Wait for any other threads to release their buffer-fix + on the compressed-only block descriptor. + FIXME: Never fix() before acquiring the lock. + Only in buf_page_get_gen(), buf_page_get_low(), buf_page_free() + we are violating that principle. */ + auto state = block->page.state(); + + for (;; state = block->page.state()) { + switch (state) { + case buf_page_t::UNFIXED + 1: + case buf_page_t::IBUF_EXIST + 1: + case buf_page_t::REINIT + 1: + break; + default: + ut_ad(state < buf_page_t::READ_FIX); + + if (state < buf_page_t::UNFIXED + 1) { + ut_ad(state > buf_page_t::FREED); + ut_ad(mode == BUF_GET_POSSIBLY_FREED + || mode == BUF_PEEK_IF_IN_POOL); + block->page.unfix(); + block->page.lock.x_unlock(); + hash_lock.unlock(); + buf_LRU_block_free_non_file_page(new_block); + mysql_mutex_unlock(&buf_pool.mutex); + return nullptr; + } - /* Try again */ - goto loop; + LF_BACKOFF(); + continue; + } + break; } - block = new_block; + /* Ensure that mtr_t::page_lock(new_block, RW_NO_LATCH) + in another thread will wait for + new_block->page.lock.x_unlock(). */ + block->page.set_state(buf_page_t::READ_FIX); - /* Move the compressed page from bpage to block, + /* Move the compressed page from block->page to new_block, and uncompress it. */ - /* Note: this is the uncompressed block and it is not - accessible by other threads yet because it is not in - any list or hash table */ mysql_mutex_lock(&buf_pool.flush_list_mutex); - buf_relocate(bpage, &block->page); + buf_relocate(&block->page, &new_block->page); - /* Set after buf_relocate(). */ - block->page.set_buf_fix_count(1); + /* X-latch the block for the duration of the decompression. */ + new_block->page.lock.x_lock(); + ut_d(block->page.lock.x_unlock()); - buf_flush_relocate_on_flush_list(bpage, &block->page); + buf_flush_relocate_on_flush_list(&block->page, + &new_block->page); mysql_mutex_unlock(&buf_pool.flush_list_mutex); - /* Buffer-fix, I/O-fix, and X-latch the block - for the duration of the decompression. - Also add the block to the unzip_LRU list. */ - block->page.set_state(BUF_BLOCK_FILE_PAGE); - /* Insert at the front of unzip_LRU list */ - buf_unzip_LRU_add_block(block, FALSE); - - block->page.set_io_fix(BUF_IO_READ); - block->lock.x_lock(); - - MEM_UNDEFINED(bpage, sizeof *bpage); + buf_unzip_LRU_add_block(new_block, FALSE); mysql_mutex_unlock(&buf_pool.mutex); hash_lock.unlock(); + +#if defined SUX_LOCK_GENERIC || defined UNIV_DEBUG + block->page.lock.free(); +#endif + ut_free(reinterpret_cast<buf_page_t*>(block)); + block = new_block; + buf_pool.n_pend_unzip++; access_time = block->page.is_accessed(); if (!access_time && !recv_no_ibuf_operations - && ibuf_page_exists(block->page.id(), zip_size)) { - block->page.ibuf_exist = true; + && ibuf_page_exists(block->page.id(), block->zip_size())) { + state = buf_page_t::IBUF_EXIST + 1; } - buf_page_free_descriptor(bpage); - /* Decompress the page while not holding buf_pool.mutex. */ + auto ok = buf_zip_decompress(block, false); + block->page.read_unfix(state); + block->page.lock.x_unlock(); + --buf_pool.n_pend_unzip; - if (!buf_zip_decompress(block, false)) { - block->lock.x_unlock(); - block->page.io_unfix(); - block->unfix(); - --buf_pool.n_pend_unzip; + if (!ok) { + /* FIXME: Evict the corrupted + ROW_FORMAT=COMPRESSED page! */ if (err) { *err = DB_PAGE_CORRUPTED; } return NULL; } - - block->page.io_unfix(); - block->lock.x_unlock(); - --buf_pool.n_pend_unzip; } - ut_ad(block->page.buf_fix_count()); - - ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); + ut_ad(block->page.frame); #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG re_evict: @@ -2913,6 +2855,7 @@ re_evict: #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ ut_ad(block->page.buf_fix_count()); + ut_ad(block->page.in_file()); /* While tablespace is reinited the indexes are already freed but the blocks related to it still resides in buffer pool. Trying to remove @@ -2921,51 +2864,46 @@ re_evict: load the block but block is already in free state. Handle the said case with mode = BUF_PEEK_IF_IN_POOL that is invoked from "btr_search_drop_page_hash_when_freed". */ - ut_ad(mode == BUF_GET_POSSIBLY_FREED - || mode == BUF_PEEK_IF_IN_POOL - || block->page.status != buf_page_t::FREED); + ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL + || !block->page.is_freed()); const bool not_first_access = block->page.set_accessed(); if (mode != BUF_PEEK_IF_IN_POOL) { buf_page_make_young_if_needed(&block->page); + if (!not_first_access) { + buf_read_ahead_linear(page_id, block->zip_size(), + ibuf_inside(mtr)); + } } #ifdef UNIV_DEBUG if (!(++buf_dbg_counter % 5771)) buf_pool.validate(); #endif /* UNIV_DEBUG */ - ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); - - /* We have to wait here because the IO_READ state was set - under the protection of the hash_lock and not block->lock. */ - buf_wait_for_read(block); + ut_ad(block->page.in_file()); + ut_ad(block->page.frame); - if (block->page.id() != page_id) { - buf_block_buf_fix_dec(block); - - if (err) { - *err = DB_PAGE_CORRUPTED; - } - - return NULL; - } + ut_ad(block->page.id() == page_id); - if (block->page.status != buf_page_t::FREED + if (!block->page.is_freed() && allow_ibuf_merge - && fil_page_get_type(block->frame) == FIL_PAGE_INDEX - && page_is_leaf(block->frame)) { - block->lock.x_lock(); - - if (block->page.ibuf_exist) { - block->page.ibuf_exist = false; + && fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX + && page_is_leaf(block->page.frame)) { + block->page.lock.x_lock(); + ut_ad(!block->page.is_io_fixed()); + + const auto state = block->page.state(); + if (state >= buf_page_t::IBUF_EXIST + && state < buf_page_t::REINIT) { + block->page.clear_ibuf_exist(); ibuf_merge_or_delete_for_page(block, page_id, - zip_size); + block->zip_size()); } if (rw_latch == RW_X_LATCH) { mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); } else { - block->lock.x_unlock(); + block->page.lock.x_unlock(); goto get_latch; } } else { @@ -2973,13 +2911,6 @@ get_latch: mtr->page_lock(block, rw_latch); } - if (!not_first_access && mode != BUF_PEEK_IF_IN_POOL) { - /* In the case of a first access, try to apply linear - read-ahead */ - - buf_read_ahead_linear(page_id, zip_size, ibuf_inside(mtr)); - } - return block; } @@ -3008,26 +2939,35 @@ buf_page_get_gen( { if (buf_block_t *block= recv_sys.recover(page_id)) { - buf_block_buf_fix_inc(block); + ut_ad(!block->page.is_io_fixed()); + /* Recovery is a special case; we fix() before acquiring lock. */ + const auto s= block->page.fix(); if (err) *err= DB_SUCCESS; const bool must_merge= allow_ibuf_merge && ibuf_page_exists(page_id, block->zip_size()); - if (block->page.status == buf_page_t::FREED) + if (s < buf_page_t::UNFIXED) ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL); - else if (must_merge && fil_page_get_type(block->frame) == FIL_PAGE_INDEX && - page_is_leaf(block->frame)) + else if (must_merge && + fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX && + page_is_leaf(block->page.frame)) { - block->lock.x_lock(); - block->page.ibuf_exist= false; - ibuf_merge_or_delete_for_page(block, page_id, block->zip_size()); + block->page.lock.x_lock(); + if (block->page.is_freed()) + ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL); + else + { + if (block->page.is_ibuf_exist()) + block->page.clear_ibuf_exist(); + ibuf_merge_or_delete_for_page(block, page_id, block->zip_size()); + } if (rw_latch == RW_X_LATCH) { mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); - return block; + return block; } - block->lock.x_unlock(); + block->page.lock.x_unlock(); } mtr->page_lock(block, rw_latch); return block; @@ -3042,93 +2982,91 @@ This is the general function used to get optimistic access to a database page. @return TRUE if success */ TRANSACTIONAL_TARGET -ibool -buf_page_optimistic_get( -/*====================*/ - ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ - buf_block_t* block, /*!< in: guessed buffer block */ - ib_uint64_t modify_clock,/*!< in: modify clock value */ - mtr_t* mtr) /*!< in: mini-transaction */ +bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block, + uint64_t modify_clock, mtr_t *mtr) { - ibool success; - - ut_ad(block); - ut_ad(mtr); - ut_ad(mtr->is_active()); - ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH); - - if (have_transactional_memory) { - } else if (UNIV_UNLIKELY(block->page.state() != BUF_BLOCK_FILE_PAGE - || block->page.io_fix() != BUF_IO_NONE)) { - return FALSE; - } - - const page_id_t id{block->page.id()}; - buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get(id.fold()); - - { - transactional_shared_lock_guard<page_hash_latch> g{ - buf_pool.page_hash.lock_get(chain)}; - if (UNIV_UNLIKELY(id != block->page.id() - || block->page.state() != BUF_BLOCK_FILE_PAGE - || block->page.io_fix() != BUF_IO_NONE)) { - return FALSE; - } - block->fix(); - } - - block->page.set_accessed(); - - buf_page_make_young_if_needed(&block->page); - - ut_ad(!ibuf_inside(mtr) || ibuf_page(id, block->zip_size(), NULL)); + ut_ad(block); + ut_ad(mtr); + ut_ad(mtr->is_active()); + ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH); - mtr_memo_type_t fix_type; + if (have_transactional_memory); + else if (UNIV_UNLIKELY(!block->page.frame)) + return false; + else + { + const auto state= block->page.state(); + if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED || + state >= buf_page_t::READ_FIX)) + return false; + } - if (rw_latch == RW_S_LATCH) { - fix_type = MTR_MEMO_PAGE_S_FIX; - success = block->lock.s_lock_try(); - } else if (block->lock.have_u_not_x()) { - block->lock.u_x_upgrade(); - mtr->page_lock_upgrade(*block); - ut_ad(id == block->page.id()); - ut_ad(modify_clock == block->modify_clock); - buf_block_buf_fix_dec(block); - goto func_exit; - } else { - fix_type = MTR_MEMO_PAGE_X_FIX; - success = block->lock.x_lock_try(); - } + bool success; + const page_id_t id{block->page.id()}; + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold()); + bool have_u_not_x= false; - ut_ad(id == block->page.id()); + { + transactional_shared_lock_guard<page_hash_latch> g + {buf_pool.page_hash.lock_get(chain)}; + if (UNIV_UNLIKELY(id != block->page.id() || !block->page.frame)) + return false; + const auto state= block->page.state(); + if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED || + state >= buf_page_t::READ_FIX)) + return false; + + if (rw_latch == RW_S_LATCH) + success= block->page.lock.s_lock_try(); + else + { + have_u_not_x= block->page.lock.have_u_not_x(); + success= have_u_not_x || block->page.lock.x_lock_try(); + } + } - if (!success) { - buf_block_buf_fix_dec(block); - return(FALSE); - } + if (!success) + return false; - if (modify_clock != block->modify_clock) { - if (rw_latch == RW_S_LATCH) { - block->lock.s_unlock(); - } else { - block->lock.x_unlock(); - } + if (have_u_not_x) + { + block->page.lock.u_x_upgrade(); + mtr->page_lock_upgrade(*block); + ut_ad(id == block->page.id()); + ut_ad(modify_clock == block->modify_clock); + } + else + { + ut_ad(rw_latch == RW_S_LATCH || !block->page.is_io_fixed()); + ut_ad(id == block->page.id()); + ut_ad(!ibuf_inside(mtr) || ibuf_page(id, block->zip_size(), nullptr)); - buf_block_buf_fix_dec(block); - return(FALSE); - } + if (modify_clock != block->modify_clock || block->page.is_freed()) + { + if (rw_latch == RW_S_LATCH) + block->page.lock.s_unlock(); + else + block->page.lock.x_unlock(); + return false; + } - mtr_memo_push(mtr, block, fix_type); -func_exit: -#ifdef UNIV_DEBUG - if (!(++buf_dbg_counter % 5771)) buf_pool.validate(); -#endif /* UNIV_DEBUG */ - ut_ad(block->page.buf_fix_count()); - ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); + block->page.fix(); + ut_ad(!block->page.is_read_fixed()); + block->page.set_accessed(); + buf_page_make_young_if_needed(&block->page); + mtr->memo_push(block, rw_latch == RW_S_LATCH + ? MTR_MEMO_PAGE_S_FIX : MTR_MEMO_PAGE_X_FIX); + } - ++buf_pool.stat.n_page_gets; + ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate()); + ut_d(const auto state = block->page.state()); + ut_ad(state > buf_page_t::UNFIXED); + ut_ad(state < buf_page_t::READ_FIX || state > buf_page_t::WRITE_FIX); + ut_ad(~buf_page_t::LRU_MASK & state); + ut_ad(block->page.frame); - return(TRUE); + ++buf_pool.stat.n_page_gets; + return true; } /** Try to S-latch a page. @@ -3150,24 +3088,18 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr) {buf_pool.page_hash.lock_get(chain)}; block= reinterpret_cast<buf_block_t*> (buf_pool.page_hash.get(page_id, chain)); - if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE) + if (!block || !block->page.frame || !block->page.lock.s_lock_try()) return nullptr; - block->fix(); - } - - if (!block->lock.s_lock_try()) - { - block->unfix(); - return nullptr; } + block->page.fix(); + ut_ad(!block->page.is_read_fixed()); mtr_memo_push(mtr, block, MTR_MEMO_PAGE_S_FIX); #ifdef UNIV_DEBUG if (!(++buf_dbg_counter % 5771)) buf_pool.validate(); #endif /* UNIV_DEBUG */ ut_ad(block->page.buf_fix_count()); - ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); ut_ad(block->page.id() == page_id); ++buf_pool.stat.n_page_gets; @@ -3181,112 +3113,125 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr) void buf_block_t::initialise(const page_id_t page_id, ulint zip_size, uint32_t fix) { - ut_ad(page.state() != BUF_BLOCK_FILE_PAGE); + ut_ad(!page.in_file()); buf_block_init_low(this); - page.init(page_id, fix); + page.init(fix, page_id); page_zip_set_size(&page.zip, zip_size); } TRANSACTIONAL_TARGET -static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size, +static buf_block_t *buf_page_create_low(page_id_t page_id, ulint zip_size, mtr_t *mtr, buf_block_t *free_block) { ut_ad(mtr->is_active()); ut_ad(page_id.space() != 0 || !zip_size); - free_block->initialise(page_id, zip_size, 1); + free_block->initialise(page_id, zip_size, buf_page_t::MEMORY); buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); mysql_mutex_lock(&buf_pool.mutex); -loop: - buf_block_t *block= reinterpret_cast<buf_block_t*> - (buf_pool.page_hash.get(page_id, chain)); + buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain); - if (block && block->page.in_file() && - !buf_pool.watch_is_sentinel(block->page)) + if (bpage && !buf_pool.watch_is_sentinel(*bpage)) { #ifdef BTR_CUR_HASH_ADAPT const dict_index_t *drop_hash_entry= nullptr; #endif - switch (UNIV_EXPECT(block->page.state(), BUF_BLOCK_FILE_PAGE)) { - default: - ut_ad(0); - break; - case BUF_BLOCK_FILE_PAGE: - if (!mtr->have_x_latch(*block)) + bool ibuf_exist= false; + + if (!mtr->have_x_latch(reinterpret_cast<const buf_block_t&>(*bpage))) + { + const bool got= bpage->lock.x_lock_try(); + if (!got) { - buf_block_buf_fix_inc(block); - while (!block->lock.x_lock_try()) - { - /* Wait for buf_page_write_complete() to release block->lock. - We must not hold buf_pool.mutex while waiting. */ - timespec abstime; - set_timespec_nsec(abstime, 1000000); - my_cond_timedwait(&buf_pool.done_flush_list, &buf_pool.mutex.m_mutex, - &abstime); - } - mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX); + mysql_mutex_unlock(&buf_pool.mutex); + bpage->lock.x_lock(); + mysql_mutex_lock(&buf_pool.mutex); } + + auto state= bpage->fix(); + ut_ad(state >= buf_page_t::FREED); + ut_ad(state < buf_page_t::READ_FIX); + + if (state < buf_page_t::UNFIXED) + bpage->set_reinit(buf_page_t::FREED); else { - ut_ad(!block->page.ibuf_exist); -#ifdef BTR_CUR_HASH_ADAPT - ut_ad(!block->index); -#endif + bpage->set_reinit(state & buf_page_t::LRU_MASK); + ibuf_exist= (state & buf_page_t::LRU_MASK) == buf_page_t::IBUF_EXIST; } + + if (UNIV_LIKELY(bpage->frame != nullptr)) + { + mysql_mutex_unlock(&buf_pool.mutex); + buf_block_t *block= reinterpret_cast<buf_block_t*>(bpage); + mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX); #ifdef BTR_CUR_HASH_ADAPT - drop_hash_entry= block->index; + drop_hash_entry= block->index; #endif - break; - case BUF_BLOCK_ZIP_PAGE: - page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); - /* It does not make sense to use transactional_lock_guard here, - because buf_relocate() would likely make the memory transaction - too large. */ - hash_lock.lock(); - if (block->page.io_fix() != BUF_IO_NONE) + } + else { + auto state= bpage->state(); + ut_ad(state >= buf_page_t::FREED); + ut_ad(state < buf_page_t::READ_FIX); + + page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); + /* It does not make sense to use transactional_lock_guard here, + because buf_relocate() would likely make the memory transaction + too large. */ + hash_lock.lock(); + + if (state < buf_page_t::UNFIXED) + bpage->set_reinit(buf_page_t::FREED); + else + { + bpage->set_reinit(state & buf_page_t::LRU_MASK); + ibuf_exist= (state & buf_page_t::LRU_MASK) == buf_page_t::IBUF_EXIST; + } + + mysql_mutex_lock(&buf_pool.flush_list_mutex); + buf_relocate(bpage, &free_block->page); + free_block->page.lock.x_lock(); + buf_flush_relocate_on_flush_list(bpage, &free_block->page); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + + buf_unzip_LRU_add_block(free_block, FALSE); + + mysql_mutex_unlock(&buf_pool.mutex); hash_lock.unlock(); - /* Wait for buf_page_write_complete() to release the I/O fix. */ - timespec abstime; - set_timespec_nsec(abstime, 1000000); - my_cond_timedwait(&buf_pool.done_flush_list, &buf_pool.mutex.m_mutex, - &abstime); - goto loop; +#if defined SUX_LOCK_GENERIC || defined UNIV_DEBUG + bpage->lock.x_unlock(); + bpage->lock.free(); +#endif + ut_free(bpage); + mtr_memo_push(mtr, free_block, MTR_MEMO_PAGE_X_FIX); + bpage= &free_block->page; } - - free_block->lock.x_lock(); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - buf_relocate(&block->page, &free_block->page); - buf_flush_relocate_on_flush_list(&block->page, &free_block->page); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - - free_block->page.set_state(BUF_BLOCK_FILE_PAGE); - buf_unzip_LRU_add_block(free_block, FALSE); - hash_lock.unlock(); - buf_page_free_descriptor(reinterpret_cast<buf_page_t*>(block)); - block= free_block; - buf_block_buf_fix_inc(block); - mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX); - break; } - - mysql_mutex_unlock(&buf_pool.mutex); + else + { + mysql_mutex_unlock(&buf_pool.mutex); + ut_ad(bpage->frame); +#ifdef BTR_CUR_HASH_ADAPT + ut_ad(!reinterpret_cast<buf_block_t*>(bpage)->index); +#endif + const auto state= bpage->state(); + ut_ad(state >= buf_page_t::FREED); + bpage->set_reinit(state < buf_page_t::UNFIXED ? buf_page_t::FREED + : state & buf_page_t::LRU_MASK); + } #ifdef BTR_CUR_HASH_ADAPT if (drop_hash_entry) - btr_search_drop_page_hash_index(block); + btr_search_drop_page_hash_index(reinterpret_cast<buf_block_t*>(bpage)); #endif /* BTR_CUR_HASH_ADAPT */ - if (block->page.ibuf_exist) - { - if (!recv_recovery_is_on()) - ibuf_merge_or_delete_for_page(nullptr, page_id, zip_size); - block->page.ibuf_exist= false; - } + if (ibuf_exist && !recv_recovery_is_on()) + ibuf_merge_or_delete_for_page(nullptr, page_id, zip_size); - return block; + return reinterpret_cast<buf_block_t*>(bpage); } /* If we get here, the page was not in buf_pool: init it there */ @@ -3294,41 +3239,36 @@ loop: DBUG_PRINT("ib_buf", ("create page %u:%u", page_id.space(), page_id.page_no())); - block= free_block; + bpage= &free_block->page; - ut_ad(block->page.buf_fix_count() == 1); + ut_ad(bpage->state() == buf_page_t::MEMORY); + bpage->lock.x_lock(); /* The block must be put to the LRU list */ - buf_LRU_add_block(&block->page, false); + buf_LRU_add_block(bpage, false); { transactional_lock_guard<page_hash_latch> g {buf_pool.page_hash.lock_get(chain)}; - block->page.set_state(BUF_BLOCK_FILE_PAGE); - buf_pool.page_hash.append(chain, &block->page); - block->lock.x_lock(); - if (UNIV_UNLIKELY(zip_size)) - /* Prevent race conditions during buf_buddy_alloc(), which may - release and reacquire buf_pool.mutex, by IO-fixing and X-latching. */ - block->page.set_io_fix(BUF_IO_READ); + bpage->set_state(buf_page_t::REINIT + 1); + buf_pool.page_hash.append(chain, bpage); } if (UNIV_UNLIKELY(zip_size)) { - block->page.zip.data= buf_buddy_alloc(zip_size); + bpage->zip.data= buf_buddy_alloc(zip_size); /* To maintain the invariant block->in_unzip_LRU_list == block->page.belongs_to_unzip_LRU() we have to add this block to unzip_LRU after block->page.zip.data is set. */ - ut_ad(block->page.belongs_to_unzip_LRU()); - buf_unzip_LRU_add_block(block, FALSE); - - block->page.set_io_fix(BUF_IO_NONE); + ut_ad(bpage->belongs_to_unzip_LRU()); + buf_unzip_LRU_add_block(reinterpret_cast<buf_block_t*>(bpage), FALSE); } mysql_mutex_unlock(&buf_pool.mutex); - mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); - block->page.set_accessed(); + mtr->memo_push(reinterpret_cast<buf_block_t*>(bpage), MTR_MEMO_PAGE_X_FIX); + + bpage->set_accessed(); buf_pool.stat.n_pages_created++; /* Delete possible entries for the page from the insert buffer: @@ -3339,8 +3279,8 @@ loop: ibuf_merge_or_delete_for_page(nullptr, page_id, zip_size); static_assert(FIL_PAGE_PREV + 4 == FIL_PAGE_NEXT, "adjacent"); - memset_aligned<8>(block->frame + FIL_PAGE_PREV, 0xff, 8); - mach_write_to_2(block->frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED); + memset_aligned<8>(bpage->frame + FIL_PAGE_PREV, 0xff, 8); + mach_write_to_2(bpage->frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED); /* FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION is only used on the following pages: @@ -3348,13 +3288,13 @@ loop: (2) FIL_RTREE_SPLIT_SEQ_NUM on R-tree pages (3) key_version on encrypted pages (not page 0:0) */ - memset(block->frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); - memset_aligned<8>(block->frame + FIL_PAGE_LSN, 0, 8); + memset(bpage->frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); + memset_aligned<8>(bpage->frame + FIL_PAGE_LSN, 0, 8); #ifdef UNIV_DEBUG if (!(++buf_dbg_counter % 5771)) buf_pool.validate(); #endif /* UNIV_DEBUG */ - return block; + return reinterpret_cast<buf_block_t*>(bpage); } /** Initialize a page in the buffer pool. The page is usually not read @@ -3393,18 +3333,12 @@ buf_block_t* buf_page_create_deferred(uint32_t space_id, ulint zip_size, /** Monitor the buffer page read/write activity, and increment corresponding counter value in MONITOR_MODULE_BUF_PAGE. @param bpage buffer page whose read or write was completed -@param io_type BUF_IO_READ or BUF_IO_WRITE */ -ATTRIBUTE_COLD __attribute__((nonnull)) -void buf_page_monitor(const buf_page_t *bpage, buf_io_fix io_type) +@param read true=read, false=write */ +ATTRIBUTE_COLD void buf_page_monitor(const buf_page_t &bpage, bool read) { - const byte* frame; monitor_id_t counter; - ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE); - - frame = bpage->zip.data - ? bpage->zip.data - : ((buf_block_t*) bpage)->frame; + const byte* frame = bpage.zip.data ? bpage.zip.data : bpage.frame; switch (fil_page_get_type(frame)) { ulint level; @@ -3419,71 +3353,69 @@ void buf_page_monitor(const buf_page_t *bpage, buf_io_fix io_type) == (index_id_t)(DICT_IBUF_ID_MIN + IBUF_SPACE_ID)) { if (level == 0) { counter = MONITOR_RW_COUNTER( - io_type, MONITOR_INDEX_IBUF_LEAF_PAGE); + read, MONITOR_INDEX_IBUF_LEAF_PAGE); } else { counter = MONITOR_RW_COUNTER( - io_type, + read, MONITOR_INDEX_IBUF_NON_LEAF_PAGE); } } else { if (level == 0) { counter = MONITOR_RW_COUNTER( - io_type, MONITOR_INDEX_LEAF_PAGE); + read, MONITOR_INDEX_LEAF_PAGE); } else { counter = MONITOR_RW_COUNTER( - io_type, MONITOR_INDEX_NON_LEAF_PAGE); + read, MONITOR_INDEX_NON_LEAF_PAGE); } } break; case FIL_PAGE_UNDO_LOG: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_UNDO_LOG_PAGE); + counter = MONITOR_RW_COUNTER(read, MONITOR_UNDO_LOG_PAGE); break; case FIL_PAGE_INODE: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_INODE_PAGE); + counter = MONITOR_RW_COUNTER(read, MONITOR_INODE_PAGE); break; case FIL_PAGE_IBUF_FREE_LIST: - counter = MONITOR_RW_COUNTER(io_type, - MONITOR_IBUF_FREELIST_PAGE); + counter = MONITOR_RW_COUNTER(read, MONITOR_IBUF_FREELIST_PAGE); break; case FIL_PAGE_IBUF_BITMAP: - counter = MONITOR_RW_COUNTER(io_type, - MONITOR_IBUF_BITMAP_PAGE); + counter = MONITOR_RW_COUNTER(read, MONITOR_IBUF_BITMAP_PAGE); break; case FIL_PAGE_TYPE_SYS: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_SYSTEM_PAGE); + counter = MONITOR_RW_COUNTER(read, MONITOR_SYSTEM_PAGE); break; case FIL_PAGE_TYPE_TRX_SYS: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_TRX_SYSTEM_PAGE); + counter = MONITOR_RW_COUNTER(read, MONITOR_TRX_SYSTEM_PAGE); break; case FIL_PAGE_TYPE_FSP_HDR: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_FSP_HDR_PAGE); + counter = MONITOR_RW_COUNTER(read, MONITOR_FSP_HDR_PAGE); break; case FIL_PAGE_TYPE_XDES: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_XDES_PAGE); + counter = MONITOR_RW_COUNTER(read, MONITOR_XDES_PAGE); break; case FIL_PAGE_TYPE_BLOB: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_BLOB_PAGE); + counter = MONITOR_RW_COUNTER(read, MONITOR_BLOB_PAGE); break; case FIL_PAGE_TYPE_ZBLOB: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB_PAGE); + counter = MONITOR_RW_COUNTER(read, MONITOR_ZBLOB_PAGE); break; case FIL_PAGE_TYPE_ZBLOB2: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB2_PAGE); + counter = MONITOR_RW_COUNTER(read, MONITOR_ZBLOB2_PAGE); break; default: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_OTHER_PAGE); + counter = MONITOR_RW_COUNTER(read, MONITOR_OTHER_PAGE); } MONITOR_INC_NOCHECK(counter); @@ -3555,8 +3487,7 @@ static dberr_t buf_page_check_corrupt(buf_page_t *bpage, { ut_ad(node.space->referenced()); - byte* dst_frame = (bpage->zip.data) ? bpage->zip.data : - ((buf_block_t*) bpage)->frame; + byte* dst_frame = bpage->zip.data ? bpage->zip.data : bpage->frame; dberr_t err = DB_SUCCESS; uint key_version = buf_page_get_key_version(dst_frame, node.space->flags); @@ -3616,109 +3547,96 @@ static dberr_t buf_page_check_corrupt(buf_page_t *bpage, return (err); } -/** Complete a read request of a file page to buf_pool. -@param bpage recently read page +/** Complete a read of a page. @param node data file @return whether the operation succeeded -@retval DB_SUCCESS always when writing, or if a read page was OK -@retval DB_PAGE_CORRUPTED if the checksum fails on a page read -@retval DB_DECRYPTION_FAILED if the page cannot be decrypted */ -dberr_t buf_page_read_complete(buf_page_t *bpage, const fil_node_t &node) +@retval DB_PAGE_CORRUPTED if the checksum fails +@retval DB_DECRYPTION_FAILED if the page cannot be decrypted */ +dberr_t buf_page_t::read_complete(const fil_node_t &node) { - const page_id_t id(bpage->id()); - ut_ad(bpage->in_file()); - ut_ad(!buf_dblwr.is_inside(id)); - ut_ad(id.space() == node.space->id); - ut_ad(bpage->zip_size() == node.space->zip_size()); - - /* We do not need protect io_fix here by mutex to read it because - this and buf_page_write_complete() are the only functions where we can - change the value from BUF_IO_READ or BUF_IO_WRITE to some other - value, and our code ensures that this is the only thread that handles - the i/o for this block. */ - - ut_ad(bpage->io_fix() == BUF_IO_READ); - ut_ad(!!bpage->zip.ssize == !!bpage->zip.data); - ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE || bpage->zip.data); - - const byte *frame= bpage->zip.data - ? bpage->zip.data - : reinterpret_cast<buf_block_t*>(bpage)->frame; - ut_ad(frame); + const page_id_t expected_id{id()}; + ut_ad(is_read_fixed()); + ut_ad(!buf_dblwr.is_inside(id())); + ut_ad(id().space() == node.space->id); + ut_ad(zip_size() == node.space->zip_size()); + ut_ad(!!zip.ssize == !!zip.data); + + const byte *read_frame= zip.data ? zip.data : frame; + ut_ad(read_frame); dberr_t err; - if (!buf_page_decrypt_after_read(bpage, node)) + if (!buf_page_decrypt_after_read(this, node)) { err= DB_DECRYPTION_FAILED; goto database_corrupted; } - if (bpage->zip.data && bpage->state() == BUF_BLOCK_FILE_PAGE) + if (belongs_to_unzip_LRU()) { buf_pool.n_pend_unzip++; - auto ok= buf_zip_decompress(reinterpret_cast<buf_block_t*>(bpage), FALSE); + auto ok= buf_zip_decompress(reinterpret_cast<buf_block_t*>(this), false); buf_pool.n_pend_unzip--; if (!ok) { - ib::info() << "Page " << id << " zip_decompress failure."; + ib::info() << "Page " << expected_id << " zip_decompress failure."; err= DB_PAGE_CORRUPTED; goto database_corrupted; } } { - const page_id_t read_id(mach_read_from_4(frame + FIL_PAGE_SPACE_ID), - mach_read_from_4(frame + FIL_PAGE_OFFSET)); + const page_id_t read_id(mach_read_from_4(read_frame + FIL_PAGE_SPACE_ID), + mach_read_from_4(read_frame + FIL_PAGE_OFFSET)); - if (read_id == id); + if (read_id == expected_id); else if (read_id == page_id_t(0, 0)) /* This is likely an uninitialized page. */; else if (!node.space->full_crc32() && - page_id_t(0, read_id.page_no()) == id) + page_id_t(0, read_id.page_no()) == expected_id) /* FIL_PAGE_SPACE_ID was written as garbage in the system tablespace before MySQL 4.1.1, which introduced innodb_file_per_table. */; else if (node.space->full_crc32() && *reinterpret_cast<const uint32_t*> - (&frame[FIL_PAGE_FCRC32_KEY_VERSION]) && + (&read_frame[FIL_PAGE_FCRC32_KEY_VERSION]) && node.space->crypt_data && node.space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED) { - ib::error() << "Cannot decrypt " << id; + ib::error() << "Cannot decrypt " << expected_id; err= DB_DECRYPTION_FAILED; goto release_page; } else ib::error() << "Space id and page no stored in the page, read in are " - << read_id << ", should be " << id; + << read_id << ", should be " << expected_id; } - err= buf_page_check_corrupt(bpage, node); + err= buf_page_check_corrupt(this, node); if (UNIV_UNLIKELY(err != DB_SUCCESS)) { database_corrupted: /* Not a real corruption if it was triggered by error injection */ DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", - if (!is_predefined_tablespace(id.space())) + if (!is_predefined_tablespace(id().space())) { - buf_corrupt_page_release(bpage, node); + buf_corrupt_page_release(this, node); ib::info() << "Simulated IMPORT corruption"; return err; } err= DB_SUCCESS; goto page_not_corrupt;); - if (bpage->zip.data && bpage->state() == BUF_BLOCK_FILE_PAGE) - memset(reinterpret_cast<buf_block_t*>(bpage)->frame, 0, srv_page_size); + if (belongs_to_unzip_LRU()) + memset_aligned<UNIV_PAGE_SIZE_MIN>(frame, 0, srv_page_size); if (err == DB_PAGE_CORRUPTED) { ib::error() << "Database page corruption on disk" " or a failed read of file '" - << node.name << "' page " << id + << node.name << "' page " << expected_id << ". You may have to recover from a backup."; - buf_page_print(frame, bpage->zip_size()); + buf_page_print(read_frame, zip_size()); ib::info() << " You can use CHECK TABLE to scan" " your table for corruption. " @@ -3729,48 +3647,51 @@ database_corrupted: { /* If the corruption is in the system tablespace, we will intentionally crash the server. */ - if (id.space() == TRX_SYS_SPACE) + if (expected_id.space() == TRX_SYS_SPACE) ib::fatal() << "Aborting because of a corrupt database page."; - buf_corrupt_page_release(bpage, node); + buf_corrupt_page_release(this, node); return err; } } DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", - page_not_corrupt: bpage= bpage; ); + page_not_corrupt: err= err; ); if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED) { release_page: - buf_corrupt_page_release(bpage, node); + buf_corrupt_page_release(this, node); if (recv_recovery_is_on()) - recv_sys.free_corrupted_page(id); + recv_sys.free_corrupted_page(expected_id); return err; } - if (recv_recovery_is_on()) - recv_recover_page(node.space, bpage); + const bool recovery= recv_recovery_is_on(); + + if (recovery) + recv_recover_page(node.space, this); - if (bpage->state() == BUF_BLOCK_FILE_PAGE && !recv_no_ibuf_operations && - (!id.space() || !is_predefined_tablespace(id.space())) && - fil_page_get_type(frame) == FIL_PAGE_INDEX && - page_is_leaf(frame)) - bpage->ibuf_exist= true; + const bool ibuf_may_exist= frame && !recv_no_ibuf_operations && + (!expected_id.space() || !is_predefined_tablespace(expected_id.space())) && + fil_page_get_type(read_frame) == FIL_PAGE_INDEX && + page_is_leaf(read_frame); if (UNIV_UNLIKELY(MONITOR_IS_ON(MONITOR_MODULE_BUF_PAGE))) - buf_page_monitor(bpage, BUF_IO_READ); - DBUG_PRINT("ib_buf", ("read page %u:%u", - id.space(), id.page_no())); - - /* Because this thread which does the unlocking might not be the same that - did the locking, we use a pass value != 0 in unlock, which simply - removes the newest lock debug record, without checking the thread id. */ - if (bpage->state() == BUF_BLOCK_FILE_PAGE) + buf_page_monitor(*this, true); + DBUG_PRINT("ib_buf", ("read page %u:%u", id().space(), id().page_no())); + + if (!recovery) { - buf_block_t *block= reinterpret_cast<buf_block_t*>(bpage); - block->lock.x_unlock(true); + ut_d(auto f=) zip.fix.fetch_sub(ibuf_may_exist + ? READ_FIX - IBUF_EXIST + : READ_FIX - UNFIXED); + ut_ad(f >= READ_FIX); + ut_ad(f < WRITE_FIX); } - bpage->io_unfix(); + else if (ibuf_may_exist) + set_ibuf_exist(); + + lock.x_unlock(true); ut_d(auto n=) buf_pool.n_pend_reads--; ut_ad(n > 0); @@ -3818,9 +3739,10 @@ void buf_pool_invalidate() ut_d(buf_pool.assert_all_freed()); ut_d(mysql_mutex_lock(&buf_pool.mutex)); - while (buf_LRU_scan_and_free_block()); + while (UT_LIST_GET_LEN(buf_pool.LRU)) { + buf_LRU_scan_and_free_block(); + } - ut_ad(UT_LIST_GET_LEN(buf_pool.LRU) == 0); ut_ad(UT_LIST_GET_LEN(buf_pool.unzip_LRU) == 0); buf_pool.freed_page_clock = 0; @@ -3848,35 +3770,34 @@ void buf_pool_t::validate() /* Check the uncompressed blocks. */ for (auto i = n_chunks; i--; chunk++) { - - ulint j; buf_block_t* block = chunk->blocks; - for (j = chunk->size; j--; block++) { - switch (block->page.state()) { - case BUF_BLOCK_ZIP_PAGE: - /* This kind of block descriptors should - be allocated by malloc() only. */ - ut_error; - break; - - case BUF_BLOCK_NOT_USED: + for (auto j = chunk->size; j--; block++) { + ut_ad(block->page.frame); + switch (const auto f = block->page.state()) { + case buf_page_t::NOT_USED: n_free++; break; - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: + case buf_page_t::MEMORY: + case buf_page_t::REMOVE_HASH: /* do nothing */ break; - case BUF_BLOCK_FILE_PAGE: - const page_id_t id = block->page.id(); - ut_ad(page_hash.get(id, page_hash.cell_get( - id.fold())) + default: + if (f >= buf_page_t::READ_FIX + && f < buf_page_t::WRITE_FIX) { + /* A read-fixed block is not + necessarily in the page_hash yet. */ + break; + } + ut_ad(f >= buf_page_t::FREED); + const page_id_t id{block->page.id()}; + ut_ad(page_hash.get( + id, + page_hash.cell_get(id.fold())) == &block->page); n_lru++; - break; - } } } @@ -3886,25 +3807,16 @@ void buf_pool_t::validate() mysql_mutex_lock(&flush_list_mutex); for (buf_page_t* b = UT_LIST_GET_FIRST(flush_list); b; b = UT_LIST_GET_NEXT(list, b)) { + ut_ad(b->in_file()); ut_ad(b->oldest_modification()); ut_ad(!fsp_is_system_temporary(b->id().space())); n_flushing++; - switch (b->state()) { - case BUF_BLOCK_ZIP_PAGE: + if (UNIV_UNLIKELY(!b->frame)) { n_lru++; n_zip++; - break; - case BUF_BLOCK_FILE_PAGE: - /* uncompressed page */ - break; - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - ut_error; - break; } - const page_id_t id = b->id(); + const page_id_t id{b->id()}; ut_ad(page_hash.get(id, page_hash.cell_get(id.fold())) == b); } @@ -3990,7 +3902,7 @@ void buf_pool_t::print() ulint n_blocks = chunk->size; for (; n_blocks--; block++) { - const buf_frame_t* frame = block->frame; + const buf_frame_t* frame = block->page.frame; if (fil_page_index_page_check(frame)) { @@ -4052,7 +3964,7 @@ ulint buf_get_latched_pages_number() for (buf_page_t *b= UT_LIST_GET_FIRST(buf_pool.LRU); b; b= UT_LIST_GET_NEXT(LRU, b)) - if (b->in_file() && (b->buf_fix_count() || b->io_fix() != BUF_IO_NONE)) + if (b->state() > buf_page_t::UNFIXED) fixed_pages_number++; mysql_mutex_unlock(&buf_pool.mutex); diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index b8371864e16..f1e728e94a8 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -86,11 +86,12 @@ start_again: buf_block_t *trx_sys_block= buf_dblwr_trx_sys_get(&mtr); if (mach_read_from_4(TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_MAGIC + - trx_sys_block->frame) == TRX_SYS_DOUBLEWRITE_MAGIC_N) + trx_sys_block->page.frame) == + TRX_SYS_DOUBLEWRITE_MAGIC_N) { /* The doublewrite buffer has already been created: just read in some numbers */ - init(TRX_SYS_DOUBLEWRITE + trx_sys_block->frame); + init(TRX_SYS_DOUBLEWRITE + trx_sys_block->page.frame); mtr.commit(); return true; } @@ -121,7 +122,7 @@ too_small: } byte *fseg_header= TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG + - trx_sys_block->frame; + trx_sys_block->page.frame; for (uint32_t prev_page_no= 0, i= 0, extent_size= FSP_EXTENT_SIZE; i < 2 * size + extent_size / 2; i++) { @@ -149,12 +150,12 @@ too_small: tablespace, then the page has not been written to in doublewrite. */ - ut_ad(new_block->lock.not_recursive()); + ut_ad(new_block->page.lock.not_recursive()); const page_id_t id= new_block->page.id(); /* We only do this in the debug build, to ensure that the check in buf_flush_init_for_writing() will see a valid page type. The flushes of new_block are actually unnecessary here. */ - ut_d(mtr.write<2>(*new_block, FIL_PAGE_TYPE + new_block->frame, + ut_d(mtr.write<2>(*new_block, FIL_PAGE_TYPE + new_block->page.frame, FIL_PAGE_TYPE_SYS)); if (i == size / 2) @@ -162,10 +163,10 @@ too_small: ut_a(id.page_no() == size); mtr.write<4>(*trx_sys_block, TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_BLOCK1 + - trx_sys_block->frame, id.page_no()); + trx_sys_block->page.frame, id.page_no()); mtr.write<4>(*trx_sys_block, TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_REPEAT + - TRX_SYS_DOUBLEWRITE_BLOCK1 + trx_sys_block->frame, + TRX_SYS_DOUBLEWRITE_BLOCK1 + trx_sys_block->page.frame, id.page_no()); } else if (i == size / 2 + size) @@ -173,10 +174,10 @@ too_small: ut_a(id.page_no() == 2 * size); mtr.write<4>(*trx_sys_block, TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_BLOCK2 + - trx_sys_block->frame, id.page_no()); + trx_sys_block->page.frame, id.page_no()); mtr.write<4>(*trx_sys_block, TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_REPEAT + - TRX_SYS_DOUBLEWRITE_BLOCK2 + trx_sys_block->frame, + TRX_SYS_DOUBLEWRITE_BLOCK2 + trx_sys_block->page.frame, id.page_no()); } else if (i > size / 2) @@ -193,7 +194,7 @@ too_small: mtr.start(); trx_sys_block= buf_dblwr_trx_sys_get(&mtr); fseg_header= TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG + - trx_sys_block->frame; + trx_sys_block->page.frame; } prev_page_no= id.page_no(); @@ -201,15 +202,16 @@ too_small: mtr.write<4>(*trx_sys_block, TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_MAGIC + - trx_sys_block->frame, TRX_SYS_DOUBLEWRITE_MAGIC_N); + trx_sys_block->page.frame, TRX_SYS_DOUBLEWRITE_MAGIC_N); mtr.write<4>(*trx_sys_block, TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_MAGIC + - TRX_SYS_DOUBLEWRITE_REPEAT + trx_sys_block->frame, + TRX_SYS_DOUBLEWRITE_REPEAT + trx_sys_block->page.frame, TRX_SYS_DOUBLEWRITE_MAGIC_N); mtr.write<4>(*trx_sys_block, TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED + - trx_sys_block->frame, TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N); + trx_sys_block->page.frame, + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N); mtr.commit(); /* Flush the modified pages to disk and make a checkpoint */ @@ -519,8 +521,9 @@ static void buf_dblwr_check_page_lsn(const buf_page_t &b, const byte *page) /** Check the LSN values on the page with which this block is associated. */ static void buf_dblwr_check_block(const buf_page_t *bpage) { - ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE); - const page_t *page= reinterpret_cast<const buf_block_t*>(bpage)->frame; + ut_ad(bpage->in_file()); + const page_t *page= bpage->frame; + ut_ad(page); switch (fil_page_get_type(page)) { case FIL_PAGE_INDEX: @@ -619,9 +622,7 @@ static void *get_frame(const IORequest &request) if (request.slot) return request.slot->out_buf; const buf_page_t *bpage= request.bpage; - return bpage->zip.data - ? bpage->zip.data - : reinterpret_cast<const buf_block_t*>(bpage)->frame; + return bpage->zip.data ? bpage->zip.data : bpage->frame; } void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request) @@ -673,7 +674,6 @@ void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request) } else { - ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE); ut_ad(!bpage->zip_size()); ut_d(buf_dblwr_check_page_lsn(*bpage, static_cast<const byte*>(frame))); } diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc index 07416a48f70..96b046c93bc 100644 --- a/storage/innobase/buf/buf0dump.cc +++ b/storage/innobase/buf/buf0dump.cc @@ -328,16 +328,15 @@ buf_dump( for (bpage = UT_LIST_GET_FIRST(buf_pool.LRU), j = 0; bpage != NULL && j < n_pages; bpage = UT_LIST_GET_NEXT(LRU, bpage)) { - - ut_a(bpage->in_file()); - const page_id_t id(bpage->id()); - - if (id.space() == SRV_TMP_SPACE_ID) { - /* Ignore the innodb_temporary tablespace. */ + const auto status = bpage->state(); + if (status < buf_page_t::UNFIXED) { + ut_a(status >= buf_page_t::FREED); continue; } + const page_id_t id{bpage->id()}; - if (bpage->status == buf_page_t::FREED) { + if (id.space() == SRV_TMP_SPACE_ID) { + /* Ignore the innodb_temporary tablespace. */ continue; } diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 2fd60a79602..ac06bc5bb4e 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -198,7 +198,7 @@ void buf_pool_t::insert_into_flush_list(buf_block_t *block, lsn_t lsn) block->page.set_oldest_modification(lsn); MEM_CHECK_DEFINED(block->page.zip.data - ? block->page.zip.data : block->frame, + ? block->page.zip.data : block->page.frame, block->physical_size()); UT_LIST_ADD_FIRST(flush_list, &block->page); ut_d(buf_flush_validate_skip()); @@ -239,15 +239,15 @@ void buf_flush_remove_pages(ulint id) for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list); bpage; ) { - ut_d(const auto s= bpage->state()); - ut_ad(s == BUF_BLOCK_ZIP_PAGE || s == BUF_BLOCK_FILE_PAGE || - s == BUF_BLOCK_REMOVE_HASH); + const auto s= bpage->state(); + ut_ad(s >= buf_page_t::REMOVE_HASH); + ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX); buf_page_t *prev= UT_LIST_GET_PREV(list, bpage); const page_id_t bpage_id(bpage->id()); if (bpage_id < first || bpage_id >= end); - else if (bpage->io_fix() != BUF_IO_NONE) + else if (s >= buf_page_t::WRITE_FIX) deferred= true; else buf_pool.delete_from_flush_list(bpage); @@ -325,39 +325,59 @@ buf_flush_relocate_on_flush_list( ut_d(buf_flush_validate_low()); } +/** Note that a block is no longer dirty, while not removing +it from buf_pool.flush_list */ +inline void buf_page_t::write_complete(bool temporary) +{ + ut_ad(temporary == fsp_is_system_temporary(id().space())); + if (temporary) + { + ut_ad(oldest_modification() == 2); + oldest_modification_= 0; + } + else + { + /* We use release memory order to guarantee that callers of + oldest_modification_acquire() will observe the block as + being detached from buf_pool.flush_list, after reading the value 0. */ + ut_ad(oldest_modification() > 2); + oldest_modification_.store(1, std::memory_order_release); + } + const auto s= state(); + ut_ad(s >= WRITE_FIX); + zip.fix.fetch_sub((s >= WRITE_FIX_REINIT) + ? (WRITE_FIX_REINIT - UNFIXED) + : (WRITE_FIX - UNFIXED)); + lock.u_unlock(true); +} + /** Complete write of a file page from buf_pool. @param request write request */ void buf_page_write_complete(const IORequest &request) { ut_ad(request.is_write()); - ut_ad(!srv_read_only_mode/* || - request.node->space->purpose == FIL_TYPE_TEMPORARY*/); + ut_ad(!srv_read_only_mode); buf_page_t *bpage= request.bpage; ut_ad(bpage); - ut_ad(bpage->in_file()); - /* bpage->io_fix() can only be changed by buf_page_write_complete() - and buf_page_read_complete() from BUF_IO_READ or BUF_IO_WRITE */ - ut_ad(bpage->io_fix() == BUF_IO_WRITE); + const auto state= bpage->state(); + /* io-fix can only be cleared by buf_page_t::write_complete() + and buf_page_t::read_complete() */ + ut_ad(state >= buf_page_t::WRITE_FIX); ut_ad(!buf_dblwr.is_inside(bpage->id())); ut_ad(request.node->space->id == bpage->id().space()); - if (bpage->status == buf_page_t::INIT_ON_FLUSH) - bpage->status= buf_page_t::NORMAL; - else + if (state < buf_page_t::WRITE_FIX_REINIT && + request.node->space->use_doublewrite()) { - ut_ad(bpage->status == buf_page_t::NORMAL); - if (request.node->space->use_doublewrite()) - { - ut_ad(request.node->space != fil_system.temp_space); - buf_dblwr.write_completed(); - } + ut_ad(request.node->space != fil_system.temp_space); + buf_dblwr.write_completed(); } if (request.slot) request.slot->release(); if (UNIV_UNLIKELY(MONITOR_IS_ON(MONITOR_MODULE_BUF_PAGE))) - buf_page_monitor(bpage, BUF_IO_WRITE); + buf_page_monitor(*bpage, false); DBUG_PRINT("ib_buf", ("write page %u:%u", bpage->id().space(), bpage->id().page_no())); const bool temp= fsp_is_system_temporary(bpage->id().space()); @@ -365,16 +385,7 @@ void buf_page_write_complete(const IORequest &request) mysql_mutex_lock(&buf_pool.mutex); mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex); buf_pool.stat.n_pages_written++; - /* While we do not need any mutex for clearing oldest_modification - here, we hope that it will be in the same cache line with io_fix, - whose changes must be protected by buf_pool.mutex. */ - ut_ad(temp || bpage->oldest_modification() > 2); - bpage->clear_oldest_modification(temp); - ut_ad(bpage->io_fix() == BUF_IO_WRITE); - bpage->set_io_fix(BUF_IO_NONE); - - if (bpage->state() == BUF_BLOCK_FILE_PAGE) - reinterpret_cast<buf_block_t*>(bpage)->lock.u_unlock(true); + bpage->write_complete(temp); if (request.is_LRU()) { @@ -437,16 +448,14 @@ buf_flush_init_for_writing( void* page_zip_, bool use_full_checksum) { - if (block != NULL && block->frame != page) { + if (block && block->page.frame != page) { /* If page is encrypted in full crc32 format then checksum stored already as a part of fil_encrypt_buf() */ ut_ad(use_full_checksum); return; } - ut_ad(block == NULL || block->frame == page); - ut_ad(block == NULL || page_zip_ == NULL - || &block->page.zip == page_zip_); + ut_ad(!block || block->page.frame == page); ut_ad(page); if (page_zip_) { @@ -454,6 +463,7 @@ buf_flush_init_for_writing( ulint size; page_zip = static_cast<page_zip_des_t*>(page_zip_); + ut_ad(!block || &block->page.zip == page_zip); size = page_zip_get_size(page_zip); ut_ad(size); @@ -621,7 +631,7 @@ a page is written to disk. static byte *buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s, buf_tmp_buffer_t **slot, size_t *size) { - ut_ad(bpage->status != buf_page_t::FREED); + ut_ad(!bpage->is_freed()); ut_ad(space->id == bpage->id().space()); ut_ad(!*slot); @@ -745,16 +755,12 @@ not_compressed: /** Free a page whose underlying file page has been freed. */ inline void buf_pool_t::release_freed_page(buf_page_t *bpage) { - ut_ad(bpage->in_file()); - const bool uncompressed= bpage->state() == BUF_BLOCK_FILE_PAGE; - mysql_mutex_lock(&mutex); - bpage->set_io_fix(BUF_IO_NONE); - bpage->status= buf_page_t::NORMAL; + mysql_mutex_assert_owner(&mutex); mysql_mutex_lock(&flush_list_mutex); ut_d(const lsn_t oldest_modification= bpage->oldest_modification();) if (fsp_is_system_temporary(bpage->id().space())) { - ut_ad(uncompressed); + ut_ad(bpage->frame); ut_ad(oldest_modification == 2); } else @@ -764,169 +770,152 @@ inline void buf_pool_t::release_freed_page(buf_page_t *bpage) } bpage->clear_oldest_modification(); mysql_mutex_unlock(&flush_list_mutex); - - if (uncompressed) - reinterpret_cast<buf_block_t*>(bpage)->lock.u_unlock(true); + bpage->lock.u_unlock(true); buf_LRU_free_page(bpage, true); - mysql_mutex_unlock(&mutex); } -/** Write a flushable page from buf_pool to a file. -buf_pool.mutex must be held. -@param bpage buffer control block +/** Write a flushable page to a file. buf_pool.mutex must be held. @param lru true=buf_pool.LRU; false=buf_pool.flush_list @param space tablespace @return whether the page was flushed and buf_pool.mutex was released */ -static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) +inline bool buf_page_t::flush(bool lru, fil_space_t *space) { - ut_ad(bpage->in_file()); - ut_ad(bpage->ready_for_flush()); + ut_ad(in_file()); + ut_ad(in_LRU_list); ut_ad((space->purpose == FIL_TYPE_TEMPORARY) == (space == fil_system.temp_space)); ut_ad(space->referenced()); ut_ad(lru || space != fil_system.temp_space); - block_lock *rw_lock; + if (!lock.u_lock_try(true)) + return false; + + const auto s= state(); + ut_a(s >= FREED); - if (bpage->state() != BUF_BLOCK_FILE_PAGE) - rw_lock= nullptr; - else + if (s < UNFIXED) { - rw_lock= &reinterpret_cast<buf_block_t*>(bpage)->lock; - if (!rw_lock->u_lock_try(true)) - return false; + buf_pool.release_freed_page(this); + mysql_mutex_unlock(&buf_pool.mutex); + return true; } - bpage->set_io_fix(BUF_IO_WRITE); - /* Because bpage->status can only be changed while buf_block_t - exists, it cannot be modified for ROW_FORMAT=COMPRESSED pages - without first allocating the uncompressed page frame. Such - allocation cannot be completed due to our io_fix. So, bpage->status - is protected even if !rw_lock. */ - const auto status= bpage->status; - - if (status != buf_page_t::FREED) + if (s >= READ_FIX || oldest_modification() < 2) { - if (lru) - buf_pool.n_flush_LRU_++; - else - buf_pool.n_flush_list_++; - buf_flush_page_count++; + lock.u_unlock(true); + return false; } mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex); - /* We are holding rw_lock = buf_block_t::lock in SX mode except if - this is a ROW_FORMAT=COMPRESSED page whose uncompressed page frame - has been evicted from the buffer pool. - - Apart from possible rw_lock protection, bpage is also protected by - io_fix and oldest_modification()!=0. Thus, it cannot be relocated in - the buffer pool or removed from flush_list or LRU_list. */ + /* Apart from the U-lock, this block will also be protected by + is_write_fixed() and oldest_modification()>1. + Thus, it cannot be relocated or removed. */ DBUG_PRINT("ib_buf", ("%s %u page %u:%u", lru ? "LRU" : "flush_list", - bpage->id().space(), bpage->id().page_no())); - ut_ad(bpage->io_fix() == BUF_IO_WRITE); - ut_d(const lsn_t oldest_modification= bpage->oldest_modification()); + id().space(), id().page_no())); + ut_d(const auto f=) zip.fix.fetch_add(WRITE_FIX - UNFIXED); + ut_ad(f >= UNFIXED); + ut_ad(f < READ_FIX); ut_ad(space == fil_system.temp_space - ? oldest_modification == 2 - : oldest_modification > 2); - ut_ad(bpage->state() == - (rw_lock ? BUF_BLOCK_FILE_PAGE : BUF_BLOCK_ZIP_PAGE)); - ut_ad(ULINT_UNDEFINED > - (lru ? buf_pool.n_flush_LRU_ : buf_pool.n_flush_list_)); + ? oldest_modification() == 2 + : oldest_modification() > 2); + if (lru) + { + ut_ad(buf_pool.n_flush_LRU_ < ULINT_UNDEFINED); + buf_pool.n_flush_LRU_++; + } + else + { + ut_ad(buf_pool.n_flush_list_ < ULINT_UNDEFINED); + buf_pool.n_flush_list_++; + } + buf_flush_page_count++; + mysql_mutex_unlock(&buf_pool.mutex); - buf_block_t *block= reinterpret_cast<buf_block_t*>(bpage); - page_t *frame= bpage->zip.data; + buf_block_t *block= reinterpret_cast<buf_block_t*>(this); + page_t *write_frame= zip.data; + + space->reacquire(); + size_t size; +#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32 + size_t orig_size; +#endif + IORequest::Type type= lru ? IORequest::WRITE_LRU : IORequest::WRITE_ASYNC; + buf_tmp_buffer_t *slot= nullptr; - if (status == buf_page_t::FREED) - buf_pool.release_freed_page(&block->page); + if (UNIV_UNLIKELY(!frame)) /* ROW_FORMAT=COMPRESSED */ + { + ut_ad(!space->full_crc32()); + ut_ad(!space->is_compressed()); /* not page_compressed */ + size= zip_size(); +#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32 + orig_size= size; +#endif + buf_flush_update_zip_checksum(write_frame, size); + write_frame= buf_page_encrypt(space, this, write_frame, &slot, &size); + ut_ad(size == zip_size()); + } else { - space->reacquire(); - ut_ad(status == buf_page_t::NORMAL || status == buf_page_t::INIT_ON_FLUSH); - size_t size; + byte *page= frame; + size= block->physical_size(); #if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32 - size_t orig_size; + orig_size= size; #endif - IORequest::Type type= lru ? IORequest::WRITE_LRU : IORequest::WRITE_ASYNC; - buf_tmp_buffer_t *slot= nullptr; - if (UNIV_UNLIKELY(!rw_lock)) /* ROW_FORMAT=COMPRESSED */ + if (space->full_crc32()) { - ut_ad(!space->full_crc32()); - ut_ad(!space->is_compressed()); /* not page_compressed */ - size= bpage->zip_size(); -#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32 - orig_size= size; -#endif - buf_flush_update_zip_checksum(frame, size); - frame= buf_page_encrypt(space, bpage, frame, &slot, &size); - ut_ad(size == bpage->zip_size()); + /* innodb_checksum_algorithm=full_crc32 is not implemented for + ROW_FORMAT=COMPRESSED pages. */ + ut_ad(!write_frame); + page= buf_page_encrypt(space, this, page, &slot, &size); + buf_flush_init_for_writing(block, page, nullptr, true); } else { - byte *page= block->frame; - size= block->physical_size(); -#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32 - orig_size= size; -#endif - - if (space->full_crc32()) - { - /* innodb_checksum_algorithm=full_crc32 is not implemented for - ROW_FORMAT=COMPRESSED pages. */ - ut_ad(!frame); - page= buf_page_encrypt(space, bpage, page, &slot, &size); - buf_flush_init_for_writing(block, page, nullptr, true); - } - else - { - buf_flush_init_for_writing(block, page, frame ? &bpage->zip : nullptr, - false); - page= buf_page_encrypt(space, bpage, frame ? frame : page, - &slot, &size); - } + buf_flush_init_for_writing(block, page, write_frame ? &zip : nullptr, + false); + page= buf_page_encrypt(space, this, write_frame ? write_frame : page, + &slot, &size); + } #if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32 - if (size != orig_size) - { - switch (space->chain.start->punch_hole) { - case 1: - type= lru ? IORequest::PUNCH_LRU : IORequest::PUNCH; - break; - case 2: - size= orig_size; - } + if (size != orig_size) + { + switch (space->chain.start->punch_hole) { + case 1: + type= lru ? IORequest::PUNCH_LRU : IORequest::PUNCH; + break; + case 2: + size= orig_size; } -#endif - frame= page; } +#endif + write_frame= page; + } - ut_ad(status == bpage->status); - ut_ad(oldest_modification == bpage->oldest_modification()); - - if (status != buf_page_t::NORMAL || !space->use_doublewrite()) + if ((s & LRU_MASK) == REINIT || !space->use_doublewrite()) + { + if (UNIV_LIKELY(space->purpose == FIL_TYPE_TABLESPACE)) { - if (UNIV_LIKELY(space->purpose == FIL_TYPE_TABLESPACE)) - { - const lsn_t lsn= mach_read_from_8(my_assume_aligned<8> - (FIL_PAGE_LSN + (frame ? frame - : block->frame))); - ut_ad(lsn >= oldest_modification); - if (lsn > log_sys.get_flushed_lsn()) - log_write_up_to(lsn, true); - } - space->io(IORequest{type, bpage, slot}, - bpage->physical_offset(), size, frame, bpage); + const lsn_t lsn= + mach_read_from_8(my_assume_aligned<8>(FIL_PAGE_LSN + + (write_frame ? write_frame + : frame))); + ut_ad(lsn >= oldest_modification()); + if (lsn > log_sys.get_flushed_lsn()) + log_write_up_to(lsn, true); } - else - buf_dblwr.add_to_batch(IORequest{bpage, slot, space->chain.start, type}, - size); + space->io(IORequest{type, this, slot}, physical_offset(), size, + write_frame, this); } + else + buf_dblwr.add_to_batch(IORequest{this, slot, space->chain.start, type}, + size); /* Increment the I/O operation count used for selecting LRU policy. */ buf_LRU_stat_inc_io(); @@ -1118,8 +1107,8 @@ static ulint buf_flush_try_neighbors(fil_space_t *space, if (!lru || id == page_id || bpage->is_old()) { if (!buf_pool.watch_is_sentinel(*bpage) && - bpage->oldest_modification() > 1 && - bpage->ready_for_flush() && buf_flush_page(bpage, lru, space)) + bpage->oldest_modification() > 1 && bpage->ready_for_flush() && + bpage->flush(lru, space)) { ++count; continue; @@ -1218,24 +1207,17 @@ static void buf_flush_discard_page(buf_page_t *bpage) ut_ad(bpage->in_file()); ut_ad(bpage->oldest_modification()); - block_lock *rw_lock; - - if (bpage->state() != BUF_BLOCK_FILE_PAGE) - rw_lock= nullptr; - else - { - rw_lock= &reinterpret_cast<buf_block_t*>(bpage)->lock; - if (!rw_lock->u_lock_try(false)) - return; - } + if (!bpage->lock.u_lock_try(false)) + return; - bpage->status= buf_page_t::NORMAL; mysql_mutex_lock(&buf_pool.flush_list_mutex); buf_pool.delete_from_flush_list(bpage); mysql_mutex_unlock(&buf_pool.flush_list_mutex); - if (rw_lock) - rw_lock->u_unlock(); + ut_d(const auto state= bpage->state()); + ut_ad(state == buf_page_t::FREED || state == buf_page_t::UNFIXED || + state == buf_page_t::IBUF_EXIST || state == buf_page_t::REINIT); + bpage->lock.u_unlock(); buf_LRU_free_page(bpage, true); } @@ -1267,15 +1249,19 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) buf_page_t *prev= UT_LIST_GET_PREV(LRU, bpage); const lsn_t oldest_modification= bpage->oldest_modification(); buf_pool.lru_hp.set(prev); + const auto state= bpage->state(); + ut_ad(state >= buf_page_t::FREED); + ut_ad(bpage->in_LRU_list); - if (oldest_modification <= 1 && bpage->can_relocate()) + if (oldest_modification <= 1) { - /* block is ready for eviction i.e., it is clean and is not - IO-fixed or buffer fixed. */ + if (state != buf_page_t::FREED && + (state >= buf_page_t::READ_FIX || (~buf_page_t::LRU_MASK & state))) + goto must_skip; if (buf_LRU_free_page(bpage, true)) ++n->evicted; } - else if (oldest_modification > 1 && bpage->ready_for_flush()) + else if (state < buf_page_t::READ_FIX) { /* Block is ready for flush. Dispatch an IO request. The IO helper thread will put it on free list in IO completion routine. */ @@ -1309,13 +1295,14 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) reacquire_mutex: mysql_mutex_lock(&buf_pool.mutex); } - else if (buf_flush_page(bpage, true, space)) + else if (bpage->flush(true, space)) { ++n->flushed; goto reacquire_mutex; } } else + must_skip: /* Can't evict or dispatch this block. Go to previous. */ ut_ad(buf_pool.lru_hp.is_hp(prev)); bpage= buf_pool.lru_hp.get(); @@ -1400,7 +1387,6 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn) } ut_ad(oldest_modification > 2); - ut_ad(bpage->in_file()); if (!bpage->ready_for_flush()) goto skip; @@ -1447,7 +1433,7 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn) reacquire_mutex: mysql_mutex_lock(&buf_pool.mutex); } - else if (buf_flush_page(bpage, false, space)) + else if (bpage->flush(false, space)) { ++count; goto reacquire_mutex; @@ -1558,9 +1544,6 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed) for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list); bpage; ) { - ut_d(const auto s= bpage->state()); - ut_ad(s == BUF_BLOCK_ZIP_PAGE || s == BUF_BLOCK_FILE_PAGE || - s == BUF_BLOCK_REMOVE_HASH); ut_ad(bpage->oldest_modification()); ut_ad(bpage->in_file()); @@ -1596,7 +1579,7 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed) acquired= false; goto was_freed; } - if (!buf_flush_page(bpage, false, space)) + if (!bpage->flush(false, space)) { may_have_skipped= true; mysql_mutex_lock(&buf_pool.flush_list_mutex); @@ -2484,8 +2467,7 @@ static void buf_flush_validate_low() in the flush list waiting to acquire the buf_pool.flush_list_mutex to complete the relocation. */ ut_d(const auto s= bpage->state()); - ut_ad(s == BUF_BLOCK_ZIP_PAGE || s == BUF_BLOCK_FILE_PAGE - || s == BUF_BLOCK_REMOVE_HASH); + ut_ad(s >= buf_page_t::REMOVE_HASH); ut_ad(om == 1 || om > 2); bpage = UT_LIST_GET_NEXT(list, bpage); diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index 250c12bea78..0658ca57c60 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -108,7 +108,7 @@ uint buf_LRU_old_threshold_ms; /** Remove bpage from buf_pool.LRU and buf_pool.page_hash. -If bpage->state() == BUF_BLOCK_ZIP_PAGE && bpage->oldest_modification() <= 1, +If !bpage->frame && bpage->oldest_modification() <= 1, the object will be freed. @param bpage buffer block @@ -117,9 +117,9 @@ the object will be freed. @param zip whether bpage->zip of BUF_BLOCK_FILE_PAGE should be freed If a compressed page is freed other compressed pages may be relocated. -@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The +@retval true if bpage with bpage->frame was removed from page_hash. The caller needs to free the page to the free list -@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In +@retval false if block without bpage->frame was removed from page_hash. In this case the block is already returned to the buddy allocator. */ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, buf_pool_t::hash_chain &chain, @@ -203,7 +203,8 @@ static bool buf_LRU_free_from_unzip_LRU_list(ulint limit) block && scanned < limit; ++scanned) { buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU, block); - ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); + ut_ad(block->page.in_file()); + ut_ad(block->page.belongs_to_unzip_LRU()); ut_ad(block->in_unzip_LRU_list); ut_ad(block->page.in_LRU_list); @@ -268,17 +269,6 @@ static bool buf_LRU_free_from_common_LRU_list(ulint limit) return(freed); } -/** Try to free a replaceable block. -@param limit maximum number of blocks to scan -@return true if found and freed */ -bool buf_LRU_scan_and_free_block(ulint limit) -{ - mysql_mutex_assert_owner(&buf_pool.mutex); - - return buf_LRU_free_from_unzip_LRU_list(limit) || - buf_LRU_free_from_common_LRU_list(limit); -} - /** @return a buffer block from the buf_pool.free list @retval NULL if the free list is empty */ buf_block_t* buf_LRU_get_free_only() @@ -306,15 +296,13 @@ buf_block_t* buf_LRU_get_free_only() a free block. */ assert_block_ahi_empty(block); - block->page.set_state(BUF_BLOCK_MEMORY); - MEM_MAKE_ADDRESSABLE(block->frame, srv_page_size); + block->page.set_state(buf_page_t::MEMORY); + MEM_MAKE_ADDRESSABLE(block->page.frame, srv_page_size); break; } /* This should be withdrawn */ - UT_LIST_ADD_LAST( - buf_pool.withdraw, - &block->page); + UT_LIST_ADD_LAST(buf_pool.withdraw, &block->page); ut_d(block->in_withdraw_list = true); block = reinterpret_cast<buf_block_t*>( @@ -428,7 +416,7 @@ got_block: if (!have_mutex) { mysql_mutex_unlock(&buf_pool.mutex); } - memset(&block->page.zip, 0, sizeof block->page.zip); + block->page.zip.clear(); return block; } @@ -771,6 +759,9 @@ buf_LRU_add_block( /** Move a block to the start of the LRU list. */ void buf_page_make_young(buf_page_t *bpage) { + if (bpage->is_read_fixed()) + return; + ut_ad(bpage->in_file()); mysql_mutex_lock(&buf_pool.mutex); @@ -793,12 +784,10 @@ The caller must hold buf_pool.mutex. @retval false if the page was not freed */ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) { - const page_id_t id(bpage->id()); + const page_id_t id{bpage->id()}; buf_page_t* b = nullptr; mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(bpage->in_file()); - ut_ad(bpage->in_LRU_list); /* First, perform a quick check before we acquire hash_lock. */ if (!bpage->can_relocate()) { @@ -839,23 +828,21 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) if (oldest_modification) { goto func_exit; } - } else if (oldest_modification - && bpage->state() != BUF_BLOCK_FILE_PAGE) { + } else if (oldest_modification && !bpage->frame) { func_exit: hash_lock.unlock(); return(false); - } else if (bpage->state() == BUF_BLOCK_FILE_PAGE) { - b = buf_page_alloc_descriptor(); + } else if (bpage->frame) { + b = static_cast<buf_page_t*>(ut_zalloc_nokey(sizeof *b)); ut_a(b); mysql_mutex_lock(&buf_pool.flush_list_mutex); new (b) buf_page_t(*bpage); - b->set_state(BUF_BLOCK_ZIP_PAGE); + b->frame = nullptr; + b->set_state(buf_page_t::UNFIXED + 1); } mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(bpage->in_file()); - ut_ad(bpage->in_LRU_list); DBUG_PRINT("ib_buf", ("free page %u:%u", id.space(), id.page_no())); @@ -887,9 +874,7 @@ func_exit: buf_LRU_block_remove_hashed(), which invokes buf_LRU_remove_block(). */ ut_ad(!bpage->in_LRU_list); - - /* bpage->state was BUF_BLOCK_FILE_PAGE because - b != nullptr. The type cast below is thus valid. */ + ut_ad(bpage->frame); ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list); /* The fields of bpage were copied to b before @@ -952,13 +937,9 @@ func_exit: page_zip_set_size(&bpage->zip, 0); - /* Prevent buf_page_get_gen() from - decompressing the block while we release - hash_lock. */ - b->set_io_fix(BUF_IO_PIN); - goto release; + b->lock.x_lock(); + hash_lock.unlock(); } else if (!zip) { -release: hash_lock.unlock(); } @@ -974,21 +955,16 @@ release: the contents of the page valid (which it still is) in order to avoid bogus Valgrind or MSAN warnings.*/ - MEM_MAKE_DEFINED(block->frame, srv_page_size); + MEM_MAKE_DEFINED(block->page.frame, srv_page_size); btr_search_drop_page_hash_index(block); - MEM_UNDEFINED(block->frame, srv_page_size); - - if (UNIV_LIKELY_NULL(b)) { - ut_ad(b->zip_size()); - b->io_unfix(); - } - + MEM_UNDEFINED(block->page.frame, srv_page_size); mysql_mutex_lock(&buf_pool.mutex); - } else + } #endif if (UNIV_LIKELY_NULL(b)) { ut_ad(b->zip_size()); - b->io_unfix(); + b->lock.x_unlock(); + b->unfix(); } buf_LRU_block_free_hashed_page(block); @@ -1005,22 +981,22 @@ buf_LRU_block_free_non_file_page( { void* data; - ut_ad(block->page.state() == BUF_BLOCK_MEMORY); + ut_ad(block->page.state() == buf_page_t::MEMORY); assert_block_ahi_empty(block); ut_ad(!block->page.in_free_list); ut_ad(!block->page.oldest_modification()); ut_ad(!block->page.in_LRU_list); ut_ad(!block->page.hash); - block->page.set_state(BUF_BLOCK_NOT_USED); + block->page.set_state(buf_page_t::NOT_USED); - MEM_UNDEFINED(block->frame, srv_page_size); + MEM_UNDEFINED(block->page.frame, srv_page_size); /* Wipe page_no and space_id */ static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); - memset_aligned<4>(block->frame + FIL_PAGE_OFFSET, 0xfe, 4); + memset_aligned<4>(block->page.frame + FIL_PAGE_OFFSET, 0xfe, 4); static_assert(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID % 4 == 2, "not perfect alignment"); - memset_aligned<2>(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, + memset_aligned<2>(block->page.frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4); data = block->page.zip.data; @@ -1050,7 +1026,7 @@ buf_LRU_block_free_non_file_page( pthread_cond_signal(&buf_pool.done_free); } - MEM_NOACCESS(block->frame, srv_page_size); + MEM_NOACCESS(block->page.frame, srv_page_size); } /** Release a memory block to the buffer pool. */ @@ -1065,8 +1041,7 @@ ATTRIBUTE_COLD void buf_pool_t::free_block(buf_block_t *block) /** Remove bpage from buf_pool.LRU and buf_pool.page_hash. -If bpage->state() == BUF_BLOCK_ZIP_PAGE && !bpage->oldest_modification(), -the object will be freed. +If !bpage->frame && !bpage->oldest_modification(), the object will be freed. @param bpage buffer block @param id page identifier @@ -1082,24 +1057,19 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, buf_pool_t::hash_chain &chain, bool zip) { - mysql_mutex_assert_owner(&buf_pool.mutex); + ut_a(bpage->can_relocate()); ut_ad(buf_pool.page_hash.lock_get(chain).is_write_locked()); - ut_a(bpage->io_fix() == BUF_IO_NONE); - ut_a(!bpage->buf_fix_count()); - buf_LRU_remove_block(bpage); buf_pool.freed_page_clock += 1; - switch (bpage->state()) { - case BUF_BLOCK_FILE_PAGE: + if (UNIV_LIKELY(bpage->frame != nullptr)) { MEM_CHECK_ADDRESSABLE(bpage, sizeof(buf_block_t)); - MEM_CHECK_ADDRESSABLE(((buf_block_t*) bpage)->frame, - srv_page_size); + MEM_CHECK_ADDRESSABLE(bpage->frame, srv_page_size); buf_block_modify_clock_inc((buf_block_t*) bpage); - if (bpage->zip.data) { - const page_t* page = ((buf_block_t*) bpage)->frame; + if (UNIV_LIKELY_NULL(bpage->zip.data)) { + const page_t* page = bpage->frame; ut_a(!zip || !bpage->oldest_modification()); ut_ad(bpage->zip_size()); @@ -1147,27 +1117,20 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, putc('\n', stderr); ut_error; } - - break; + } else { + goto evict_zip; } - /* fall through */ - case BUF_BLOCK_ZIP_PAGE: + } else { +evict_zip: ut_a(!bpage->oldest_modification()); MEM_CHECK_ADDRESSABLE(bpage->zip.data, bpage->zip_size()); - break; - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - ut_error; - break; } ut_ad(!bpage->in_zip_hash); buf_pool.page_hash.remove(chain, bpage); - page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); + page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); - switch (bpage->state()) { - case BUF_BLOCK_ZIP_PAGE: + if (UNIV_UNLIKELY(!bpage->frame)) { ut_ad(!bpage->in_free_list); ut_ad(!bpage->in_LRU_list); ut_a(bpage->zip.data); @@ -1180,20 +1143,19 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, buf_buddy_free(bpage->zip.data, bpage->zip_size()); buf_pool_mutex_exit_allow(); - buf_page_free_descriptor(bpage); - return(false); - - case BUF_BLOCK_FILE_PAGE: + bpage->lock.free(); + ut_free(bpage); + return false; + } else { static_assert(FIL_NULL == 0xffffffffU, "fill pattern"); static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); - memset_aligned<4>(reinterpret_cast<buf_block_t*>(bpage)->frame - + FIL_PAGE_OFFSET, 0xff, 4); + memset_aligned<4>(bpage->frame + FIL_PAGE_OFFSET, 0xff, 4); static_assert(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID % 4 == 2, "not perfect alignment"); - memset_aligned<2>(reinterpret_cast<buf_block_t*>(bpage)->frame + memset_aligned<2>(bpage->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4); - MEM_UNDEFINED(((buf_block_t*) bpage)->frame, srv_page_size); - bpage->set_state(BUF_BLOCK_REMOVE_HASH); + MEM_UNDEFINED(bpage->frame, srv_page_size); + bpage->set_state(buf_page_t::REMOVE_HASH); if (!zip) { return true; @@ -1237,41 +1199,36 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, page_zip_set_size(&bpage->zip, 0); } - return(true); - - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - break; + return true; } - - ut_error; - return(false); } /** Release and evict a corrupted page. @param bpage page that was being read */ ATTRIBUTE_COLD void buf_pool_t::corrupted_evict(buf_page_t *bpage) { - const page_id_t id(bpage->id()); + const page_id_t id{bpage->id()}; buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold()); page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); mysql_mutex_lock(&mutex); hash_lock.lock(); - ut_ad(bpage->io_fix() == BUF_IO_READ); ut_ad(!bpage->oldest_modification()); bpage->set_corrupt_id(); - bpage->io_unfix(); - - if (bpage->state() == BUF_BLOCK_FILE_PAGE) - reinterpret_cast<buf_block_t*>(bpage)->lock.x_unlock(true); + constexpr auto read_unfix= buf_page_t::READ_FIX - buf_page_t::UNFIXED; + auto s= bpage->zip.fix.fetch_sub(read_unfix) - read_unfix; + bpage->lock.x_unlock(true); - while (bpage->buf_fix_count()) + while (s != buf_page_t::UNFIXED) + { + ut_ad(s > buf_page_t::UNFIXED); + ut_ad(s < buf_page_t::READ_FIX); /* Wait for other threads to release the fix count before releasing the bpage from LRU list. */ (void) LF_BACKOFF(); + s= bpage->state(); + } /* remove from LRU and page_hash */ if (buf_LRU_block_remove_hashed(bpage, id, chain, true)) @@ -1357,6 +1314,23 @@ func_exit: memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur); } +#if defined __aarch64__&&defined __GNUC__&&__GNUC__==4&&!defined __clang__ +/* Avoid GCC 4.8.5 internal compiler error "could not split insn". +We would only need this for buf_LRU_scan_and_free_block(), +but GCC 4.8.5 does not support pop_options. */ +# pragma GCC optimize ("no-expensive-optimizations") +#endif +/** Try to free a replaceable block. +@param limit maximum number of blocks to scan +@return true if found and freed */ +bool buf_LRU_scan_and_free_block(ulint limit) +{ + mysql_mutex_assert_owner(&buf_pool.mutex); + + return buf_LRU_free_from_unzip_LRU_list(limit) || + buf_LRU_free_from_common_LRU_list(limit); +} + #ifdef UNIV_DEBUG /** Validate the LRU list. */ void buf_LRU_validate() @@ -1389,20 +1363,11 @@ void buf_LRU_validate() for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool.LRU); bpage != NULL; bpage = UT_LIST_GET_NEXT(LRU, bpage)) { - - switch (bpage->state()) { - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - ut_error; - break; - case BUF_BLOCK_FILE_PAGE: - ut_ad(reinterpret_cast<buf_block_t*>(bpage) - ->in_unzip_LRU_list - == bpage->belongs_to_unzip_LRU()); - case BUF_BLOCK_ZIP_PAGE: - break; - } + ut_ad(bpage->in_file()); + ut_ad(!bpage->frame + || reinterpret_cast<buf_block_t*>(bpage) + ->in_unzip_LRU_list + == bpage->belongs_to_unzip_LRU()); if (bpage->is_old()) { const buf_page_t* prev @@ -1428,7 +1393,7 @@ void buf_LRU_validate() bpage != NULL; bpage = UT_LIST_GET_NEXT(list, bpage)) { - ut_a(bpage->state() == BUF_BLOCK_NOT_USED); + ut_a(bpage->state() == buf_page_t::NOT_USED); } CheckUnzipLRUAndLRUList::validate(); @@ -1464,38 +1429,28 @@ void buf_LRU_print() fputs("old ", stderr); } - if (const uint32_t buf_fix_count = bpage->buf_fix_count()) { - fprintf(stderr, "buffix count %u ", buf_fix_count); - } - - if (const auto io_fix = bpage->io_fix()) { - fprintf(stderr, "io_fix %d ", io_fix); + const unsigned s = bpage->state(); + if (s > buf_page_t::UNFIXED) { + fprintf(stderr, "fix %u ", s - buf_page_t::UNFIXED); + } else { + ut_ad(s == buf_page_t::UNFIXED + || s == buf_page_t::REMOVE_HASH); } if (bpage->oldest_modification()) { fputs("modif. ", stderr); } - switch (const auto state = bpage->state()) { - const byte* frame; - case BUF_BLOCK_FILE_PAGE: - frame = buf_block_get_frame((buf_block_t*) bpage); - fprintf(stderr, "\ntype %u index id " IB_ID_FMT "\n", - fil_page_get_type(frame), - btr_page_get_index_id(frame)); - break; - case BUF_BLOCK_ZIP_PAGE: - frame = bpage->zip.data; + if (const byte* frame = bpage->zip.data) { fprintf(stderr, "\ntype %u size " ULINTPF " index id " IB_ID_FMT "\n", fil_page_get_type(frame), bpage->zip_size(), btr_page_get_index_id(frame)); - break; - - default: - fprintf(stderr, "\n!state %d!\n", state); - break; + } else { + fprintf(stderr, "\ntype %u index id " IB_ID_FMT "\n", + fil_page_get_type(bpage->frame), + btr_page_get_index_id(bpage->frame)); } } diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index 080f87adb0f..aafd1c048e4 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -59,12 +59,9 @@ inline void buf_pool_t::watch_remove(buf_page_t *watch, ut_ad(page_hash.lock_get(chain).is_write_locked()); ut_a(watch_is_sentinel(*watch)); if (watch->buf_fix_count()) - { page_hash.remove(chain, watch); - watch->set_buf_fix_count(0); - } ut_ad(!watch->in_page_hash); - watch->set_state(BUF_BLOCK_NOT_USED); + watch->set_state(buf_page_t::NOT_USED); watch->id_= page_id_t(~0ULL); } @@ -109,10 +106,10 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, if (!zip_size || unzip || recv_recovery_is_on()) { block= buf_LRU_get_free_block(false); - block->initialise(page_id, zip_size); + block->initialise(page_id, zip_size, buf_page_t::READ_FIX); /* x_unlock() will be invoked - in buf_page_read_complete() by the io-handler thread. */ - block->lock.x_lock(true); + in buf_page_t::read_complete() by the io-handler thread. */ + block->page.lock.x_lock(true); } buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); @@ -125,7 +122,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, /* The page is already in the buffer pool. */ if (block) { - block->lock.x_unlock(true); + block->page.lock.x_unlock(true); + ut_d(block->page.set_state(buf_page_t::MEMORY)); buf_LRU_block_free_non_file_page(block); } goto func_exit; @@ -143,14 +141,13 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, if (hash_page) { /* Preserve the reference count. */ - auto buf_fix_count= hash_page->buf_fix_count(); - ut_a(buf_fix_count > 0); - block->page.add_buf_fix_count(buf_fix_count); + uint32_t buf_fix_count= hash_page->state(); + ut_a(buf_fix_count >= buf_page_t::UNFIXED); + ut_a(buf_fix_count < buf_page_t::READ_FIX); buf_pool.watch_remove(hash_page, chain); + block->page.fix(buf_fix_count - buf_page_t::UNFIXED); } - block->page.set_io_fix(BUF_IO_READ); - block->page.set_state(BUF_BLOCK_FILE_PAGE); buf_pool.page_hash.append(chain, &block->page); } @@ -198,13 +195,14 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, } } - bpage= buf_page_alloc_descriptor(); + bpage= static_cast<buf_page_t*>(ut_zalloc_nokey(sizeof *bpage)); page_zip_des_init(&bpage->zip); page_zip_set_size(&bpage->zip, zip_size); bpage->zip.data = (page_zip_t*) data; - bpage->init(BUF_BLOCK_ZIP_PAGE, page_id); + bpage->init(buf_page_t::READ_FIX, page_id); + bpage->lock.x_lock(true); { transactional_lock_guard<page_hash_latch> g @@ -215,12 +213,14 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, /* Preserve the reference count. It can be 0 if buf_pool_t::watch_unset() is executing concurrently, waiting for buf_pool.mutex, which we are holding. */ - bpage->add_buf_fix_count(hash_page->buf_fix_count()); + uint32_t buf_fix_count= hash_page->state(); + ut_a(buf_fix_count >= buf_page_t::UNFIXED); + ut_a(buf_fix_count < buf_page_t::READ_FIX); + bpage->fix(buf_fix_count - buf_page_t::UNFIXED); buf_pool.watch_remove(hash_page, chain); } buf_pool.page_hash.append(chain, bpage); - bpage->set_io_fix(BUF_IO_READ); } /* The block must be put to the LRU list, to the old blocks. @@ -315,16 +315,7 @@ nothing_read: "read page " << page_id << " zip_size=" << zip_size << " unzip=" << unzip << ',' << (sync ? "sync" : "async")); - void* dst; - - if (zip_size) { - dst = bpage->zip.data; - } else { - ut_a(bpage->state() == BUF_BLOCK_FILE_PAGE); - - dst = ((buf_block_t*) bpage)->frame; - } - + void* dst = zip_size ? bpage->zip.data : bpage->frame; const ulint len = zip_size ? zip_size : srv_page_size; auto fio = space->io(IORequest(sync @@ -347,7 +338,7 @@ nothing_read: thd_wait_end(NULL); /* The i/o was already completed in space->io() */ - *err = buf_page_read_complete(bpage, *fio.node); + *err = bpage->read_complete(*fio.node); space->release(); if (*err != DB_SUCCESS) { @@ -628,19 +619,7 @@ failed: on the page, we do not acquire an s-latch on the page, this is to prevent deadlocks. The hash_lock is only protecting the buf_pool.page_hash for page i, not the bpage contents itself. */ - const byte *f; - switch (UNIV_EXPECT(bpage->state(), BUF_BLOCK_FILE_PAGE)) { - case BUF_BLOCK_FILE_PAGE: - f= reinterpret_cast<const buf_block_t*>(bpage)->frame; - break; - case BUF_BLOCK_ZIP_PAGE: - f= bpage->zip.data; - break; - default: - ut_ad("invalid state" == 0); - goto fail; - } - + const byte *f= bpage->frame ? bpage->frame : bpage->zip.data; uint32_t prev= mach_read_from_4(my_assume_aligned<4>(f + FIL_PAGE_PREV)); uint32_t next= mach_read_from_4(my_assume_aligned<4>(f + FIL_PAGE_NEXT)); if (prev == FIL_NULL || next == FIL_NULL) diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc index e6a46699f02..40c8df1498e 100644 --- a/storage/innobase/dict/dict0boot.cc +++ b/storage/innobase/dict/dict0boot.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2020, MariaDB Corporation. +Copyright (c) 2016, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -55,29 +55,29 @@ dict_hdr_get_new_id( if (table_id) { id = mach_read_from_8(DICT_HDR + DICT_HDR_TABLE_ID - + dict_hdr->frame); + + dict_hdr->page.frame); id++; mtr.write<8>(*dict_hdr, DICT_HDR + DICT_HDR_TABLE_ID - + dict_hdr->frame, id); + + dict_hdr->page.frame, id); *table_id = id; } if (index_id) { id = mach_read_from_8(DICT_HDR + DICT_HDR_INDEX_ID - + dict_hdr->frame); + + dict_hdr->page.frame); id++; mtr.write<8>(*dict_hdr, DICT_HDR + DICT_HDR_INDEX_ID - + dict_hdr->frame, id); + + dict_hdr->page.frame, id); *index_id = id; } if (space_id) { *space_id = mach_read_from_4(DICT_HDR + DICT_HDR_MAX_SPACE_ID - + dict_hdr->frame); + + dict_hdr->page.frame); if (fil_assign_new_space_id(space_id)) { mtr.write<4>(*dict_hdr, DICT_HDR + DICT_HDR_MAX_SPACE_ID - + dict_hdr->frame, *space_id); + + dict_hdr->page.frame, *space_id); } } @@ -90,7 +90,7 @@ void dict_hdr_flush_row_id(row_id_t id) mtr_t mtr; mtr.start(); buf_block_t* d= dict_hdr_get(&mtr); - byte *row_id= DICT_HDR + DICT_HDR_ROW_ID + d->frame; + byte *row_id= DICT_HDR + DICT_HDR_ROW_ID + d->page.frame; if (mach_read_from_8(row_id) < id) mtr.write<8>(*d, row_id, id); mtr.commit(); @@ -119,17 +119,18 @@ static bool dict_hdr_create() /* Start counting row, table, index, and tree ids from DICT_HDR_FIRST_ID */ - mtr.write<8>(*d, DICT_HDR + DICT_HDR_ROW_ID + d->frame, + mtr.write<8>(*d, DICT_HDR + DICT_HDR_ROW_ID + d->page.frame, DICT_HDR_FIRST_ID); - mtr.write<8>(*d, DICT_HDR + DICT_HDR_TABLE_ID + d->frame, + mtr.write<8>(*d, DICT_HDR + DICT_HDR_TABLE_ID + d->page.frame, DICT_HDR_FIRST_ID); - mtr.write<8>(*d, DICT_HDR + DICT_HDR_INDEX_ID + d->frame, + mtr.write<8>(*d, DICT_HDR + DICT_HDR_INDEX_ID + d->page.frame, DICT_HDR_FIRST_ID); - ut_ad(!mach_read_from_4(DICT_HDR + DICT_HDR_MAX_SPACE_ID + d->frame)); + ut_ad(!mach_read_from_4(DICT_HDR + DICT_HDR_MAX_SPACE_ID + + d->page.frame)); /* Obsolete, but we must initialize it anyway. */ - mtr.write<4>(*d, DICT_HDR + DICT_HDR_MIX_ID_LOW + d->frame, + mtr.write<4>(*d, DICT_HDR + DICT_HDR_MIX_ID_LOW + d->page.frame, DICT_HDR_FIRST_ID); /* Create the B-tree roots for the clustered indexes of the basic @@ -145,7 +146,8 @@ failed: goto func_exit; } - mtr.write<4>(*d, DICT_HDR + DICT_HDR_TABLES + d->frame, root_page_no); + mtr.write<4>(*d, DICT_HDR + DICT_HDR_TABLES + d->page.frame, + root_page_no); /*--------------------------*/ root_page_no = btr_create(DICT_UNIQUE, fil_system.sys_space, DICT_TABLE_IDS_ID, @@ -154,7 +156,7 @@ failed: goto failed; } - mtr.write<4>(*d, DICT_HDR + DICT_HDR_TABLE_IDS + d->frame, + mtr.write<4>(*d, DICT_HDR + DICT_HDR_TABLE_IDS + d->page.frame, root_page_no); /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, @@ -164,7 +166,7 @@ failed: goto failed; } - mtr.write<4>(*d, DICT_HDR + DICT_HDR_COLUMNS + d->frame, + mtr.write<4>(*d, DICT_HDR + DICT_HDR_COLUMNS + d->page.frame, root_page_no); /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, @@ -174,7 +176,8 @@ failed: goto failed; } - mtr.write<4>(*d, DICT_HDR + DICT_HDR_INDEXES + d->frame, root_page_no); + mtr.write<4>(*d, DICT_HDR + DICT_HDR_INDEXES + d->page.frame, + root_page_no); /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, fil_system.sys_space, DICT_FIELDS_ID, @@ -183,7 +186,8 @@ failed: goto failed; } - mtr.write<4>(*d, DICT_HDR + DICT_HDR_FIELDS + d->frame, root_page_no); + mtr.write<4>(*d, DICT_HDR + DICT_HDR_FIELDS + d->page.frame, + root_page_no); func_exit: mtr.commit(); return fail; @@ -228,7 +232,7 @@ dict_boot(void) dict_sys.lock(SRW_LOCK_CALL); /* Get the dictionary header */ - const byte* dict_hdr = &dict_hdr_get(&mtr)->frame[DICT_HDR]; + const byte* dict_hdr = &dict_hdr_get(&mtr)->page.frame[DICT_HDR]; /* Because we only write new row ids to disk-based data structure (dictionary header) when it is divisible by diff --git a/storage/innobase/dict/dict0defrag_bg.cc b/storage/innobase/dict/dict0defrag_bg.cc index 35ffee7e0d0..f843fcf5152 100644 --- a/storage/innobase/dict/dict0defrag_bg.cc +++ b/storage/innobase/dict/dict0defrag_bg.cc @@ -314,11 +314,11 @@ btr_get_size_and_reserved( mtr->x_lock_space(index->table->space); ulint n = fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_LEAF - + root->frame, used, mtr); + + root->page.frame, used, mtr); if (flag == BTR_TOTAL_SIZE) { n += fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_TOP - + root->frame, &dummy, mtr); + + root->page.frame, &dummy, mtr); *used += dummy; } diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index c7b707d896c..a69f645758f 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -936,7 +936,8 @@ void dict_check_tablespaces_and_store_max_id() mtr.start(); ulint max_space_id = mach_read_from_4(DICT_HDR_MAX_SPACE_ID + DICT_HDR - + dict_hdr_get(&mtr)->frame); + + dict_hdr_get(&mtr) + ->page.frame); mtr.commit(); fil_set_max_space_id_if_bigger(max_space_id); @@ -2373,15 +2374,15 @@ corrupted: page_id, table->space->zip_size(), RW_S_LATCH, &mtr); const bool corrupted = !block - || page_get_space_id(block->frame) + || page_get_space_id(block->page.frame) != page_id.space() - || page_get_page_no(block->frame) + || page_get_page_no(block->page.frame) != page_id.page_no() || (mach_read_from_2(FIL_PAGE_TYPE - + block->frame) + + block->page.frame) != FIL_PAGE_INDEX && mach_read_from_2(FIL_PAGE_TYPE - + block->frame) + + block->page.frame) != FIL_PAGE_TYPE_INSTANT); mtr.commit(); if (corrupted) { diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 0af5982806f..570903d4327 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -1442,10 +1442,12 @@ invalid: index->stat_index_size = fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_LEAF - + root->frame, &size, &mtr) + + root->page.frame, &size, + &mtr) + fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_TOP - + root->frame, &dummy, &mtr); + + root->page.frame, &dummy, + &mtr); mtr.commit(); @@ -2529,17 +2531,19 @@ empty_index: DBUG_RETURN(result); } - root_level = btr_page_get_level(root->frame); + root_level = btr_page_get_level(root->page.frame); mtr.x_lock_space(index->table->space); ulint dummy, size; result.index_size = fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_LEAF - + root->frame, &size, &mtr) + + root->page.frame, + &size, &mtr) + fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_TOP - + root->frame, &dummy, &mtr); + + root->page.frame, + &dummy, &mtr); result.n_leaf_pages = size ? size : 1; } @@ -2647,7 +2651,7 @@ empty_index: mtr_sx_lock_index(index, &mtr); buf_block_t *root = btr_root_block_get(index, RW_S_LATCH, &mtr); - if (!root || root_level != btr_page_get_level(root->frame) + if (!root || root_level != btr_page_get_level(root->page.frame) || index->table->bulk_trx_id != bulk_trx_id) { /* Just quit if the tree has changed beyond recognition here. The old stats from previous diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 88f60165e01..bbf5b934a9a 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -379,7 +379,7 @@ void fil_space_crypt_t::write_page0(buf_block_t* block, mtr_t* mtr) { const ulint offset = FSP_HEADER_OFFSET + fsp_header_get_encryption_offset(block->zip_size()); - byte* b = block->frame + offset; + byte* b = block->page.frame + offset; mtr->memcpy<mtr_t::MAYBE_NOP>(*block, b, CRYPT_MAGIC, MAGIC_SZ); @@ -938,13 +938,13 @@ fil_crypt_read_crypt_data(fil_space_t* space) nullptr, BUF_GET_POSSIBLY_FREED, &mtr)) { - if (block->page.status == buf_page_t::FREED) { + if (block->page.is_freed()) { goto func_exit; } mysql_mutex_lock(&fil_system.mutex); if (!space->crypt_data && !space->is_stopping()) { space->crypt_data = fil_space_read_crypt_data( - zip_size, block->frame); + zip_size, block->page.frame); } mysql_mutex_unlock(&fil_system.mutex); } @@ -1001,7 +1001,7 @@ func_exit: page_id_t(space->id, 0), space->zip_size(), RW_X_LATCH, NULL, BUF_GET_POSSIBLY_FREED, &mtr, &err)) { - if (block->page.status == buf_page_t::FREED) { + if (block->page.is_freed()) { goto abort; } @@ -1793,7 +1793,7 @@ fil_crypt_rotate_page( const lsn_t block_lsn = mach_read_from_8(FIL_PAGE_LSN + frame); uint kv = buf_page_get_key_version(frame, space->flags); - if (block->page.status == buf_page_t::FREED) { + if (block->page.is_freed()) { /* Do not modify freed pages to avoid an assertion failure on recovery.*/ } else if (block->page.oldest_modification() > 1) { @@ -1973,7 +1973,7 @@ fil_crypt_flush_space( if (buf_block_t* block = buf_page_get_gen( page_id_t(space->id, 0), space->zip_size(), RW_X_LATCH, NULL, BUF_GET_POSSIBLY_FREED, &mtr)) { - if (block->page.status != buf_page_t::FREED) { + if (block->page.is_freed()) { mtr.set_named_space(space); crypt_data->write_page0(block, &mtr); } @@ -2422,7 +2422,7 @@ bool fil_space_verify_crypt_checksum(const byte* page, ulint zip_size) /* Compressed and encrypted pages do not have checksum. Assume not corrupted. Page verification happens after decompression in - buf_page_read_complete() using buf_page_is_corrupted(). */ + buf_page_t::read_complete() using buf_page_is_corrupted(). */ if (fil_page_get_type(page) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { return true; } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 32bafb14684..681cee32fd5 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -2682,7 +2682,7 @@ void fsp_flags_try_adjust(fil_space_t* space, ulint flags) if (buf_block_t* b = buf_page_get( page_id_t(space->id, 0), space->zip_size(), RW_X_LATCH, &mtr)) { - uint32_t f = fsp_header_get_flags(b->frame); + uint32_t f = fsp_header_get_flags(b->page.frame); if (fil_space_t::full_crc32(f)) { goto func_exit; } @@ -2700,7 +2700,7 @@ void fsp_flags_try_adjust(fil_space_t* space, ulint flags) mtr.set_named_space(space); mtr.write<4,mtr_t::FORCED>(*b, FSP_HEADER_OFFSET + FSP_SPACE_FLAGS - + b->frame, flags); + + b->page.frame, flags); } func_exit: mtr.commit(); @@ -2923,7 +2923,7 @@ write_completed: files and never issue asynchronous reads of change buffer pages. */ const page_id_t id(request.bpage->id()); - if (dberr_t err= buf_page_read_complete(request.bpage, *request.node)) + if (dberr_t err= request.bpage->read_complete(*request.node)) { if (recv_recovery_is_on() && !srv_force_recovery) { diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index feef88bd7ae..d563e0868f1 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -130,10 +130,10 @@ static buf_block_t *fsp_get_header(const fil_space_t *space, mtr_t *mtr) buf_block_t *block= buf_page_get_gen(page_id_t(space->id, 0), space->zip_size(), RW_SX_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, mtr); - if (!block || block->page.status == buf_page_t::FREED) + if (!block || block->page.is_freed()) return nullptr; ut_ad(space->id == mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + - block->frame)); + block->page.frame)); return block; } @@ -150,7 +150,7 @@ inline void xdes_set_free(const buf_block_t &block, xdes_t *descr, ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_SX_FIX | MTR_MEMO_PAGE_X_FIX)); ut_ad(offset < FSP_EXTENT_SIZE); - ut_ad(page_align(descr) == block.frame); + ut_ad(page_align(descr) == block.page.frame); compile_time_assert(XDES_BITS_PER_PAGE == 2); compile_time_assert(XDES_FREE_BIT == 0); compile_time_assert(XDES_CLEAN_BIT == 1); @@ -222,7 +222,7 @@ inline void xdes_set_state(const buf_block_t &block, xdes_t *descr, ut_ad(state <= XDES_FSEG); ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_SX_FIX | MTR_MEMO_PAGE_X_FIX)); - ut_ad(page_align(descr) == block.frame); + ut_ad(page_align(descr) == block.page.frame); ut_ad(mach_read_from_4(descr + XDES_STATE) <= XDES_FSEG); mtr->write<1>(block, XDES_STATE + 3 + descr, state); } @@ -250,7 +250,7 @@ inline void xdes_init(const buf_block_t &block, xdes_t *descr, mtr_t *mtr) { ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_SX_FIX | MTR_MEMO_PAGE_X_FIX)); - mtr->memset(&block, uint16_t(descr - block.frame) + XDES_BITMAP, + mtr->memset(&block, uint16_t(descr - block.page.frame) + XDES_BITMAP, XDES_SIZE - XDES_BITMAP, 0xff); xdes_set_state(block, descr, XDES_FREE, mtr); } @@ -267,13 +267,13 @@ void fseg_mark_page_used(fseg_inode_t *seg_inode, buf_block_t *iblock, ulint page, xdes_t *descr, buf_block_t *xdes, mtr_t *mtr) { - ut_ad(fil_page_get_type(iblock->frame) == FIL_PAGE_INODE); + ut_ad(fil_page_get_type(iblock->page.frame) == FIL_PAGE_INODE); ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); ut_ad(!memcmp(seg_inode + FSEG_ID, descr + XDES_ID, 4)); - const uint16_t xoffset= uint16_t(descr - xdes->frame + XDES_FLST_NODE); - const uint16_t ioffset= uint16_t(seg_inode - iblock->frame); + const uint16_t xoffset= uint16_t(descr - xdes->page.frame + XDES_FLST_NODE); + const uint16_t ioffset= uint16_t(seg_inode - iblock->page.frame); if (!xdes_get_n_used(descr)) { @@ -325,9 +325,9 @@ xdes_get_descriptor_with_space_hdr( | MTR_MEMO_PAGE_X_FIX)); /* Read free limit and space size */ uint32_t limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT - + header->frame); + + header->page.frame); uint32_t size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE - + header->frame); + + header->page.frame); ut_ad(limit == space->free_limit || (space->free_limit == 0 && (init_space @@ -351,7 +351,7 @@ xdes_get_descriptor_with_space_hdr( block = buf_page_get_gen(page_id_t(space->id, descr_page_no), zip_size, RW_SX_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, mtr); - if (block && block->page.status == buf_page_t::FREED) { + if (block && block->page.is_freed()) { block = nullptr; } } @@ -363,7 +363,7 @@ xdes_get_descriptor_with_space_hdr( return block ? XDES_ARR_OFFSET + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset) - + block->frame + + block->page.frame : nullptr; } @@ -385,7 +385,7 @@ static xdes_t *xdes_get_descriptor(const fil_space_t *space, page_no_t offset, buf_block_t *block= buf_page_get_gen(page_id_t(space->id, 0), space->zip_size(), RW_SX_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, mtr); - if (!block || block->page.status == buf_page_t::FREED) + if (!block || block->page.is_freed()) return nullptr; return xdes_get_descriptor_with_space_hdr(block, space, offset, mtr, xdes); } @@ -422,18 +422,18 @@ xdes_get_descriptor_const( nullptr, BUF_GET_POSSIBLY_FREED, mtr)) { - if (block->page.status == buf_page_t::FREED) { + if (block->page.is_freed()) { return nullptr; } ut_ad(page != 0 || space->free_limit == mach_read_from_4( FSP_FREE_LIMIT + FSP_HEADER_OFFSET - + block->frame)); + + block->page.frame)); ut_ad(page != 0 || space->size_in_header == mach_read_from_4( FSP_SIZE + FSP_HEADER_OFFSET - + block->frame)); + + block->page.frame)); - return(block->frame + XDES_ARR_OFFSET + XDES_SIZE + return(block->page.frame + XDES_ARR_OFFSET + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset)); } @@ -473,26 +473,27 @@ static uint32_t xdes_get_offset(const xdes_t *descr) @param[in,out] block buffer pool block */ void fsp_apply_init_file_page(buf_block_t *block) { - memset_aligned<UNIV_PAGE_SIZE_MIN>(block->frame, 0, srv_page_size); + memset_aligned<UNIV_PAGE_SIZE_MIN>(block->page.frame, 0, srv_page_size); const page_id_t id(block->page.id()); - mach_write_to_4(block->frame + FIL_PAGE_OFFSET, id.page_no()); + mach_write_to_4(block->page.frame + FIL_PAGE_OFFSET, id.page_no()); if (log_sys.is_physical()) - memset_aligned<8>(block->frame + FIL_PAGE_PREV, 0xff, 8); - mach_write_to_4(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id.space()); + memset_aligned<8>(block->page.frame + FIL_PAGE_PREV, 0xff, 8); + mach_write_to_4(block->page.frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, + id.space()); if (page_zip_des_t* page_zip= buf_block_get_page_zip(block)) { memset_aligned<UNIV_ZIP_SIZE_MIN>(page_zip->data, 0, page_zip_get_size(page_zip)); static_assert(FIL_PAGE_OFFSET == 4, "compatibility"); memcpy_aligned<4>(page_zip->data + FIL_PAGE_OFFSET, - block->frame + FIL_PAGE_OFFSET, 4); + block->page.frame + FIL_PAGE_OFFSET, 4); if (log_sys.is_physical()) memset_aligned<8>(page_zip->data + FIL_PAGE_PREV, 0xff, 8); static_assert(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID % 4 == 2, "not perfect alignment"); memcpy_aligned<2>(page_zip->data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, - block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4); + block->page.frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4); } } @@ -552,28 +553,28 @@ void fsp_header_init(fil_space_t* space, uint32_t size, mtr_t* mtr) fsp_init_file_page(space, block, mtr); - mtr->write<2>(*block, block->frame + FIL_PAGE_TYPE, + mtr->write<2>(*block, block->page.frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR); mtr->write<4,mtr_t::MAYBE_NOP>(*block, FSP_HEADER_OFFSET + FSP_SPACE_ID - + block->frame, space->id); + + block->page.frame, space->id); ut_ad(0 == mach_read_from_4(FSP_HEADER_OFFSET + FSP_NOT_USED - + block->frame)); + + block->page.frame)); /* recv_sys_t::parse() expects to find a WRITE record that covers all 4 bytes. Therefore, we must specify mtr_t::FORCED in order to avoid optimizing away any unchanged most significant bytes of FSP_SIZE. */ mtr->write<4,mtr_t::FORCED>(*block, FSP_HEADER_OFFSET + FSP_SIZE - + block->frame, size); + + block->page.frame, size); ut_ad(0 == mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT - + block->frame)); + + block->page.frame)); if (auto f = space->flags & ~FSP_FLAGS_MEM_MASK) { mtr->write<4,mtr_t::FORCED>(*block, FSP_HEADER_OFFSET + FSP_SPACE_FLAGS - + block->frame, f); + + block->page.frame, f); } ut_ad(0 == mach_read_from_4(FSP_HEADER_OFFSET + FSP_FRAG_N_USED - + block->frame)); + + block->page.frame)); flst_init(block, FSP_HEADER_OFFSET + FSP_FREE, mtr); flst_init(block, FSP_HEADER_OFFSET + FSP_FREE_FRAG, mtr); @@ -581,7 +582,8 @@ void fsp_header_init(fil_space_t* space, uint32_t size, mtr_t* mtr) flst_init(block, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL, mtr); flst_init(block, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, mtr); - mtr->write<8>(*block, FSP_HEADER_OFFSET + FSP_SEG_ID + block->frame, + mtr->write<8>(*block, FSP_HEADER_OFFSET + FSP_SEG_ID + + block->page.frame, 1U); fsp_fill_free_list(!is_system_tablespace(space->id), @@ -617,7 +619,8 @@ fsp_try_extend_data_file_with_pages( ut_a(!is_system_tablespace(space->id)); ut_d(space->modify_check(*mtr)); - size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE + header->frame); + size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE + + header->page.frame); ut_ad(size == space->size_in_header); ut_a(page_no >= size); @@ -629,7 +632,7 @@ fsp_try_extend_data_file_with_pages( in order to avoid optimizing away any unchanged most significant bytes of FSP_SIZE. */ mtr->write<4,mtr_t::FORCED>(*header, FSP_HEADER_OFFSET + FSP_SIZE - + header->frame, space->size); + + header->page.frame, space->size); space->size_in_header = space->size; return(success); @@ -722,7 +725,7 @@ fsp_try_extend_data_file(fil_space_t *space, buf_block_t *header, mtr_t *mtr) } uint32_t size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE - + header->frame); + + header->page.frame); ut_ad(size == space->size_in_header); uint32_t size_increase; @@ -770,7 +773,8 @@ fsp_try_extend_data_file(fil_space_t *space, buf_block_t *header, mtr_t *mtr) in order to avoid optimizing away any unchanged most significant bytes of FSP_SIZE. */ mtr->write<4,mtr_t::FORCED>(*header, FSP_HEADER_OFFSET + FSP_SIZE - + header->frame, space->size_in_header); + + header->page.frame, + space->size_in_header); return(size_increase); } @@ -785,10 +789,9 @@ Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE. ATTRIBUTE_COLD void fil_block_reset_type(const buf_block_t& block, ulint type, mtr_t* mtr) { - ib::info() - << "Resetting invalid page " << block.page.id() << " type " - << fil_page_get_type(block.frame) << " to " << type << "."; - mtr->write<2>(block, block.frame + FIL_PAGE_TYPE, type); + ib::info() << "Resetting invalid page " << block.page.id() << " type " + << fil_page_get_type(block.page.frame) << " to " << type << "."; + mtr->write<2>(block, block.page.frame + FIL_PAGE_TYPE, type); } /** Put new extents to the free list if there are free extents above the free @@ -812,9 +815,9 @@ fsp_fill_free_list( /* Check if we can fill free list from above the free list limit */ uint32_t size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE - + header->frame); + + header->page.frame); uint32_t limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT - + header->frame); + + header->page.frame); ut_ad(size == space->size_in_header); ut_ad(limit == space->free_limit); @@ -849,7 +852,7 @@ fsp_fill_free_list( space->free_limit = i + extent_size; mtr->write<4>(*header, FSP_HEADER_OFFSET + FSP_FREE_LIMIT - + header->frame, i + extent_size); + + header->page.frame, i + extent_size); if (init_xdes) { @@ -868,8 +871,8 @@ fsp_fill_free_list( buf_pool.free_block(f); } fsp_init_file_page(space, block, mtr); - mtr->write<2>(*block, - FIL_PAGE_TYPE + block->frame, + mtr->write<2>(*block, FIL_PAGE_TYPE + + block->page.frame, FIL_PAGE_TYPE_XDES); } @@ -884,8 +887,8 @@ fsp_fill_free_list( buf_pool.free_block(f); } fsp_init_file_page(space, block, mtr); - mtr->write<2>(*block, - block->frame + FIL_PAGE_TYPE, + mtr->write<2>(*block, FIL_PAGE_TYPE + + block->page.frame, FIL_PAGE_IBUF_BITMAP); } } @@ -903,7 +906,7 @@ fsp_fill_free_list( } xdes_init(*xdes, descr, mtr); const uint16_t xoffset= static_cast<uint16_t>( - descr - xdes->frame + XDES_FLST_NODE); + descr - xdes->page.frame + XDES_FLST_NODE); if (UNIV_UNLIKELY(init_xdes)) { @@ -920,7 +923,7 @@ fsp_fill_free_list( FSP_HEADER_OFFSET + FSP_FREE_FRAG, xdes, xoffset, mtr); byte* n_used = FSP_HEADER_OFFSET + FSP_FRAG_N_USED - + header->frame; + + header->page.frame; mtr->write<4>(*header, n_used, 2U + mach_read_from_4(n_used)); } else { @@ -974,13 +977,13 @@ fsp_alloc_free_extent( } else { /* Take the first extent in the free list */ first = flst_get_first(FSP_HEADER_OFFSET + FSP_FREE - + header->frame); + + header->page.frame); if (first.page == FIL_NULL) { fsp_fill_free_list(false, space, header, mtr); first = flst_get_first(FSP_HEADER_OFFSET + FSP_FREE - + header->frame); + + header->page.frame); if (first.page == FIL_NULL) { return nullptr; /* No free extents left */ } @@ -996,7 +999,8 @@ fsp_alloc_free_extent( flst_remove(header, FSP_HEADER_OFFSET + FSP_FREE, desc_block, static_cast<uint16_t>( - descr - desc_block->frame + XDES_FLST_NODE), mtr); + descr - desc_block->page.frame + XDES_FLST_NODE), + mtr); space->free_len--; *xdes = desc_block; @@ -1018,14 +1022,15 @@ fsp_alloc_from_free_frag(buf_block_t *header, buf_block_t *xdes, xdes_t *descr, xdes_set_free<false>(*xdes, descr, bit, mtr); /* Update the FRAG_N_USED field */ - byte* n_used_p = FSP_HEADER_OFFSET + FSP_FRAG_N_USED + header->frame; + byte* n_used_p = FSP_HEADER_OFFSET + FSP_FRAG_N_USED + + header->page.frame; uint32_t n_used = mach_read_from_4(n_used_p) + 1; if (xdes_is_full(descr)) { /* The fragment is full: move it to another list */ const uint16_t xoffset= static_cast<uint16_t>( - descr - xdes->frame + XDES_FLST_NODE); + descr - xdes->page.frame + XDES_FLST_NODE); flst_remove(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG, xdes, xoffset, mtr); xdes_set_state(*xdes, descr, XDES_FULL_FRAG, mtr); @@ -1063,7 +1068,7 @@ fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr) if (block) { ut_ad(block->page.buf_fix_count() >= 1); - ut_ad(block->lock.x_lock_count() == 1); + ut_ad(block->page.lock.x_lock_count() == 1); ut_ad(mtr->have_x_latch(*block)); free_block= block; goto got_free_block; @@ -1119,7 +1124,7 @@ fsp_alloc_free_page( } else { /* Else take the first extent in free_frag list */ first = flst_get_first(FSP_HEADER_OFFSET + FSP_FREE_FRAG - + block->frame); + + block->page.frame); if (first.page == FIL_NULL) { /* There are no partially full fragments: allocate @@ -1139,7 +1144,7 @@ fsp_alloc_free_page( xdes_set_state(*xdes, descr, XDES_FREE_FRAG, mtr); flst_add_last(block, FSP_HEADER_OFFSET + FSP_FREE_FRAG, xdes, static_cast<uint16_t>( - descr - xdes->frame + descr - xdes->page.frame + XDES_FLST_NODE), mtr); } else { descr = xdes_lst_get_descriptor(*space, first, mtr, @@ -1169,7 +1174,7 @@ fsp_alloc_free_page( uint32_t page_no = xdes_get_offset(descr) + free; uint32_t space_size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE - + block->frame); + + block->page.frame); ut_ad(space_size == space->size_in_header || (space_id == TRX_SYS_SPACE && srv_startup_is_before_trx_rollback_phase)); @@ -1271,9 +1276,9 @@ static void fsp_free_page(fil_space_t* space, page_no_t offset, mtr_t* mtr) xdes_set_free<true>(*xdes, descr, bit, mtr); frag_n_used = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FRAG_N_USED - + header->frame); + + header->page.frame); - const uint16_t xoffset= static_cast<uint16_t>(descr - xdes->frame + const uint16_t xoffset= static_cast<uint16_t>(descr - xdes->page.frame + XDES_FLST_NODE); if (state == XDES_FULL_FRAG) { @@ -1284,12 +1289,12 @@ static void fsp_free_page(fil_space_t* space, page_no_t offset, mtr_t* mtr) flst_add_last(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG, xdes, xoffset, mtr); mtr->write<4>(*header, FSP_HEADER_OFFSET + FSP_FRAG_N_USED - + header->frame, + + header->page.frame, frag_n_used + FSP_EXTENT_SIZE - 1); } else { ut_a(frag_n_used > 0); mtr->write<4>(*header, FSP_HEADER_OFFSET + FSP_FRAG_N_USED - + header->frame, frag_n_used - 1); + + header->page.frame, frag_n_used - 1); } if (!xdes_get_n_used(descr)) { @@ -1325,7 +1330,7 @@ static void fsp_free_extent(fil_space_t* space, page_no_t offset, mtr_t* mtr) xdes_init(*xdes, descr, mtr); flst_add_last(block, FSP_HEADER_OFFSET + FSP_FREE, - xdes, static_cast<uint16_t>(descr - xdes->frame + + xdes, static_cast<uint16_t>(descr - xdes->page.frame + XDES_FLST_NODE), mtr); space->free_len++; } @@ -1408,12 +1413,12 @@ fsp_alloc_seg_inode_page(fil_space_t *space, buf_block_t *header, mtr_t *mtr) if (!block) return false; - ut_ad(block->lock.not_recursive()); + ut_ad(block->page.lock.not_recursive()); - mtr->write<2>(*block, block->frame + FIL_PAGE_TYPE, FIL_PAGE_INODE); + mtr->write<2>(*block, block->page.frame + FIL_PAGE_TYPE, FIL_PAGE_INODE); #ifdef UNIV_DEBUG - const byte *inode= FSEG_ID + FSEG_ARR_OFFSET + block->frame; + const byte *inode= FSEG_ID + FSEG_ARR_OFFSET + block->page.frame; for (ulint i= FSP_SEG_INODES_PER_PAGE(space->physical_size()); i--; inode += FSEG_INODE_SIZE) ut_ad(!mach_read_from_8(inode)); @@ -1441,18 +1446,18 @@ fsp_alloc_seg_inode(fil_space_t *space, buf_block_t *header, /* Allocate a new segment inode page if needed. */ if (!flst_get_len(FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE - + header->frame) + + header->page.frame) && !fsp_alloc_seg_inode_page(space, header, mtr)) { return(NULL); } const page_id_t page_id( space->id, flst_get_first(FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE - + header->frame).page); + + header->page.frame).page); block = buf_page_get_gen(page_id, space->zip_size(), RW_SX_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, mtr); - if (!block || block->page.status == buf_page_t::FREED) { + if (!block || block->page.is_freed()) { return nullptr; } @@ -1462,13 +1467,14 @@ fsp_alloc_seg_inode(fil_space_t *space, buf_block_t *header, const ulint physical_size = space->physical_size(); - ulint n = fsp_seg_inode_page_find_free(block->frame, 0, physical_size); + ulint n = fsp_seg_inode_page_find_free(block->page.frame, 0, + physical_size); ut_a(n < FSP_SEG_INODES_PER_PAGE(physical_size)); - inode = fsp_seg_inode_page_get_nth_inode(block->frame, n); + inode = fsp_seg_inode_page_get_nth_inode(block->page.frame, n); - if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(block->frame, + if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(block->page.frame, n + 1, physical_size)) { /* There are no other unused headers left on the page: move it @@ -1508,7 +1514,8 @@ static void fsp_free_seg_inode( const ulint physical_size = space->physical_size(); if (ULINT_UNDEFINED - == fsp_seg_inode_page_find_free(iblock->frame, 0, physical_size)) { + == fsp_seg_inode_page_find_free(iblock->page.frame, 0, + physical_size)) { /* Move the page to another list */ flst_remove(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL, iblock, FSEG_INODE_PAGE_NODE, mtr); @@ -1518,8 +1525,8 @@ static void fsp_free_seg_inode( mtr->memset(iblock, page_offset(inode) + FSEG_ID, FSEG_INODE_SIZE, 0); - if (ULINT_UNDEFINED - == fsp_seg_inode_page_find_used(iblock->frame, physical_size)) { + if (ULINT_UNDEFINED == fsp_seg_inode_page_find_used(iblock->page.frame, + physical_size)) { /* There are no other used headers left on the page: free it */ flst_remove(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, iblock, FSEG_INODE_PAGE_NODE, mtr); @@ -1735,9 +1742,10 @@ fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr, value in space header */ seg_id = mach_read_from_8(FSP_HEADER_OFFSET + FSP_SEG_ID - + header->frame); + + header->page.frame); - mtr->write<8>(*header, FSP_HEADER_OFFSET + FSP_SEG_ID + header->frame, + mtr->write<8>(*header, + FSP_HEADER_OFFSET + FSP_SEG_ID + header->page.frame, seg_id + 1); mtr->write<8>(*iblock, inode + FSEG_ID, seg_id); ut_ad(!mach_read_from_4(inode + FSEG_NOT_FULL_N_USED)); @@ -1749,7 +1757,8 @@ fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr, mtr->write<4>(*iblock, inode + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE); compile_time_assert(FSEG_FRAG_SLOT_SIZE == 4); compile_time_assert(FIL_NULL == 0xffffffff); - mtr->memset(iblock, uint16_t(inode - iblock->frame) + FSEG_FRAG_ARR, + mtr->memset(iblock, + uint16_t(inode - iblock->page.frame) + FSEG_FRAG_ARR, FSEG_FRAG_SLOT_SIZE * FSEG_FRAG_ARR_N_SLOTS, 0xff); if (!block) { @@ -1769,23 +1778,23 @@ fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr, goto funct_exit; } - ut_d(const auto x = block->lock.x_lock_count()); - ut_ad(x || block->lock.not_recursive()); + ut_d(const auto x = block->page.lock.x_lock_count()); + ut_ad(x || block->page.lock.not_recursive()); ut_ad(x == 1 || space->is_being_truncated); ut_ad(x <= 2); - ut_ad(!fil_page_get_type(block->frame)); - mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame, + ut_ad(!fil_page_get_type(block->page.frame)); + mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->page.frame, FIL_PAGE_TYPE_SYS); } mtr->write<2>(*block, byte_offset + FSEG_HDR_OFFSET - + block->frame, page_offset(inode)); + + block->page.frame, page_offset(inode)); mtr->write<4>(*block, byte_offset + FSEG_HDR_PAGE_NO - + block->frame, iblock->page.id().page_no()); + + block->page.frame, iblock->page.id().page_no()); mtr->write<4,mtr_t::MAYBE_NOP>(*block, byte_offset + FSEG_HDR_SPACE - + block->frame, space->id); + + block->page.frame, space->id); funct_exit: if (!has_done_reservation) { @@ -1828,7 +1837,7 @@ ulint fseg_n_reserved_pages(const buf_block_t &block, const fseg_header_t *header, ulint *used, mtr_t *mtr) { - ut_ad(page_align(header) == block.frame); + ut_ad(page_align(header) == block.page.frame); return fseg_n_reserved_pages_low(fseg_inode_get(header, block.page.id().space(), block.zip_size(), mtr), @@ -1897,9 +1906,9 @@ fseg_fill_free_list( mtr->write<8>(*xdes, descr + XDES_ID, seg_id); flst_add_last(iblock, - static_cast<uint16_t>(inode - iblock->frame + static_cast<uint16_t>(inode - iblock->page.frame + FSEG_FREE), xdes, - static_cast<uint16_t>(descr - xdes->frame + static_cast<uint16_t>(descr - xdes->page.frame + XDES_FLST_NODE), mtr); hint += FSP_EXTENT_SIZE; } @@ -1960,9 +1969,9 @@ fseg_alloc_free_extent( mtr->write<8,mtr_t::MAYBE_NOP>(**xdes, descr + XDES_ID, seg_id); flst_add_last(iblock, - static_cast<uint16_t>(inode - iblock->frame + static_cast<uint16_t>(inode - iblock->page.frame + FSEG_FREE), *xdes, - static_cast<uint16_t>(descr - (*xdes)->frame + static_cast<uint16_t>(descr - (*xdes)->page.frame + XDES_FLST_NODE), mtr); /* Try to fill the segment free list */ @@ -2076,9 +2085,11 @@ take_hinted_page: mtr->write<8,mtr_t::MAYBE_NOP>(*xdes, ret_descr + XDES_ID, seg_id); flst_add_last(iblock, - static_cast<uint16_t>(seg_inode - iblock->frame + static_cast<uint16_t>(seg_inode + - iblock->page.frame + FSEG_FREE), xdes, - static_cast<uint16_t>(ret_descr - xdes->frame + static_cast<uint16_t>(ret_descr + - xdes->page.frame + XDES_FLST_NODE), mtr); /* Try to fill the segment free list */ @@ -2410,7 +2421,7 @@ fsp_reserve_free_extents( } try_again: uint32_t size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE - + header->frame); + + header->page.frame); ut_ad(size == space->size_in_header); if (size < extent_size && n_pages < extent_size / 2) { @@ -2421,12 +2432,12 @@ try_again: } uint32_t n_free_list_ext = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE - + header->frame); + + header->page.frame); ut_ad(space->free_len == n_free_list_ext); uint32_t free_limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT - + header->frame); + + header->page.frame); ut_ad(space->free_limit == free_limit); /* Below we play safe when counting free extents above the free limit: @@ -2517,7 +2528,7 @@ fseg_free_page_low( ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - ut_ad(iblock->frame == page_align(seg_inode)); + ut_ad(iblock->page.frame == page_align(seg_inode)); ut_d(space->modify_check(*mtr)); #ifdef BTR_CUR_HASH_ADAPT @@ -2551,7 +2562,8 @@ fseg_free_page_low( } compile_time_assert(FIL_NULL == 0xffffffff); - mtr->memset(iblock, uint16_t(seg_inode - iblock->frame) + mtr->memset(iblock, uint16_t(seg_inode + - iblock->page.frame) + FSEG_FRAG_ARR + i * FSEG_FRAG_SLOT_SIZE, 4, 0xff); break; @@ -2577,8 +2589,9 @@ fseg_free_page_low( byte* p_not_full = seg_inode + FSEG_NOT_FULL_N_USED; uint32_t not_full_n_used = mach_read_from_4(p_not_full); - const uint16_t xoffset= uint16_t(descr - xdes->frame + XDES_FLST_NODE); - const uint16_t ioffset= uint16_t(seg_inode - iblock->frame); + const uint16_t xoffset= uint16_t(descr - xdes->page.frame + + XDES_FLST_NODE); + const uint16_t ioffset= uint16_t(seg_inode - iblock->page.frame); if (xdes_is_full(descr)) { /* The fragment is full: move it to another list */ @@ -2705,8 +2718,9 @@ fseg_free_extent( ut_d(space->modify_check(*mtr)); const uint32_t first_page_in_extent = page - (page % FSP_EXTENT_SIZE); - const uint16_t xoffset= uint16_t(descr - xdes->frame + XDES_FLST_NODE); - const uint16_t ioffset= uint16_t(seg_inode - iblock->frame); + const uint16_t xoffset= uint16_t(descr - xdes->page.frame + + XDES_FLST_NODE); + const uint16_t ioffset= uint16_t(seg_inode - iblock->page.frame); #ifdef BTR_CUR_HASH_ADAPT if (ahi) { diff --git a/storage/innobase/fut/fut0lst.cc b/storage/innobase/fut/fut0lst.cc index cb5774ff7d1..98f37be002a 100644 --- a/storage/innobase/fut/fut0lst.cc +++ b/storage/innobase/fut/fut0lst.cc @@ -74,7 +74,7 @@ static void flst_write_addr(const buf_block_t& block, byte *faddr, static void flst_zero_both(const buf_block_t& b, byte *addr, mtr_t *mtr) { if (mach_read_from_4(addr + FIL_ADDR_PAGE) != FIL_NULL) - mtr->memset(&b, ulint(addr - b.frame) + FIL_ADDR_PAGE, 4, 0xff); + mtr->memset(&b, ulint(addr - b.page.frame) + FIL_ADDR_PAGE, 4, 0xff); mtr->write<2,mtr_t::MAYBE_NOP>(b, addr + FIL_ADDR_BYTE, 0U); /* Initialize the other address by (MEMMOVE|0x80,offset,FIL_ADDR_SIZE,source) which is 4 bytes, or less than FIL_ADDR_SIZE. */ @@ -95,12 +95,13 @@ static void flst_add_to_empty(buf_block_t *base, uint16_t boffset, ut_ad(mtr->memo_contains_flagged(add, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - ut_ad(!mach_read_from_4(base->frame + boffset + FLST_LEN)); - mtr->write<1>(*base, base->frame + boffset + (FLST_LEN + 3), 1U); + ut_ad(!mach_read_from_4(base->page.frame + boffset + FLST_LEN)); + mtr->write<1>(*base, base->page.frame + boffset + (FLST_LEN + 3), 1U); /* Update first and last fields of base node */ - flst_write_addr(*base, base->frame + boffset + FLST_FIRST, + flst_write_addr(*base, base->page.frame + boffset + FLST_FIRST, add->page.id().page_no(), aoffset, mtr); - memcpy(base->frame + boffset + FLST_LAST, base->frame + boffset + FLST_FIRST, + memcpy(base->page.frame + boffset + FLST_LAST, + base->page.frame + boffset + FLST_FIRST, FIL_ADDR_SIZE); /* Initialize FLST_LAST by (MEMMOVE|0x80,offset,FIL_ADDR_SIZE,source) which is 4 bytes, or less than FIL_ADDR_SIZE. */ @@ -109,7 +110,7 @@ static void flst_add_to_empty(buf_block_t *base, uint16_t boffset, /* Set prev and next fields of node to add */ static_assert(FLST_NEXT == FLST_PREV + FIL_ADDR_SIZE, "compatibility"); - flst_zero_both(*add, add->frame + aoffset + FLST_PREV, mtr); + flst_zero_both(*add, add->page.frame + aoffset + FLST_PREV, mtr); } /** Insert a node after another one. @@ -137,15 +138,15 @@ static void flst_insert_after(buf_block_t *base, uint16_t boffset, ut_ad(mtr->memo_contains_flagged(add, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - fil_addr_t next_addr= flst_get_next_addr(cur->frame + coffset); + fil_addr_t next_addr= flst_get_next_addr(cur->page.frame + coffset); - flst_write_addr(*add, add->frame + aoffset + FLST_PREV, + flst_write_addr(*add, add->page.frame + aoffset + FLST_PREV, cur->page.id().page_no(), coffset, mtr); - flst_write_addr(*add, add->frame + aoffset + FLST_NEXT, + flst_write_addr(*add, add->page.frame + aoffset + FLST_NEXT, next_addr.page, next_addr.boffset, mtr); if (next_addr.page == FIL_NULL) - flst_write_addr(*base, base->frame + boffset + FLST_LAST, + flst_write_addr(*base, base->page.frame + boffset + FLST_LAST, add->page.id().page_no(), aoffset, mtr); else { @@ -156,10 +157,10 @@ static void flst_insert_after(buf_block_t *base, uint16_t boffset, add->page.id().page_no(), aoffset, mtr); } - flst_write_addr(*cur, cur->frame + coffset + FLST_NEXT, + flst_write_addr(*cur, cur->page.frame + coffset + FLST_NEXT, add->page.id().page_no(), aoffset, mtr); - byte *len= &base->frame[boffset + FLST_LEN]; + byte *len= &base->page.frame[boffset + FLST_LEN]; mtr->write<4>(*base, len, mach_read_from_4(len) + 1); } @@ -188,15 +189,15 @@ static void flst_insert_before(buf_block_t *base, uint16_t boffset, ut_ad(mtr->memo_contains_flagged(add, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - fil_addr_t prev_addr= flst_get_prev_addr(cur->frame + coffset); + fil_addr_t prev_addr= flst_get_prev_addr(cur->page.frame + coffset); - flst_write_addr(*add, add->frame + aoffset + FLST_PREV, + flst_write_addr(*add, add->page.frame + aoffset + FLST_PREV, prev_addr.page, prev_addr.boffset, mtr); - flst_write_addr(*add, add->frame + aoffset + FLST_NEXT, + flst_write_addr(*add, add->page.frame + aoffset + FLST_NEXT, cur->page.id().page_no(), coffset, mtr); if (prev_addr.page == FIL_NULL) - flst_write_addr(*base, base->frame + boffset + FLST_FIRST, + flst_write_addr(*base, base->page.frame + boffset + FLST_FIRST, add->page.id().page_no(), aoffset, mtr); else { @@ -207,10 +208,10 @@ static void flst_insert_before(buf_block_t *base, uint16_t boffset, add->page.id().page_no(), aoffset, mtr); } - flst_write_addr(*cur, cur->frame + coffset + FLST_PREV, + flst_write_addr(*cur, cur->page.frame + coffset + FLST_PREV, add->page.id().page_no(), aoffset, mtr); - byte *len= &base->frame[boffset + FLST_LEN]; + byte *len= &base->page.frame[boffset + FLST_LEN]; mtr->write<4>(*base, len, mach_read_from_4(len) + 1); } @@ -244,19 +245,19 @@ void flst_add_last(buf_block_t *base, uint16_t boffset, ut_ad(mtr->memo_contains_flagged(add, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - if (!flst_get_len(base->frame + boffset)) + if (!flst_get_len(base->page.frame + boffset)) flst_add_to_empty(base, boffset, add, aoffset, mtr); else { - fil_addr_t addr= flst_get_last(base->frame + boffset); + fil_addr_t addr= flst_get_last(base->page.frame + boffset); buf_block_t *cur= add; const flst_node_t *c= addr.page == add->page.id().page_no() - ? add->frame + addr.boffset + ? add->page.frame + addr.boffset : fut_get_ptr(add->page.id().space(), add->zip_size(), addr, RW_SX_LATCH, mtr, &cur); if (c) flst_insert_after(base, boffset, cur, - static_cast<uint16_t>(c - cur->frame), + static_cast<uint16_t>(c - cur->page.frame), add, aoffset, mtr); } } @@ -278,19 +279,19 @@ void flst_add_first(buf_block_t *base, uint16_t boffset, ut_ad(mtr->memo_contains_flagged(add, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - if (!flst_get_len(base->frame + boffset)) + if (!flst_get_len(base->page.frame + boffset)) flst_add_to_empty(base, boffset, add, aoffset, mtr); else { - fil_addr_t addr= flst_get_first(base->frame + boffset); + fil_addr_t addr= flst_get_first(base->page.frame + boffset); buf_block_t *cur= add; const flst_node_t *c= addr.page == add->page.id().page_no() - ? add->frame + addr.boffset + ? add->page.frame + addr.boffset : fut_get_ptr(add->page.id().space(), add->zip_size(), addr, RW_SX_LATCH, mtr, &cur); if (c) flst_insert_before(base, boffset, cur, - static_cast<uint16_t>(c - cur->frame), + static_cast<uint16_t>(c - cur->page.frame), add, aoffset, mtr); } } @@ -311,17 +312,17 @@ void flst_remove(buf_block_t *base, uint16_t boffset, ut_ad(mtr->memo_contains_flagged(cur, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - const fil_addr_t prev_addr= flst_get_prev_addr(cur->frame + coffset); - const fil_addr_t next_addr= flst_get_next_addr(cur->frame + coffset); + const fil_addr_t prev_addr= flst_get_prev_addr(cur->page.frame + coffset); + const fil_addr_t next_addr= flst_get_next_addr(cur->page.frame + coffset); if (prev_addr.page == FIL_NULL) - flst_write_addr(*base, base->frame + boffset + FLST_FIRST, + flst_write_addr(*base, base->page.frame + boffset + FLST_FIRST, next_addr.page, next_addr.boffset, mtr); else { buf_block_t *block= cur; if (flst_node_t *prev= prev_addr.page == cur->page.id().page_no() - ? cur->frame + prev_addr.boffset + ? cur->page.frame + prev_addr.boffset : fut_get_ptr(cur->page.id().space(), cur->zip_size(), prev_addr, RW_SX_LATCH, mtr, &block)) flst_write_addr(*block, prev + FLST_NEXT, @@ -329,20 +330,20 @@ void flst_remove(buf_block_t *base, uint16_t boffset, } if (next_addr.page == FIL_NULL) - flst_write_addr(*base, base->frame + boffset + FLST_LAST, + flst_write_addr(*base, base->page.frame + boffset + FLST_LAST, prev_addr.page, prev_addr.boffset, mtr); else { buf_block_t *block= cur; if (flst_node_t *next= next_addr.page == cur->page.id().page_no() - ? cur->frame + next_addr.boffset + ? cur->page.frame + next_addr.boffset : fut_get_ptr(cur->page.id().space(), cur->zip_size(), next_addr, RW_SX_LATCH, mtr, &block)) flst_write_addr(*block, next + FLST_PREV, prev_addr.page, prev_addr.boffset, mtr); } - byte *len= &base->frame[boffset + FLST_LEN]; + byte *len= &base->page.frame[boffset + FLST_LEN]; ut_ad(mach_read_from_4(len) > 0); mtr->write<4>(*base, len, mach_read_from_4(len) - 1); } @@ -362,8 +363,8 @@ void flst_validate(const buf_block_t *base, uint16_t boffset, mtr_t *mtr) the x-locked pages could fill the buffer, resulting in a deadlock. */ mtr_t mtr2; - const uint32_t len= flst_get_len(base->frame + boffset); - fil_addr_t addr= flst_get_first(base->frame + boffset); + const uint32_t len= flst_get_len(base->page.frame + boffset); + fil_addr_t addr= flst_get_first(base->page.frame + boffset); for (uint32_t i= len; i--; ) { @@ -378,7 +379,7 @@ void flst_validate(const buf_block_t *base, uint16_t boffset, mtr_t *mtr) ut_ad(addr.page == FIL_NULL); - addr= flst_get_last(base->frame + boffset); + addr= flst_get_last(base->page.frame + boffset); for (uint32_t i= len; i--; ) { diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc index a003100b3a2..8097fbe2011 100644 --- a/storage/innobase/gis/gis0rtree.cc +++ b/storage/innobase/gis/gis0rtree.cc @@ -231,7 +231,7 @@ rtr_update_mbr_field( ut_ad(page == buf_block_get_frame(block)); child = btr_node_ptr_get_child_page_no(rec, offsets); - const ulint n_core = page_is_leaf(block->frame) + const ulint n_core = page_is_leaf(block->page.frame) ? index->n_core_fields : 0; if (new_rec) { @@ -590,8 +590,8 @@ rtr_adjust_upper_level( page_cursor = btr_cur_get_page_cur(&cursor); - rtr_update_mbr_field(&cursor, offsets, NULL, block->frame, mbr, NULL, - mtr); + rtr_update_mbr_field(&cursor, offsets, nullptr, block->page.frame, mbr, + nullptr, mtr); /* Already updated parent MBR, reset in our path */ if (sea_cur->rtr_info) { @@ -605,7 +605,7 @@ rtr_adjust_upper_level( /* Insert the node for the new page. */ node_ptr_upper = rtr_index_build_node_ptr( index, new_mbr, - page_rec_get_next(page_get_infimum_rec(new_block->frame)), + page_rec_get_next(page_get_infimum_rec(new_block->page.frame)), new_page_no, heap); ulint up_match = 0; @@ -660,15 +660,15 @@ rtr_adjust_upper_level( ut_ad(block->zip_size() == index->table->space->zip_size()); - const uint32_t next_page_no = btr_page_get_next(block->frame); + const uint32_t next_page_no = btr_page_get_next(block->page.frame); if (next_page_no != FIL_NULL) { buf_block_t* next_block = btr_block_get( *index, next_page_no, RW_X_LATCH, false, mtr); #ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(next_block->frame) - == page_is_comp(block->frame)); - ut_a(btr_page_get_prev(next_block->frame) + ut_a(page_is_comp(next_block->page.frame) + == page_is_comp(block->page.frame)); + ut_a(btr_page_get_prev(next_block->page.frame) == block->page.id().page_no()); #endif /* UNIV_BTR_DEBUG */ @@ -744,8 +744,7 @@ rtr_split_page_move_rec_list( log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); } - max_to_move = page_get_n_recs( - buf_block_get_frame(block)); + max_to_move = page_get_n_recs(buf_block_get_frame(block)); rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc( heap, sizeof (*rec_move) * max_to_move)); @@ -979,7 +978,7 @@ func_start: if (page_level && UNIV_LIKELY_NULL(new_page_zip)) { /* ROW_FORMAT=COMPRESSED non-leaf pages are not expected to contain FIL_NULL in FIL_PAGE_PREV at this stage. */ - memset_aligned<4>(new_block->frame + FIL_PAGE_PREV, 0, 4); + memset_aligned<4>(new_block->page.frame + FIL_PAGE_PREV, 0, 4); } btr_page_create(new_block, new_page_zip, cursor->index, page_level, mtr); @@ -1879,7 +1878,7 @@ err_exit: /* Scan records in root page and calculate area. */ double area = 0; for (const rec_t* rec = page_rec_get_next( - page_get_infimum_rec(block->frame)); + page_get_infimum_rec(block->page.frame)); !page_rec_is_supremum(rec); rec = page_rec_get_next_const(rec)) { rtr_mbr_t mbr; diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc index 41f32cf7240..1e212524032 100644 --- a/storage/innobase/gis/gis0sea.cc +++ b/storage/innobase/gis/gis0sea.cc @@ -260,7 +260,7 @@ rtr_pcur_getnext_from_path( ut_ad(my_latch_mode == BTR_MODIFY_TREE || my_latch_mode == BTR_CONT_MODIFY_TREE || !page_is_leaf(btr_cur_get_page(btr_cur)) - || !btr_cur->page_cur.block->lock.have_any()); + || !btr_cur->page_cur.block->page.lock.have_any()); block = buf_page_get_gen( page_id_t(index->table->space_id, @@ -395,14 +395,14 @@ rtr_pcur_getnext_from_path( } if (rw_latch == RW_NO_LATCH) { - block->lock.s_lock(); + block->page.lock.s_lock(); } lock_prdt_lock(block, &prdt, index, LOCK_S, LOCK_PREDICATE, btr_cur->rtr_info->thr); if (rw_latch == RW_NO_LATCH) { - block->lock.s_unlock(); + block->page.lock.s_unlock(); } } @@ -926,7 +926,7 @@ rtr_create_rtr_info( mysql_mutex_init(rtr_match_mutex_key, &rtr_info->matches->rtr_match_mutex, nullptr); - rtr_info->matches->block.lock.init(); + rtr_info->matches->block.page.lock.init(); } rtr_info->path = UT_NEW_NOKEY(rtr_node_path_t()); @@ -1071,7 +1071,7 @@ rtr_clean_rtr_info( UT_DELETE(rtr_info->matches->matched_recs); } - rtr_info->matches->block.lock.free(); + rtr_info->matches->block.page.lock.free(); mysql_mutex_destroy( &rtr_info->matches->rtr_match_mutex); @@ -1387,7 +1387,7 @@ rtr_leaf_push_match_rec( ulint data_len; rtr_rec_t rtr_rec; - buf = match_rec->block.frame + match_rec->used; + buf = match_rec->block.page.frame + match_rec->used; ut_ad(page_rec_is_leaf(rec)); copy = rec_copy(buf, rec, offsets); @@ -1484,7 +1484,7 @@ rtr_non_leaf_insert_stack_push( new_seq, level, child_no, my_cursor, mbr_inc); } -/** Copy a buf_block_t, except "block->lock". +/** Copy a buf_block_t, except "block->page.lock". @param[in,out] matches copy to match->block @param[in] block block to copy */ static @@ -1498,8 +1498,9 @@ rtr_copy_buf( from the dummy buf_block_t we create here and because memcpy()ing it generates (valid) compiler warnings that the vtable pointer will be copied. */ + matches->block.page.lock.free(); new (&matches->block.page) buf_page_t(block->page); - matches->block.frame = block->frame; + matches->block.page.frame = block->page.frame; matches->block.unzip_LRU = block->unzip_LRU; ut_d(matches->block.in_unzip_LRU_list = block->in_unzip_LRU_list); @@ -1534,13 +1535,12 @@ rtr_init_match( ut_ad(matches->matched_recs->empty()); matches->locked = false; rtr_copy_buf(matches, block); - matches->block.frame = matches->bufp; + matches->block.page.frame = matches->bufp; matches->valid = false; - /* We have to copy PAGE_W*_SUPREMUM_END bytes so that we can + /* We have to copy PAGE_*_SUPREMUM_END bytes so that we can use infimum/supremum of this page as normal btr page for search. */ - memcpy(matches->block.frame, page, page_is_comp(page) - ? PAGE_NEW_SUPREMUM_END - : PAGE_OLD_SUPREMUM_END); + memcpy(matches->block.page.frame, page, page_is_comp(page) + ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END); matches->used = page_is_comp(page) ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 324bb895107..73d52e19bf1 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -17515,8 +17515,10 @@ func_exit: if (block != NULL) { ib::info() << "Dirtying page: " << block->page.id(); mtr.write<1,mtr_t::FORCED>(*block, - block->frame + FIL_PAGE_SPACE_ID, - block->frame[FIL_PAGE_SPACE_ID]); + block->page.frame + + FIL_PAGE_SPACE_ID, + block->page.frame + [FIL_PAGE_SPACE_ID]); } mtr.commit(); log_write_up_to(mtr.commit_lsn(), true); @@ -17970,7 +17972,8 @@ static bool innodb_buffer_pool_evict_uncompressed() for (buf_block_t* block = UT_LIST_GET_LAST(buf_pool.unzip_LRU); block != NULL; ) { buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU, block); - ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); + ut_ad(block->page.in_file()); + ut_ad(block->page.belongs_to_unzip_LRU()); ut_ad(block->in_unzip_LRU_list); ut_ad(block->page.in_LRU_list); diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 23608749abf..59cbf3f6bbb 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -5825,8 +5825,8 @@ add_all_virtual: btr_pcur_move_to_next_on_page(&pcur); buf_block_t* block = btr_pcur_get_block(&pcur); - ut_ad(page_is_leaf(block->frame)); - ut_ad(!page_has_prev(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); + ut_ad(!page_has_prev(block->page.frame)); ut_ad(!buf_block_get_page_zip(block)); const rec_t* rec = btr_pcur_get_rec(&pcur); que_thr_t* thr = pars_complete_graph_for_exec( @@ -5839,8 +5839,8 @@ add_all_virtual: if (is_root && !rec_is_alter_metadata(rec, *index) && !index->table->instant - && !page_has_next(block->frame) - && page_rec_is_last(rec, block->frame)) { + && !page_has_next(block->page.frame) + && page_rec_is_last(rec, block->page.frame)) { goto empty_table; } @@ -5852,7 +5852,8 @@ add_all_virtual: buf_block_t* root = btr_root_block_get(index, RW_X_LATCH, &mtr); DBUG_ASSERT(root); - if (fil_page_get_type(root->frame) != FIL_PAGE_TYPE_INSTANT) { + if (fil_page_get_type(root->page.frame) + != FIL_PAGE_TYPE_INSTANT) { DBUG_ASSERT("wrong page type" == 0); err = DB_CORRUPTION; goto func_exit; @@ -5923,8 +5924,8 @@ add_all_virtual: && !index->table->instant) { empty_table: /* The table is empty. */ - ut_ad(fil_page_index_page_check(block->frame)); - ut_ad(!page_has_siblings(block->frame)); + ut_ad(fil_page_index_page_check(block->page.frame)); + ut_ad(!page_has_siblings(block->page.frame)); ut_ad(block->page.id().page_no() == index->page); /* MDEV-17383: free metadata BLOBs! */ btr_page_empty(block, NULL, index, 0, &mtr); @@ -5942,7 +5943,7 @@ empty_table: mtr.start(); index->set_modified(mtr); if (buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, &mtr)) { - if (fil_page_get_type(root->frame) != FIL_PAGE_INDEX) { + if (fil_page_get_type(root->page.frame) != FIL_PAGE_INDEX) { DBUG_ASSERT("wrong page type" == 0); goto err_exit; } @@ -10493,7 +10494,8 @@ commit_cache_norebuild( space->zip_size(), RW_X_LATCH, &mtr)) { byte* f = FSP_HEADER_OFFSET - + FSP_SPACE_FLAGS + b->frame; + + FSP_SPACE_FLAGS + + b->page.frame; const auto sf = space->flags & ~FSP_FLAGS_MEM_MASK; if (mach_read_from_4(f) != sf) { diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 96d76686e68..717427c9f38 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -116,10 +116,8 @@ struct buf_page_info_t{ ulint block_id; /*!< Buffer Pool block ID */ /** page identifier */ page_id_t id; - unsigned access_time:32; /*!< Time of first access */ - unsigned io_fix:2; /*!< type of pending I/O operation */ - uint32_t fix_count; /*!< Count of how manyfold this block - is bufferfixed */ + uint32_t access_time; /*!< Time of first access */ + uint32_t state; /*!< buf_page_t::state() */ #ifdef BTR_CUR_HASH_ADAPT unsigned hashed:1; /*!< Whether hash index has been built on this page */ @@ -130,7 +128,7 @@ struct buf_page_info_t{ buf_pool.freed_page_clock */ unsigned zip_ssize:PAGE_ZIP_SSIZE_BITS; /*!< Compressed page size */ - unsigned page_state:3; /*!< Page state */ + unsigned compressed_only:1; /*!< ROW_FORMAT=COMPRESSED only */ unsigned page_type:I_S_PAGE_TYPE_BITS; /*!< Page type */ unsigned num_recs:UNIV_PAGE_SIZE_SHIFT_MAX-2; /*!< Number of records on Page */ @@ -3816,12 +3814,11 @@ static const LEX_CSTRING io_values[] = { { STRING_WITH_LEN("IO_NONE") }, { STRING_WITH_LEN("IO_READ") }, - { STRING_WITH_LEN("IO_WRITE") }, - { STRING_WITH_LEN("IO_PIN") } + { STRING_WITH_LEN("IO_WRITE") } }; -static TypelibBuffer<4> io_values_typelib(io_values); +static TypelibBuffer<3> io_values_typelib(io_values); namespace Show { /* Fields of the dynamic table INNODB_BUFFER_POOL_PAGE. */ @@ -3944,7 +3941,7 @@ i_s_innodb_buffer_page_fill( OK(fields[IDX_BUFFER_PAGE_FLUSH_TYPE]->store(0, true)); OK(fields[IDX_BUFFER_PAGE_FIX_COUNT]->store( - page_info->fix_count, true)); + ~buf_page_t::LRU_MASK & page_info->state, true)); #ifdef BTR_CUR_HASH_ADAPT OK(fields[IDX_BUFFER_PAGE_HASHED]->store( @@ -4017,12 +4014,27 @@ i_s_innodb_buffer_page_fill( ? (UNIV_ZIP_SIZE_MIN >> 1) << page_info->zip_ssize : 0, true)); + static_assert(buf_page_t::NOT_USED == 0, "compatibility"); + static_assert(buf_page_t::MEMORY == 1, "compatibility"); + static_assert(buf_page_t::REMOVE_HASH == 2, "compatibility"); + OK(fields[IDX_BUFFER_PAGE_STATE]->store( - 1 + std::min<unsigned>(page_info->page_state, - BUF_BLOCK_FILE_PAGE), true)); + std::min<uint32_t>(3, page_info->state) + 1, true)); + + static_assert(buf_page_t::UNFIXED == 1U << 29, "comp."); + static_assert(buf_page_t::READ_FIX == 4U << 29, "comp."); + static_assert(buf_page_t::WRITE_FIX == 5U << 29, "comp."); - OK(fields[IDX_BUFFER_PAGE_IO_FIX]->store( - 1 + page_info->io_fix, true)); + unsigned io_fix = page_info->state >> 29; + if (io_fix < 4) { + io_fix = 1; + } else if (io_fix > 5) { + io_fix = 3; + } else { + io_fix -= 2; + } + + OK(fields[IDX_BUFFER_PAGE_IO_FIX]->store(io_fix, true)); OK(fields[IDX_BUFFER_PAGE_IS_OLD]->store( page_info->is_old, true)); @@ -4106,61 +4118,51 @@ i_s_innodb_buffer_page_get_info( { page_info->block_id = pos; - compile_time_assert(BUF_BLOCK_NOT_USED == 0); - compile_time_assert(BUF_BLOCK_MEMORY == 1); - compile_time_assert(BUF_BLOCK_REMOVE_HASH == 2); - compile_time_assert(BUF_BLOCK_FILE_PAGE == 3); - compile_time_assert(BUF_BLOCK_ZIP_PAGE == 4); + static_assert(buf_page_t::NOT_USED == 0, "compatibility"); + static_assert(buf_page_t::MEMORY == 1, "compatibility"); + static_assert(buf_page_t::REMOVE_HASH == 2, "compatibility"); + static_assert(buf_page_t::UNFIXED == 1U << 29, "compatibility"); + static_assert(buf_page_t::READ_FIX == 4U << 29, "compatibility"); + static_assert(buf_page_t::WRITE_FIX == 5U << 29, "compatibility"); - auto state = bpage->state(); - page_info->page_state= int{state} & 7; + page_info->state = bpage->state(); - switch (state) { - default: + if (page_info->state < buf_page_t::FREED) { page_info->page_type = I_S_PAGE_TYPE_UNKNOWN; - break; - case BUF_BLOCK_FILE_PAGE: - case BUF_BLOCK_ZIP_PAGE: + page_info->compressed_only = false; + } else { const byte* frame; page_info->id = bpage->id(); - page_info->fix_count = bpage->buf_fix_count(); - page_info->oldest_mod = bpage->oldest_modification(); page_info->access_time = bpage->access_time; page_info->zip_ssize = bpage->zip.ssize; - page_info->io_fix = bpage->io_fix() & 3; - page_info->is_old = bpage->old; page_info->freed_page_clock = bpage->freed_page_clock; - switch (bpage->io_fix()) { - case BUF_IO_NONE: - case BUF_IO_WRITE: - case BUF_IO_PIN: - break; - case BUF_IO_READ: + if (page_info->state >= buf_page_t::READ_FIX + && page_info->state < buf_page_t::WRITE_FIX) { page_info->page_type = I_S_PAGE_TYPE_UNKNOWN; page_info->newest_mod = 0; return; } - if (state == BUF_BLOCK_FILE_PAGE) { - const buf_block_t*block; - - block = reinterpret_cast<const buf_block_t*>(bpage); - frame = block->frame; + page_info->compressed_only = !bpage->frame, + frame = bpage->frame; + if (UNIV_LIKELY(frame != nullptr)) { #ifdef BTR_CUR_HASH_ADAPT /* Note: this may be a false positive, that is, block->index will not always be set to NULL when the last adaptive hash index reference is dropped. */ - page_info->hashed = (block->index != NULL); + page_info->hashed = + reinterpret_cast<const buf_block_t*>(bpage) + ->index != nullptr; #endif /* BTR_CUR_HASH_ADAPT */ } else { ut_ad(page_info->zip_ssize); @@ -4447,7 +4449,7 @@ i_s_innodb_buf_page_lru_fill( OK(fields[IDX_BUF_LRU_PAGE_FLUSH_TYPE]->store(0, true)); OK(fields[IDX_BUF_LRU_PAGE_FIX_COUNT]->store( - page_info->fix_count, true)); + ~buf_page_t::LRU_MASK & page_info->state, true)); #ifdef BTR_CUR_HASH_ADAPT OK(fields[IDX_BUF_LRU_PAGE_HASHED]->store( @@ -4520,11 +4522,22 @@ i_s_innodb_buf_page_lru_fill( ? 512 << page_info->zip_ssize : 0, true)); OK(fields[IDX_BUF_LRU_PAGE_STATE]->store( - page_info->page_state == BUF_BLOCK_ZIP_PAGE, - true)); + page_info->compressed_only, true)); + + static_assert(buf_page_t::UNFIXED == 1U << 29, "comp."); + static_assert(buf_page_t::READ_FIX == 4U << 29, "comp."); + static_assert(buf_page_t::WRITE_FIX == 5U << 29, "comp."); + + unsigned io_fix = page_info->state >> 29; + if (io_fix < 4) { + io_fix = 1; + } else if (io_fix > 5) { + io_fix = 3; + } else { + io_fix -= 2; + } - OK(fields[IDX_BUF_LRU_PAGE_IO_FIX]->store( - 1 + page_info->io_fix, true)); + OK(fields[IDX_BUF_LRU_PAGE_IO_FIX]->store(io_fix, true)); OK(fields[IDX_BUF_LRU_PAGE_IS_OLD]->store( page_info->is_old, true)); diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 8db0fc7d8ff..6e19248d578 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -321,7 +321,7 @@ ibuf_header_page_get( page_id_t(IBUF_SPACE_ID, FSP_IBUF_HEADER_PAGE_NO), 0, RW_X_LATCH, mtr); - return block ? block->frame : nullptr; + return block ? block->page.frame : nullptr; } /** Acquire the change buffer root page. @@ -341,7 +341,7 @@ static buf_block_t *ibuf_tree_root_get(mtr_t *mtr) page_id_t(IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO), 0, RW_SX_LATCH, mtr); - ut_ad(ibuf.empty == page_is_empty(block->frame)); + ut_ad(ibuf.empty == page_is_empty(block->page.frame)); return block; } @@ -430,7 +430,7 @@ ibuf_init_at_db_start(void) fseg_n_reserved_pages(*header_page, IBUF_HEADER + IBUF_TREE_SEG_HEADER - + header_page->frame, &n_used, &mtr); + + header_page->page.frame, &n_used, &mtr); ut_ad(n_used >= 2); @@ -613,7 +613,7 @@ ibuf_bitmap_page_set_bits( ut_ad(byte_offset + IBUF_BITMAP < srv_page_size); - byte* map_byte = &block->frame[IBUF_BITMAP + byte_offset]; + byte* map_byte = &block->page.frame[IBUF_BITMAP + byte_offset]; byte b = *map_byte; if (bit == IBUF_BITMAP_FREE) { @@ -687,7 +687,7 @@ ibuf_set_free_bits_low( mtr_t* mtr) /*!< in/out: mtr */ { ut_ad(mtr->is_named_space(block->page.id().space())); - if (!page_is_leaf(block->frame)) { + if (!page_is_leaf(block->page.frame)) { return; } @@ -721,7 +721,7 @@ ibuf_set_free_bits_func( #endif /* UNIV_IBUF_DEBUG */ ulint val) /*!< in: value to set: < 4 */ { - if (!page_is_leaf(block->frame)) { + if (!page_is_leaf(block->page.frame)) { return; } @@ -831,7 +831,7 @@ ibuf_update_free_bits_zip( buf_block_t* block, /*!< in/out: index page */ mtr_t* mtr) /*!< in/out: mtr */ { - ut_ad(page_is_leaf(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); ut_ad(block->zip_size()); ulint after = ibuf_index_page_calc_free_zip(block); @@ -956,7 +956,7 @@ ibuf_page_low( &local_mtr, &err); ret = ibuf_bitmap_page_get_bits_low( - block->frame, page_id, zip_size, + block->page.frame, page_id, zip_size, MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF); mtr_commit(&local_mtr); @@ -971,7 +971,7 @@ ibuf_page_low( ret = ibuf_bitmap_page_get_bits(ibuf_bitmap_get_map_page( page_id, zip_size, - mtr)->frame, + mtr)->page.frame, page_id, zip_size, IBUF_BITMAP_IBUF, mtr); @@ -1814,11 +1814,11 @@ static bool ibuf_add_free_page() return false; } - ut_ad(block->lock.not_recursive()); + ut_ad(block->page.lock.not_recursive()); ibuf_enter(&mtr); mysql_mutex_lock(&ibuf_mutex); - mtr.write<2>(*block, block->frame + FIL_PAGE_TYPE, + mtr.write<2>(*block, block->page.frame + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST); /* Add the page to the free list and update the ibuf size data */ @@ -1889,7 +1889,7 @@ ibuf_remove_free_page(void) mysql_mutex_unlock(&ibuf_mutex); uint32_t page_no = flst_get_last(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST - + root->frame).page; + + root->page.frame).page; /* NOTE that we must release the latch on the ibuf tree root because in fseg_free_page we access level 1 pages, and the root @@ -1917,7 +1917,7 @@ ibuf_remove_free_page(void) root = ibuf_tree_root_get(&mtr); ut_ad(page_no == flst_get_last(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST - + root->frame).page); + + root->page.frame).page); buf_block_t* block = buf_page_get(page_id, 0, RW_X_LATCH, &mtr); @@ -3297,7 +3297,7 @@ fail_exit: buffer pool, but we do not have to care about it, since we are holding a latch on the insert buffer leaf page that contains buffered changes for (space, page_no). If the page enters the - buffer pool, buf_page_read_complete() for (space, page_no) will + buffer pool, buf_page_t::read_complete() for (space, page_no) will have to acquire a latch on the same insert buffer leaf page, which it cannot do until we have buffered the IBUF_OP_DELETE and done mtr_commit(&mtr) to release the latch. */ @@ -3322,7 +3322,7 @@ commit_exit: if (op == IBUF_OP_INSERT) { ulint bits = ibuf_bitmap_page_get_bits( - bitmap_page->frame, page_id, physical_size, + bitmap_page->page.frame, page_id, physical_size, IBUF_BITMAP_FREE, &bitmap_mtr); if (buffered + entry_size + page_dir_calc_reserved_space(1) @@ -3402,7 +3402,7 @@ commit_exit: which would cause the sx-latching of the root after that to break the latching order. */ - root = ibuf_tree_root_get(&mtr)->frame; + root = ibuf_tree_root_get(&mtr)->page.frame; err = btr_cur_optimistic_insert( BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG, @@ -3638,7 +3638,7 @@ ibuf_insert_to_index_page_low( } ib::error() << "Insert buffer insert fails; page free " - << page_get_max_insert_size(block->frame, 1) + << page_get_max_insert_size(block->page.frame, 1) << ", dtuple size " << rec_get_converted_size(index, entry, 0); @@ -3653,9 +3653,10 @@ ibuf_insert_to_index_page_low( ib::error() << "page " << block->page.id() << ", size " << block->physical_size() << ", bitmap bits " - << ibuf_bitmap_page_get_bits(bitmap_page->frame, - block->page.id(), block->zip_size(), - IBUF_BITMAP_FREE, mtr); + << ibuf_bitmap_page_get_bits( + bitmap_page->page.frame, + block->page.id(), block->zip_size(), + IBUF_BITMAP_FREE, mtr); } ib::error() << BUG_REPORT_MSG; @@ -4098,7 +4099,7 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, goto func_exit; } - root = ibuf_tree_root_get(mtr)->frame; + root = ibuf_tree_root_get(mtr)->page.frame; btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), 0, false, mtr); @@ -4139,7 +4140,7 @@ bool ibuf_page_exists(const page_id_t id, ulint zip_size) if (const buf_block_t* bitmap_page = ibuf_bitmap_get_map_page( id, zip_size, &mtr)) { bitmap_bits = ibuf_bitmap_page_get_bits( - bitmap_page->frame, id, zip_size, + bitmap_page->page.frame, id, zip_size, IBUF_BITMAP_BUFFERED, &mtr) != 0; } ibuf_mtr_commit(&mtr); @@ -4197,8 +4198,9 @@ void ibuf_merge_or_delete_for_page(buf_block_t *block, const page_id_t page_id, ulint dops[IBUF_OP_COUNT]; ut_ad(!block || page_id == block->page.id()); - ut_ad(!block || block->page.state() == BUF_BLOCK_FILE_PAGE); - ut_ad(!block || block->page.status == buf_page_t::NORMAL); + ut_ad(!block || block->page.frame); + ut_ad(!block || !block->page.is_ibuf_exist()); + ut_ad(!block || !block->page.is_reinit()); ut_ad(!trx_sys_hdr_page(page_id)); ut_ad(page_id < page_id_t(SRV_SPACE_ID_UPPER_BOUND, 0)); @@ -4222,10 +4224,10 @@ void ibuf_merge_or_delete_for_page(buf_block_t *block, const page_id_t page_id, page_id, zip_size, &mtr); if (bitmap_page - && fil_page_get_type(bitmap_page->frame) + && fil_page_get_type(bitmap_page->page.frame) != FIL_PAGE_TYPE_ALLOCATED) { bitmap_bits = ibuf_bitmap_page_get_bits( - bitmap_page->frame, page_id, zip_size, + bitmap_page->page.frame, page_id, zip_size, IBUF_BITMAP_BUFFERED, &mtr); } @@ -4258,17 +4260,17 @@ void ibuf_merge_or_delete_for_page(buf_block_t *block, const page_id_t page_id, is needed for the insert operations to the index page to pass the debug checks. */ - block->lock.claim_ownership(); + block->page.lock.claim_ownership(); - if (!fil_page_index_page_check(block->frame) - || !page_is_leaf(block->frame)) { + if (!fil_page_index_page_check(block->page.frame) + || !page_is_leaf(block->page.frame)) { corruption_noticed = true; ib::error() << "Corruption in the tablespace. Bitmap" " shows insert buffer records to page " << page_id << " though the page type is " - << fil_page_get_type(block->frame) + << fil_page_get_type(block->page.frame) << ", which is not an index leaf page. We try" " to resolve the problem by skipping the" " insert buffer merge for this page. Please" @@ -4291,8 +4293,8 @@ loop: &pcur, &mtr); if (block) { - buf_block_buf_fix_inc(block); - block->lock.x_lock_recursive(); + block->page.fix(); + block->page.lock.x_lock_recursive(); mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX); } @@ -4351,7 +4353,7 @@ loop: dummy_index->table->space = space; dummy_index->table->space_id = space->id; - ut_ad(page_validate(block->frame, dummy_index)); + ut_ad(page_validate(block->page.frame, dummy_index)); switch (op) { case IBUF_OP_INSERT: @@ -4401,8 +4403,8 @@ loop: ibuf_mtr_start(&mtr); mtr.set_named_space(space); - buf_block_buf_fix_inc(block); - block->lock.x_lock_recursive(); + block->page.lock.x_lock_recursive(); + block->fix(); mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX); if (!ibuf_restore_pos(page_id, search_tuple, @@ -4552,7 +4554,7 @@ ibuf_is_empty(void) ut_d(mysql_mutex_lock(&ibuf_mutex)); const buf_block_t* root = ibuf_tree_root_get(&mtr); - bool is_empty = page_is_empty(root->frame); + bool is_empty = page_is_empty(root->page.frame); ut_a(is_empty == ibuf.empty); ut_d(mysql_mutex_unlock(&ibuf_mutex)); ibuf_mtr_commit(&mtr); @@ -4634,7 +4636,7 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space) return DB_CORRUPTION; } - if (buf_is_zeroes(span<const byte>(bitmap_page->frame, + if (buf_is_zeroes(span<const byte>(bitmap_page->page.frame, physical_size))) { /* This means we got all-zero page instead of ibuf bitmap page. The subsequent page should be @@ -4663,7 +4665,8 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space) const page_id_t cur_page_id(space->id, offset); if (ibuf_bitmap_page_get_bits( - bitmap_page->frame, cur_page_id, zip_size, + bitmap_page->page.frame, + cur_page_id, zip_size, IBUF_BITMAP_IBUF, &mtr)) { mysql_mutex_unlock(&ibuf_mutex); @@ -4681,7 +4684,8 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space) } if (ibuf_bitmap_page_get_bits( - bitmap_page->frame, cur_page_id, zip_size, + bitmap_page->page.frame, + cur_page_id, zip_size, IBUF_BITMAP_BUFFERED, &mtr)) { ib_errf(trx->mysql_thd, diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index 40293df21e5..95e148bee54 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -460,9 +460,9 @@ template<bool has_prev= false> inline void btr_set_min_rec_mark(rec_t *rec, const buf_block_t &block, mtr_t *mtr) { - ut_ad(block.frame == page_align(rec)); - ut_ad(!page_is_leaf(block.frame)); - ut_ad(has_prev == page_has_prev(block.frame)); + ut_ad(block.page.frame == page_align(rec)); + ut_ad(!page_is_leaf(block.page.frame)); + ut_ad(has_prev == page_has_prev(block.page.frame)); rec-= page_rec_is_comp(rec) ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS; diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic index 89826e8f214..f92622cc400 100644 --- a/storage/innobase/include/btr0btr.ic +++ b/storage/innobase/include/btr0btr.ic @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2020, MariaDB Corporation. +Copyright (c) 2015, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -50,7 +50,7 @@ void btr_page_set_level(buf_block_t *block, ulint level, mtr_t *mtr) { ut_ad(level <= BTR_MAX_NODE_LEVEL); constexpr uint16_t field= PAGE_HEADER + PAGE_LEVEL; - byte *b= my_assume_aligned<2>(&block->frame[field]); + byte *b= my_assume_aligned<2>(&block->page.frame[field]); if (mtr->write<2,mtr_t::MAYBE_NOP>(*block, b, level) && UNIV_LIKELY_NULL(block->page.zip.data)) memcpy_aligned<2>(&block->page.zip.data[field], b, 2); @@ -63,7 +63,7 @@ void btr_page_set_level(buf_block_t *block, ulint level, mtr_t *mtr) inline void btr_page_set_next(buf_block_t *block, ulint next, mtr_t *mtr) { constexpr uint16_t field= FIL_PAGE_NEXT; - byte *b= my_assume_aligned<4>(&block->frame[field]); + byte *b= my_assume_aligned<4>(&block->page.frame[field]); if (mtr->write<4,mtr_t::MAYBE_NOP>(*block, b, next) && UNIV_LIKELY_NULL(block->page.zip.data)) memcpy_aligned<4>(&block->page.zip.data[field], b, 4); @@ -76,7 +76,7 @@ inline void btr_page_set_next(buf_block_t *block, ulint next, mtr_t *mtr) inline void btr_page_set_prev(buf_block_t *block, ulint prev, mtr_t *mtr) { constexpr uint16_t field= FIL_PAGE_PREV; - byte *b= my_assume_aligned<4>(&block->frame[field]); + byte *b= my_assume_aligned<4>(&block->page.frame[field]); if (mtr->write<4,mtr_t::MAYBE_NOP>(*block, b, prev) && UNIV_LIKELY_NULL(block->page.zip.data)) memcpy_aligned<4>(&block->page.zip.data[field], b, 4); diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 911c79c29ba..778b8cd04e8 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -73,36 +73,10 @@ struct btr_latch_leaves_t { #include "que0types.h" #include "row0types.h" -#ifdef UNIV_DEBUG -/*********************************************************//** -Returns the page cursor component of a tree cursor. -@return pointer to page cursor component */ -UNIV_INLINE -page_cur_t* -btr_cur_get_page_cur( -/*=================*/ - const btr_cur_t* cursor);/*!< in: tree cursor */ -/*********************************************************//** -Returns the buffer block on which the tree cursor is positioned. -@return pointer to buffer block */ -UNIV_INLINE -buf_block_t* -btr_cur_get_block( -/*==============*/ - const btr_cur_t* cursor);/*!< in: tree cursor */ -/*********************************************************//** -Returns the record pointer of a tree cursor. -@return pointer to record */ -UNIV_INLINE -rec_t* -btr_cur_get_rec( -/*============*/ - const btr_cur_t* cursor);/*!< in: tree cursor */ -#else /* UNIV_DEBUG */ -# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur) -# define btr_cur_get_block(cursor) ((cursor)->page_cur.block) -# define btr_cur_get_rec(cursor) ((cursor)->page_cur.rec) -#endif /* UNIV_DEBUG */ +#define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur) +#define btr_cur_get_block(cursor) ((cursor)->page_cur.block) +#define btr_cur_get_rec(cursor) ((cursor)->page_cur.rec) + /*********************************************************//** Returns the compressed page on which the tree cursor is positioned. @return pointer to compressed page, or NULL if the page is not compressed */ diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic index 905dc28bd62..76a2d3be49c 100644 --- a/storage/innobase/include/btr0cur.ic +++ b/storage/innobase/include/btr0cur.ic @@ -36,44 +36,6 @@ if (btr_cur_limit_optimistic_insert_debug > 1\ # define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE) #endif /* UNIV_DEBUG */ -#ifdef UNIV_DEBUG -/*********************************************************//** -Returns the page cursor component of a tree cursor. -@return pointer to page cursor component */ -UNIV_INLINE -page_cur_t* -btr_cur_get_page_cur( -/*=================*/ - const btr_cur_t* cursor) /*!< in: tree cursor */ -{ - return(&((btr_cur_t*) cursor)->page_cur); -} - -/*********************************************************//** -Returns the buffer block on which the tree cursor is positioned. -@return pointer to buffer block */ -UNIV_INLINE -buf_block_t* -btr_cur_get_block( -/*==============*/ - const btr_cur_t* cursor) /*!< in: tree cursor */ -{ - return(page_cur_get_block(btr_cur_get_page_cur(cursor))); -} - -/*********************************************************//** -Returns the record pointer of a tree cursor. -@return pointer to record */ -UNIV_INLINE -rec_t* -btr_cur_get_rec( -/*============*/ - const btr_cur_t* cursor) /*!< in: tree cursor */ -{ - return(page_cur_get_rec(btr_cur_get_page_cur(cursor))); -} -#endif /* UNIV_DEBUG */ - /*********************************************************//** Returns the compressed page on which the tree cursor is positioned. @return pointer to compressed page, or NULL if the page is not compressed */ diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h index 62f82632c62..45406bd4b6f 100644 --- a/storage/innobase/include/btr0pcur.h +++ b/storage/innobase/include/btr0pcur.h @@ -335,54 +335,11 @@ btr_pcur_move_to_next_page( btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the last record of the current page */ mtr_t* mtr); /*!< in: mtr */ -#ifdef UNIV_DEBUG -/*********************************************************//** -Returns the btr cursor component of a persistent cursor. -@return pointer to btr cursor component */ -UNIV_INLINE -btr_cur_t* -btr_pcur_get_btr_cur( -/*=================*/ - const btr_pcur_t* cursor); /*!< in: persistent cursor */ -/*********************************************************//** -Returns the page cursor component of a persistent cursor. -@return pointer to page cursor component */ -UNIV_INLINE -page_cur_t* -btr_pcur_get_page_cur( -/*==================*/ - const btr_pcur_t* cursor); /*!< in: persistent cursor */ -/*********************************************************//** -Returns the page of a persistent cursor. -@return pointer to the page */ -UNIV_INLINE -page_t* -btr_pcur_get_page( -/*==============*/ - const btr_pcur_t* cursor);/*!< in: persistent cursor */ -/*********************************************************//** -Returns the buffer block of a persistent cursor. -@return pointer to the block */ -UNIV_INLINE -buf_block_t* -btr_pcur_get_block( -/*===============*/ - const btr_pcur_t* cursor);/*!< in: persistent cursor */ -/*********************************************************//** -Returns the record of a persistent cursor. -@return pointer to the record */ -UNIV_INLINE -rec_t* -btr_pcur_get_rec( -/*=============*/ - const btr_pcur_t* cursor);/*!< in: persistent cursor */ -#else /* UNIV_DEBUG */ -# define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur) -# define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur) -# define btr_pcur_get_page(cursor) ((cursor)->btr_cur.page_cur.block->frame) -# define btr_pcur_get_block(cursor) ((cursor)->btr_cur.page_cur.block) -# define btr_pcur_get_rec(cursor) ((cursor)->btr_cur.page_cur.rec) -#endif /* UNIV_DEBUG */ + +#define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur) +#define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur) +#define btr_pcur_get_page(cursor) btr_pcur_get_block(cursor)->page.frame + /*********************************************************//** Checks if the persistent cursor is on a user record. */ UNIV_INLINE @@ -521,6 +478,25 @@ struct btr_pcur_t{ dict_index_t* index() const { return(btr_cur.index); } }; +inline buf_block_t *btr_pcur_get_block(btr_pcur_t *cursor) +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + return cursor->btr_cur.page_cur.block; +} + +inline const buf_block_t *btr_pcur_get_block(const btr_pcur_t *cursor) +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + return cursor->btr_cur.page_cur.block; +} + +inline rec_t *btr_pcur_get_rec(const btr_pcur_t *cursor) +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + return cursor->btr_cur.page_cur.rec; +} + #include "btr0pcur.ic" #endif diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic index 3853db88a8e..f5e59c7268e 100644 --- a/storage/innobase/include/btr0pcur.ic +++ b/storage/innobase/include/btr0pcur.ic @@ -43,76 +43,6 @@ btr_pcur_get_rel_pos( return(cursor->rel_pos); } -#ifdef UNIV_DEBUG -/*********************************************************//** -Returns the btr cursor component of a persistent cursor. -@return pointer to btr cursor component */ -UNIV_INLINE -btr_cur_t* -btr_pcur_get_btr_cur( -/*=================*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - const btr_cur_t* btr_cur = &cursor->btr_cur; - return((btr_cur_t*) btr_cur); -} - -/*********************************************************//** -Returns the page cursor component of a persistent cursor. -@return pointer to page cursor component */ -UNIV_INLINE -page_cur_t* -btr_pcur_get_page_cur( -/*==================*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - return(btr_cur_get_page_cur(btr_pcur_get_btr_cur(cursor))); -} - -/*********************************************************//** -Returns the page of a persistent cursor. -@return pointer to the page */ -UNIV_INLINE -page_t* -btr_pcur_get_page( -/*==============*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - - return(btr_cur_get_page(btr_pcur_get_btr_cur(cursor))); -} - -/*********************************************************//** -Returns the buffer block of a persistent cursor. -@return pointer to the block */ -UNIV_INLINE -buf_block_t* -btr_pcur_get_block( -/*===============*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - - return(btr_cur_get_block(btr_pcur_get_btr_cur(cursor))); -} - -/*********************************************************//** -Returns the record of a persistent cursor. -@return pointer to the record */ -UNIV_INLINE -rec_t* -btr_pcur_get_rec( -/*=============*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - return(btr_cur_get_rec(btr_pcur_get_btr_cur(cursor))); -} -#endif /* UNIV_DEBUG */ - /**************************************************************//** Gets the up_match value for a pcur after a search. @return number of matched fields at the cursor or to the right if diff --git a/storage/innobase/include/buf0block_hint.h b/storage/innobase/include/buf0block_hint.h index ee48e7ce6d2..d4fee7c1e99 100644 --- a/storage/innobase/include/buf0block_hint.h +++ b/storage/innobase/include/buf0block_hint.h @@ -56,7 +56,7 @@ public: buf_block_t *block= m_block; bool res= f(block); if (block) - buf_block_buf_fix_dec(block); + block->page.unfix(); return res; } diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index fce5702c171..20a76992b7d 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -73,22 +73,6 @@ extern my_bool buf_disable_resize_buffer_pool_debug; /*!< if TRUE, resizing buffer pool is not allowed. */ # endif /* UNIV_DEBUG */ -/** buf_page_t::state() values, distinguishing buf_page_t and buf_block_t */ -enum buf_page_state -{ - /** available in buf_pool.free or buf_pool.watch */ - BUF_BLOCK_NOT_USED, - /** allocated for something else than a file page */ - BUF_BLOCK_MEMORY, - /** a previously allocated file page, in transit to NOT_USED */ - BUF_BLOCK_REMOVE_HASH, - /** a buf_block_t that is also in buf_pool.LRU */ - BUF_BLOCK_FILE_PAGE, - /** the buf_page_t of a ROW_FORMAT=COMPRESSED page - whose uncompressed page frame has been evicted */ - BUF_BLOCK_ZIP_PAGE -}; - /** This structure defines information we will fetch from each buffer pool. It will be used to print table IO stats */ struct buf_pool_info_t @@ -170,12 +154,9 @@ operator<<( #ifndef UNIV_INNOCHECKSUM # define buf_pool_get_curr_size() srv_buf_pool_curr_size -# define buf_page_alloc_descriptor() \ - static_cast<buf_page_t*>(ut_zalloc_nokey(sizeof(buf_page_t))) -# define buf_page_free_descriptor(bpage) ut_free(bpage) /** Allocate a buffer block. -@return own: the allocated block, in state BUF_BLOCK_MEMORY */ +@return own: the allocated block, state()==MEMORY */ inline buf_block_t *buf_block_alloc(); /********************************************************************//** Frees a buffer block which does not contain a file page. */ @@ -200,17 +181,14 @@ error-prone programming not to set a latch, and it should be used with care. */ #define buf_page_get_with_no_latch(ID, SIZE, MTR) \ buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, MTR) -/********************************************************************//** -This is the general function used to get optimistic access to a database -page. -@return TRUE if success */ -ibool -buf_page_optimistic_get( -/*====================*/ - ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ - buf_block_t* block, /*!< in: guessed block */ - ib_uint64_t modify_clock,/*!< in: modify clock value */ - mtr_t* mtr); /*!< in: mini-transaction */ +/** Try to acquire a page latch. +@param rw_latch RW_S_LATCH or RW_X_LATCH +@param block guessed block +@param modify_clock expected value of block->modify_clock +@param mtr mini-transaction +@return whether the latch was acquired (the page is an allocated file page) */ +bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block, + uint64_t modify_clock, mtr_t *mtr); /** Try to S-latch a page. Suitable for using when holding the lock_sys latches (as it avoids deadlock). @@ -222,15 +200,15 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr); /** Get read access to a compressed page (usually of type FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2). -The page must be released with buf_page_release_zip(). +The page must be released with unfix(). NOTE: the page is not protected by any latch. Mutual exclusion has to be implemented at a higher level. In other words, all possible accesses to a given page through this function must be protected by the same set of mutexes or latches. -@param[in] page_id page id -@param[in] zip_size ROW_FORMAT=COMPRESSED page size -@return pointer to the block */ -buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size); +@param page_id page identifier +@param zip_size ROW_FORMAT=COMPRESSED page size in bytes +@return pointer to the block, s-latched */ +buf_page_t *buf_page_get_zip(const page_id_t page_id, ulint zip_size); /** Get access to a database page. Buffered redo log may be applied. @param[in] page_id page id @@ -282,8 +260,8 @@ buf_page_get_low( /** Initialize a page in the buffer pool. The page is usually not read from a file even if it cannot be found in the buffer buf_pool. This is one -of the functions which perform to a block a state transition NOT_USED => -FILE_PAGE (the other is buf_page_get_gen). +of the functions which perform to a block a state transition NOT_USED => LRU +(the other is buf_page_get_low()). @param[in,out] space space object @param[in] offset offset of the tablespace @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @@ -305,22 +283,6 @@ buf_block_t* buf_page_create_deferred(uint32_t space_id, ulint zip_size, mtr_t *mtr, buf_block_t *free_block); -/********************************************************************//** -Releases a compressed-only page acquired with buf_page_get_zip(). */ -UNIV_INLINE -void -buf_page_release_zip( -/*=================*/ - buf_page_t* bpage); /*!< in: buffer block */ -/********************************************************************//** -Releases a latch, if specified. */ -UNIV_INLINE -void -buf_page_release_latch( -/*=====================*/ - buf_block_t* block, /*!< in: buffer block */ - ulint rw_latch); /*!< in: RW_S_LATCH, RW_X_LATCH, - RW_NO_LATCH */ /** Move a block to the start of the LRU list. */ void buf_page_make_young(buf_page_t *bpage); /** Mark the page status as FREED for the given tablespace and page number. @@ -371,13 +333,6 @@ ib_uint64_t buf_block_get_modify_clock( /*=======================*/ buf_block_t* block); /*!< in: block */ -/*******************************************************************//** -Increments the bufferfix count. */ -# define buf_block_buf_fix_inc(block) (block)->fix() - -/*******************************************************************//** -Decrements the bufferfix count. */ -# define buf_block_buf_fix_dec(block) (block)->unfix() #endif /* !UNIV_INNOCHECKSUM */ /** Check if a buffer is all zeroes. @@ -516,19 +471,7 @@ void buf_pool_invalidate(); --------------------------- LOWER LEVEL ROUTINES ------------------------- =========================================================================*/ -#ifdef UNIV_DEBUG -/*********************************************************************//** -Gets a pointer to the memory frame of a block. -@return pointer to the frame */ -UNIV_INLINE -buf_frame_t* -buf_block_get_frame( -/*================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((warn_unused_result)); -#else /* UNIV_DEBUG */ -# define buf_block_get_frame(block) (block)->frame -#endif /* UNIV_DEBUG */ +#define buf_block_get_frame(block) (block)->page.frame /*********************************************************************//** Gets the compressed page descriptor corresponding to an uncompressed page @@ -541,18 +484,8 @@ if applicable. */ /** Monitor the buffer page read/write activity, and increment corresponding counter value in MONITOR_MODULE_BUF_PAGE. @param bpage buffer page whose read or write was completed -@param io_type BUF_IO_READ or BUF_IO_WRITE */ -ATTRIBUTE_COLD __attribute__((nonnull)) -void buf_page_monitor(const buf_page_t *bpage, buf_io_fix io_type); - -/** Complete a read request of a file page to buf_pool. -@param bpage recently read page -@param node data file -@return whether the operation succeeded -@retval DB_SUCCESS always when writing, or if a read page was OK -@retval DB_PAGE_CORRUPTED if the checksum fails on a page read -@retval DB_DECRYPTION_FAILED if the page cannot be decrypted */ -dberr_t buf_page_read_complete(buf_page_t *bpage, const fil_node_t &node); +@param read true=read, false=write */ +ATTRIBUTE_COLD void buf_page_monitor(const buf_page_t &bpage, bool read); /** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit, if needed. @@ -622,6 +555,7 @@ class buf_page_t { friend buf_pool_t; friend buf_block_t; + /** @name General fields */ /* @{ */ @@ -629,10 +563,9 @@ public: // FIXME: fix fil_iterate() /** Page id. Protected by buf_pool.page_hash.lock_get() when the page is in buf_pool.page_hash. */ page_id_t id_; + /** buf_pool.page_hash link; protected by buf_pool.page_hash.lock_get() */ + buf_page_t *hash; private: - /** Count of how manyfold this block is currently bufferfixed. */ - Atomic_counter<uint32_t> buf_fix_count_; - /** log sequence number of the START of the log entry written of the oldest modification to this block which has not yet been written to the data file; @@ -643,49 +576,64 @@ private: (because id().space() is the temporary tablespace). */ Atomic_relaxed<lsn_t> oldest_modification_; - /** type of pending I/O operation; protected by buf_pool.mutex - if in_LRU_list */ - Atomic_relaxed<buf_io_fix> io_fix_; - /** Block state. @see in_file(). - State transitions between in_file() states and to - BUF_BLOCK_REMOVE_HASH are protected by buf_pool.page_hash.lock_get() - when the block is in buf_pool.page_hash. - Other transitions when in_LRU_list are protected by buf_pool.mutex. */ - buf_page_state state_; - public: - /** buf_pool.page_hash link; protected by buf_pool.page_hash.lock_get() */ - buf_page_t *hash; + /** state() of unused block (in buf_pool.free list) */ + static constexpr uint32_t NOT_USED= 0; + /** state() of block allocated as general-purpose memory */ + static constexpr uint32_t MEMORY= 1; + /** state() of block that is being freed */ + static constexpr uint32_t REMOVE_HASH= 2; + /** smallest state() of a buffer page that is freed in the tablespace */ + static constexpr uint32_t FREED= 3; + /** smallest state() for a block that belongs to buf_pool.LRU */ + static constexpr uint32_t UNFIXED= 1U << 29; + /** smallest state() of a block for which buffered changes may exist */ + static constexpr uint32_t IBUF_EXIST= 2U << 29; + /** smallest state() of a (re)initialized page (no doublewrite needed) */ + static constexpr uint32_t REINIT= 3U << 29; + /** smallest state() for an io-fixed block */ + static constexpr uint32_t READ_FIX= 4U << 29; + /** smallest state() for a write-fixed block */ + static constexpr uint32_t WRITE_FIX= 5U << 29; + /** smallest state() for a write-fixed block with buffered changes */ + static constexpr uint32_t WRITE_FIX_IBUF= 6U << 29; + /** smallest state() for a write-fixed block (no doublewrite was used) */ + static constexpr uint32_t WRITE_FIX_REINIT= 7U << 29; + /** buf_pool.LRU status mask in state() */ + static constexpr uint32_t LRU_MASK= 7U << 29; + + /** lock covering the contents of frame */ + block_lock lock; + /** pointer to aligned, uncompressed page frame of innodb_page_size */ + byte *frame; /* @} */ - page_zip_des_t zip; /*!< compressed page; zip.data - (but not the data it points to) is - also protected by buf_pool.mutex; - state == BUF_BLOCK_ZIP_PAGE and - zip.data == NULL means an active - buf_pool.watch */ + /** ROW_FORMAT=COMPRESSED page; zip.data (but not the data it points to) + is also protected by buf_pool.mutex; + !frame && !zip.data means an active buf_pool.watch */ + page_zip_des_t zip; #ifdef UNIV_DEBUG /** whether this->list is in buf_pool.zip_hash; protected by buf_pool.mutex */ bool in_zip_hash; - /** whether this->LRU is in buf_pool.LRU (in_file() holds); + /** whether this->LRU is in buf_pool.LRU (in_file()); protected by buf_pool.mutex */ bool in_LRU_list; - /** whether this is in buf_pool.page_hash (in_file() holds); + /** whether this is in buf_pool.page_hash (in_file()); protected by buf_pool.mutex */ bool in_page_hash; - /** whether this->list is in buf_pool.free (state() == BUF_BLOCK_NOT_USED); + /** whether this->list is in buf_pool.free (state() == NOT_USED); protected by buf_pool.flush_list_mutex */ bool in_free_list; #endif /* UNIV_DEBUG */ /** list member in one of the lists of buf_pool; protected by buf_pool.mutex or buf_pool.flush_list_mutex - state() == BUF_BLOCK_NOT_USED: buf_pool.free or buf_pool.withdraw + state() == NOT_USED: buf_pool.free or buf_pool.withdraw in_file() && oldest_modification(): buf_pool.flush_list (protected by buf_pool.flush_list_mutex) The contents is undefined if in_file() && !oldest_modification(), - or if state() is BUF_BLOCK_MEMORY or BUF_BLOCK_REMOVE_HASH. */ + or if state() == MEMORY or state() == REMOVE_HASH. */ UT_LIST_NODE_T(buf_page_t) list; /** @name LRU replacement algorithm fields. @@ -709,7 +657,7 @@ public: 0 if the block was never accessed in the buffer pool. - For state==BUF_BLOCK_MEMORY + For state() == MEMORY blocks, this field can be repurposed for something else. @@ -717,88 +665,126 @@ public: and bytes allocated for recv_sys.pages, the field is protected by recv_sys_t::mutex. */ - /** Change buffer entries for the page exist. - Protected by io_fix()==BUF_IO_READ or by buf_block_t::lock. */ - bool ibuf_exist; - - /** Block initialization status. Can be modified while holding io_fix() - or buf_block_t::lock X-latch */ - enum { - /** the page was read normally and should be flushed normally */ - NORMAL = 0, - /** the page was (re)initialized, and the doublewrite buffer can be - skipped on the next flush */ - INIT_ON_FLUSH, - /** the page was freed and need to be flushed. - For page_compressed, page flush will punch a hole to free space. - Else if innodb_immediate_scrub_data_uncompressed, the page will - be overwritten with zeroes. */ - FREED - } status; - - buf_page_t() : id_(0) + buf_page_t() : id_{0} { - static_assert(BUF_BLOCK_NOT_USED == 0, "compatibility"); + static_assert(NOT_USED == 0, "compatibility"); memset((void*) this, 0, sizeof *this); } - /** Initialize some fields */ - void init() + buf_page_t(const buf_page_t &b) : + id_(b.id_), hash(b.hash), + oldest_modification_(b.oldest_modification_), + lock() /* not copied */, + frame(b.frame), zip(b.zip), +#ifdef UNIV_DEBUG + in_zip_hash(b.in_zip_hash), in_LRU_list(b.in_LRU_list), + in_page_hash(b.in_page_hash), in_free_list(b.in_free_list), +#endif /* UNIV_DEBUG */ + list(b.list), LRU(b.LRU), old(b.old), freed_page_clock(b.freed_page_clock), + access_time(b.access_time) { - io_fix_= BUF_IO_NONE; - buf_fix_count_= 0; - old= 0; - freed_page_clock= 0; - access_time= 0; + lock.init(); + } + + /** Initialize some more fields */ + void init(uint32_t state, page_id_t id) + { + ut_ad(state < REMOVE_HASH || state >= UNFIXED); + id_= id; + zip.fix= state; oldest_modification_= 0; - ibuf_exist= false; - status= NORMAL; + lock.init(); ut_d(in_zip_hash= false); ut_d(in_free_list= false); ut_d(in_LRU_list= false); ut_d(in_page_hash= false); + old= 0; + freed_page_clock= 0; + access_time= 0; } - /** Initialize some more fields */ - void init(buf_page_state state, page_id_t id, uint32_t buf_fix_count= 0) +public: + const page_id_t &id() const { return id_; } + uint32_t state() const { return zip.fix; } + uint32_t buf_fix_count() const { - init(); - state_= state; - id_= id; - buf_fix_count_= buf_fix_count; + uint32_t f= state(); + ut_ad(f >= FREED); + return f < UNFIXED ? (f - FREED) : (~LRU_MASK & f); } + /** @return whether this block is read or write fixed; + read_complete() or write_complete() will always release + the io-fix before releasing U-lock or X-lock */ + bool is_io_fixed() const + { const auto s= state(); ut_ad(s >= FREED); return s >= READ_FIX; } + /** @return whether this block is write fixed; + write_complete() will always release the write-fix before releasing U-lock */ + bool is_write_fixed() const { return state() >= WRITE_FIX; } + /** @return whether this block is read fixed; this should never hold + when a thread is holding the block lock in any mode */ + bool is_read_fixed() const { return is_io_fixed() && !is_write_fixed(); } - /** Initialize some more fields */ - void init(page_id_t id, uint32_t buf_fix_count= 0) + /** @return if this belongs to buf_pool.unzip_LRU */ + bool belongs_to_unzip_LRU() const + { return UNIV_LIKELY_NULL(zip.data) && frame; } + + bool is_freed() const + { const auto s= state(); ut_ad(s >= FREED); return s < UNFIXED; } + bool is_ibuf_exist() const { - init(); - id_= id; - buf_fix_count_= buf_fix_count; - hash= nullptr; + const auto s= state(); + ut_ad(s >= UNFIXED); + ut_ad(s < READ_FIX); + return (s & LRU_MASK) == IBUF_EXIST; } + bool is_reinit() const { return !(~state() & REINIT); } -public: - const page_id_t &id() const { return id_; } - buf_page_state state() const { return state_; } - uint32_t buf_fix_count() const { return buf_fix_count_; } - buf_io_fix io_fix() const { return io_fix_; } - void io_unfix() + void set_reinit(uint32_t prev_state) { - ut_d(const auto old_io_fix= io_fix()); - ut_ad(old_io_fix == BUF_IO_READ || old_io_fix == BUF_IO_PIN); - io_fix_= BUF_IO_NONE; + ut_ad(prev_state < READ_FIX); + ut_d(const auto s=) zip.fix.fetch_add(REINIT - prev_state); + ut_ad(s > prev_state); + ut_ad(s < prev_state + UNFIXED); } - /** @return if this belongs to buf_pool.unzip_LRU */ - bool belongs_to_unzip_LRU() const + void set_ibuf_exist() + { + ut_ad(lock.is_write_locked()); + ut_ad(id() < page_id_t(SRV_SPACE_ID_UPPER_BOUND, 0)); + const auto s= state(); + ut_ad(s >= UNFIXED); + ut_ad(s < READ_FIX); + ut_ad(s < IBUF_EXIST || s >= REINIT); + zip.fix.fetch_add(IBUF_EXIST - (LRU_MASK & s)); + } + void clear_ibuf_exist() + { + ut_ad(lock.is_write_locked()); + ut_ad(id() < page_id_t(SRV_SPACE_ID_UPPER_BOUND, 0)); + ut_d(const auto s=) zip.fix.fetch_sub(IBUF_EXIST - UNFIXED); + ut_ad(s >= IBUF_EXIST); + ut_ad(s < REINIT); + } + + void read_unfix(uint32_t s) { - return zip.data && state() != BUF_BLOCK_ZIP_PAGE; + ut_ad(lock.is_write_locked()); + ut_ad(s == UNFIXED + 1 || s == IBUF_EXIST + 1 || s == REINIT + 1); + ut_d(auto old_state=) zip.fix.fetch_add(s - READ_FIX); + ut_ad(old_state >= READ_FIX); + ut_ad(old_state < WRITE_FIX); } - inline void add_buf_fix_count(uint32_t count); - inline void set_buf_fix_count(uint32_t count); - inline void set_state(buf_page_state state); - inline void set_io_fix(buf_io_fix io_fix); + void set_freed(uint32_t prev_state, uint32_t count= 0) + { + ut_ad(lock.is_write_locked()); + ut_ad(prev_state >= UNFIXED); + ut_ad(prev_state < READ_FIX); + ut_d(auto s=) zip.fix.fetch_sub((prev_state & LRU_MASK) - FREED - count); + ut_ad(!((prev_state ^ s) & LRU_MASK)); + } + + inline void set_state(uint32_t s); inline void set_corrupt_id(); /** @return the log sequence number of the oldest pending modification @@ -818,15 +804,35 @@ public: inline void set_oldest_modification(lsn_t lsn); /** Clear oldest_modification after removing from buf_pool.flush_list */ inline void clear_oldest_modification(); + /** Reset the oldest_modification when marking a persistent page freed */ + void reset_oldest_modification() + { + ut_ad(oldest_modification() > 2); + oldest_modification_.store(1, std::memory_order_release); + } + + /** Complete a read of a page. + @param node data file + @return whether the operation succeeded + @retval DB_PAGE_CORRUPTED if the checksum fails + @retval DB_DECRYPTION_FAILED if the page cannot be decrypted */ + dberr_t read_complete(const fil_node_t &node); + /** Note that a block is no longer dirty, while not removing it from buf_pool.flush_list */ - inline void clear_oldest_modification(bool temporary); + inline void write_complete(bool temporary); + + /** Write a flushable page to a file. buf_pool.mutex must be held. + @param lru true=buf_pool.LRU; false=buf_pool.flush_list + @param space tablespace + @return whether the page was flushed and buf_pool.mutex was released */ + inline bool flush(bool lru, fil_space_t *space); /** Notify that a page in a temporary tablespace has been modified. */ void set_temp_modified() { ut_ad(fsp_is_system_temporary(id().space())); - ut_ad(state() == BUF_BLOCK_FILE_PAGE); + ut_ad(in_file()); ut_ad(!oldest_modification()); oldest_modification_= 2; } @@ -834,19 +840,35 @@ public: /** Prepare to release a file page to buf_pool.free. */ void free_file_page() { - ut_ad(state() == BUF_BLOCK_REMOVE_HASH); + ut_ad((zip.fix.fetch_sub(REMOVE_HASH - MEMORY)) == REMOVE_HASH); /* buf_LRU_block_free_non_file_page() asserts !oldest_modification() */ ut_d(oldest_modification_= 0;) - set_corrupt_id(); - ut_d(set_state(BUF_BLOCK_MEMORY)); + id_= page_id_t(~0ULL); + } + + void fix_on_recovery() + { + ut_d(const auto f=) zip.fix.fetch_sub(READ_FIX - UNFIXED - 1); + ut_ad(f >= READ_FIX); + ut_ad(f < WRITE_FIX); + } + + uint32_t fix(uint32_t count= 1) + { + ut_ad(count); + ut_ad(count < IBUF_EXIST); + uint32_t f= zip.fix.fetch_add(count); + ut_ad(f >= FREED); + ut_ad(!((f ^ (f + 1)) & LRU_MASK)); + return f; } - void fix() { buf_fix_count_++; } uint32_t unfix() { - uint32_t count= buf_fix_count_--; - ut_ad(count != 0); - return count - 1; + uint32_t f= zip.fix.fetch_sub(1); + ut_ad(f > FREED); + ut_ad(!((f ^ (f - 1)) & LRU_MASK)); + return f - 1; } /** @return the physical size, in bytes */ @@ -872,27 +894,10 @@ public: } /** @return whether the block is mapped to a data file */ - bool in_file() const - { - switch (state_) { - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_FILE_PAGE: - return true; - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - return false; - } - - ut_error; - return false; - } + bool in_file() const { return state() >= FREED; } /** @return whether the block is modified and ready for flushing */ inline bool ready_for_flush() const; - /** @return whether the state can be changed to BUF_BLOCK_NOT_USED */ - bool ready_for_replace() const - { return !oldest_modification() && can_relocate(); } /** @return whether the block can be relocated in memory. The block can be dirty, but it must not be I/O-fixed or bufferfixed. */ inline bool can_relocate() const; @@ -924,27 +929,18 @@ struct buf_block_t{ be the first field, so that buf_pool.page_hash can point to buf_page_t or buf_block_t */ - byte* frame; /*!< pointer to buffer frame which - is of size srv_page_size, and - aligned to an address divisible by - srv_page_size */ - /** read-write lock covering frame */ - block_lock lock; #ifdef UNIV_DEBUG /** whether page.list is in buf_pool.withdraw - ((state() == BUF_BLOCK_NOT_USED)) and the buffer pool is being shrunk; + ((state() == NOT_USED)) and the buffer pool is being shrunk; protected by buf_pool.mutex */ bool in_withdraw_list; /** whether unzip_LRU is in buf_pool.unzip_LRU - (state() == BUF_BLOCK_FILE_PAGE and zip.data != nullptr); + (in_file() && frame && zip.data); protected by buf_pool.mutex */ bool in_unzip_LRU_list; #endif - UT_LIST_NODE_T(buf_block_t) unzip_LRU; - /*!< node of the decompressed LRU list; - a block is in the unzip_LRU list - if page.state() == BUF_BLOCK_FILE_PAGE - and page.zip.data != NULL */ + /** member of buf_pool.unzip_LRU (if belongs_to_unzip_LRU()) */ + UT_LIST_NODE_T(buf_block_t) unzip_LRU; /* @} */ /** @name Optimistic search field */ /* @{ */ @@ -983,17 +979,15 @@ struct buf_block_t{ These 5 fields may only be modified when: we are holding the appropriate x-latch in btr_search_latches[], and one of the following holds: - (1) the block state is BUF_BLOCK_FILE_PAGE, and - we are holding an s-latch or x-latch on buf_block_t::lock, or - (2) buf_block_t::buf_fix_count == 0, or - (3) the block state is BUF_BLOCK_REMOVE_HASH. + (1) in_file(), and we are holding lock in any mode, or + (2) !is_read_fixed()&&(state()>=UNFIXED||state()==REMOVE_HASH). An exception to this is when we init or create a page in the buffer pool in buf0buf.cc. Another exception for buf_pool_t::clear_hash_index() is that assigning block->index = NULL (and block->n_pointers = 0) - is allowed whenever btr_search_own_all(RW_LOCK_X). + is allowed whenever all AHI latches are exclusively locked. Another exception is that ha_insert_for_fold() may decrement n_pointers without holding the appropriate latch @@ -1002,8 +996,8 @@ struct buf_block_t{ This implies that the fields may be read without race condition whenever any of the following hold: - - the btr_search_latches[] s-latch or x-latch is being held, or - - the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH, + - the btr_search_sys.partition[].latch is being held, or + - state() == NOT_USED || state() == MEMORY, and holding some latch prevents the state from changing to that. Some use of assert_block_ahi_empty() or assert_block_ahi_valid() @@ -1017,9 +1011,7 @@ struct buf_block_t{ Atomic_counter<ulint> n_pointers; /*!< used in debugging: the number of pointers in the adaptive hash index - pointing to this frame; - protected by atomic memory access - or btr_search_own_all(). */ + pointing to this frame */ # define assert_block_ahi_empty(block) \ ut_a((block)->n_pointers == 0) # define assert_block_ahi_empty_on_init(block) do { \ @@ -1054,13 +1046,7 @@ struct buf_block_t{ # define assert_block_ahi_valid(block) /* nothing */ #endif /* BTR_CUR_HASH_ADAPT */ void fix() { page.fix(); } - uint32_t unfix() - { - ut_ad(page.buf_fix_count() || page.io_fix() != BUF_IO_NONE || - page.state() == BUF_BLOCK_ZIP_PAGE || - !lock.have_any()); - return page.unfix(); - } + uint32_t unfix() { return page.unfix(); } /** @return the physical size, in bytes */ ulint physical_size() const { return page.physical_size(); } @@ -1072,15 +1058,15 @@ struct buf_block_t{ /** Initialize the block. @param page_id page identifier @param zip_size ROW_FORMAT=COMPRESSED page size, or 0 - @param fix initial buf_fix_count() */ - void initialise(const page_id_t page_id, ulint zip_size, uint32_t fix= 0); + @param state initial state() */ + void initialise(const page_id_t page_id, ulint zip_size, uint32_t state); }; /**********************************************************************//** Compute the hash fold value for blocks in buf_pool.zip_hash. */ /* @{ */ #define BUF_POOL_ZIP_FOLD_PTR(ptr) (ulint(ptr) >> srv_page_size_shift) -#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame) +#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->page.frame) #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) /* @} */ @@ -1276,7 +1262,7 @@ class buf_pool_t size_t mem_size() const { return mem_pfx.m_size; } /** Register the chunk */ - void reg() { map_reg->emplace(map::value_type(blocks->frame, this)); } + void reg() { map_reg->emplace(map::value_type(blocks->page.frame, this)); } /** Allocate a chunk of buffer frames. @param bytes requested size @@ -1368,7 +1354,7 @@ public: } /** Determine whether a frame is intended to be withdrawn during resize(). - @param ptr pointer within a buf_block_t::frame + @param ptr pointer within a buf_page_t::frame @return whether the frame will be withdrawn */ bool will_be_withdrawn(const byte *ptr) const { @@ -1381,8 +1367,8 @@ public: for (const chunk_t *chunk= chunks + n_chunks_new, * const echunk= chunks + n_chunks; chunk != echunk; chunk++) - if (ptr >= chunk->blocks->frame && - ptr < (chunk->blocks + chunk->size - 1)->frame + srv_page_size) + if (ptr >= chunk->blocks->page.frame && + ptr < (chunk->blocks + chunk->size - 1)->page.frame + srv_page_size) return true; return false; } @@ -1486,17 +1472,11 @@ public: buf_page_t *bpage= page_hash.get(page_id, chain); if (bpage >= &watch[0] && bpage < &watch[UT_ARR_SIZE(watch)]) { - ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE); ut_ad(!bpage->in_zip_hash); ut_ad(!bpage->zip.data); if (!allow_watch) bpage= nullptr; } - else if (bpage) - { - ut_ad(page_id == bpage->id()); - ut_ad(bpage->in_file()); - } return bpage; } @@ -1510,15 +1490,9 @@ public: page_hash.lock_get(page_hash.cell_get(bpage.id().fold())). is_locked()); #endif /* SAFE_MUTEX */ - if (&bpage < &watch[0] || &bpage >= &watch[UT_ARR_SIZE(watch)]) - { - ut_ad(bpage.state() == BUF_BLOCK_ZIP_PAGE - ? !!bpage.zip.data - : bpage.state() == BUF_BLOCK_FILE_PAGE); + ut_ad(bpage.in_file()); + if (&bpage < &watch[0] || &bpage >= &watch[array_elements(watch)]) return false; - } - - ut_ad(bpage.state() == BUF_BLOCK_ZIP_PAGE); ut_ad(!bpage.in_zip_hash); ut_ad(!bpage.zip.data); return true; @@ -1818,7 +1792,7 @@ public: } // n_flush_LRU() + n_flush_list() - // is approximately COUNT(io_fix()==BUF_IO_WRITE) in flush_list + // is approximately COUNT(is_write_fixed()) in flush_list unsigned freed_page_clock;/*!< a sequence number used to count the number of buffer @@ -1991,6 +1965,7 @@ inline buf_page_t *buf_pool_t::page_hash_table::get(const page_id_t id, for (buf_page_t *bpage= chain.first; bpage; bpage= bpage->hash) { ut_ad(bpage->in_page_hash); + ut_ad(bpage->in_file()); if (bpage->id() == id) return bpage; } @@ -2012,50 +1987,12 @@ inline void page_hash_latch::lock() } #endif /* SUX_LOCK_GENERIC */ -inline void buf_page_t::add_buf_fix_count(uint32_t count) -{ - mysql_mutex_assert_owner(&buf_pool.mutex); - buf_fix_count_+= count; -} - -inline void buf_page_t::set_buf_fix_count(uint32_t count) -{ - mysql_mutex_assert_owner(&buf_pool.mutex); - buf_fix_count_= count; -} - -inline void buf_page_t::set_state(buf_page_state state) -{ - mysql_mutex_assert_owner(&buf_pool.mutex); -#ifdef UNIV_DEBUG - switch (state) { - case BUF_BLOCK_REMOVE_HASH: - /* buf_pool_t::corrupted_evict() invokes set_corrupt_id() - before buf_LRU_free_one_page(), so we cannot assert that - we are holding the hash_lock. */ - break; - case BUF_BLOCK_MEMORY: - break; - case BUF_BLOCK_NOT_USED: - break; - case BUF_BLOCK_ZIP_PAGE: - if (this >= &buf_pool.watch[0] && - this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]) - break; - /* fall through */ - case BUF_BLOCK_FILE_PAGE: - ut_ad(buf_pool.page_hash.lock_get(buf_pool.page_hash.cell_get(id_.fold())). - is_write_locked()); - break; - } -#endif - state_= state; -} - -inline void buf_page_t::set_io_fix(buf_io_fix io_fix) +inline void buf_page_t::set_state(uint32_t s) { mysql_mutex_assert_owner(&buf_pool.mutex); - io_fix_= io_fix; + ut_ad(s <= REMOVE_HASH || s >= UNFIXED); + ut_ad(s <= READ_FIX); + zip.fix= s; } inline void buf_page_t::set_corrupt_id() @@ -2072,17 +2009,12 @@ inline void buf_page_t::set_corrupt_id() default: ut_ad("block is dirty" == 0); } - switch (state()) { - case BUF_BLOCK_REMOVE_HASH: - break; - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_FILE_PAGE: + const auto f= state(); + if (f != REMOVE_HASH) + { + ut_ad(f >= UNFIXED); ut_ad(buf_pool.page_hash.lock_get(buf_pool.page_hash.cell_get(id_.fold())). is_write_locked()); - break; - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_MEMORY: - ut_ad("invalid state" == 0); } #endif id_= page_id_t(~0ULL); @@ -2101,9 +2033,8 @@ inline void buf_page_t::set_oldest_modification(lsn_t lsn) inline void buf_page_t::clear_oldest_modification() { mysql_mutex_assert_owner(&buf_pool.flush_list_mutex); - ut_d(const auto state= state_); - ut_ad(state == BUF_BLOCK_FILE_PAGE || state == BUF_BLOCK_ZIP_PAGE || - state == BUF_BLOCK_REMOVE_HASH); + ut_d(const auto s= state()); + ut_ad(s >= REMOVE_HASH); ut_ad(oldest_modification()); ut_ad(!list.prev); ut_ad(!list.next); @@ -2113,36 +2044,15 @@ inline void buf_page_t::clear_oldest_modification() oldest_modification_.store(0, std::memory_order_release); } -/** Note that a block is no longer dirty, while not removing -it from buf_pool.flush_list */ -inline void buf_page_t::clear_oldest_modification(bool temporary) -{ - ut_ad(temporary == fsp_is_system_temporary(id().space())); - if (temporary) - { - ut_ad(oldest_modification() == 2); - oldest_modification_= 0; - } - else - { - /* We use release memory order to guarantee that callers of - oldest_modification_acquire() will observe the block as - being detached from buf_pool.flush_list, after reading the value 0. */ - ut_ad(oldest_modification() > 2); - oldest_modification_.store(1, std::memory_order_release); - } -} - /** @return whether the block is modified and ready for flushing */ inline bool buf_page_t::ready_for_flush() const { mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(in_LRU_list); - ut_a(in_file()); - ut_ad(fsp_is_system_temporary(id().space()) - ? oldest_modification() == 2 - : oldest_modification() > 2); - return io_fix_ == BUF_IO_NONE; + const auto s= state(); + ut_a(s >= FREED); + ut_ad(!fsp_is_system_temporary(id().space()) || oldest_modification() == 2); + return s < READ_FIX; } /** @return whether the block can be relocated in memory. @@ -2150,9 +2060,11 @@ The block can be dirty, but it must not be I/O-fixed or bufferfixed. */ inline bool buf_page_t::can_relocate() const { mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(in_file()); + const auto f= state(); + ut_ad(f >= FREED); ut_ad(in_LRU_list); - return io_fix_ == BUF_IO_NONE && !buf_fix_count_; + return (f == FREED || (f < READ_FIX && !(f & ~LRU_MASK))) && + !lock.is_locked_or_waiting(); } /** @return whether the block has been flagged old in buf_pool.LRU */ @@ -2213,41 +2125,26 @@ inline void buf_page_t::set_old(bool old) /********************************************************************** Let us list the consistency conditions for different control block states. -NOT_USED: is in free list, not in LRU list, not in flush list, nor - page hash table -MEMORY: is not in free list, LRU list, or flush list, nor page - hash table -FILE_PAGE: space and offset are defined, is in page hash table - if io_fix == BUF_IO_WRITE, - buf_pool.n_flush_LRU() || buf_pool.n_flush_list() - - (1) if buf_fix_count == 0, then - is in LRU list, not in free list - is in flush list, - if and only if oldest_modification > 0 - is x-locked, - if and only if io_fix == BUF_IO_READ - is s-locked, - if and only if io_fix == BUF_IO_WRITE - - (2) if buf_fix_count > 0, then - is not in LRU list, not in free list - is in flush list, - if and only if oldest_modification > 0 - if io_fix == BUF_IO_READ, - is x-locked - if io_fix == BUF_IO_WRITE, - is s-locked +NOT_USED: is in free list, not LRU, not flush_list, nor page_hash +MEMORY: is not in any of free, LRU, flush_list, page_hash +in_file(): is not in free list, is in LRU list, id() is defined, + is in page_hash (not necessarily if is_read_fixed()) + + is in buf_pool.flush_list, if and only + if oldest_modification == 1 || oldest_modification > 2 + + (1) if is_write_fixed(): is u-locked + (2) if is_read_fixed(): is x-locked State transitions: NOT_USED => MEMORY -MEMORY => FILE_PAGE MEMORY => NOT_USED -FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if - (1) buf_fix_count == 0, - (2) oldest_modification == 0, and - (3) io_fix == 0. +MEMORY => UNFIXED +UNFIXED => in_file() +in_file() => UNFIXED or FREED +UNFIXED or FREED => REMOVE_HASH +REMOVE_HASH => NOT_USED (if and only if !oldest_modification()) */ /** Select from where to start a scan. If we have scanned diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic index bdca5b66392..5baee629dde 100644 --- a/storage/innobase/include/buf0buf.ic +++ b/storage/innobase/include/buf0buf.ic @@ -86,38 +86,6 @@ inline bool buf_page_peek_if_too_old(const buf_page_t *bpage) } } -#ifdef UNIV_DEBUG -/*********************************************************************//** -Gets a pointer to the memory frame of a block. -@return pointer to the frame */ -UNIV_INLINE -buf_frame_t* -buf_block_get_frame( -/*================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ -{ - if (!block) { - return NULL; - } - - switch (block->page.state()) { - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_NOT_USED: - ut_error; - break; - case BUF_BLOCK_FILE_PAGE: - ut_a(block->page.buf_fix_count()); - /* fall through */ - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - goto ok; - } - ut_error; -ok: - return((buf_frame_t*) block->frame); -} -#endif /* UNIV_DEBUG */ - /** Allocate a buffer block. @return own: the allocated block, in state BUF_BLOCK_MEMORY */ inline buf_block_t *buf_block_alloc() @@ -149,16 +117,11 @@ buf_block_modify_clock_inc( buf_block_t* block) /*!< in: block */ { #ifdef SAFE_MUTEX - /* No latch is acquired for the shared temporary tablespace. */ - ut_ad(fsp_is_system_temporary(block->page.id().space()) - || (mysql_mutex_is_owner(&buf_pool.mutex) - && !block->page.buf_fix_count()) - || block->lock.have_u_or_x()); + ut_ad((mysql_mutex_is_owner(&buf_pool.mutex) + && !block->page.buf_fix_count()) + || block->page.lock.have_u_or_x()); #else /* SAFE_MUTEX */ - /* No latch is acquired for the shared temporary tablespace. */ - ut_ad(fsp_is_system_temporary(block->page.id().space()) - || !block->page.buf_fix_count() - || block->lock.have_u_or_x()); + ut_ad(!block->page.buf_fix_count() || block->page.lock.have_u_or_x()); #endif /* SAFE_MUTEX */ assert_block_ahi_valid(block); @@ -175,58 +138,10 @@ buf_block_get_modify_clock( /*=======================*/ buf_block_t* block) /*!< in: block */ { - /* No latch is acquired for the shared temporary tablespace. */ - ut_ad(fsp_is_system_temporary(block->page.id().space()) - || block->lock.have_any()); + ut_ad(block->page.lock.have_any()); return(block->modify_clock); } -/********************************************************************//** -Releases a compressed-only page acquired with buf_page_get_zip(). */ -UNIV_INLINE -void -buf_page_release_zip( -/*=================*/ - buf_page_t* bpage) /*!< in: buffer block */ -{ - ut_ad(bpage); - ut_ad(bpage->buf_fix_count()); - - switch (bpage->state()) { - case BUF_BLOCK_FILE_PAGE: - case BUF_BLOCK_ZIP_PAGE: - bpage->unfix(); - return; - - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - break; - } - - ut_error; -} - -/********************************************************************//** -Releases a latch, if specified. */ -UNIV_INLINE -void -buf_page_release_latch( -/*===================*/ - buf_block_t* block, /*!< in: buffer block */ - ulint rw_latch) /*!< in: RW_S_LATCH, RW_X_LATCH, - RW_NO_LATCH */ -{ - switch (rw_latch) { - case RW_S_LATCH: - block->lock.s_unlock(); - break; - case RW_SX_LATCH: - case RW_X_LATCH: - block->lock.u_or_x_unlock(rw_latch == RW_SX_LATCH); - } -} - /** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit, if needed. @param[in] size size in bytes diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index cd0d068abb0..41f0c7364f8 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -24,12 +24,11 @@ The database buffer pool flush algorithm Created 11/5/1995 Heikki Tuuri *******************************************************/ -#ifndef buf0flu_h -#define buf0flu_h +#pragma once #include "ut0byte.h" #include "log0log.h" -#include "buf0types.h" +#include "buf0buf.h" /** Number of pages flushed. Protected by buf_pool.mutex. */ extern ulint buf_flush_page_count; @@ -122,15 +121,28 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious); This function should be called at a mini-transaction commit, if a page was modified in it. Puts the block to the list of modified blocks, if it not already in it. */ -UNIV_INLINE -void -buf_flush_note_modification( -/*========================*/ - buf_block_t* block, /*!< in: block which is modified */ - lsn_t start_lsn, /*!< in: start lsn of the first mtr in a - set of mtr's */ - lsn_t end_lsn); /*!< in: end lsn of the last mtr in the - set of mtr's */ +inline void buf_flush_note_modification(buf_block_t *b, lsn_t start, lsn_t end) +{ + ut_ad(!srv_read_only_mode); + ut_d(const auto s= b->page.state()); + ut_ad(s > buf_page_t::FREED); + ut_ad(s < buf_page_t::READ_FIX); + ut_ad(mach_read_from_8(b->page.frame + FIL_PAGE_LSN) <= end); + mach_write_to_8(b->page.frame + FIL_PAGE_LSN, end); + if (UNIV_LIKELY_NULL(b->page.zip.data)) + memcpy_aligned<8>(FIL_PAGE_LSN + b->page.zip.data, + FIL_PAGE_LSN + b->page.frame, 8); + + const lsn_t oldest_modification= b->page.oldest_modification(); + + if (oldest_modification > 1) + ut_ad(oldest_modification <= start); + else if (fsp_is_system_temporary(b->page.id().space())) + b->page.set_temp_modified(); + else + buf_pool.insert_into_flush_list(b, start); + srv_stats.buf_pool_write_requests.inc(); +} /** Initialize page_cleaner. */ ATTRIBUTE_COLD void buf_flush_page_cleaner_init(); @@ -149,7 +161,3 @@ void buf_flush_validate(); /** Synchronously flush dirty blocks. NOTE: The calling thread is not allowed to hold any buffer page latches! */ void buf_flush_sync(); - -#include "buf0flu.ic" - -#endif diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic deleted file mode 100644 index b8a9b6d1f5d..00000000000 --- a/storage/innobase/include/buf0flu.ic +++ /dev/null @@ -1,66 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2019, 2021, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/buf0flu.ic -The database buffer pool flush algorithm - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#include "assume_aligned.h" -#include "buf0buf.h" -#include "srv0srv.h" - -/********************************************************************//** -This function should be called at a mini-transaction commit, if a page was -modified in it. Puts the block to the list of modified blocks, if it is not -already in it. */ -UNIV_INLINE -void -buf_flush_note_modification( -/*========================*/ - buf_block_t* block, /*!< in: block which is modified */ - lsn_t start_lsn, /*!< in: start lsn of the mtr that - modified this block */ - lsn_t end_lsn) /*!< in: end lsn of the mtr that - modified this block */ -{ - ut_ad(!srv_read_only_mode); - ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); - ut_ad(block->page.buf_fix_count()); - ut_ad(mach_read_from_8(block->frame + FIL_PAGE_LSN) <= end_lsn); - mach_write_to_8(block->frame + FIL_PAGE_LSN, end_lsn); - if (UNIV_LIKELY_NULL(block->page.zip.data)) { - memcpy_aligned<8>(FIL_PAGE_LSN + block->page.zip.data, - FIL_PAGE_LSN + block->frame, 8); - } - - const lsn_t oldest_modification = block->page.oldest_modification(); - - if (oldest_modification > 1) { - ut_ad(oldest_modification <= start_lsn); - } else if (fsp_is_system_temporary(block->page.id().space())) { - block->page.set_temp_modified(); - } else { - buf_pool.insert_into_flush_list(block, start_lsn); - } - - srv_stats.buf_pool_write_requests.inc(); -} diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h index d3ee0b42169..4e29b41124e 100644 --- a/storage/innobase/include/buf0types.h +++ b/storage/innobase/include/buf0types.h @@ -39,16 +39,6 @@ struct buf_buddy_stat_t; /** A buffer frame. @see page_t */ typedef byte buf_frame_t; -/** Flags for io_fix types */ -enum buf_io_fix { - BUF_IO_NONE = 0, /**< no pending I/O */ - BUF_IO_READ, /**< read pending */ - BUF_IO_WRITE, /**< write pending */ - BUF_IO_PIN /**< disallow relocation of - block and its removal of from - the flush_list */ -}; - /** Alternatives for srv_checksum_algorithm, which can be changed by setting innodb_checksum_algorithm */ enum srv_checksum_algorithm_t { diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h index 51333cb5955..a519fa096b1 100644 --- a/storage/innobase/include/fsp0fsp.h +++ b/storage/innobase/include/fsp0fsp.h @@ -545,9 +545,8 @@ fil_block_check_type( ulint type, mtr_t* mtr) { - if (UNIV_UNLIKELY(type != fil_page_get_type(block.frame))) { - fil_block_reset_type(block, type, mtr); - } + if (UNIV_UNLIKELY(type != fil_page_get_type(block.page.frame))) + fil_block_reset_type(block, type, mtr); } /** Checks if a page address is an extent descriptor page address. diff --git a/storage/innobase/include/fut0fut.h b/storage/innobase/include/fut0fut.h index 5169db95549..58c33c12a29 100644 --- a/storage/innobase/include/fut0fut.h +++ b/storage/innobase/include/fut0fut.h @@ -61,7 +61,7 @@ fut_get_ptr( rw_latch, nullptr, BUF_GET_POSSIBLY_FREED, mtr); if (!block) { - } else if (block->page.status == buf_page_t::FREED) { + } else if (block->page.is_freed()) { block = nullptr; } else { ptr = buf_block_get_frame(block) + addr.boffset; diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h index 1ade24cd069..c27de3db786 100644 --- a/storage/innobase/include/fut0lst.h +++ b/storage/innobase/include/fut0lst.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2020, MariaDB Corporation. +Copyright (c) 2018, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -70,9 +70,10 @@ typedef byte flst_node_t; @param[in,out] mtr mini-transaction */ inline void flst_init(const buf_block_t* block, uint16_t ofs, mtr_t* mtr) { - ut_ad(!mach_read_from_2(FLST_LEN + ofs + block->frame)); - ut_ad(!mach_read_from_2(FLST_FIRST + FIL_ADDR_BYTE + ofs + block->frame)); - ut_ad(!mach_read_from_2(FLST_LAST + FIL_ADDR_BYTE + ofs + block->frame)); + ut_d(const page_t *page= block->page.frame); + ut_ad(!mach_read_from_2(FLST_LEN + ofs + page)); + ut_ad(!mach_read_from_2(FLST_FIRST + FIL_ADDR_BYTE + ofs + page)); + ut_ad(!mach_read_from_2(FLST_LAST + FIL_ADDR_BYTE + ofs + page)); compile_time_assert(FIL_NULL == 0xffU * 0x1010101U); mtr->memset(block, FLST_FIRST + FIL_ADDR_PAGE + ofs, 4, 0xff); mtr->memset(block, FLST_LAST + FIL_ADDR_PAGE + ofs, 4, 0xff); diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic index e481dabbbf5..7d08cbb930c 100644 --- a/storage/innobase/include/lock0lock.ic +++ b/storage/innobase/include/lock0lock.ic @@ -37,7 +37,7 @@ lock_get_min_heap_no( /*=================*/ const buf_block_t* block) /*!< in: buffer block */ { - const page_t* page = block->frame; + const page_t* page = block->page.frame; if (page_is_comp(page)) { return(rec_get_heap_no_new( @@ -74,6 +74,6 @@ lock_rec_create( btr_assert_not_corrupted(block, index); return lock_rec_create_low( c_lock, - type_mode, block->page.id(), block->frame, heap_no, + type_mode, block->page.id(), block->page.frame, heap_no, index, trx, caller_owns_trx_mutex); } diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 46c09fecf7a..57c2f0b5433 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -225,7 +225,7 @@ private: public: /** whether we are applying redo log records during crash recovery */ bool recovery_on; - /** whether recv_recover_page(), invoked from buf_page_read_complete(), + /** whether recv_recover_page(), invoked from buf_page_t::read_complete(), should apply log records*/ bool apply_log_recs; byte* buf; /*!< buffer for parsing log records */ diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index 0d83d83b794..82fbca10721 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2019, 2020, MariaDB Corporation. +Copyright (c) 2019, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -170,7 +170,7 @@ inline uint32_t mlog_decode_len(const byte *log, const byte *end) template<unsigned l,mtr_t::write_type w,typename V> inline bool mtr_t::write(const buf_block_t &block, void *ptr, V val) { - ut_ad(ut_align_down(ptr, srv_page_size) == block.frame); + ut_ad(ut_align_down(ptr, srv_page_size) == block.page.frame); static_assert(l == 1 || l == 2 || l == 4 || l == 8, "wrong length"); byte buf[l]; @@ -242,7 +242,7 @@ inline void mtr_t::memset(const buf_block_t *b, ulint ofs, ulint len, byte val) { ut_ad(ofs <= ulint(srv_page_size)); ut_ad(ofs + len <= ulint(srv_page_size)); - ::memset(ofs + b->frame, val, len); + ::memset(ofs + b->page.frame, val, len); memset(*b, ofs, len, val); } @@ -286,10 +286,10 @@ inline void mtr_t::memset(const buf_block_t *b, ulint ofs, size_t len, size_t s= 0; while (s < len) { - ::memcpy(ofs + s + b->frame, str, size); + ::memcpy(ofs + s + b->page.frame, str, size); s+= len; } - ::memcpy(ofs + s + b->frame, str, len - s); + ::memcpy(ofs + s + b->page.frame, str, len - s); memset(*b, ofs, len, str, size); } @@ -303,7 +303,7 @@ inline void mtr_t::memcpy(const buf_block_t &b, ulint offset, ulint len) ut_ad(len); ut_ad(offset <= ulint(srv_page_size)); ut_ad(offset + len <= ulint(srv_page_size)); - memcpy_low(b, uint16_t(offset), &b.frame[offset], len); + memcpy_low(b, uint16_t(offset), &b.page.frame[offset], len); } /** Log a write of a byte string to a page. @@ -484,7 +484,7 @@ template<mtr_t::write_type w> inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str, ulint len) { - ut_ad(ut_align_down(dest, srv_page_size) == b.frame); + ut_ad(ut_align_down(dest, srv_page_size) == b.page.frame); char *d= static_cast<char*>(dest); const char *s= static_cast<const char*>(str); if (w != FORCED && m_log_mode == MTR_LOG_ALL) @@ -525,7 +525,7 @@ inline void mtr_t::init(buf_block_t *b) m_freed_space= nullptr; } - b->page.status= buf_page_t::INIT_ON_FLUSH; + b->page.set_reinit(b->page.state() & buf_page_t::LRU_MASK); if (m_log_mode != MTR_LOG_ALL) { diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic index f1b2f9aba83..02ad88194fb 100644 --- a/storage/innobase/include/mtr0mtr.ic +++ b/storage/innobase/include/mtr0mtr.ic @@ -30,7 +30,8 @@ Created 11/26/1995 Heikki Tuuri @return true if the mtr is dirtying a clean page. */ inline bool mtr_t::is_block_dirtied(const buf_block_t *block) { - ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); + ut_ad(block->page.in_file()); + ut_ad(block->page.frame); ut_ad(block->page.buf_fix_count()); return block->page.oldest_modification() <= 1; } @@ -109,7 +110,8 @@ mtr_t::sx_latch_at_savepoint( /* == RW_NO_LATCH */ ut_a(slot->type == MTR_MEMO_BUF_FIX); - block->lock.u_lock(); + block->page.lock.u_lock(); + ut_ad(!block->page.is_io_fixed()); if (!m_made_dirty) { m_made_dirty = is_block_dirtied(block); @@ -142,7 +144,8 @@ mtr_t::x_latch_at_savepoint( /* == RW_NO_LATCH */ ut_a(slot->type == MTR_MEMO_BUF_FIX); - block->lock.x_lock(); + block->page.lock.x_lock(); + ut_ad(!block->page.is_io_fixed()); if (!m_made_dirty) { m_made_dirty = is_block_dirtied(block); @@ -159,15 +162,23 @@ mtr_t::release_block_at_savepoint( ulint savepoint, buf_block_t* block) { - ut_ad(is_active()); - - mtr_memo_slot_t* slot = m_memo.at<mtr_memo_slot_t*>(savepoint); - - ut_a(slot->object == block); - - buf_page_release_latch(block, slot->type); - - reinterpret_cast<buf_block_t*>(block)->unfix(); - - slot->object = NULL; + ut_ad(is_active()); + + mtr_memo_slot_t *slot = m_memo.at<mtr_memo_slot_t*>(savepoint); + + ut_a(slot->object == block); + slot->object= nullptr; + block->page.unfix(); + + switch (slot->type) { + case MTR_MEMO_PAGE_S_FIX: + block->page.lock.s_unlock(); + break; + case MTR_MEMO_PAGE_SX_FIX: + case MTR_MEMO_PAGE_X_FIX: + block->page.lock.u_or_x_unlock(slot->type == MTR_MEMO_PAGE_SX_FIX); + break; + default: + break; + } } diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic index 828be6840d2..5ee96dd716d 100644 --- a/storage/innobase/include/page0cur.ic +++ b/storage/innobase/include/page0cur.ic @@ -34,13 +34,7 @@ page_cur_get_page( /*==============*/ page_cur_t* cur) /*!< in: page cursor */ { - ut_ad(cur); - - if (cur->rec) { - ut_ad(page_align(cur->rec) == cur->block->frame); - } - - return(page_align(cur->rec)); + return page_align(page_cur_get_rec(cur)); } /*********************************************************//** @@ -52,13 +46,9 @@ page_cur_get_block( /*===============*/ page_cur_t* cur) /*!< in: page cursor */ { - ut_ad(cur); - - if (cur->rec) { - ut_ad(page_align(cur->rec) == cur->block->frame); - } - - return(cur->block); + ut_ad(cur); + ut_ad(!cur->rec || page_align(cur->rec) == cur->block->page.frame); + return cur->block; } /*********************************************************//** @@ -82,13 +72,9 @@ page_cur_get_rec( /*=============*/ page_cur_t* cur) /*!< in: page cursor */ { - ut_ad(cur); - - if (cur->rec) { - ut_ad(page_align(cur->rec) == cur->block->frame); - } - - return(cur->rec); + ut_ad(cur); + ut_ad(!cur->rec || page_align(cur->rec) == cur->block->page.frame); + return cur->rec; } #endif /* UNIV_DEBUG */ @@ -102,7 +88,7 @@ page_cur_set_before_first( const buf_block_t* block, /*!< in: index page */ page_cur_t* cur) /*!< in: cursor */ { - cur->block = (buf_block_t*) block; + cur->block = const_cast<buf_block_t*>(block); cur->rec = page_get_infimum_rec(buf_block_get_frame(cur->block)); } @@ -116,7 +102,7 @@ page_cur_set_after_last( const buf_block_t* block, /*!< in: index page */ page_cur_t* cur) /*!< in: cursor */ { - cur->block = (buf_block_t*) block; + cur->block = const_cast<buf_block_t*>(block); cur->rec = page_get_supremum_rec(buf_block_get_frame(cur->block)); } @@ -130,7 +116,7 @@ page_cur_is_before_first( const page_cur_t* cur) /*!< in: cursor */ { ut_ad(cur); - ut_ad(page_align(cur->rec) == cur->block->frame); + ut_ad(page_align(cur->rec) == cur->block->page.frame); return(page_rec_is_infimum(cur->rec)); } @@ -144,7 +130,7 @@ page_cur_is_after_last( const page_cur_t* cur) /*!< in: cursor */ { ut_ad(cur); - ut_ad(page_align(cur->rec) == cur->block->frame); + ut_ad(page_align(cur->rec) == cur->block->page.frame); return(page_rec_is_supremum(cur->rec)); } @@ -160,7 +146,7 @@ page_cur_position( page_cur_t* cur) /*!< out: page cursor */ { ut_ad(rec && block && cur); - ut_ad(page_align(rec) == block->frame); + ut_ad(page_align(rec) == block->page.frame); cur->rec = (rec_t*) rec; cur->block = (buf_block_t*) block; @@ -273,7 +259,7 @@ page_cur_tuple_insert( index, tuple, n_ext); *offsets = rec_get_offsets(rec, index, *offsets, - page_is_leaf(cursor->block->frame) + page_is_leaf(cursor->block->page.frame) ? index->n_core_fields : 0, ULINT_UNDEFINED, heap); ut_ad(size == rec_offs_size(*offsets)); diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h index abb395938fc..41e46c2d051 100644 --- a/storage/innobase/include/page0page.h +++ b/storage/innobase/include/page0page.h @@ -418,8 +418,8 @@ template<bool compressed> inline void page_rec_set_n_owned(buf_block_t *block, rec_t *rec, ulint n_owned, bool comp, mtr_t *mtr) { - ut_ad(block->frame == page_align(rec)); - ut_ad(comp == (page_is_comp(block->frame) != 0)); + ut_ad(block->page.frame == page_align(rec)); + ut_ad(comp == (page_is_comp(block->page.frame) != 0)); if (page_zip_des_t *page_zip= compressed ? buf_block_get_page_zip(block) : nullptr) diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic index 6514886dd67..861bf4a53df 100644 --- a/storage/innobase/include/page0page.ic +++ b/storage/innobase/include/page0page.ic @@ -87,7 +87,7 @@ page_set_ssn_id( MTR_MEMO_PAGE_X_FIX)); ut_ad(!page_zip || page_zip == &block->page.zip); constexpr uint16_t field= FIL_RTREE_SPLIT_SEQ_NUM; - byte *b= my_assume_aligned<2>(&block->frame[field]); + byte *b= my_assume_aligned<2>(&block->page.frame[field]); if (mtr->write<8,mtr_t::MAYBE_NOP>(*block, b, ssn_id) && UNIV_LIKELY_NULL(page_zip)) memcpy_aligned<2>(&page_zip->data[field], b, 8); @@ -125,7 +125,7 @@ Reset PAGE_LAST_INSERT. inline void page_header_reset_last_insert(buf_block_t *block, mtr_t *mtr) { constexpr uint16_t field= PAGE_HEADER + PAGE_LAST_INSERT; - byte *b= my_assume_aligned<2>(&block->frame[field]); + byte *b= my_assume_aligned<2>(&block->page.frame[field]); if (mtr->write<2,mtr_t::MAYBE_NOP>(*block, b, 0U) && UNIV_LIKELY_NULL(block->page.zip.data)) memset_aligned<2>(&block->page.zip.data[field], 0, 2); diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h index 4d6aabfd576..d18cea66b30 100644 --- a/storage/innobase/include/page0types.h +++ b/storage/innobase/include/page0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2019, 2020, MariaDB Corporation. +Copyright (c) 2019, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -88,26 +88,41 @@ enum page_cur_mode_t { PAGE_CUR_RTREE_GET_FATHER = 14 }; +class buf_pool_t; +class buf_page_t; + /** Compressed page descriptor */ struct page_zip_des_t { page_zip_t* data; /*!< compressed page data */ -#ifdef UNIV_DEBUG - unsigned m_start:16; /*!< start offset of modification log */ - bool m_external; /*!< Allocated externally, not from the - buffer pool */ -#endif /* UNIV_DEBUG */ - unsigned m_end:16; /*!< end offset of modification log */ - unsigned m_nonempty:1; /*!< TRUE if the modification log + uint32_t m_end:16; /*!< end offset of modification log */ + uint32_t m_nonempty:1; /*!< TRUE if the modification log is not empty */ - unsigned n_blobs:12; /*!< number of externally stored + uint32_t n_blobs:12; /*!< number of externally stored columns on the page; the maximum is 744 on a 16 KiB page */ - unsigned ssize:PAGE_ZIP_SSIZE_BITS; + uint32_t ssize:PAGE_ZIP_SSIZE_BITS; /*!< 0 or compressed page shift size; the size in bytes is (UNIV_ZIP_SIZE_MIN >> 1) << ssize. */ +#ifdef UNIV_DEBUG + uint16_t m_start; /*!< start offset of modification log */ + bool m_external; /*!< Allocated externally, not from the + buffer pool */ +#endif /* UNIV_DEBUG */ + + void clear() { + memset((void*) this, 0, sizeof(data) + sizeof(uint32_t)); + ut_d(m_start = 0); + ut_d(m_external = false); + } + +private: + friend buf_pool_t; + friend buf_page_t; + /** fix count and state used in buf_page_t */ + Atomic_relaxed<uint32_t> fix; }; /** Compression statistics for a given page size */ diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h index 4c577e9e434..45b5482fdb2 100644 --- a/storage/innobase/include/page0zip.h +++ b/storage/innobase/include/page0zip.h @@ -109,12 +109,7 @@ page_zip_is_too_big( /**********************************************************************//** Initialize a compressed page descriptor. */ -UNIV_INLINE -void -page_zip_des_init( -/*==============*/ - page_zip_des_t* page_zip); /*!< in/out: compressed page - descriptor */ +#define page_zip_des_init(page_zip) (page_zip)->clear() /**********************************************************************//** Configure the zlib allocator to use the given memory heap. */ diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic index 87af2cc3879..7cf42a04b57 100644 --- a/storage/innobase/include/page0zip.ic +++ b/storage/innobase/include/page0zip.ic @@ -2,7 +2,7 @@ Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2017, 2020, MariaDB Corporation. +Copyright (c) 2017, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -305,18 +305,6 @@ page_zip_available( } /**********************************************************************//** -Initialize a compressed page descriptor. */ -UNIV_INLINE -void -page_zip_des_init( -/*==============*/ - page_zip_des_t* page_zip) /*!< in/out: compressed page - descriptor */ -{ - memset(page_zip, 0, sizeof *page_zip); -} - -/**********************************************************************//** Reset the counters used for filling INFORMATION_SCHEMA.innodb_cmp_per_index. */ UNIV_INLINE diff --git a/storage/innobase/include/sux_lock.h b/storage/innobase/include/sux_lock.h index e592e3825ad..17a484c732e 100644 --- a/storage/innobase/include/sux_lock.h +++ b/storage/innobase/include/sux_lock.h @@ -68,10 +68,13 @@ public: ut_ad(!writer.load(std::memory_order_relaxed)); ut_ad(!recursive); ut_d(readers_lock.init()); - ut_ad(!readers.load(std::memory_order_relaxed)); +#ifdef UNIV_DEBUG + if (auto r= readers.load(std::memory_order_relaxed)) + ut_ad(r->empty()); +#endif } - /** Free the rw-lock after create() */ + /** Free the rw-lock after init() */ void free() { ut_ad(!writer.load(std::memory_order_relaxed)); @@ -274,6 +277,8 @@ public: bool is_write_locked() const { return lock.is_write_locked(); } + bool is_locked_or_waiting() const { return lock.is_locked_or_waiting(); } + inline void lock_shared(); inline void unlock_shared(); }; @@ -291,7 +296,10 @@ template<> inline void sux_lock<ssux_lock_impl<true>>::init() ut_ad(!writer.load(std::memory_order_relaxed)); ut_ad(!recursive); ut_d(readers_lock.init()); - ut_ad(!readers.load(std::memory_order_relaxed)); +#ifdef UNIV_DEBUG + if (auto r= readers.load(std::memory_order_relaxed)) + ut_ad(r->empty()); +#endif } template<> diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h index 6ad897ca515..83df423d1ed 100644 --- a/storage/innobase/include/trx0rseg.h +++ b/storage/innobase/include/trx0rseg.h @@ -302,7 +302,7 @@ inline uint32_t trx_rsegf_get_nth_undo(const buf_block_t *rseg_header, ulint n) { ut_ad(n < TRX_RSEG_N_SLOTS); return mach_read_from_4(TRX_RSEG + TRX_RSEG_UNDO_SLOTS + - n * TRX_RSEG_SLOT_SIZE + rseg_header->frame); + n * TRX_RSEG_SLOT_SIZE + rseg_header->page.frame); } #ifdef WITH_WSREP diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index 93cc1fb9019..cbac3fd3a94 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -176,7 +176,7 @@ trx_sysf_rseg_get_space(const buf_block_t* sys_header, ulint rseg_id) ut_ad(rseg_id < TRX_SYS_N_RSEGS); return mach_read_from_4(TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_SPACE + rseg_id * TRX_SYS_RSEG_SLOT_SIZE - + sys_header->frame); + + sys_header->page.frame); } /** Read the page number of a rollback segment slot. @@ -189,7 +189,7 @@ trx_sysf_rseg_get_page_no(const buf_block_t *sys_header, ulint rseg_id) ut_ad(rseg_id < TRX_SYS_N_RSEGS); return mach_read_from_4(TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_PAGE_NO + rseg_id * TRX_SYS_RSEG_SLOT_SIZE + - sys_header->frame); + sys_header->page.frame); } /** Maximum length of MySQL binlog file name, in bytes. diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic index 91a260d66a0..1a9c7774580 100644 --- a/storage/innobase/include/trx0undo.ic +++ b/storage/innobase/include/trx0undo.ic @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2019, MariaDB Corporation. +Copyright (c) 2017, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -128,11 +128,11 @@ uint16_t trx_undo_page_get_end(const buf_block_t *undo_page, uint32_t page_no, { if (page_no == undo_page->page.id().page_no()) if (uint16_t end = mach_read_from_2(TRX_UNDO_NEXT_LOG + offset + - undo_page->frame)) + undo_page->page.frame)) return end; return mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + - undo_page->frame); + undo_page->page.frame); } /** Get the next record in an undo log. @@ -146,6 +146,6 @@ trx_undo_page_get_next_rec(const buf_block_t *undo_page, uint16_t rec, uint32_t page_no, uint16_t offset) { uint16_t end= trx_undo_page_get_end(undo_page, page_no, offset); - uint16_t next= mach_read_from_2(undo_page->frame + rec); - return next == end ? nullptr : undo_page->frame + next; + uint16_t next= mach_read_from_2(undo_page->page.frame + rec); + return next == end ? nullptr : undo_page->page.frame + next; } diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 850c13ccf9c..36792a4e090 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1544,12 +1544,12 @@ lock_rec_lock( have a lock strong enough already granted on the record, we have to wait. */ - err= lock_rec_enqueue_waiting(c_lock, mode, id, block->frame, heap_no, - index, thr, nullptr); + err= lock_rec_enqueue_waiting(c_lock, mode, id, block->page.frame, + heap_no, index, thr, nullptr); else if (!impl) { /* Set the requested lock on the record. */ - lock_rec_add_to_queue(mode, g.cell(), id, block->frame, heap_no, + lock_rec_add_to_queue(mode, g.cell(), id, block->page.frame, heap_no, index, trx, true); err= DB_SUCCESS_LOCKED_REC; } @@ -1570,18 +1570,13 @@ lock_rec_lock( trx->mutex_unlock(); return err; } - else - { - /* - Simplified and faster path for the most common cases - Note that we don't own the trx mutex. - */ - if (!impl) - lock_rec_create_low(nullptr, - mode, id, block->frame, heap_no, index, trx, false); - return DB_SUCCESS_LOCKED_REC; - } + /* Simplified and faster path for the most common cases */ + if (!impl) + lock_rec_create_low(nullptr, mode, id, block->page.frame, heap_no, index, + trx, false); + + return DB_SUCCESS_LOCKED_REC; } /*********************************************************************//** @@ -2187,7 +2182,7 @@ lock_rec_inherit_to_gap_if_gap_lock( !lock->is_record_not_gap()) && !lock_table_has(lock->trx, lock->index->table, LOCK_X)) lock_rec_add_to_queue(LOCK_GAP | lock->mode(), - g.cell(), id, block->frame, + g.cell(), id, block->page.frame, heir_heap_no, lock->index, lock->trx, false); } @@ -2233,7 +2228,7 @@ lock_rec_move( the function works also if donator_id == receiver_id */ lock_rec_add_to_queue(type_mode, receiver_cell, - receiver_id, receiver.frame, + receiver_id, receiver.page.frame, receiver_heap_no, lock->index, lock_trx, true); lock_trx->mutex_unlock(); @@ -2344,8 +2339,8 @@ lock_move_reorganize_page( } while (lock); - const ulint comp= page_is_comp(block->frame); - ut_ad(comp == page_is_comp(oblock->frame)); + const ulint comp= page_is_comp(block->page.frame); + ut_ad(comp == page_is_comp(oblock->page.frame)); lock_move_granted_locks_to_front(old_locks); @@ -2359,8 +2354,8 @@ lock_move_reorganize_page( supremum of the page; the infimum may carry locks if an update of a record is occurring on the page, and its locks were temporarily stored on the infimum */ - const rec_t *rec1= page_get_infimum_rec(block->frame); - const rec_t *rec2= page_get_infimum_rec(oblock->frame); + const rec_t *rec1= page_get_infimum_rec(block->page.frame); + const rec_t *rec2= page_get_infimum_rec(oblock->page.frame); /* Set locks according to old locks */ for (;;) @@ -2399,7 +2394,7 @@ lock_move_reorganize_page( /* NOTE that the old lock bitmap could be too small for the new heap number! */ - lock_rec_add_to_queue(lock->type_mode, cell, id, block->frame, + lock_rec_add_to_queue(lock->type_mode, cell, id, block->page.frame, new_heap_no, lock->index, lock_trx, true); } @@ -2441,8 +2436,8 @@ lock_move_rec_list_end( { const ulint comp= page_rec_is_comp(rec); - ut_ad(block->frame == page_align(rec)); - ut_ad(comp == page_is_comp(new_block->frame)); + ut_ad(block->page.frame == page_align(rec)); + ut_ad(comp == page_is_comp(new_block->page.frame)); const page_id_t id{block->page.id()}; const page_id_t new_id{new_block->page.id()}; @@ -2466,13 +2461,15 @@ lock_move_rec_list_end( { if (page_offset(rec1) == PAGE_NEW_INFIMUM) rec1= page_rec_get_next_low(rec1, TRUE); - rec2= page_rec_get_next_low(new_block->frame + PAGE_NEW_INFIMUM, TRUE); + rec2= page_rec_get_next_low(new_block->page.frame + PAGE_NEW_INFIMUM, + TRUE); } else { if (page_offset(rec1) == PAGE_OLD_INFIMUM) rec1= page_rec_get_next_low(rec1, FALSE); - rec2= page_rec_get_next_low(new_block->frame + PAGE_OLD_INFIMUM,FALSE); + rec2= page_rec_get_next_low(new_block->page.frame + PAGE_OLD_INFIMUM, + FALSE); } /* Copy lock requests on user records to new page and @@ -2524,7 +2521,8 @@ lock_move_rec_list_end( lock->type_mode&= ~LOCK_WAIT; } - lock_rec_add_to_queue(type_mode, g.cell2(), new_id, new_block->frame, + lock_rec_add_to_queue(type_mode, g.cell2(), new_id, + new_block->page.frame, rec2_heap_no, lock->index, lock_trx, true); } @@ -2565,9 +2563,9 @@ lock_move_rec_list_start( { const ulint comp= page_rec_is_comp(rec); - ut_ad(block->frame == page_align(rec)); - ut_ad(comp == page_is_comp(new_block->frame)); - ut_ad(new_block->frame == page_align(old_end)); + ut_ad(block->page.frame == page_align(rec)); + ut_ad(comp == page_is_comp(new_block->page.frame)); + ut_ad(new_block->page.frame == page_align(old_end)); ut_ad(!page_rec_is_metadata(rec)); const page_id_t id{block->page.id()}; const page_id_t new_id{new_block->page.id()}; @@ -2585,12 +2583,14 @@ lock_move_rec_list_start( if (comp) { - rec1= page_rec_get_next_low(block->frame + PAGE_NEW_INFIMUM, TRUE); + rec1= page_rec_get_next_low(block->page.frame + PAGE_NEW_INFIMUM, + TRUE); rec2= page_rec_get_next_low(old_end, TRUE); } else { - rec1= page_rec_get_next_low(block->frame + PAGE_OLD_INFIMUM, FALSE); + rec1= page_rec_get_next_low(block->page.frame + PAGE_OLD_INFIMUM, + FALSE); rec2= page_rec_get_next_low(old_end, FALSE); } @@ -2638,7 +2638,8 @@ lock_move_rec_list_start( lock->type_mode&= ~LOCK_WAIT; } - lock_rec_add_to_queue(type_mode, g.cell2(), new_id, new_block->frame, + lock_rec_add_to_queue(type_mode, g.cell2(), new_id, + new_block->page.frame, rec2_heap_no, lock->index, lock_trx, true); } @@ -2677,8 +2678,8 @@ lock_rtr_move_rec_list( const ulint comp= page_rec_is_comp(rec_move[0].old_rec); - ut_ad(block->frame == page_align(rec_move[0].old_rec)); - ut_ad(new_block->frame == page_align(rec_move[0].new_rec)); + ut_ad(block->page.frame == page_align(rec_move[0].old_rec)); + ut_ad(new_block->page.frame == page_align(rec_move[0].new_rec)); ut_ad(comp == page_rec_is_comp(rec_move[0].new_rec)); const page_id_t id{block->page.id()}; const page_id_t new_id{new_block->page.id()}; @@ -2732,7 +2733,8 @@ lock_rtr_move_rec_list( lock->type_mode&= ~LOCK_WAIT; } - lock_rec_add_to_queue(type_mode, g.cell2(), new_id, new_block->frame, + lock_rec_add_to_queue(type_mode, g.cell2(), new_id, + new_block->page.frame, rec2_heap_no, lock->index, lock_trx, true); rec_move[moved].moved= true; @@ -2770,7 +2772,7 @@ lock_update_split_right( /* Inherit the locks to the supremum of left page from the successor of the infimum on right page */ - lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, left_block->frame, + lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, left_block->page.frame, PAGE_HEAP_NO_SUPREMUM, h); } @@ -2818,7 +2820,7 @@ lock_update_merge_right( /* Inherit the locks from the supremum of the left page to the original successor of infimum on the right page, to which the left page was merged */ - lock_rec_inherit_to_gap(g.cell2(), r, g.cell1(), l, right_block->frame, + lock_rec_inherit_to_gap(g.cell2(), r, g.cell1(), l, right_block->page.frame, page_rec_get_heap_no(orig_succ), PAGE_HEAP_NO_SUPREMUM); @@ -2874,7 +2876,7 @@ lock_update_split_left( LockMultiGuard g{lock_sys.rec_hash, l, r}; /* Inherit the locks to the supremum of the left page from the successor of the infimum on the right page */ - lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, left_block->frame, + lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, left_block->page.frame, PAGE_HEAP_NO_SUPREMUM, h); } @@ -2885,7 +2887,7 @@ lock_update_split_left( void lock_update_merge_left(const buf_block_t& left, const rec_t *orig_pred, const page_id_t right) { - ut_ad(left.frame == page_align(orig_pred)); + ut_ad(left.page.frame == page_align(orig_pred)); const page_id_t l{left.page.id()}; @@ -2897,7 +2899,7 @@ void lock_update_merge_left(const buf_block_t& left, const rec_t *orig_pred, { /* Inherit the locks on the supremum of the left page to the first record which was moved from the right page */ - lock_rec_inherit_to_gap(g.cell1(), l, g.cell1(), l, left.frame, + lock_rec_inherit_to_gap(g.cell1(), l, g.cell1(), l, left.page.frame, page_rec_get_heap_no(left_next_rec), PAGE_HEAP_NO_SUPREMUM); @@ -2938,8 +2940,8 @@ lock_rec_reset_and_inherit_gap_locks( /* This is a rare operation and likely too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, heir, donor}; lock_rec_reset_and_release_wait(g.cell1(), heir, heir_heap_no); - lock_rec_inherit_to_gap(g.cell1(), heir, g.cell2(), donor, heir_block.frame, - heir_heap_no, heap_no); + lock_rec_inherit_to_gap(g.cell1(), heir, g.cell2(), donor, + heir_block.page.frame, heir_heap_no, heap_no); } /*************************************************************//** @@ -2954,7 +2956,7 @@ lock_update_discard( const buf_block_t* block) /*!< in: index page which will be discarded */ { - const page_t* page = block->frame; + const page_t* page = block->page.frame; const rec_t* rec; ulint heap_no; const page_id_t heir(heir_block->page.id()); @@ -2975,7 +2977,7 @@ lock_update_discard( lock_rec_inherit_to_gap(g.cell1(), heir, g.cell2(), page_id, - heir_block->frame, + heir_block->page.frame, heir_heap_no, heap_no); lock_rec_reset_and_release_wait( @@ -2991,7 +2993,7 @@ lock_update_discard( lock_rec_inherit_to_gap(g.cell1(), heir, g.cell2(), page_id, - heir_block->frame, + heir_block->page.frame, heir_heap_no, heap_no); lock_rec_reset_and_release_wait( @@ -3031,7 +3033,7 @@ lock_update_insert( ulint receiver_heap_no; ulint donator_heap_no; - ut_ad(block->frame == page_align(rec)); + ut_ad(block->page.frame == page_align(rec)); ut_ad(!page_rec_is_metadata(rec)); /* Inherit the gap-locking locks for rec, in gap mode, from the next @@ -3059,7 +3061,7 @@ lock_update_delete( const buf_block_t* block, /*!< in: buffer block containing rec */ const rec_t* rec) /*!< in: the record to be removed */ { - const page_t* page = block->frame; + const page_t* page = block->page.frame; ulint heap_no; ulint next_heap_no; @@ -3083,7 +3085,7 @@ lock_update_delete( /* Let the next record inherit the locks from rec, in gap mode */ - lock_rec_inherit_to_gap(g.cell(), id, g.cell(), id, block->frame, + lock_rec_inherit_to_gap(g.cell(), id, g.cell(), id, block->page.frame, next_heap_no, heap_no); /* Reset the lock bits on rec and release waiting transactions */ @@ -3109,7 +3111,7 @@ lock_rec_store_on_page_infimum( { const ulint heap_no= page_rec_get_heap_no(rec); - ut_ad(block->frame == page_align(rec)); + ut_ad(block->page.frame == page_align(rec)); const page_id_t id{block->page.id()}; LockGuard g{lock_sys.rec_hash, id}; @@ -4244,7 +4246,7 @@ static void lock_rec_print(FILE* file, const lock_t* lock, mtr_t& mtr) fprintf(file, "Record lock, heap no %lu", (ulong) i); if (block) { - ut_ad(page_is_leaf(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); const rec_t* rec; rec = page_find_rec_with_heap_no( @@ -4740,7 +4742,7 @@ loop: goto function_exit; } - DBUG_ASSERT(block->page.status != buf_page_t::FREED); + DBUG_ASSERT(!block->page.is_freed()); for (i = 0; i < nth_lock; i++) { @@ -4762,7 +4764,7 @@ loop: if (i == PAGE_HEAP_NO_SUPREMUM || lock_rec_get_nth_bit(lock, i)) { - rec = page_find_rec_with_heap_no(block->frame, i); + rec = page_find_rec_with_heap_no(block->page.frame, i); ut_a(rec); ut_ad(!lock_rec_get_nth_bit(lock, i) || page_rec_is_leaf(rec)); @@ -4864,7 +4866,7 @@ static void lock_rec_block_validate(const page_id_t page_id) << page_id << " err " << err; } - ut_ad(!block || block->page.status == buf_page_t::FREED + ut_ad(!block || block->page.is_freed() || lock_rec_validate_page(block, space->is_latched())); mtr_commit(&mtr); @@ -4940,9 +4942,9 @@ lock_rec_insert_check_and_lock( LOCK_GAP type locks from the successor record */ { - ut_ad(block->frame == page_align(rec)); + ut_ad(block->page.frame == page_align(rec)); ut_ad(mtr->is_named_space(index->table->space)); - ut_ad(page_is_leaf(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); ut_ad(!index->table->is_temporary()); dberr_t err= DB_SUCCESS; @@ -4989,7 +4991,7 @@ lock_rec_insert_check_and_lock( heap_no, trx)) { trx->mutex_lock(); - err= lock_rec_enqueue_waiting(c_lock, type_mode, id, block->frame, + err= lock_rec_enqueue_waiting(c_lock, type_mode, id, block->page.frame, heap_no, index, thr, nullptr); trx->mutex_unlock(); } @@ -5241,7 +5243,7 @@ lock_clust_rec_modify_check_and_lock( ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(page_rec_is_leaf(rec)); ut_ad(dict_index_is_clust(index)); - ut_ad(block->frame == page_align(rec)); + ut_ad(block->page.frame == page_align(rec)); ut_ad(!rec_is_metadata(rec, *index)); ut_ad(!index->table->is_temporary()); @@ -5297,7 +5299,7 @@ lock_sec_rec_modify_check_and_lock( ut_ad(!dict_index_is_clust(index)); ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG)); - ut_ad(block->frame == page_align(rec)); + ut_ad(block->page.frame == page_align(rec)); ut_ad(mtr->is_named_space(index->table->space)); ut_ad(page_rec_is_leaf(rec)); ut_ad(!rec_is_metadata(rec, *index)); @@ -5399,7 +5401,7 @@ lock_sec_rec_read_check_and_lock( ut_ad(!dict_index_is_clust(index)); ut_ad(!dict_index_is_online_ddl(index)); - ut_ad(block->frame == page_align(rec)); + ut_ad(block->page.frame == page_align(rec)); ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(page_rec_is_leaf(rec)); @@ -5424,7 +5426,8 @@ lock_sec_rec_read_check_and_lock( trx_t *trx = thr_get_trx(thr); if (!lock_table_has(trx, index->table, LOCK_X) && !page_rec_is_supremum(rec) - && page_get_max_trx_id(block->frame) >= trx_sys.get_min_trx_id() + && page_get_max_trx_id(block->page.frame) + >= trx_sys.get_min_trx_id() && lock_rec_convert_impl_to_expl(thr_get_trx(thr), id, rec, index, offsets) && gap_mode == LOCK_REC_NOT_GAP) { @@ -5486,7 +5489,7 @@ lock_clust_rec_read_check_and_lock( que_thr_t* thr) /*!< in: query thread */ { ut_ad(dict_index_is_clust(index)); - ut_ad(block->frame == page_align(rec)); + ut_ad(block->page.frame == page_align(rec)); ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP || gap_mode == LOCK_REC_NOT_GAP); @@ -6328,9 +6331,9 @@ void lock_update_split_and_merge( supremum on the left page before merge*/ const buf_block_t* right_block) /*!< in: right page from which merged */ { - ut_ad(page_is_leaf(left_block->frame)); - ut_ad(page_is_leaf(right_block->frame)); - ut_ad(page_align(orig_pred) == left_block->frame); + ut_ad(page_is_leaf(left_block->page.frame)); + ut_ad(page_is_leaf(right_block->page.frame)); + ut_ad(page_align(orig_pred) == left_block->page.frame); const page_id_t l{left_block->page.id()}; const page_id_t r{right_block->page.id()}; @@ -6342,7 +6345,7 @@ void lock_update_split_and_merge( /* Inherit the locks on the supremum of the left page to the first record which was moved from the right page */ - lock_rec_inherit_to_gap(g.cell1(), l, g.cell1(), l, left_block->frame, + lock_rec_inherit_to_gap(g.cell1(), l, g.cell1(), l, left_block->page.frame, page_rec_get_heap_no(left_next_rec), PAGE_HEAP_NO_SUPREMUM); @@ -6352,7 +6355,7 @@ void lock_update_split_and_merge( /* Inherit the locks to the supremum of the left page from the successor of the infimum on the right page */ - lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, left_block->frame, + lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, left_block->page.frame, PAGE_HEAP_NO_SUPREMUM, lock_get_min_heap_no(right_block)); } diff --git a/storage/innobase/lock/lock0prdt.cc b/storage/innobase/lock/lock0prdt.cc index 5553fa98357..e924d46bd3f 100644 --- a/storage/innobase/lock/lock0prdt.cc +++ b/storage/innobase/lock/lock0prdt.cc @@ -496,7 +496,7 @@ lock_prdt_insert_check_and_lock( lock_prdt_t* prdt) /*!< in: Predicates with Minimum Bound Rectangle */ { - ut_ad(block->frame == page_align(rec)); + ut_ad(block->page.frame == page_align(rec)); ut_ad(!index->table->is_temporary()); ut_ad(index->is_spatial()); @@ -533,7 +533,7 @@ lock_prdt_insert_check_and_lock( trx->mutex_lock(); /* Allocate MBR on the lock heap */ lock_init_prdt_from_mbr(prdt, mbr, 0, trx->lock.lock_heap); - err= lock_rec_enqueue_waiting(c_lock, mode, id, block->frame, + err= lock_rec_enqueue_waiting(c_lock, mode, id, block->page.frame, PRDT_HEAPNO, index, thr, prdt); trx->mutex_unlock(); } @@ -753,28 +753,19 @@ lock_prdt_lock( lock = lock_prdt_has_lock( mode, g.cell(), id, prdt, trx); - if (lock == NULL) { - - lock_t* wait_for; - - wait_for = lock_prdt_other_has_conflicting( - prdt_mode, g.cell(), id, prdt, trx); - - if (wait_for != NULL) { - - err = lock_rec_enqueue_waiting( - wait_for, - prdt_mode, - id, block->frame, PRDT_HEAPNO, - index, thr, prdt); - } else { - - lock_prdt_add_to_queue( - prdt_mode, block, index, trx, - prdt, true); - - status = LOCK_REC_SUCCESS; - } + if (lock) { + } else if (lock_t* wait_for + = lock_prdt_other_has_conflicting( + prdt_mode, g.cell(), id, prdt, + trx)) { + err = lock_rec_enqueue_waiting( + wait_for, prdt_mode, id, + block->page.frame, PRDT_HEAPNO, + index, thr, prdt); + } else { + lock_prdt_add_to_queue( + prdt_mode, block, index, trx, + prdt, true); } trx->mutex_unlock(); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 5822524bda9..fbbde25a218 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -162,7 +162,7 @@ public: { ut_ad(len > 2); byte *free_p= my_assume_aligned<2> - (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + block.frame); + (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + block.page.frame); const uint16_t free= mach_read_from_2(free_p); if (UNIV_UNLIKELY(free < TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE || free + len + 6 >= srv_page_size - FIL_PAGE_DATA_END)) @@ -172,7 +172,7 @@ public: return true; } - byte *p= block.frame + free; + byte *p= block.page.frame + free; mach_write_to_2(free_p, free + 4 + len); memcpy(p, free_p, 2); p+= 2; @@ -201,8 +201,8 @@ public: apply_status apply(const buf_block_t &block, uint16_t &last_offset) const { const byte * const recs= begin(); - byte *const frame= block.page.zip.ssize - ? block.page.zip.data : block.frame; + byte *const frame= block.page.zip.data + ? block.page.zip.data : block.page.frame; const size_t size= block.physical_size(); apply_status applied= APPLIED_NO; @@ -815,7 +815,7 @@ bool recv_sys_t::recover_deferred(recv_sys_t::map::iterator &p, const byte *page= UNIV_LIKELY_NULL(block->page.zip.data) ? block->page.zip.data - : block->frame; + : block->page.frame; const uint32_t space_id= mach_read_from_4(page + FIL_PAGE_SPACE_ID); const uint32_t flags= fsp_header_get_flags(page); const uint32_t page_no= mach_read_from_4(page + FIL_PAGE_OFFSET); @@ -834,7 +834,7 @@ bool recv_sys_t::recover_deferred(recv_sys_t::map::iterator &p, page), size); space->free_limit= fsp_header_get_field(page, FSP_FREE_LIMIT); space->free_len= flst_get_len(FSP_HEADER_OFFSET + FSP_FREE + page); - block->unfix(); + block->page.lock.x_unlock(); fil_node_t *node= UT_LIST_GET_FIRST(space->chain); node->deferred= true; if (!space->acquire()) @@ -861,7 +861,7 @@ bool recv_sys_t::recover_deferred(recv_sys_t::map::iterator &p, return false; } - block->unfix(); + block->page.lock.x_unlock(); } fail: @@ -981,7 +981,7 @@ public: case FIL_PAGE_RTREE: if (page_zip_decompress( &block->page.zip, - block->frame, + block->page.frame, true)) { break; } @@ -995,8 +995,10 @@ public: continue; } mysql_mutex_unlock(&recv_sys.mutex); - block->page.ibuf_exist = ibuf_page_exists( - block->page.id(), block->zip_size()); + if (ibuf_page_exists(block->page.id(), + block->zip_size())) { + block->page.set_ibuf_exist(); + } mtr.commit(); mtr.start(); mysql_mutex_lock(&recv_sys.mutex); @@ -1296,9 +1298,9 @@ inline void recv_sys_t::clear() for (buf_block_t *block= UT_LIST_GET_LAST(blocks); block; ) { buf_block_t *prev_block= UT_LIST_GET_PREV(unzip_LRU, block); - ut_ad(block->page.state() == BUF_BLOCK_MEMORY); + ut_ad(block->page.state() == buf_page_t::MEMORY); UT_LIST_REMOVE(blocks, block); - MEM_MAKE_ADDRESSABLE(block->frame, srv_page_size); + MEM_MAKE_ADDRESSABLE(block->page.frame, srv_page_size); buf_block_free(block); block= prev_block; } @@ -1337,9 +1339,9 @@ create_block: ut_calc_align<uint16_t>(static_cast<uint16_t>(len), ALIGNMENT); static_assert(ut_is_2pow(ALIGNMENT), "ALIGNMENT must be a power of 2"); UT_LIST_ADD_FIRST(blocks, block); - MEM_MAKE_ADDRESSABLE(block->frame, len); - MEM_NOACCESS(block->frame + len, srv_page_size - len); - return my_assume_aligned<ALIGNMENT>(block->frame); + MEM_MAKE_ADDRESSABLE(block->page.frame, len); + MEM_NOACCESS(block->page.frame + len, srv_page_size - len); + return my_assume_aligned<ALIGNMENT>(block->page.frame); } size_t free_offset= static_cast<uint16_t>(block->page.access_time); @@ -1357,8 +1359,8 @@ create_block: block->page.access_time= ((block->page.access_time >> 16) + 1) << 16 | ut_calc_align<uint16_t>(static_cast<uint16_t>(free_offset), ALIGNMENT); - MEM_MAKE_ADDRESSABLE(block->frame + free_offset - len, len); - return my_assume_aligned<ALIGNMENT>(block->frame + free_offset - len); + MEM_MAKE_ADDRESSABLE(block->page.frame + free_offset - len, len); + return my_assume_aligned<ALIGNMENT>(block->page.frame + free_offset - len); } @@ -1377,22 +1379,22 @@ inline void recv_sys_t::free(const void *data) auto *chunk= buf_pool.chunks; for (auto i= buf_pool.n_chunks; i--; chunk++) { - if (data < chunk->blocks->frame) + if (data < chunk->blocks->page.frame) continue; const size_t offs= (reinterpret_cast<const byte*>(data) - - chunk->blocks->frame) >> srv_page_size_shift; + chunk->blocks->page.frame) >> srv_page_size_shift; if (offs >= chunk->size) continue; buf_block_t *block= &chunk->blocks[offs]; - ut_ad(block->frame == data); - ut_ad(block->page.state() == BUF_BLOCK_MEMORY); + ut_ad(block->page.frame == data); + ut_ad(block->page.state() == buf_page_t::MEMORY); ut_ad(static_cast<uint16_t>(block->page.access_time - 1) < srv_page_size); ut_ad(block->page.access_time >= 1U << 16); if (!((block->page.access_time -= 1U << 16) >> 16)) { UT_LIST_REMOVE(blocks, block); - MEM_MAKE_ADDRESSABLE(block->frame, srv_page_size); + MEM_MAKE_ADDRESSABLE(block->page.frame, srv_page_size); buf_block_free(block); } return; @@ -2011,9 +2013,11 @@ append: tail->append(l, len); return; } - if (end <= &block->frame[used - ALIGNMENT] || &block->frame[used] >= end) + if (end <= &block->page.frame[used - ALIGNMENT] || + &block->page.frame[used] >= end) break; /* Not the last allocated record in the page */ - const size_t new_used= static_cast<size_t>(end - block->frame + len + 1); + const size_t new_used= static_cast<size_t> + (end - block->page.frame + len + 1); ut_ad(new_used > used); if (new_used > srv_page_size) break; @@ -2574,7 +2578,7 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, byte *frame = UNIV_LIKELY_NULL(block->page.zip.data) ? block->page.zip.data - : block->frame; + : block->page.frame; const lsn_t page_lsn = init ? 0 : mach_read_from_8(frame + FIL_PAGE_LSN); @@ -2717,7 +2721,7 @@ set_start_lsn: if (start_lsn) { ut_ad(end_lsn >= start_lsn); mach_write_to_8(FIL_PAGE_LSN + frame, end_lsn); - if (UNIV_LIKELY(frame == block->frame)) { + if (UNIV_LIKELY(frame == block->page.frame)) { mach_write_to_8(srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM + frame, end_lsn); @@ -2736,7 +2740,7 @@ set_start_lsn: any buffered changes. */ init->created = false; ut_ad(!mtr.has_modifications()); - block->page.status = buf_page_t::FREED; + block->page.set_freed(block->page.state()); } /* Make sure that committing mtr does not change the modification @@ -2814,24 +2818,25 @@ void recv_recover_page(fil_space_t* space, buf_page_t* bpage) mtr.start(); mtr.set_log_mode(MTR_LOG_NO_REDO); - ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE); - buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage); - + ut_ad(bpage->frame); /* Move the ownership of the x-latch on the page to this OS thread, so that we can acquire a second x-latch on it. This is needed for the operations to the page to pass the debug checks. */ - block->lock.claim_ownership(); - block->lock.x_lock_recursive(); - buf_block_buf_fix_inc(block); - mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX); + bpage->lock.claim_ownership(); + bpage->lock.x_lock_recursive(); + bpage->fix_on_recovery(); + mtr.memo_push(reinterpret_cast<buf_block_t*>(bpage), + MTR_MEMO_PAGE_X_FIX); mysql_mutex_lock(&recv_sys.mutex); if (recv_sys.apply_log_recs) { recv_sys_t::map::iterator p = recv_sys.pages.find(bpage->id()); if (p != recv_sys.pages.end() && !p->second.is_being_processed()) { - recv_recover_page(block, mtr, p, space); + recv_recover_page( + reinterpret_cast<buf_block_t*>(bpage), mtr, p, + space); p->second.log.clear(); recv_sys.pages.erase(p); recv_sys.maybe_finish_batch(); @@ -2936,7 +2941,7 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id, /* Buffer fix the first page while deferring the tablespace and unfix it after creating defer tablespace */ if (first_page && !space) - block->fix(); + block->page.lock.x_lock(); ut_ad(&recs == &pages.find(page_id)->second); i.created= true; recv_recover_page(block, mtr, p, space, &i); diff --git a/storage/innobase/mem/mem0mem.cc b/storage/innobase/mem/mem0mem.cc index 6d4593e0ab4..5e8587bfea6 100644 --- a/storage/innobase/mem/mem0mem.cc +++ b/storage/innobase/mem/mem0mem.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2020, MariaDB Corporation. +Copyright (c) 2017, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -294,7 +294,7 @@ mem_heap_create_block_func( buf_block = buf_block_alloc(); } - block = (mem_block_t*) buf_block->frame; + block = (mem_block_t*) buf_block->page.frame; } if (block == NULL) { diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index a28edb9d1c5..7ce109c8d5b 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -32,6 +32,7 @@ Created 11/26/1995 Heikki Tuuri #include "page0types.h" #include "mtr0log.h" #include "log0recv.h" +#include "my_cpu.h" #ifdef BTR_CUR_HASH_ADAPT # include "btr0sea.h" #endif @@ -166,19 +167,18 @@ struct FindPage return(true); } - buf_block_t* block = reinterpret_cast<buf_block_t*>( - slot->object); + buf_page_t* bpage = static_cast<buf_page_t*>(slot->object); - if (m_ptr < block->frame - || m_ptr >= block->frame + srv_page_size) { + if (m_ptr < bpage->frame + || m_ptr >= bpage->frame + srv_page_size) { return(true); } ut_ad(!(slot->type & MTR_MEMO_PAGE_S_FIX) - || block->lock.have_s()); + || bpage->lock.have_s()); ut_ad(!(slot->type & MTR_MEMO_PAGE_SX_FIX) - || block->lock.have_u_or_x()); + || bpage->lock.have_u_or_x()); ut_ad(!(slot->type & MTR_MEMO_PAGE_X_FIX) - || block->lock.have_x()); + || bpage->lock.have_x()); m_slot = slot; return(false); } @@ -207,41 +207,40 @@ private: @param slot memo slot */ static void memo_slot_release(mtr_memo_slot_t *slot) { + void *object= slot->object; + slot->object= nullptr; switch (const auto type= slot->type) { case MTR_MEMO_S_LOCK: - static_cast<index_lock*>(slot->object)->s_unlock(); + static_cast<index_lock*>(object)->s_unlock(); break; case MTR_MEMO_X_LOCK: case MTR_MEMO_SX_LOCK: - static_cast<index_lock*>(slot->object)-> + static_cast<index_lock*>(object)-> u_or_x_unlock(type == MTR_MEMO_SX_LOCK); break; case MTR_MEMO_SPACE_X_LOCK: - static_cast<fil_space_t*>(slot->object)->set_committed_size(); - static_cast<fil_space_t*>(slot->object)->x_unlock(); + static_cast<fil_space_t*>(object)->set_committed_size(); + static_cast<fil_space_t*>(object)->x_unlock(); break; case MTR_MEMO_SPACE_S_LOCK: - static_cast<fil_space_t*>(slot->object)->s_unlock(); + static_cast<fil_space_t*>(object)->s_unlock(); break; default: -#ifdef UNIV_DEBUG - switch (slot->type & ~MTR_MEMO_MODIFY) { - case MTR_MEMO_BUF_FIX: + buf_page_t *bpage= static_cast<buf_page_t*>(object); + bpage->unfix(); + switch (auto latch= slot->type & ~MTR_MEMO_MODIFY) { case MTR_MEMO_PAGE_S_FIX: + bpage->lock.s_unlock(); + return; case MTR_MEMO_PAGE_SX_FIX: case MTR_MEMO_PAGE_X_FIX: - break; - default: - ut_ad("invalid type" == 0); - break; + bpage->lock.u_or_x_unlock(latch == MTR_MEMO_PAGE_SX_FIX); + /* fall through */ + case MTR_MEMO_BUF_FIX: + return; } -#endif /* UNIV_DEBUG */ - buf_block_t *block= static_cast<buf_block_t*>(slot->object); - buf_page_release_latch(block, slot->type & ~MTR_MEMO_MODIFY); - block->unfix(); - break; + ut_ad("invalid type" == 0); } - slot->object= nullptr; } /** Release the latches acquired by the mini-transaction. */ @@ -249,43 +248,42 @@ struct ReleaseLatches { /** @return true always. */ bool operator()(mtr_memo_slot_t *slot) const { - if (!slot->object) + void *object= slot->object; + if (!object) return true; + slot->object= nullptr; switch (const auto type= slot->type) { case MTR_MEMO_S_LOCK: - static_cast<index_lock*>(slot->object)->s_unlock(); + static_cast<index_lock*>(object)->s_unlock(); break; case MTR_MEMO_SPACE_X_LOCK: - static_cast<fil_space_t*>(slot->object)->set_committed_size(); - static_cast<fil_space_t*>(slot->object)->x_unlock(); + static_cast<fil_space_t*>(object)->set_committed_size(); + static_cast<fil_space_t*>(object)->x_unlock(); break; case MTR_MEMO_SPACE_S_LOCK: - static_cast<fil_space_t*>(slot->object)->s_unlock(); + static_cast<fil_space_t*>(object)->s_unlock(); break; case MTR_MEMO_X_LOCK: case MTR_MEMO_SX_LOCK: - static_cast<index_lock*>(slot->object)-> + static_cast<index_lock*>(object)-> u_or_x_unlock(type == MTR_MEMO_SX_LOCK); break; default: -#ifdef UNIV_DEBUG - switch (slot->type & ~MTR_MEMO_MODIFY) { - case MTR_MEMO_BUF_FIX: + buf_page_t *bpage= static_cast<buf_page_t*>(object); + bpage->unfix(); + switch (auto latch= slot->type & ~MTR_MEMO_MODIFY) { case MTR_MEMO_PAGE_S_FIX: + bpage->lock.s_unlock(); + return true; case MTR_MEMO_PAGE_SX_FIX: case MTR_MEMO_PAGE_X_FIX: - break; - default: - ut_ad("invalid type" == 0); - break; + bpage->lock.u_or_x_unlock(latch == MTR_MEMO_PAGE_SX_FIX); + /* fall through */ + case MTR_MEMO_BUF_FIX: + return true; } -#endif /* UNIV_DEBUG */ - buf_block_t *block= static_cast<buf_block_t*>(slot->object); - buf_page_release_latch(block, slot->type & ~MTR_MEMO_MODIFY); - block->unfix(); - break; + ut_ad("invalid type" == 0); } - slot->object= NULL; return true; } }; @@ -485,8 +483,11 @@ struct Shrink case MTR_MEMO_PAGE_X_FIX: case MTR_MEMO_PAGE_SX_FIX: auto &bpage= static_cast<buf_block_t*>(slot->object)->page; - ut_ad(bpage.io_fix() == BUF_IO_NONE); - const auto id= bpage.id(); + const auto s= bpage.state(); + ut_ad(s >= buf_page_t::FREED); + ut_ad(s < buf_page_t::READ_FIX); + ut_ad(bpage.frame); + const page_id_t id{bpage.id()}; if (id < high) { ut_ad(id.space() == high.space() || @@ -494,10 +495,11 @@ struct Shrink srv_is_undo_tablespace(high.space()))); break; } + if (s >= buf_page_t::UNFIXED) + bpage.set_freed(s); ut_ad(id.space() == high.space()); - ut_ad(bpage.state() == BUF_BLOCK_FILE_PAGE); if (bpage.oldest_modification() > 1) - bpage.clear_oldest_modification(false); + bpage.reset_oldest_modification(); slot->type= static_cast<mtr_memo_type_t>(slot->type & ~MTR_MEMO_MODIFY); } return true; @@ -1057,7 +1059,7 @@ bool mtr_t::have_x_latch(const buf_block_t &block) const MTR_MEMO_BUF_FIX | MTR_MEMO_MODIFY)); return false; } - ut_ad(block.lock.have_x()); + ut_ad(block.page.lock.have_x()); return true; } @@ -1092,20 +1094,21 @@ static void mtr_defer_drop_ahi(buf_block_t *block, mtr_memo_type_t fix_type) break; case MTR_MEMO_PAGE_S_FIX: /* Temporarily release our S-latch. */ - block->lock.s_unlock(); - block->lock.x_lock(); + block->page.lock.s_unlock(); + block->page.lock.x_lock(); if (dict_index_t *index= block->index) if (index->freed()) btr_search_drop_page_hash_index(block); - block->lock.x_unlock(); - block->lock.s_lock(); + block->page.lock.x_unlock(); + block->page.lock.s_lock(); + ut_ad(!block->page.is_read_fixed()); break; case MTR_MEMO_PAGE_SX_FIX: - block->lock.u_x_upgrade(); + block->page.lock.u_x_upgrade(); if (dict_index_t *index= block->index) if (index->freed()) btr_search_drop_page_hash_index(block); - block->lock.x_u_downgrade(); + block->page.lock.x_u_downgrade(); break; default: ut_ad(fix_type == MTR_MEMO_PAGE_X_FIX); @@ -1131,7 +1134,7 @@ struct UpgradeX /** Upgrade U locks on a block to X */ void mtr_t::page_lock_upgrade(const buf_block_t &block) { - ut_ad(block.lock.have_x()); + ut_ad(block.page.lock.have_x()); m_memo.for_each_block(CIterate<UpgradeX>((UpgradeX(block)))); #ifdef BTR_CUR_HASH_ADAPT ut_ad(!block.index || !block.index->freed()); @@ -1165,28 +1168,42 @@ void mtr_t::lock_upgrade(const index_lock &lock) void mtr_t::page_lock(buf_block_t *block, ulint rw_latch) { mtr_memo_type_t fix_type; + const auto state= block->page.state(); + ut_ad(state >= buf_page_t::FREED); switch (rw_latch) { case RW_NO_LATCH: fix_type= MTR_MEMO_BUF_FIX; + if (state >= buf_page_t::READ_FIX && state < buf_page_t::WRITE_FIX) + { + /* The io-fix will be released after block->page.lock in + buf_page_t::read_complete(), buf_pool_t::corrupted_evict(), and + buf_page_t::write_complete(). */ + block->page.lock.s_lock(); + ut_ad(!block->page.is_read_fixed()); + block->page.lock.s_unlock(); + } goto done; case RW_S_LATCH: fix_type= MTR_MEMO_PAGE_S_FIX; - block->lock.s_lock(); + block->page.lock.s_lock(); + ut_ad(!block->page.is_read_fixed()); break; case RW_SX_LATCH: fix_type= MTR_MEMO_PAGE_SX_FIX; - block->lock.u_lock(); + block->page.lock.u_lock(); + ut_ad(!block->page.is_io_fixed()); break; default: ut_ad(rw_latch == RW_X_LATCH); fix_type= MTR_MEMO_PAGE_X_FIX; - if (block->lock.x_lock_upgraded()) + if (block->page.lock.x_lock_upgraded()) { - page_lock_upgrade(*block); block->unfix(); + page_lock_upgrade(*block); return; } + ut_ad(!block->page.is_io_fixed()); } #ifdef BTR_CUR_HASH_ADAPT @@ -1196,8 +1213,9 @@ void mtr_t::page_lock(buf_block_t *block, ulint rw_latch) #endif /* BTR_CUR_HASH_ADAPT */ done: - ut_ad(page_id_t(page_get_space_id(block->frame), - page_get_page_no(block->frame)) == block->page.id()); + ut_ad(state < buf_page_t::UNFIXED || + page_id_t(page_get_space_id(block->page.frame), + page_get_page_no(block->page.frame)) == block->page.id()); memo_push(block, fix_type); } @@ -1277,7 +1295,7 @@ struct FlaggedCheck { if (f & (MTR_MEMO_PAGE_S_FIX | MTR_MEMO_PAGE_SX_FIX | MTR_MEMO_PAGE_X_FIX)) { block_lock* lock = &static_cast<buf_block_t*>( - const_cast<void*>(m_ptr))->lock; + const_cast<void*>(m_ptr))->page.lock; ut_ad(!(f & MTR_MEMO_PAGE_S_FIX) || lock->have_s()); ut_ad(!(f & MTR_MEMO_PAGE_SX_FIX) || lock->have_u_or_x()); diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index cc6b1797d61..41ae2638f47 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -773,7 +773,7 @@ page_cur_open_on_rnd_user_rec( buf_block_t* block, /*!< in: page */ page_cur_t* cursor) /*!< out: page cursor */ { - const ulint n_recs = page_get_n_recs(block->frame); + const ulint n_recs = page_get_n_recs(block->page.frame); page_cur_set_before_first(block, cursor); @@ -782,7 +782,7 @@ page_cur_open_on_rnd_user_rec( return; } - cursor->rec = page_rec_get_nth(block->frame, + cursor->rec = page_rec_get_nth(block->page.frame, ut_rnd_interval(n_recs) + 1); } @@ -805,7 +805,7 @@ Split a directory slot which owns too many records. static void page_dir_split_slot(const buf_block_t &block, page_dir_slot_t *slot) { - ut_ad(slot <= &block.frame[srv_page_size - PAGE_EMPTY_DIR_START]); + ut_ad(slot <= &block.page.frame[srv_page_size - PAGE_EMPTY_DIR_START]); slot= my_assume_aligned<2>(slot); const ulint n_owned= PAGE_DIR_SLOT_MAX_N_OWNED + 1; @@ -822,12 +822,12 @@ static void page_dir_split_slot(const buf_block_t &block, /* Add a directory slot immediately below this one. */ constexpr uint16_t n_slots_f= PAGE_N_DIR_SLOTS + PAGE_HEADER; - byte *n_slots_p= my_assume_aligned<2>(n_slots_f + block.frame); + byte *n_slots_p= my_assume_aligned<2>(n_slots_f + block.page.frame); const uint16_t n_slots= mach_read_from_2(n_slots_p); page_dir_slot_t *last_slot= static_cast<page_dir_slot_t*> - (block.frame + srv_page_size - (PAGE_DIR + PAGE_DIR_SLOT_SIZE) - - n_slots * PAGE_DIR_SLOT_SIZE); + (block.page.frame + srv_page_size - (PAGE_DIR + PAGE_DIR_SLOT_SIZE) - + n_slots * PAGE_DIR_SLOT_SIZE); ut_ad(slot >= last_slot); memmove_aligned<2>(last_slot, last_slot + PAGE_DIR_SLOT_SIZE, slot - last_slot); @@ -836,8 +836,8 @@ static void page_dir_split_slot(const buf_block_t &block, mach_write_to_2(n_slots_p, n_slots + 1); - mach_write_to_2(slot, rec - block.frame); - const bool comp= page_is_comp(block.frame) != 0; + mach_write_to_2(slot, rec - block.page.frame); + const bool comp= page_is_comp(block.page.frame) != 0; page_rec_set_n_owned(page_dir_slot_get_rec(slot), half_owned, comp); page_rec_set_n_owned(page_dir_slot_get_rec(slot - PAGE_DIR_SLOT_SIZE), n_owned - half_owned, comp); @@ -851,10 +851,10 @@ Split a directory slot which owns too many records. static void page_zip_dir_split_slot(buf_block_t *block, ulint s, mtr_t* mtr) { ut_ad(block->page.zip.data); - ut_ad(page_is_comp(block->frame)); + ut_ad(page_is_comp(block->page.frame)); ut_ad(s); - page_dir_slot_t *slot= page_dir_get_nth_slot(block->frame, s); + page_dir_slot_t *slot= page_dir_get_nth_slot(block->page.frame, s); const ulint n_owned= PAGE_DIR_SLOT_MAX_N_OWNED + 1; ut_ad(page_dir_slot_get_n_owned(slot) == n_owned); @@ -871,12 +871,12 @@ static void page_zip_dir_split_slot(buf_block_t *block, ulint s, mtr_t* mtr) /* Add a directory slot immediately below this one. */ constexpr uint16_t n_slots_f= PAGE_N_DIR_SLOTS + PAGE_HEADER; - byte *n_slots_p= my_assume_aligned<2>(n_slots_f + block->frame); + byte *n_slots_p= my_assume_aligned<2>(n_slots_f + block->page.frame); const uint16_t n_slots= mach_read_from_2(n_slots_p); page_dir_slot_t *last_slot= static_cast<page_dir_slot_t*> - (block->frame + srv_page_size - (PAGE_DIR + PAGE_DIR_SLOT_SIZE) - - n_slots * PAGE_DIR_SLOT_SIZE); + (block->page.frame + srv_page_size - (PAGE_DIR + PAGE_DIR_SLOT_SIZE) - + n_slots * PAGE_DIR_SLOT_SIZE); memmove_aligned<2>(last_slot, last_slot + PAGE_DIR_SLOT_SIZE, slot - last_slot); @@ -904,10 +904,10 @@ this may result in merging the two slots. static void page_zip_dir_balance_slot(buf_block_t *block, ulint s, mtr_t *mtr) { ut_ad(block->page.zip.data); - ut_ad(page_is_comp(block->frame)); + ut_ad(page_is_comp(block->page.frame)); ut_ad(s > 0); - const ulint n_slots = page_dir_get_n_slots(block->frame); + const ulint n_slots = page_dir_get_n_slots(block->page.frame); if (UNIV_UNLIKELY(s + 1 == n_slots)) { /* The last directory slot cannot be balanced. */ @@ -916,7 +916,7 @@ static void page_zip_dir_balance_slot(buf_block_t *block, ulint s, mtr_t *mtr) ut_ad(s < n_slots); - page_dir_slot_t* slot = page_dir_get_nth_slot(block->frame, s); + page_dir_slot_t* slot = page_dir_get_nth_slot(block->page.frame, s); rec_t* const up_rec = const_cast<rec_t*> (page_dir_slot_get_rec(slot - PAGE_DIR_SLOT_SIZE)); rec_t* const slot_rec = const_cast<rec_t*> @@ -936,12 +936,12 @@ static void page_zip_dir_balance_slot(buf_block_t *block, ulint s, mtr_t *mtr) true, mtr); /* Shift the slots */ page_dir_slot_t* last_slot = page_dir_get_nth_slot( - block->frame, n_slots - 1); + block->page.frame, n_slots - 1); memmove_aligned<2>(last_slot + PAGE_DIR_SLOT_SIZE, last_slot, slot - last_slot); constexpr uint16_t n_slots_f = PAGE_N_DIR_SLOTS + PAGE_HEADER; byte *n_slots_p= my_assume_aligned<2> - (n_slots_f + block->frame); + (n_slots_f + block->page.frame); mtr->write<2>(*block, n_slots_p, n_slots - 1); memcpy_aligned<2>(n_slots_f + block->page.zip.data, n_slots_p, 2); @@ -967,11 +967,11 @@ this may result in merging the two slots. @param[in] s the slot to be balanced */ static void page_dir_balance_slot(const buf_block_t &block, ulint s) { - const bool comp= page_is_comp(block.frame); + const bool comp= page_is_comp(block.page.frame); ut_ad(!block.page.zip.data); ut_ad(s > 0); - const ulint n_slots = page_dir_get_n_slots(block.frame); + const ulint n_slots = page_dir_get_n_slots(block.page.frame); if (UNIV_UNLIKELY(s + 1 == n_slots)) { /* The last directory slot cannot be balanced. */ @@ -980,7 +980,7 @@ static void page_dir_balance_slot(const buf_block_t &block, ulint s) ut_ad(s < n_slots); - page_dir_slot_t* slot = page_dir_get_nth_slot(block.frame, s); + page_dir_slot_t* slot = page_dir_get_nth_slot(block.page.frame, s); rec_t* const up_rec = const_cast<rec_t*> (page_dir_slot_get_rec(slot - PAGE_DIR_SLOT_SIZE)); rec_t* const slot_rec = const_cast<rec_t*> @@ -1001,13 +1001,13 @@ static void page_dir_balance_slot(const buf_block_t &block, ulint s) + (PAGE_DIR_SLOT_MIN_N_OWNED - 1), comp); /* Shift the slots */ page_dir_slot_t* last_slot = page_dir_get_nth_slot( - block.frame, n_slots - 1); + block.page.frame, n_slots - 1); memmove_aligned<2>(last_slot + PAGE_DIR_SLOT_SIZE, last_slot, slot - last_slot); memset_aligned<2>(last_slot, 0, 2); constexpr uint16_t n_slots_f = PAGE_N_DIR_SLOTS + PAGE_HEADER; byte *n_slots_p= my_assume_aligned<2> - (n_slots_f + block.frame); + (n_slots_f + block.page.frame); mach_write_to_2(n_slots_p, n_slots - 1); return; } @@ -1045,14 +1045,15 @@ static byte* page_mem_alloc_heap(buf_block_t *block, ulint need, ut_ad(!compressed || block->page.zip.data); byte *heap_top= my_assume_aligned<2>(PAGE_HEAP_TOP + PAGE_HEADER + - block->frame); + block->page.frame); const uint16_t top= mach_read_from_2(heap_top); - if (need > page_get_max_insert_size(block->frame, 1)) + if (need > page_get_max_insert_size(block->page.frame, 1)) return NULL; - byte *n_heap= my_assume_aligned<2>(PAGE_N_HEAP + PAGE_HEADER + block->frame); + byte *n_heap= my_assume_aligned<2> + (PAGE_N_HEAP + PAGE_HEADER + block->page.frame); const uint16_t h= mach_read_from_2(n_heap); if (UNIV_UNLIKELY((h + 1) & 0x6000)) @@ -1078,7 +1079,7 @@ static byte* page_mem_alloc_heap(buf_block_t *block, ulint need, heap_top, 4); } - return &block->frame[top]; + return &block->page.frame[top]; } /** Write log for inserting a B-tree or R-tree record in @@ -1103,13 +1104,14 @@ inline void mtr_t::page_insert(const buf_block_t &block, bool reuse, { ut_ad(!block.page.zip.data); ut_ad(m_log_mode == MTR_LOG_ALL); - ut_d(ulint n_slots= page_dir_get_n_slots(block.frame)); + ut_d(ulint n_slots= page_dir_get_n_slots(block.page.frame)); ut_ad(n_slots >= 2); - ut_d(const byte *page_end= page_dir_get_nth_slot(block.frame, n_slots - 1)); - ut_ad(&block.frame[prev_rec + PAGE_OLD_INFIMUM] <= page_end); - ut_ad(block.frame + page_header_get_offs(block.frame, PAGE_HEAP_TOP) <= - page_end); - ut_ad(fil_page_index_page_check(block.frame)); + ut_d(const byte *page_end= + page_dir_get_nth_slot(block.page.frame, n_slots - 1)); + ut_ad(&block.page.frame[prev_rec + PAGE_OLD_INFIMUM] <= page_end); + ut_ad(block.page.frame + + page_header_get_offs(block.page.frame, PAGE_HEAP_TOP) <= page_end); + ut_ad(fil_page_index_page_check(block.page.frame)); ut_ad(!(~(REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG) & info_bits)); ut_ad(n_fields_s >= 2); ut_ad((n_fields_s >> 1) <= REC_MAX_N_FIELDS); @@ -1188,15 +1190,16 @@ inline void mtr_t::page_insert(const buf_block_t &block, bool reuse, { ut_ad(!block.page.zip.data); ut_ad(m_log_mode == MTR_LOG_ALL); - ut_d(ulint n_slots= page_dir_get_n_slots(block.frame)); + ut_d(ulint n_slots= page_dir_get_n_slots(block.page.frame)); ut_ad(n_slots >= 2); - ut_d(const byte *page_end= page_dir_get_nth_slot(block.frame, n_slots - 1)); - ut_ad(&block.frame[prev_rec + PAGE_NEW_INFIMUM] <= page_end); - ut_ad(block.frame + page_header_get_offs(block.frame, PAGE_HEAP_TOP) <= - page_end); - ut_ad(fil_page_index_page_check(block.frame)); - ut_ad(hdr_l + hdr_c + data_l + data_c <= - static_cast<size_t>(page_end - &block.frame[PAGE_NEW_SUPREMUM_END])); + ut_d(const byte *page_end= page_dir_get_nth_slot(block.page.frame, + n_slots - 1)); + ut_ad(&block.page.frame[prev_rec + PAGE_NEW_INFIMUM] <= page_end); + ut_ad(block.page.frame + + page_header_get_offs(block.page.frame, PAGE_HEAP_TOP) <= page_end); + ut_ad(fil_page_index_page_check(block.page.frame)); + ut_ad(hdr_l + hdr_c + data_l + data_c <= static_cast<size_t> + (page_end - &block.page.frame[PAGE_NEW_SUPREMUM_END])); ut_ad(reuse || shift == 0); #ifdef UNIV_DEBUG switch (~(REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG) & info_status) { @@ -1204,11 +1207,11 @@ inline void mtr_t::page_insert(const buf_block_t &block, bool reuse, ut_ad(0); break; case REC_STATUS_NODE_PTR: - ut_ad(!page_is_leaf(block.frame)); + ut_ad(!page_is_leaf(block.page.frame)); break; case REC_STATUS_INSTANT: case REC_STATUS_ORDINARY: - ut_ad(page_is_leaf(block.frame)); + ut_ad(page_is_leaf(block.page.frame)); } #endif @@ -1296,13 +1299,13 @@ page_cur_insert_rec_low( ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(rec_offs_n_fields(offsets) > 0); - ut_ad(index->table->not_redundant() == !!page_is_comp(block->frame)); - ut_ad(!!page_is_comp(block->frame) == !!rec_offs_comp(offsets)); - ut_ad(fil_page_index_page_check(block->frame)); - ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + block->frame) == + ut_ad(index->table->not_redundant() == !!page_is_comp(block->page.frame)); + ut_ad(!!page_is_comp(block->page.frame) == !!rec_offs_comp(offsets)); + ut_ad(fil_page_index_page_check(block->page.frame)); + ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + block->page.frame) == index->id || mtr->is_inside_ibuf()); - ut_ad(page_dir_get_n_slots(block->frame) >= 2); + ut_ad(page_dir_get_n_slots(block->page.frame) >= 2); ut_ad(!page_rec_is_supremum(cur->rec)); @@ -1319,7 +1322,7 @@ page_cur_insert_rec_low( rec - rec_offs_extra_size(offsets); ulint extra_size __attribute__((unused))= rec_offs_extra_size(offsets) - - (page_is_comp(block->frame) + (page_is_comp(block->page.frame) ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES); /* All data bytes of the record must be valid. */ @@ -1335,10 +1338,10 @@ page_cur_insert_rec_low( ulint heap_no; byte *insert_buf; - const bool comp= page_is_comp(block->frame); + const bool comp= page_is_comp(block->page.frame); const ulint extra_size= rec_offs_extra_size(offsets); - if (rec_t* free_rec= page_header_get_ptr(block->frame, PAGE_FREE)) + if (rec_t* free_rec= page_header_get_ptr(block->page.frame, PAGE_FREE)) { /* Try to reuse the head of PAGE_FREE. */ rec_offs foffsets_[REC_OFFS_NORMAL_SIZE]; @@ -1347,7 +1350,7 @@ page_cur_insert_rec_low( rec_offs_init(foffsets_); rec_offs *foffsets= rec_get_offsets(free_rec, index, foffsets_, - page_is_leaf(block->frame) + page_is_leaf(block->page.frame) ? index->n_core_fields : 0, ULINT_UNDEFINED, &heap); const ulint fextra_size= rec_offs_extra_size(foffsets); @@ -1361,13 +1364,14 @@ page_cur_insert_rec_low( goto use_heap; byte *page_free= my_assume_aligned<2>(PAGE_FREE + PAGE_HEADER + - block->frame); + block->page.frame); if (comp) { heap_no= rec_get_heap_no_new(free_rec); uint16_t next= mach_read_from_2(free_rec - REC_NEXT); mach_write_to_2(page_free, next - ? static_cast<uint16_t>(free_rec + next - block->frame) + ? static_cast<uint16_t>(free_rec + next - + block->page.frame) : 0); } else @@ -1395,40 +1399,40 @@ use_heap: ut_ad(cur->rec != insert_buf + extra_size); - rec_t *next_rec= block->frame + rec_get_next_offs(cur->rec, comp); - ut_ad(next_rec != block->frame); + rec_t *next_rec= block->page.frame + rec_get_next_offs(cur->rec, comp); + ut_ad(next_rec != block->page.frame); /* Update page header fields */ byte *page_last_insert= my_assume_aligned<2>(PAGE_LAST_INSERT + PAGE_HEADER + - block->frame); + block->page.frame); const uint16_t last_insert= mach_read_from_2(page_last_insert); ut_ad(!last_insert || !comp || - rec_get_node_ptr_flag(block->frame + last_insert) == + rec_get_node_ptr_flag(block->page.frame + last_insert) == rec_get_node_ptr_flag(rec)); /* Write PAGE_LAST_INSERT */ mach_write_to_2(page_last_insert, page_offset(insert_buf + extra_size)); /* Update PAGE_DIRECTION_B, PAGE_N_DIRECTION if needed */ - if (block->frame[FIL_PAGE_TYPE + 1] != byte(FIL_PAGE_RTREE)) + if (block->page.frame[FIL_PAGE_TYPE + 1] != byte(FIL_PAGE_RTREE)) { - byte *dir= &block->frame[PAGE_DIRECTION_B + PAGE_HEADER]; + byte *dir= &block->page.frame[PAGE_DIRECTION_B + PAGE_HEADER]; byte *n= my_assume_aligned<2> - (&block->frame[PAGE_N_DIRECTION + PAGE_HEADER]); + (&block->page.frame[PAGE_N_DIRECTION + PAGE_HEADER]); if (UNIV_UNLIKELY(!last_insert)) { no_direction: *dir= static_cast<byte>((*dir & ~((1U << 3) - 1)) | PAGE_NO_DIRECTION); memset(n, 0, 2); } - else if (block->frame + last_insert == cur->rec && + else if (block->page.frame + last_insert == cur->rec && (*dir & ((1U << 3) - 1)) != PAGE_LEFT) { *dir= static_cast<byte>((*dir & ~((1U << 3) - 1)) | PAGE_RIGHT); inc_dir: mach_write_to_2(n, mach_read_from_2(n) + 1); } - else if (next_rec == block->frame + last_insert && + else if (next_rec == block->page.frame + last_insert && (*dir & ((1U << 3) - 1)) != PAGE_RIGHT) { *dir= static_cast<byte>((*dir & ~((1U << 3) - 1)) | PAGE_LEFT); @@ -1440,7 +1444,7 @@ inc_dir: /* Update PAGE_N_RECS. */ byte *page_n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER + - block->frame); + block->page.frame); mach_write_to_2(page_n_recs, mach_read_from_2(page_n_recs) + 1); @@ -1470,17 +1474,17 @@ inc_dir: } switch (rec_get_status(rec)) { case REC_STATUS_NODE_PTR: - ut_ad(!page_is_leaf(block->frame)); + ut_ad(!page_is_leaf(block->page.frame)); break; case REC_STATUS_INSTANT: ut_ad(index->is_instant()); - ut_ad(page_is_leaf(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); if (!rec_is_metadata(rec, true)) break; - ut_ad(cur->rec == &block->frame[PAGE_NEW_INFIMUM]); + ut_ad(cur->rec == &block->page.frame[PAGE_NEW_INFIMUM]); break; case REC_STATUS_ORDINARY: - ut_ad(page_is_leaf(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); ut_ad(!(rec_get_info_bits(rec, true) & ~REC_INFO_DELETED_FLAG)); break; case REC_STATUS_INFIMUM: @@ -1501,8 +1505,8 @@ inc_dir: static_cast<uint16_t>(insert_rec - cur->rec)); while (!(n_owned= rec_get_n_owned_new(next_rec))) { - next_rec= block->frame + rec_get_next_offs(next_rec, true); - ut_ad(next_rec != block->frame); + next_rec= block->page.frame + rec_get_next_offs(next_rec, true); + ut_ad(next_rec != block->page.frame); } rec_set_bit_field_1(next_rec, n_owned + 1, REC_NEW_N_OWNED, REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); @@ -1515,7 +1519,7 @@ inc_dir: const byte * const c_start= cur->rec - extra_size; if (extra_size > REC_N_NEW_EXTRA_BYTES && c_start >= - &block->frame[PAGE_NEW_SUPREMUM_END + REC_N_NEW_EXTRA_BYTES]) + &block->page.frame[PAGE_NEW_SUPREMUM_END + REC_N_NEW_EXTRA_BYTES]) { /* Find common header bytes with the preceding record. */ const byte *r= rec - (REC_N_NEW_EXTRA_BYTES + 1); @@ -1528,11 +1532,11 @@ inc_dir: else { #ifdef UNIV_DEBUG - if (!page_is_leaf(block->frame)); + if (!page_is_leaf(block->page.frame)); else if (rec_is_metadata(rec, false)) { ut_ad(index->is_instant()); - ut_ad(cur->rec == &block->frame[PAGE_OLD_INFIMUM]); + ut_ad(cur->rec == &block->page.frame[PAGE_OLD_INFIMUM]); } #endif rec_set_bit_field_1(insert_rec, 0, REC_OLD_N_OWNED, @@ -1543,8 +1547,8 @@ inc_dir: mach_write_to_2(cur->rec - REC_NEXT, page_offset(insert_rec)); while (!(n_owned= rec_get_n_owned_old(next_rec))) { - next_rec= block->frame + rec_get_next_offs(next_rec, false); - ut_ad(next_rec != block->frame); + next_rec= block->page.frame + rec_get_next_offs(next_rec, false); + ut_ad(next_rec != block->page.frame); } rec_set_bit_field_1(next_rec, n_owned + 1, REC_OLD_N_OWNED, REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); @@ -1557,7 +1561,7 @@ inc_dir: ut_ad(extra_size > REC_N_OLD_EXTRA_BYTES); const byte * const c_start= cur->rec - extra_size; if (c_start >= - &block->frame[PAGE_OLD_SUPREMUM_END + REC_N_OLD_EXTRA_BYTES]) + &block->page.frame[PAGE_OLD_SUPREMUM_END + REC_N_OLD_EXTRA_BYTES]) { /* Find common header bytes with the preceding record. */ const byte *r= rec - (REC_N_OLD_EXTRA_BYTES + 1); @@ -1578,9 +1582,9 @@ inc_dir: if (c <= insert_buf && c_end > insert_buf) c_end= insert_buf; else - c_end= std::min<const byte*>(c_end, block->frame + srv_page_size - + c_end= std::min<const byte*>(c_end, block->page.frame + srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE * - page_dir_get_n_slots(block->frame)); + page_dir_get_n_slots(block->page.frame)); size_t data_common; /* Copy common data bytes of the preceding record. */ for (; c != c_end && *r == *c; c++, r++); @@ -1588,14 +1592,14 @@ inc_dir: if (comp) mtr->page_insert(*block, reuse, - cur->rec - block->frame - PAGE_NEW_INFIMUM, + cur->rec - block->page.frame - PAGE_NEW_INFIMUM, info_status, free_offset, hdr_common, data_common, insert_buf, extra_size - hdr_common - REC_N_NEW_EXTRA_BYTES, r, data_size - data_common); else mtr->page_insert(*block, reuse, - cur->rec - block->frame - PAGE_OLD_INFIMUM, + cur->rec - block->page.frame - PAGE_OLD_INFIMUM, info_status, rec_get_n_fields_old(insert_rec) << 1 | rec_get_1byte_offs_flag(insert_rec), hdr_common, data_common, @@ -1615,11 +1619,12 @@ copied: if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) { const auto owner= page_dir_find_owner_slot(next_rec); - page_dir_split_slot(*block, page_dir_get_nth_slot(block->frame, owner)); + page_dir_split_slot(*block, + page_dir_get_nth_slot(block->page.frame, owner)); } rec_offs_make_valid(insert_buf + extra_size, index, - page_is_leaf(block->frame), offsets); + page_is_leaf(block->page.frame), offsets); return insert_buf + extra_size; } @@ -1703,20 +1708,22 @@ page_cur_insert_rec_zip( mtr_t* mtr) /*!< in/out: mini-transaction */ { page_zip_des_t * const page_zip= page_cur_get_page_zip(cursor); + page_t * const page= cursor->block->page.frame; + ut_ad(page_zip); ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(index->table->not_redundant()); - ut_ad(page_is_comp(cursor->block->frame)); + ut_ad(page_is_comp(page)); ut_ad(rec_offs_comp(offsets)); - ut_ad(fil_page_get_type(cursor->block->frame) == FIL_PAGE_INDEX || - fil_page_get_type(cursor->block->frame) == FIL_PAGE_RTREE); - ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + cursor->block->frame) == + ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX || + fil_page_get_type(page) == FIL_PAGE_RTREE); + ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + page) == index->id || mtr->is_inside_ibuf()); - ut_ad(!page_get_instant(cursor->block->frame)); + ut_ad(!page_get_instant(page)); ut_ad(!page_cur_is_after_last(cursor)); #ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, cursor->block->frame, index)); + ut_a(page_zip_validate(page_zip, page, index)); #endif /* UNIV_ZIP_DEBUG */ /* 1. Get the size of the physical record in the page */ @@ -1734,13 +1741,11 @@ page_cur_insert_rec_zip( MEM_CHECK_DEFINED(rec_start, extra_size); } #endif /* HAVE_MEM_CHECK */ - const bool reorg_before_insert= page_has_garbage(cursor->block->frame) && - rec_size > page_get_max_insert_size(cursor->block->frame, 1) && - rec_size <= page_get_max_insert_size_after_reorganize(cursor->block->frame, - 1); + const bool reorg_before_insert= page_has_garbage(page) && + rec_size > page_get_max_insert_size(page, 1) && + rec_size <= page_get_max_insert_size_after_reorganize(page, 1); constexpr uint16_t page_free_f= PAGE_FREE + PAGE_HEADER; - byte* const page_free = my_assume_aligned<4>(page_free_f + - cursor->block->frame); + byte* const page_free = my_assume_aligned<4>(page_free_f + page); uint16_t free_rec= 0; /* 2. Try to find suitable space from page memory management */ @@ -1756,15 +1761,14 @@ page_cur_insert_rec_zip( const rec_t * const cursor_rec= page_cur_get_rec(cursor); #endif /* UNIV_DEBUG */ - if (page_is_empty(cursor->block->frame)) + if (page_is_empty(page)) { ut_ad(page_cur_is_before_first(cursor)); /* This is an empty page. Recreate to remove the modification log. */ page_create_zip(cursor->block, index, - page_header_get_field(cursor->block->frame, PAGE_LEVEL), - 0, mtr); - ut_ad(!page_header_get_ptr(cursor->block->frame, PAGE_FREE)); + page_header_get_field(page, PAGE_LEVEL), 0, mtr); + ut_ad(!page_header_get_ptr(page, PAGE_FREE)); if (page_zip_available(page_zip, index->is_clust(), rec_size, 1)) goto use_heap; @@ -1773,7 +1777,7 @@ page_cur_insert_rec_zip( return nullptr; } - if (page_zip->m_nonempty || page_has_garbage(cursor->block->frame)) + if (page_zip->m_nonempty || page_has_garbage(page)) { ulint pos= page_rec_get_n_recs_before(cursor->rec); @@ -1784,11 +1788,11 @@ page_cur_insert_rec_zip( } if (pos) - cursor->rec= page_rec_get_nth(cursor->block->frame, pos); + cursor->rec= page_rec_get_nth(page, pos); else - ut_ad(cursor->rec == page_get_infimum_rec(cursor->block->frame)); + ut_ad(cursor->rec == page_get_infimum_rec(page)); - ut_ad(!page_header_get_ptr(cursor->block->frame, PAGE_FREE)); + ut_ad(!page_header_get_ptr(page, PAGE_FREE)); if (page_zip_available(page_zip, index->is_clust(), rec_size, 1)) goto use_heap; @@ -1811,11 +1815,10 @@ page_cur_insert_rec_zip( { /* The page was reorganized: Seek to pos. */ cursor->rec= pos > 1 - ? page_rec_get_nth(cursor->block->frame, pos - 1) - : cursor->block->frame + PAGE_NEW_INFIMUM; - insert_rec= cursor->block->frame + rec_get_next_offs(cursor->rec, 1); - rec_offs_make_valid(insert_rec, index, - page_is_leaf(cursor->block->frame), offsets); + ? page_rec_get_nth(page, pos - 1) + : page + PAGE_NEW_INFIMUM; + insert_rec= page + rec_get_next_offs(cursor->rec, 1); + rec_offs_make_valid(insert_rec, index, page_is_leaf(page), offsets); return insert_rec; } @@ -1826,9 +1829,9 @@ page_cur_insert_rec_zip( plus log the insert of this record?) */ /* Out of space: restore the page */ - if (!page_zip_decompress(page_zip, cursor->block->frame, false)) + if (!page_zip_decompress(page_zip, page, false)) ut_error; /* Memory corrupted? */ - ut_ad(page_validate(cursor->block->frame, index)); + ut_ad(page_validate(page, index)); insert_rec= nullptr; } return insert_rec; @@ -1843,13 +1846,11 @@ page_cur_insert_rec_zip( rec_offs_init(foffsets_); - rec_offs *foffsets= rec_get_offsets(cursor->block->frame + free_rec, index, - foffsets_, - page_is_leaf(cursor->block->frame) + rec_offs *foffsets= rec_get_offsets(page + free_rec, index, foffsets_, + page_is_leaf(page) ? index->n_core_fields : 0, ULINT_UNDEFINED, &heap); - insert_buf= cursor->block->frame + free_rec - - rec_offs_extra_size(foffsets); + insert_buf= page + free_rec - rec_offs_extra_size(foffsets); if (rec_offs_size(foffsets) < rec_size) { @@ -1877,7 +1878,7 @@ too_small: /* Do not allow extra_size to grow */ goto too_small; - byte *const free_rec_ptr= cursor->block->frame + free_rec; + byte *const free_rec_ptr= page + free_rec; heap_no= rec_get_heap_no_new(free_rec_ptr); int16_t next_rec= mach_read_from_2(free_rec_ptr - REC_NEXT); /* With innodb_page_size=64k, int16_t would be unsafe to use here, @@ -1899,7 +1900,7 @@ too_small: static_assert(PAGE_GARBAGE == PAGE_FREE + 2, "compatibility"); mtr->memcpy(*cursor->block, page_free, hdr, 4); - if (!page_is_leaf(cursor->block->frame)) + if (!page_is_leaf(page)) { /* Zero out the node pointer of free_rec, in case it will not be overwritten by insert_rec. */ @@ -1949,8 +1950,7 @@ use_heap: /* 3. Create the record */ byte *insert_rec= rec_copy(insert_buf, rec, offsets); - rec_offs_make_valid(insert_rec, index, page_is_leaf(cursor->block->frame), - offsets); + rec_offs_make_valid(insert_rec, index, page_is_leaf(page), offsets); /* 4. Insert the record in the linked list of records */ ut_ad(cursor->rec != insert_rec); @@ -1965,8 +1965,7 @@ use_heap: (next_rec - insert_rec)); mach_write_to_2(cursor->rec - REC_NEXT, static_cast<uint16_t> (insert_rec - cursor->rec)); - byte *n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER + - cursor->block->frame); + byte *n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER + page); mtr->write<2>(*cursor->block, n_recs, 1U + mach_read_from_2(n_recs)); memcpy_aligned<2>(&page_zip->data[PAGE_N_RECS + PAGE_HEADER], n_recs, 2); @@ -1985,7 +1984,7 @@ use_heap: page_zip->data); const uint16_t last_insert_rec= mach_read_from_2(last_insert); ut_ad(!last_insert_rec || - rec_get_node_ptr_flag(cursor->block->frame + last_insert_rec) == + rec_get_node_ptr_flag(page + last_insert_rec) == rec_get_node_ptr_flag(insert_rec)); mach_write_to_2(last_insert, page_offset(insert_rec)); @@ -2001,15 +2000,14 @@ no_direction: *dir= PAGE_NO_DIRECTION; memset(n, 0, 2); } - else if (*dir != PAGE_LEFT && - cursor->block->frame + last_insert_rec == cursor->rec) + else if (*dir != PAGE_LEFT && page + last_insert_rec == cursor->rec) { *dir= PAGE_RIGHT; inc_dir: mach_write_to_2(n, mach_read_from_2(n) + 1); } else if (*dir != PAGE_RIGHT && page_rec_get_next(insert_rec) == - cursor->block->frame + last_insert_rec) + page + last_insert_rec) { *dir= PAGE_LEFT; goto inc_dir; @@ -2020,8 +2018,7 @@ inc_dir: /* Write the header fields in one record. */ mtr->memcpy(*cursor->block, - my_assume_aligned<8>(PAGE_LAST_INSERT + PAGE_HEADER + - cursor->block->frame), + my_assume_aligned<8>(PAGE_LAST_INSERT + PAGE_HEADER + page), my_assume_aligned<8>(PAGE_LAST_INSERT + PAGE_HEADER + page_zip->data), PAGE_N_RECS - PAGE_LAST_INSERT + 2); @@ -2056,13 +2053,13 @@ inc_dir: static void page_mem_free(const buf_block_t &block, rec_t *rec, size_t data_size, size_t extra_size) { - ut_ad(page_align(rec) == block.frame); + ut_ad(page_align(rec) == block.page.frame); ut_ad(!block.page.zip.data); - const rec_t *free= page_header_get_ptr(block.frame, PAGE_FREE); + const rec_t *free= page_header_get_ptr(block.page.frame, PAGE_FREE); - const uint16_t n_heap= uint16_t(page_header_get_field(block.frame, + const uint16_t n_heap= uint16_t(page_header_get_field(block.page.frame, PAGE_N_HEAP) - 1); - ut_ad(page_get_n_recs(block.frame) < (n_heap & 0x7fff)); + ut_ad(page_get_n_recs(block.page.frame) < (n_heap & 0x7fff)); const bool deleting_top= n_heap == ((n_heap & 0x8000) ? (rec_get_heap_no_new(rec) | 0x8000) : rec_get_heap_no_old(rec)); @@ -2070,7 +2067,7 @@ static void page_mem_free(const buf_block_t &block, rec_t *rec, if (deleting_top) { byte *page_heap_top= my_assume_aligned<2>(PAGE_HEAP_TOP + PAGE_HEADER + - block.frame); + block.page.frame); const uint16_t heap_top= mach_read_from_2(page_heap_top); const size_t extra_savings= heap_top - page_offset(rec + data_size); ut_ad(extra_savings < heap_top); @@ -2083,7 +2080,7 @@ static void page_mem_free(const buf_block_t &block, rec_t *rec, if (extra_savings) { byte *page_garbage= my_assume_aligned<2>(PAGE_GARBAGE + PAGE_HEADER + - block.frame); + block.page.frame); uint16_t garbage= mach_read_from_2(page_garbage); ut_ad(garbage >= extra_savings); mach_write_to_2(page_garbage, garbage - extra_savings); @@ -2092,17 +2089,17 @@ static void page_mem_free(const buf_block_t &block, rec_t *rec, else { byte *page_free= my_assume_aligned<2>(PAGE_FREE + PAGE_HEADER + - block.frame); + block.page.frame); byte *page_garbage= my_assume_aligned<2>(PAGE_GARBAGE + PAGE_HEADER + - block.frame); + block.page.frame); mach_write_to_2(page_free, page_offset(rec)); mach_write_to_2(page_garbage, mach_read_from_2(page_garbage) + extra_size + data_size); } - memset_aligned<2>(PAGE_LAST_INSERT + PAGE_HEADER + block.frame, 0, 2); + memset_aligned<2>(PAGE_LAST_INSERT + PAGE_HEADER + block.page.frame, 0, 2); byte *page_n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER + - block.frame); + block.page.frame); mach_write_to_2(page_n_recs, mach_read_from_2(page_n_recs) - 1); const byte* const end= rec + data_size; @@ -2112,7 +2109,7 @@ static void page_mem_free(const buf_block_t &block, rec_t *rec, uint16_t next= free ? ((n_heap & 0x8000) ? static_cast<uint16_t>(free - rec) - : static_cast<uint16_t>(free - block.frame)) + : static_cast<uint16_t>(free - block.page.frame)) : uint16_t{0}; mach_write_to_2(rec - REC_NEXT, next); } @@ -2144,18 +2141,20 @@ page_cur_delete_rec( /* page_zip_validate() will fail here when btr_cur_pessimistic_delete() invokes btr_set_min_rec_mark(). - Then, both "page_zip" and "block->frame" would have the min-rec-mark - set on the smallest user record, but "block->frame" would additionally - have it set on the smallest-but-one record. Because sloppy + Then, both "page_zip" and "block->page.frame" would have the + min-rec-mark set on the smallest user record, but + "block->page.frame" would additionally have it set on the + smallest-but-one record. Because sloppy page_zip_validate_low() only ignores min-rec-flag differences in the smallest user record, it cannot be used here either. */ current_rec = cursor->rec; buf_block_t* const block = cursor->block; ut_ad(rec_offs_validate(current_rec, index, offsets)); - ut_ad(!!page_is_comp(block->frame) == index->table->not_redundant()); - ut_ad(fil_page_index_page_check(block->frame)); - ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + block->frame) + ut_ad(!!page_is_comp(block->page.frame) + == index->table->not_redundant()); + ut_ad(fil_page_index_page_check(block->page.frame)); + ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + block->page.frame) == index->id || mtr->is_inside_ibuf()); ut_ad(mtr->is_named_space(index->table->space)); @@ -2163,10 +2162,10 @@ page_cur_delete_rec( /* The record must not be the supremum or infimum record. */ ut_ad(page_rec_is_user_rec(current_rec)); - if (page_get_n_recs(block->frame) == 1 + if (page_get_n_recs(block->page.frame) == 1 && !rec_is_alter_metadata(current_rec, *index)) { /* Empty the page. */ - ut_ad(page_is_leaf(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); /* Usually, this should be the root page, and the whole index tree should become empty. However, this could also be a call in @@ -2182,7 +2181,7 @@ page_cur_delete_rec( /* Save to local variables some data associated with current_rec */ cur_slot_no = page_dir_find_owner_slot(current_rec); ut_ad(cur_slot_no > 0); - cur_dir_slot = page_dir_get_nth_slot(block->frame, cur_slot_no); + cur_dir_slot = page_dir_get_nth_slot(block->page.frame, cur_slot_no); cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot); /* The page gets invalid for btr_pcur_restore_pos(). @@ -2222,7 +2221,7 @@ page_cur_delete_rec( (page_dir_slot_get_rec(cur_dir_slot)); if (UNIV_LIKELY_NULL(block->page.zip.data)) { - ut_ad(page_is_comp(block->frame)); + ut_ad(page_is_comp(block->page.frame)); if (current_rec == slot_rec) { page_zip_rec_set_owned(block, prev_rec, 1, mtr); page_zip_rec_set_owned(block, slot_rec, 0, mtr); @@ -2241,7 +2240,7 @@ page_cur_delete_rec( page_header_reset_last_insert(block, mtr); page_zip_dir_delete(block, rec, index, offsets, - page_header_get_ptr(block->frame, + page_header_get_ptr(block->page.frame, PAGE_FREE), mtr); if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) { @@ -2258,7 +2257,7 @@ page_cur_delete_rec( const size_t data_size = rec_offs_data_size(offsets); const size_t extra_size = rec_offs_extra_size(offsets); - if (page_is_comp(block->frame)) { + if (page_is_comp(block->page.frame)) { mtr->page_delete(*block, page_offset(prev_rec) - PAGE_NEW_INFIMUM, extra_size - REC_N_NEW_EXTRA_BYTES, @@ -2287,9 +2286,9 @@ page_cur_delete_rec( page_dir_balance_slot(*block, cur_slot_no); } - ut_ad(page_is_comp(block->frame) - ? page_simple_validate_new(block->frame) - : page_simple_validate_old(block->frame)); + ut_ad(page_is_comp(block->page.frame) + ? page_simple_validate_new(block->page.frame) + : page_simple_validate_old(block->page.frame)); } /** Apply a INSERT_HEAP_REDUNDANT or INSERT_REUSE_REDUNDANT record that was @@ -2308,18 +2307,17 @@ bool page_apply_insert_redundant(const buf_block_t &block, bool reuse, size_t hdr_c, size_t data_c, const void *data, size_t data_len) { - const uint16_t n_slots= page_dir_get_n_slots(block.frame); - byte *page_n_heap= my_assume_aligned<2>(PAGE_N_HEAP + PAGE_HEADER + - block.frame); + page_t * const page= block.page.frame; + const uint16_t n_slots= page_dir_get_n_slots(page); + byte *page_n_heap= my_assume_aligned<2>(PAGE_N_HEAP + PAGE_HEADER + page); const uint16_t h= mach_read_from_2(page_n_heap); const page_id_t id(block.page.id()); if (UNIV_UNLIKELY(n_slots < 2 || h < n_slots || h < PAGE_HEAP_NO_USER_LOW || h >= srv_page_size / REC_N_OLD_EXTRA_BYTES || - !fil_page_index_page_check(block.frame) || - page_get_page_no(block.frame) != id.page_no() || + !fil_page_index_page_check(page) || + page_get_page_no(page) != id.page_no() || mach_read_from_2(my_assume_aligned<2> - (PAGE_OLD_SUPREMUM - REC_NEXT + - block.frame)))) + (PAGE_OLD_SUPREMUM - REC_NEXT + page)))) { corrupted: ib::error() << (reuse @@ -2331,19 +2329,19 @@ corrupted: return true; } - byte * const last_slot= page_dir_get_nth_slot(block.frame, n_slots - 1); + byte * const last_slot= page_dir_get_nth_slot(page, n_slots - 1); byte * const page_heap_top= my_assume_aligned<2> - (PAGE_HEAP_TOP + PAGE_HEADER + block.frame); - const byte *const heap_bot= &block.frame[PAGE_OLD_SUPREMUM_END]; - byte *heap_top= block.frame + mach_read_from_2(page_heap_top); + (PAGE_HEAP_TOP + PAGE_HEADER + page); + const byte *const heap_bot= &page[PAGE_OLD_SUPREMUM_END]; + byte *heap_top= page + mach_read_from_2(page_heap_top); if (UNIV_UNLIKELY(heap_bot > heap_top || heap_top > last_slot)) goto corrupted; if (UNIV_UNLIKELY(mach_read_from_2(last_slot) != PAGE_OLD_SUPREMUM)) goto corrupted; - if (UNIV_UNLIKELY(mach_read_from_2(page_dir_get_nth_slot(block.frame, 0)) != + if (UNIV_UNLIKELY(mach_read_from_2(page_dir_get_nth_slot(page, 0)) != PAGE_OLD_INFIMUM)) goto corrupted; - rec_t * const prev_rec= block.frame + PAGE_OLD_INFIMUM + prev; + rec_t * const prev_rec= page + PAGE_OLD_INFIMUM + prev; if (!prev); else if (UNIV_UNLIKELY(heap_bot + (REC_N_OLD_EXTRA_BYTES + 1) > prev_rec || prev_rec > heap_top)) @@ -2355,7 +2353,7 @@ corrupted: goto corrupted; const ulint pextra_size= REC_N_OLD_EXTRA_BYTES + (rec_get_1byte_offs_flag(prev_rec) ? pn_fields : pn_fields * 2); - if (prev_rec == &block.frame[PAGE_OLD_INFIMUM]); + if (prev_rec == &page[PAGE_OLD_INFIMUM]); else if (UNIV_UNLIKELY(prev_rec - pextra_size < heap_bot)) goto corrupted; if (UNIV_UNLIKELY(hdr_c && prev_rec - hdr_c < heap_bot)) @@ -2363,8 +2361,8 @@ corrupted: const ulint pdata_size= rec_get_data_size_old(prev_rec); if (UNIV_UNLIKELY(prev_rec + pdata_size > heap_top)) goto corrupted; - rec_t * const next_rec= block.frame + mach_read_from_2(prev_rec - REC_NEXT); - if (next_rec == block.frame + PAGE_OLD_SUPREMUM); + rec_t * const next_rec= page + mach_read_from_2(prev_rec - REC_NEXT); + if (next_rec == page + PAGE_OLD_SUPREMUM); else if (UNIV_UNLIKELY(heap_bot + REC_N_OLD_EXTRA_BYTES > next_rec || next_rec > heap_top)) goto corrupted; @@ -2389,8 +2387,8 @@ corrupted: for (ulint ns= PAGE_DIR_SLOT_MAX_N_OWNED; !(n_owned= rec_get_n_owned_old(owner_rec)); ) { - owner_rec= block.frame + mach_read_from_2(owner_rec - REC_NEXT); - if (owner_rec == &block.frame[PAGE_OLD_SUPREMUM]); + owner_rec= page + mach_read_from_2(owner_rec - REC_NEXT); + if (owner_rec == &page[PAGE_OLD_SUPREMUM]); else if (UNIV_UNLIKELY(heap_bot + REC_N_OLD_EXTRA_BYTES > owner_rec || owner_rec > heap_top)) goto corrupted; @@ -2404,10 +2402,10 @@ corrupted: goto corrupted; else { - mach_write_to_2(insert_buf, owner_rec - block.frame); + mach_write_to_2(insert_buf, owner_rec - page); static_assert(PAGE_DIR_SLOT_SIZE == 2, "compatibility"); const page_dir_slot_t * const first_slot= - page_dir_get_nth_slot(block.frame, 0); + page_dir_get_nth_slot(page, 0); while (memcmp_aligned<2>(owner_slot, insert_buf, 2)) if ((owner_slot+= 2) == first_slot) @@ -2436,8 +2434,8 @@ corrupted: if (reuse) { byte *page_free= my_assume_aligned<2>(PAGE_FREE + PAGE_HEADER + - block.frame); - rec_t *free_rec= block.frame + mach_read_from_2(page_free); + page); + rec_t *free_rec= page + mach_read_from_2(page_free); if (UNIV_UNLIKELY(heap_bot + REC_N_OLD_EXTRA_BYTES > free_rec || free_rec > heap_top)) goto corrupted; @@ -2456,9 +2454,9 @@ corrupted: fextra_size + fdata_size)) goto corrupted; buf= free_rec - fextra_size; - const rec_t *const next_free= block.frame + + const rec_t *const next_free= page + mach_read_from_2(free_rec - REC_NEXT); - if (next_free == block.frame); + if (next_free == page); else if (UNIV_UNLIKELY(next_free < &heap_bot[REC_N_OLD_EXTRA_BYTES + 1] || heap_top < next_free)) goto corrupted; @@ -2482,11 +2480,11 @@ corrupted: ut_ad(data_size - data_c == data_len - (extra_size - hdr_c)); byte *page_last_insert= my_assume_aligned<2>(PAGE_LAST_INSERT + PAGE_HEADER + - block.frame); + page); const uint16_t last_insert= mach_read_from_2(page_last_insert); memcpy(buf, insert_buf, extra_size); buf+= extra_size; - mach_write_to_2(page_last_insert, buf - block.frame); + mach_write_to_2(page_last_insert, buf - page); memcpy(prev_rec - REC_NEXT, page_last_insert, 2); memcpy(buf, prev_rec, data_c); memcpy(buf + data_c, static_cast<const byte*>(data) + (extra_size - hdr_c), @@ -2495,25 +2493,25 @@ corrupted: REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); /* Update PAGE_DIRECTION_B, PAGE_N_DIRECTION if needed */ - if (block.frame[FIL_PAGE_TYPE + 1] != byte(FIL_PAGE_RTREE)) + if (page[FIL_PAGE_TYPE + 1] != byte(FIL_PAGE_RTREE)) { - byte *dir= &block.frame[PAGE_DIRECTION_B + PAGE_HEADER]; + byte *dir= &page[PAGE_DIRECTION_B + PAGE_HEADER]; byte *n_dir= my_assume_aligned<2> - (&block.frame[PAGE_N_DIRECTION + PAGE_HEADER]); + (&page[PAGE_N_DIRECTION + PAGE_HEADER]); if (UNIV_UNLIKELY(!last_insert)) { no_direction: *dir= static_cast<byte>((*dir & ~((1U << 3) - 1)) | PAGE_NO_DIRECTION); memset(n_dir, 0, 2); } - else if (block.frame + last_insert == prev_rec && + else if (page + last_insert == prev_rec && (*dir & ((1U << 3) - 1)) != PAGE_LEFT) { *dir= static_cast<byte>((*dir & ~((1U << 3) - 1)) | PAGE_RIGHT); inc_dir: mach_write_to_2(n_dir, mach_read_from_2(n_dir) + 1); } - else if (next_rec == block.frame + last_insert && + else if (next_rec == page + last_insert && (*dir & ((1U << 3) - 1)) != PAGE_RIGHT) { *dir= static_cast<byte>((*dir & ~((1U << 3) - 1)) | PAGE_LEFT); @@ -2524,14 +2522,13 @@ inc_dir: } /* Update PAGE_N_RECS. */ - byte *page_n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER + - block.frame); + byte *page_n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER + page); mach_write_to_2(page_n_recs, mach_read_from_2(page_n_recs) + 1); if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) page_dir_split_slot(block, owner_slot); - ut_ad(page_simple_validate_old(block.frame)); + ut_ad(page_simple_validate_old(page)); return false; } @@ -2552,21 +2549,20 @@ bool page_apply_insert_dynamic(const buf_block_t &block, bool reuse, size_t hdr_c, size_t data_c, const void *data, size_t data_len) { - const uint16_t n_slots= page_dir_get_n_slots(block.frame); - byte *page_n_heap= my_assume_aligned<2>(PAGE_N_HEAP + PAGE_HEADER + - block.frame); + page_t * const page= block.page.frame; + const uint16_t n_slots= page_dir_get_n_slots(page); + byte *page_n_heap= my_assume_aligned<2>(PAGE_N_HEAP + PAGE_HEADER + page); ulint h= mach_read_from_2(page_n_heap); const page_id_t id(block.page.id()); if (UNIV_UNLIKELY(n_slots < 2 || h < (PAGE_HEAP_NO_USER_LOW | 0x8000) || (h & 0x7fff) >= srv_page_size / REC_N_NEW_EXTRA_BYTES || (h & 0x7fff) < n_slots || - !fil_page_index_page_check(block.frame) || - page_get_page_no(block.frame) != id.page_no() || + !fil_page_index_page_check(page) || + page_get_page_no(page) != id.page_no() || mach_read_from_2(my_assume_aligned<2> - (PAGE_NEW_SUPREMUM - REC_NEXT + - block.frame)) || + (PAGE_NEW_SUPREMUM - REC_NEXT + page)) || ((enc_hdr_l & REC_STATUS_INSTANT) && - !page_is_leaf(block.frame)) || + !page_is_leaf(page)) || (enc_hdr_l >> 3) > data_len)) { corrupted: @@ -2579,42 +2575,42 @@ corrupted: return true; } - byte * const last_slot= page_dir_get_nth_slot(block.frame, n_slots - 1); + byte * const last_slot= page_dir_get_nth_slot(page, n_slots - 1); byte * const page_heap_top= my_assume_aligned<2> - (PAGE_HEAP_TOP + PAGE_HEADER + block.frame); - const byte *const heap_bot= &block.frame[PAGE_NEW_SUPREMUM_END]; - byte *heap_top= block.frame + mach_read_from_2(page_heap_top); + (PAGE_HEAP_TOP + PAGE_HEADER + page); + const byte *const heap_bot= &page[PAGE_NEW_SUPREMUM_END]; + byte *heap_top= page + mach_read_from_2(page_heap_top); if (UNIV_UNLIKELY(heap_bot > heap_top || heap_top > last_slot)) goto corrupted; if (UNIV_UNLIKELY(mach_read_from_2(last_slot) != PAGE_NEW_SUPREMUM)) goto corrupted; - if (UNIV_UNLIKELY(mach_read_from_2(page_dir_get_nth_slot(block.frame, 0)) != + if (UNIV_UNLIKELY(mach_read_from_2(page_dir_get_nth_slot(page, 0)) != PAGE_NEW_INFIMUM)) goto corrupted; uint16_t n= static_cast<uint16_t>(PAGE_NEW_INFIMUM + prev); - rec_t *prev_rec= block.frame + n; + rec_t *prev_rec= page + n; n= static_cast<uint16_t>(n + mach_read_from_2(prev_rec - REC_NEXT)); if (!prev); else if (UNIV_UNLIKELY(heap_bot + REC_N_NEW_EXTRA_BYTES > prev_rec || prev_rec > heap_top)) goto corrupted; - rec_t * const next_rec= block.frame + n; - if (next_rec == block.frame + PAGE_NEW_SUPREMUM); + rec_t * const next_rec= page + n; + if (next_rec == page + PAGE_NEW_SUPREMUM); else if (UNIV_UNLIKELY(heap_bot + REC_N_NEW_EXTRA_BYTES > next_rec || next_rec > heap_top)) goto corrupted; ulint n_owned; rec_t *owner_rec= next_rec; - n= static_cast<uint16_t>(next_rec - block.frame); + n= static_cast<uint16_t>(next_rec - page); for (ulint ns= PAGE_DIR_SLOT_MAX_N_OWNED; !(n_owned= rec_get_n_owned_new(owner_rec)); ) { n= static_cast<uint16_t>(n + mach_read_from_2(owner_rec - REC_NEXT)); - owner_rec= block.frame + n; + owner_rec= page + n; if (n == PAGE_NEW_SUPREMUM); else if (UNIV_UNLIKELY(heap_bot + REC_N_NEW_EXTRA_BYTES > owner_rec || owner_rec > heap_top)) @@ -2631,9 +2627,9 @@ corrupted: { static_assert(PAGE_DIR_SLOT_SIZE == 2, "compatibility"); alignas(2) byte slot_buf[2]; - mach_write_to_2(slot_buf, owner_rec - block.frame); + mach_write_to_2(slot_buf, owner_rec - page); const page_dir_slot_t * const first_slot= - page_dir_get_nth_slot(block.frame, 0); + page_dir_get_nth_slot(page, 0); while (memcmp_aligned<2>(owner_slot, slot_buf, 2)) if ((owner_slot+= 2) == first_slot) @@ -2647,9 +2643,8 @@ corrupted: byte *buf; if (reuse) { - byte *page_free= my_assume_aligned<2>(PAGE_FREE + PAGE_HEADER + - block.frame); - rec_t *free_rec= block.frame + mach_read_from_2(page_free); + byte *page_free= my_assume_aligned<2>(PAGE_FREE + PAGE_HEADER + page); + rec_t *free_rec= page + mach_read_from_2(page_free); if (UNIV_UNLIKELY(heap_bot + REC_N_NEW_EXTRA_BYTES > free_rec || free_rec > heap_top)) goto corrupted; @@ -2667,9 +2662,9 @@ corrupted: goto corrupted; if ((n= mach_read_from_2(free_rec - REC_NEXT)) != 0) { - n= static_cast<uint16_t>(n + free_rec - block.frame); + n= static_cast<uint16_t>(n + free_rec - page); if (UNIV_UNLIKELY(n < PAGE_NEW_SUPREMUM_END + REC_N_NEW_EXTRA_BYTES || - heap_top < block.frame + n)) + heap_top < page + n)) goto corrupted; } mach_write_to_2(page_free, n); @@ -2700,7 +2695,7 @@ corrupted: h= (h & ((1U << 5) - 1)) << 3; static_assert(REC_STATUS_ORDINARY == 0, "compatibility"); static_assert(REC_STATUS_INSTANT == 4, "compatibility"); - if (page_is_leaf(block.frame)) + if (page_is_leaf(page)) h|= enc_hdr_l & REC_STATUS_INSTANT; else { @@ -2712,9 +2707,9 @@ corrupted: buf+= REC_NEXT; mach_write_to_2(buf - REC_NEXT, static_cast<uint16_t>(next_rec - buf)); byte *page_last_insert= my_assume_aligned<2>(PAGE_LAST_INSERT + PAGE_HEADER + - block.frame); + page); const uint16_t last_insert= mach_read_from_2(page_last_insert); - mach_write_to_2(page_last_insert, buf - block.frame); + mach_write_to_2(page_last_insert, buf - page); mach_write_to_2(prev_rec - REC_NEXT, static_cast<uint16_t>(buf - prev_rec)); memcpy(buf, prev_rec, data_c); buf+= data_c; @@ -2724,25 +2719,24 @@ corrupted: REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); /* Update PAGE_DIRECTION_B, PAGE_N_DIRECTION if needed */ - if (block.frame[FIL_PAGE_TYPE + 1] != byte(FIL_PAGE_RTREE)) + if (page[FIL_PAGE_TYPE + 1] != byte(FIL_PAGE_RTREE)) { - byte *dir= &block.frame[PAGE_DIRECTION_B + PAGE_HEADER]; - byte *n_dir= my_assume_aligned<2> - (&block.frame[PAGE_N_DIRECTION + PAGE_HEADER]); + byte *dir= &page[PAGE_DIRECTION_B + PAGE_HEADER]; + byte *n_dir= my_assume_aligned<2>(&page[PAGE_N_DIRECTION + PAGE_HEADER]); if (UNIV_UNLIKELY(!last_insert)) { no_direction: *dir= static_cast<byte>((*dir & ~((1U << 3) - 1)) | PAGE_NO_DIRECTION); memset(n_dir, 0, 2); } - else if (block.frame + last_insert == prev_rec && + else if (page + last_insert == prev_rec && (*dir & ((1U << 3) - 1)) != PAGE_LEFT) { *dir= static_cast<byte>((*dir & ~((1U << 3) - 1)) | PAGE_RIGHT); inc_dir: mach_write_to_2(n_dir, mach_read_from_2(n_dir) + 1); } - else if (next_rec == block.frame + last_insert && + else if (next_rec == page + last_insert && (*dir & ((1U << 3) - 1)) != PAGE_RIGHT) { *dir= static_cast<byte>((*dir & ~((1U << 3) - 1)) | PAGE_LEFT); @@ -2753,14 +2747,13 @@ inc_dir: } /* Update PAGE_N_RECS. */ - byte *page_n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER + - block.frame); + byte *page_n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER + page); mach_write_to_2(page_n_recs, mach_read_from_2(page_n_recs) + 1); if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) page_dir_split_slot(block, owner_slot); - ut_ad(page_simple_validate_new(block.frame)); + ut_ad(page_simple_validate_new(page)); return false; } @@ -2771,17 +2764,17 @@ page_cur_delete_rec() for a ROW_FORMAT=REDUNDANT page. @return whether the operation failed (inconcistency was noticed) */ bool page_apply_delete_redundant(const buf_block_t &block, ulint prev) { - const uint16_t n_slots= page_dir_get_n_slots(block.frame); - ulint n_recs= page_get_n_recs(block.frame); + page_t * const page= block.page.frame; + const uint16_t n_slots= page_dir_get_n_slots(page); + ulint n_recs= page_get_n_recs(page); const page_id_t id(block.page.id()); if (UNIV_UNLIKELY(!n_recs || n_slots < 2 || - !fil_page_index_page_check(block.frame) || - page_get_page_no(block.frame) != id.page_no() || + !fil_page_index_page_check(page) || + page_get_page_no(page) != id.page_no() || mach_read_from_2(my_assume_aligned<2> - (PAGE_OLD_SUPREMUM - REC_NEXT + - block.frame)) || - page_is_comp(block.frame))) + (PAGE_OLD_SUPREMUM - REC_NEXT + page)) || + page_is_comp(page))) { corrupted: ib::error() << "Not applying DELETE_ROW_FORMAT_REDUNDANT" @@ -2789,12 +2782,12 @@ corrupted: return true; } - byte *slot= page_dir_get_nth_slot(block.frame, n_slots - 1); - rec_t *prev_rec= block.frame + PAGE_OLD_INFIMUM + prev; + byte *slot= page_dir_get_nth_slot(page, n_slots - 1); + rec_t *prev_rec= page + PAGE_OLD_INFIMUM + prev; if (UNIV_UNLIKELY(prev_rec > slot)) goto corrupted; uint16_t n= mach_read_from_2(prev_rec - REC_NEXT); - rec_t *rec= block.frame + n; + rec_t *rec= page + n; if (UNIV_UNLIKELY(n < PAGE_OLD_SUPREMUM_END + REC_N_OLD_EXTRA_BYTES || slot < rec)) goto corrupted; @@ -2806,7 +2799,7 @@ corrupted: goto corrupted; n= mach_read_from_2(rec - REC_NEXT); - rec_t *next= block.frame + n; + rec_t *next= page + n; if (n == PAGE_OLD_SUPREMUM); else if (UNIV_UNLIKELY(n < PAGE_OLD_SUPREMUM_END + REC_N_OLD_EXTRA_BYTES || slot < next)) @@ -2817,7 +2810,7 @@ corrupted: for (ulint i= n_recs; !(slot_owned= rec_get_n_owned_old(s)); ) { n= mach_read_from_2(s - REC_NEXT); - s= block.frame + n; + s= page + n; if (n == PAGE_OLD_SUPREMUM); else if (UNIV_UNLIKELY(n < PAGE_OLD_SUPREMUM_END + REC_N_OLD_EXTRA_BYTES || slot < s)) @@ -2829,9 +2822,9 @@ corrupted: /* The first slot is always pointing to the infimum record. Find the directory slot pointing to s. */ - const byte * const first_slot= block.frame + srv_page_size - (PAGE_DIR + 2); + const byte * const first_slot= page + srv_page_size - (PAGE_DIR + 2); alignas(2) byte slot_offs[2]; - mach_write_to_2(slot_offs, s - block.frame); + mach_write_to_2(slot_offs, s - page); static_assert(PAGE_DIR_SLOT_SIZE == 2, "compatibility"); while (memcmp_aligned<2>(slot, slot_offs, 2)) @@ -2841,7 +2834,7 @@ corrupted: if (rec == s) { s= prev_rec; - mach_write_to_2(slot, s - block.frame); + mach_write_to_2(slot, s - page); } memcpy(prev_rec - REC_NEXT, rec - REC_NEXT, 2); @@ -2853,7 +2846,7 @@ corrupted: if (slot_owned < PAGE_DIR_SLOT_MIN_N_OWNED) page_dir_balance_slot(block, (first_slot - slot) / 2); - ut_ad(page_simple_validate_old(block.frame)); + ut_ad(page_simple_validate_old(page)); return false; } @@ -2867,17 +2860,17 @@ page_cur_delete_rec() for a ROW_FORMAT=COMPACT or DYNAMIC page. bool page_apply_delete_dynamic(const buf_block_t &block, ulint prev, size_t hdr_size, size_t data_size) { - const uint16_t n_slots= page_dir_get_n_slots(block.frame); - ulint n_recs= page_get_n_recs(block.frame); + page_t * const page= block.page.frame; + const uint16_t n_slots= page_dir_get_n_slots(page); + ulint n_recs= page_get_n_recs(page); const page_id_t id(block.page.id()); if (UNIV_UNLIKELY(!n_recs || n_slots < 2 || - !fil_page_index_page_check(block.frame) || - page_get_page_no(block.frame) != id.page_no() || + !fil_page_index_page_check(page) || + page_get_page_no(page) != id.page_no() || mach_read_from_2(my_assume_aligned<2> - (PAGE_NEW_SUPREMUM - REC_NEXT + - block.frame)) || - !page_is_comp(block.frame))) + (PAGE_NEW_SUPREMUM - REC_NEXT + page)) || + !page_is_comp(page))) { corrupted: ib::error() << "Not applying DELETE_ROW_FORMAT_DYNAMIC" @@ -2885,13 +2878,13 @@ corrupted: return true; } - byte *slot= page_dir_get_nth_slot(block.frame, n_slots - 1); + byte *slot= page_dir_get_nth_slot(page, n_slots - 1); uint16_t n= static_cast<uint16_t>(PAGE_NEW_INFIMUM + prev); - rec_t *prev_rec= block.frame + n; + rec_t *prev_rec= page + n; if (UNIV_UNLIKELY(prev_rec > slot)) goto corrupted; n= static_cast<uint16_t>(n + mach_read_from_2(prev_rec - REC_NEXT)); - rec_t *rec= block.frame + n; + rec_t *rec= page + n; if (UNIV_UNLIKELY(n < PAGE_NEW_SUPREMUM_END + REC_N_NEW_EXTRA_BYTES || slot < rec)) goto corrupted; @@ -2900,14 +2893,14 @@ corrupted: slot < rec + data_size)) goto corrupted; n= static_cast<uint16_t>(n + mach_read_from_2(rec - REC_NEXT)); - rec_t *next= block.frame + n; + rec_t *next= page + n; if (n == PAGE_NEW_SUPREMUM); else if (UNIV_UNLIKELY(n < PAGE_NEW_SUPREMUM_END + REC_N_NEW_EXTRA_BYTES || slot < next)) goto corrupted; rec_t *s= rec; - n= static_cast<uint16_t>(rec - block.frame); + n= static_cast<uint16_t>(rec - page); ulint slot_owned; for (ulint i= n_recs; !(slot_owned= rec_get_n_owned_new(s)); ) { @@ -2916,7 +2909,7 @@ corrupted: next > static_cast<uint16_t>(-REC_N_NEW_EXTRA_BYTES))) goto corrupted; n= static_cast<uint16_t>(n + next); - s= block.frame + n; + s= page + n; if (n == PAGE_NEW_SUPREMUM); else if (UNIV_UNLIKELY(n < PAGE_NEW_SUPREMUM_END + REC_N_NEW_EXTRA_BYTES || slot < s)) @@ -2928,9 +2921,9 @@ corrupted: /* The first slot is always pointing to the infimum record. Find the directory slot pointing to s. */ - const byte * const first_slot= block.frame + srv_page_size - (PAGE_DIR + 2); + const byte * const first_slot= page + srv_page_size - (PAGE_DIR + 2); alignas(2) byte slot_offs[2]; - mach_write_to_2(slot_offs, s - block.frame); + mach_write_to_2(slot_offs, s - page); static_assert(PAGE_DIR_SLOT_SIZE == 2, "compatibility"); while (memcmp_aligned<2>(slot, slot_offs, 2)) @@ -2940,7 +2933,7 @@ corrupted: if (rec == s) { s= prev_rec; - mach_write_to_2(slot, s - block.frame); + mach_write_to_2(slot, s - page); } mach_write_to_2(prev_rec - REC_NEXT, static_cast<uint16_t>(next - prev_rec)); @@ -2952,7 +2945,7 @@ corrupted: if (slot_owned < PAGE_DIR_SLOT_MIN_N_OWNED) page_dir_balance_slot(block, (first_slot - slot) / 2); - ut_ad(page_simple_validate_new(block.frame)); + ut_ad(page_simple_validate_new(page)); return false; } diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index 1b8b3cb339f..2f85ef94233 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -201,7 +201,7 @@ page_set_max_trx_id( ut_ad(!page_zip || page_zip == &block->page.zip); static_assert((PAGE_HEADER + PAGE_MAX_TRX_ID) % 8 == 0, "alignment"); byte *max_trx_id= my_assume_aligned<8>(PAGE_MAX_TRX_ID + - PAGE_HEADER + block->frame); + PAGE_HEADER + block->page.frame); mtr->write<8>(*block, max_trx_id, trx_id); if (UNIV_LIKELY_NULL(page_zip)) @@ -228,7 +228,7 @@ page_set_autoinc( MTR_MEMO_PAGE_SX_FIX)); byte *field= my_assume_aligned<8>(PAGE_HEADER + PAGE_ROOT_AUTO_INC + - block->frame); + block->page.frame); ib_uint64_t old= mach_read_from_8(field); if (old == autoinc || (old > autoinc && !reset)) return; /* nothing to update */ @@ -283,7 +283,7 @@ void page_create_low(const buf_block_t* block, bool comp) compile_time_assert(PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE <= PAGE_DATA); - page = block->frame; + page = block->page.frame; fil_page_set_type(page, FIL_PAGE_INDEX); @@ -370,13 +370,14 @@ page_create_zip( page_create_low(block, true); if (index->is_spatial()) { - mach_write_to_2(FIL_PAGE_TYPE + block->frame, FIL_PAGE_RTREE); - memset(block->frame + FIL_RTREE_SPLIT_SEQ_NUM, 0, 8); + mach_write_to_2(FIL_PAGE_TYPE + block->page.frame, + FIL_PAGE_RTREE); + memset(block->page.frame + FIL_RTREE_SPLIT_SEQ_NUM, 0, 8); memset(block->page.zip.data + FIL_RTREE_SPLIT_SEQ_NUM, 0, 8); } - mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + block->frame, level); - mach_write_to_8(PAGE_HEADER + PAGE_MAX_TRX_ID + block->frame, + mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + block->page.frame, level); + mach_write_to_8(PAGE_HEADER + PAGE_MAX_TRX_ID + block->page.frame, max_trx_id); if (!page_zip_compress(block, index, page_zip_level, mtr)) { @@ -398,7 +399,7 @@ page_create_empty( trx_id_t max_trx_id; page_zip_des_t* page_zip= buf_block_get_page_zip(block); - ut_ad(fil_page_index_page_check(block->frame)); + ut_ad(fil_page_index_page_check(block->page.frame)); ut_ad(!index->is_dummy); ut_ad(block->page.id().space() == index->table->space->id); @@ -408,12 +409,12 @@ page_create_empty( for MVCC. */ if (dict_index_is_sec_or_ibuf(index) && !index->table->is_temporary() - && page_is_leaf(block->frame)) { - max_trx_id = page_get_max_trx_id(block->frame); + && page_is_leaf(block->page.frame)) { + max_trx_id = page_get_max_trx_id(block->page.frame); ut_ad(max_trx_id); } else if (block->page.id().page_no() == index->page) { /* Preserve PAGE_ROOT_AUTO_INC. */ - max_trx_id = page_get_max_trx_id(block->frame); + max_trx_id = page_get_max_trx_id(block->page.frame); } else { max_trx_id = 0; } @@ -421,7 +422,7 @@ page_create_empty( if (page_zip) { ut_ad(!index->table->is_temporary()); page_create_zip(block, index, - page_header_get_field(block->frame, + page_header_get_field(block->page.frame, PAGE_LEVEL), max_trx_id, mtr); } else { @@ -430,9 +431,10 @@ page_create_empty( static_assert(((FIL_PAGE_INDEX & 0xff00) | byte(FIL_PAGE_RTREE)) == FIL_PAGE_RTREE, "compatibility"); - mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame, + mtr->write<1>(*block, + FIL_PAGE_TYPE + 1 + block->page.frame, byte(FIL_PAGE_RTREE)); - if (mach_read_from_8(block->frame + if (mach_read_from_8(block->page.frame + FIL_RTREE_SPLIT_SEQ_NUM)) { mtr->memset(block, FIL_RTREE_SPLIT_SEQ_NUM, 8, 0); @@ -441,7 +443,7 @@ page_create_empty( if (max_trx_id) { mtr->write<8>(*block, PAGE_HEADER + PAGE_MAX_TRX_ID - + block->frame, max_trx_id); + + block->page.frame, max_trx_id); } } } @@ -482,7 +484,7 @@ page_copy_rec_list_end_no_locks( ut_a(page_is_comp(new_page) == page_rec_is_comp(rec)); ut_a(mach_read_from_2(new_page + srv_page_size - 10) == (ulint) (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); - const ulint n_core = page_is_leaf(block->frame) + const ulint n_core = page_is_leaf(block->page.frame) ? index->n_core_fields : 0; page_cur_set_before_first(new_block, &cur2); @@ -533,9 +535,9 @@ page_copy_rec_list_end( dict_index_t* index, /*!< in: record descriptor */ mtr_t* mtr) /*!< in: mtr */ { - page_t* new_page = buf_block_get_frame(new_block); + page_t* new_page = new_block->page.frame; page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block); - page_t* page = block->frame; + page_t* page = block->page.frame; rec_t* ret = page_rec_get_next( page_get_infimum_rec(new_page)); ulint num_moved = 0; @@ -689,7 +691,7 @@ page_copy_rec_list_start( dict_index_t* index, /*!< in: record descriptor */ mtr_t* mtr) /*!< in: mtr */ { - ut_ad(page_align(rec) == block->frame); + ut_ad(page_align(rec) == block->page.frame); page_t* new_page = buf_block_get_frame(new_block); page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block); @@ -769,7 +771,7 @@ page_copy_rec_list_start( && !index->table->is_temporary()) { page_update_max_trx_id(new_block, new_page_zip, - page_get_max_trx_id(block->frame), + page_get_max_trx_id(block->page.frame), mtr); } @@ -851,12 +853,14 @@ page_delete_rec_list_end( delete, or ULINT_UNDEFINED if not known */ mtr_t* mtr) /*!< in: mtr */ { + page_t * const page= block->page.frame; + ut_ad(size == ULINT_UNDEFINED || size < srv_page_size); - ut_ad(page_align(rec) == block->frame); - ut_ad(index->table->not_redundant() == !!page_is_comp(block->frame)); + ut_ad(page_align(rec) == page); + ut_ad(index->table->not_redundant() == !!page_is_comp(page)); #ifdef UNIV_ZIP_DEBUG ut_a(!block->page.zip.data || - page_zip_validate(&block->page.zip, block->frame, index)); + page_zip_validate(&block->page.zip, page, index)); #endif /* UNIV_ZIP_DEBUG */ if (page_rec_is_supremum(rec)) @@ -866,10 +870,11 @@ page_delete_rec_list_end( return; } - if (page_rec_is_infimum(rec) || n_recs == page_get_n_recs(block->frame) || - rec == (page_is_comp(block->frame) - ? page_rec_get_next_low(block->frame + PAGE_NEW_INFIMUM, 1) - : page_rec_get_next_low(block->frame + PAGE_OLD_INFIMUM, 0))) + if (page_rec_is_infimum(rec) || + n_recs == page_get_n_recs(page) || + rec == (page_is_comp(page) + ? page_rec_get_next_low(page + PAGE_NEW_INFIMUM, 1) + : page_rec_get_next_low(page + PAGE_OLD_INFIMUM, 0))) { /* We are deleting all records. */ page_create_empty(block, index, mtr); @@ -888,7 +893,7 @@ page_delete_rec_list_end( /* The page becomes invalid for optimistic searches */ buf_block_modify_clock_inc(block); - const ulint n_core= page_is_leaf(block->frame) ? index->n_core_fields : 0; + const ulint n_core= page_is_leaf(page) ? index->n_core_fields : 0; mem_heap_t *heap= nullptr; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs *offsets= offsets_; @@ -897,7 +902,7 @@ page_delete_rec_list_end( #if 1 // FIXME: remove this, and write minimal amount of log! */ if (UNIV_LIKELY_NULL(block->page.zip.data)) { - ut_ad(page_is_comp(block->frame)); + ut_ad(page_is_comp(page)); do { page_cur_t cur; @@ -906,7 +911,7 @@ page_delete_rec_list_end( ULINT_UNDEFINED, &heap); rec= rec_get_next_ptr(rec, TRUE); #ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(&block->page.zip, block->frame, index)); + ut_a(page_zip_validate(&block->page.zip, page, index)); #endif /* UNIV_ZIP_DEBUG */ page_cur_delete_rec(&cur, index, offsets, mtr); } @@ -919,7 +924,7 @@ page_delete_rec_list_end( #endif byte *prev_rec= page_rec_get_prev(rec); - byte *last_rec= page_rec_get_prev(page_get_supremum_rec(block->frame)); + byte *last_rec= page_rec_get_prev(page_get_supremum_rec(page)); // FIXME: consider a special case of shrinking PAGE_HEAP_TOP @@ -936,7 +941,7 @@ page_delete_rec_list_end( offsets = rec_get_offsets(rec2, index, offsets, n_core, ULINT_UNDEFINED, &heap); ulint s= rec_offs_size(offsets); - ut_ad(ulint(rec2 - block->frame) + s - rec_offs_extra_size(offsets) < + ut_ad(ulint(rec2 - page) + s - rec_offs_extra_size(offsets) < srv_page_size); ut_ad(size + s < srv_page_size); size+= s; @@ -960,7 +965,7 @@ page_delete_rec_list_end( const rec_t *owner_rec= rec; ulint count= 0; - if (page_is_comp(block->frame)) + if (page_is_comp(page)) while (!(n_owned= rec_get_n_owned_new(owner_rec))) { count++; @@ -980,16 +985,14 @@ page_delete_rec_list_end( } mtr->write<2,mtr_t::MAYBE_NOP>(*block, my_assume_aligned<2> - (PAGE_N_DIR_SLOTS + PAGE_HEADER + - block->frame), slot_index + 1); + (PAGE_N_DIR_SLOTS + PAGE_HEADER + page), + slot_index + 1); mtr->write<2,mtr_t::MAYBE_NOP>(*block, my_assume_aligned<2> - (PAGE_LAST_INSERT + PAGE_HEADER + - block->frame), 0U); + (PAGE_LAST_INSERT + PAGE_HEADER + page), 0U); /* Catenate the deleted chain segment to the page free list */ alignas(4) byte page_header[4]; - byte *page_free= my_assume_aligned<4>(PAGE_HEADER + PAGE_FREE + - block->frame); - const uint16_t free= page_header_get_field(block->frame, PAGE_FREE); + byte *page_free= my_assume_aligned<4>(PAGE_HEADER + PAGE_FREE + page); + const uint16_t free= page_header_get_field(page, PAGE_FREE); static_assert(PAGE_FREE + 2 == PAGE_GARBAGE, "compatibility"); mach_write_to_2(page_header, page_offset(rec)); @@ -998,20 +1001,19 @@ page_delete_rec_list_end( size); mtr->memcpy(*block, page_free, page_header, 4); - byte *page_n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER + - block->frame); + byte *page_n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER + page); mtr->write<2>(*block, page_n_recs, ulint{mach_read_from_2(page_n_recs)} - n_recs); /* Update the page directory; there is no need to balance the number of the records owned by the supremum record, as it is allowed to be less than PAGE_DIR_SLOT_MIN_N_OWNED */ - page_dir_slot_t *slot= page_dir_get_nth_slot(block->frame, slot_index); + page_dir_slot_t *slot= page_dir_get_nth_slot(page, slot_index); - if (page_is_comp(block->frame)) + if (page_is_comp(page)) { mtr->write<2,mtr_t::MAYBE_NOP>(*block, slot, PAGE_NEW_SUPREMUM); - byte *owned= PAGE_NEW_SUPREMUM - REC_NEW_N_OWNED + block->frame; + byte *owned= PAGE_NEW_SUPREMUM - REC_NEW_N_OWNED + page; byte new_owned= static_cast<byte>((*owned & ~REC_N_OWNED_MASK) | n_owned << REC_N_OWNED_SHIFT); #if 0 // FIXME: implement minimal logging for ROW_FORMAT=COMPRESSED @@ -1019,7 +1021,7 @@ page_delete_rec_list_end( { *owned= new_owned; memcpy_aligned<2>(PAGE_N_DIR_SLOTS + PAGE_HEADER + block->page.zip.data, - PAGE_N_DIR_SLOTS + PAGE_HEADER + block->frame, + PAGE_N_DIR_SLOTS + PAGE_HEADER + page, PAGE_N_RECS + 2 - PAGE_N_DIR_SLOTS); // TODO: the equivalent of page_zip_dir_delete() for all records mach_write_to_2(prev_rec - REC_NEXT, static_cast<uint16_t> @@ -1040,7 +1042,7 @@ page_delete_rec_list_end( else { mtr->write<2,mtr_t::MAYBE_NOP>(*block, slot, PAGE_OLD_SUPREMUM); - byte *owned= PAGE_OLD_SUPREMUM - REC_OLD_N_OWNED + block->frame; + byte *owned= PAGE_OLD_SUPREMUM - REC_OLD_N_OWNED + page; byte new_owned= static_cast<byte>((*owned & ~REC_N_OWNED_MASK) | n_owned << REC_N_OWNED_SHIFT); mtr->write<1,mtr_t::MAYBE_NOP>(*block, owned, new_owned); @@ -1067,7 +1069,7 @@ page_delete_rec_list_start( rec_offs_init(offsets_); - ut_ad(page_align(rec) == block->frame); + ut_ad(page_align(rec) == block->page.frame); ut_ad((ibool) !!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); #ifdef UNIV_ZIP_DEBUG @@ -1397,7 +1399,7 @@ page_print_list( dict_index_t* index, /*!< in: dictionary index of the page */ ulint pr_n) /*!< in: print n first and n last entries */ { - page_t* page = block->frame; + page_t* page = block->page.frame; page_cur_t cur; ulint count; ulint n_recs; @@ -1498,7 +1500,7 @@ page_print( ulint rn) /*!< in: print rn first and last records in directory */ { - page_t* page = block->frame; + page_t* page = block->page.frame; page_header_print(page); page_dir_print(page, dn); diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index d3af50551c1..ec90d73e765 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -418,7 +418,7 @@ static void page_zip_compress_write_log(buf_block_t *block, return; } - const page_t *page= block->frame; + const page_t *page= block->page.frame; const page_zip_des_t *page_zip= &block->page.zip; /* Read the number of user records. */ ulint trailer_size= ulint(page_dir_get_n_heap(page_zip->data)) - @@ -443,7 +443,6 @@ static void page_zip_compress_write_log(buf_block_t *block, if (trailer_size) mtr->zmemcpy(*block, page_zip_get_size(page_zip) - trailer_size, trailer_size); - block->page.status = buf_page_t::INIT_ON_FLUSH; /* because of mtr_t::init() */ } /******************************************************//** @@ -1280,7 +1279,7 @@ page_zip_compress( my_bool cmp_per_index_enabled; cmp_per_index_enabled = srv_cmp_per_index_enabled; - page_t* page = block->frame; + page_t* page = block->page.frame; page_zip_des_t* page_zip = &block->page.zip; ut_a(page_is_comp(page)); @@ -3533,7 +3532,7 @@ page_zip_write_rec_ext( ulint len; byte* externs = storage; ulint n_ext = rec_offs_n_extern(offsets); - const page_t* const page = block->frame; + const page_t* const page = block->page.frame; page_zip_des_t* const page_zip = &block->page.zip; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -3651,7 +3650,7 @@ void page_zip_write_rec(buf_block_t *block, const byte *rec, const dict_index_t *index, const rec_offs *offsets, ulint create, mtr_t *mtr) { - const page_t* const page = block->frame; + const page_t* const page = block->page.frame; page_zip_des_t* const page_zip = &block->page.zip; byte* data; byte* storage; @@ -3841,7 +3840,7 @@ page_zip_write_blob_ptr( { const byte* field; byte* externs; - const page_t* const page = block->frame; + const page_t* const page = block->page.frame; page_zip_des_t* const page_zip = &block->page.zip; ulint blob_no; ulint len; @@ -3905,7 +3904,7 @@ page_zip_write_node_ptr( byte* storage; page_zip_des_t* const page_zip = &block->page.zip; - ut_d(const page_t* const page = block->frame); + ut_d(const page_t* const page = block->page.frame); ut_ad(page_simple_validate_new(page)); ut_ad(page_zip_simple_validate(page_zip)); ut_ad(page_zip_get_size(page_zip) @@ -3952,7 +3951,7 @@ page_zip_write_trx_id_and_roll_ptr( { page_zip_des_t* const page_zip = &block->page.zip; - ut_d(const page_t* const page = block->frame); + ut_d(const page_t* const page = block->page.frame); ut_ad(page_align(rec) == page); ut_ad(page_simple_validate_new(page)); ut_ad(page_zip_simple_validate(page_zip)); @@ -4053,7 +4052,7 @@ page_zip_clear_rec( byte* field; ulint len; - ut_ad(page_align(rec) == block->frame); + ut_ad(page_align(rec) == block->page.frame); page_zip_des_t* const page_zip = &block->page.zip; /* page_zip_validate() would fail here if a record @@ -4061,7 +4060,7 @@ page_zip_clear_rec( ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(!page_zip_dir_find(page_zip, page_offset(rec))); ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec))); - ut_ad(page_zip_header_cmp(page_zip, block->frame)); + ut_ad(page_zip_header_cmp(page_zip, block->page.frame)); heap_no = rec_get_heap_no_new(rec); ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); @@ -4071,7 +4070,7 @@ page_zip_clear_rec( MEM_CHECK_DEFINED(rec - rec_offs_extra_size(offsets), rec_offs_extra_size(offsets)); - if (!page_is_leaf(block->frame)) { + if (!page_is_leaf(block->page.frame)) { /* Clear node_ptr. On the compressed page, there is an array of node_ptr immediately before the dense page directory, at the very end of the page. */ @@ -4137,7 +4136,7 @@ clear_page_zip: void page_zip_rec_set_deleted(buf_block_t *block, rec_t *rec, bool flag, mtr_t *mtr) { - ut_ad(page_align(rec) == block->frame); + ut_ad(page_align(rec) == block->page.frame); byte *slot= page_zip_dir_find(&block->page.zip, page_offset(rec)); byte b= *slot; if (flag) @@ -4146,7 +4145,7 @@ void page_zip_rec_set_deleted(buf_block_t *block, rec_t *rec, bool flag, b&= byte(~(PAGE_ZIP_DIR_SLOT_DEL >> 8)); mtr->zmemcpy<mtr_t::MAYBE_NOP>(*block, slot, &b, 1); #ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(&block->page.zip, block->frame, nullptr)); + ut_a(page_zip_validate(&block->page.zip, block->page.frame, nullptr)); #endif /* UNIV_ZIP_DEBUG */ } @@ -4161,7 +4160,7 @@ page_zip_rec_set_owned( ulint flag, /*!< in: the owned flag (nonzero=TRUE) */ mtr_t* mtr) /*!< in/out: mini-transaction */ { - ut_ad(page_align(rec) == block->frame); + ut_ad(page_align(rec) == block->page.frame); page_zip_des_t *const page_zip= &block->page.zip; byte *slot= page_zip_dir_find(page_zip, page_offset(rec)); MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip)); @@ -4184,8 +4183,8 @@ page_zip_dir_insert( byte* rec, /*!< in: record to insert */ mtr_t* mtr) /*!< in/out: mini-transaction */ { - ut_ad(page_align(cursor->rec) == cursor->block->frame); - ut_ad(page_align(rec) == cursor->block->frame); + ut_ad(page_align(cursor->rec) == cursor->block->page.frame); + ut_ad(page_align(rec) == cursor->block->page.frame); page_zip_des_t *const page_zip= &cursor->block->page.zip; ulint n_dense; @@ -4278,7 +4277,7 @@ void page_zip_dir_delete(buf_block_t *block, byte *rec, const dict_index_t *index, const rec_offs *offsets, const byte *free, mtr_t *mtr) { - ut_ad(page_align(rec) == block->frame); + ut_ad(page_align(rec) == block->page.frame); page_zip_des_t *const page_zip= &block->page.zip; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -4292,22 +4291,22 @@ void page_zip_dir_delete(buf_block_t *block, byte *rec, mach_write_to_2(rec - REC_NEXT, free ? static_cast<uint16_t>(free - rec) : 0); byte *page_free= my_assume_aligned<2>(PAGE_FREE + PAGE_HEADER + - block->frame); + block->page.frame); mtr->write<2>(*block, page_free, page_offset(rec)); byte *garbage= my_assume_aligned<2>(PAGE_GARBAGE + PAGE_HEADER + - block->frame); + block->page.frame); mtr->write<2>(*block, garbage, rec_offs_size(offsets) + mach_read_from_2(garbage)); compile_time_assert(PAGE_GARBAGE == PAGE_FREE + 2); memcpy_aligned<4>(PAGE_FREE + PAGE_HEADER + page_zip->data, page_free, 4); byte *slot_rec= page_zip_dir_find(page_zip, page_offset(rec)); ut_a(slot_rec); - uint16_t n_recs= page_get_n_recs(block->frame); + uint16_t n_recs= page_get_n_recs(block->page.frame); ut_ad(n_recs); - ut_ad(n_recs > 1 || page_get_page_no(block->frame) == index->page); + ut_ad(n_recs > 1 || page_get_page_no(block->page.frame) == index->page); /* This could not be done before page_zip_dir_find(). */ byte *page_n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER + - block->frame); + block->page.frame); mtr->write<2>(*block, page_n_recs, n_recs - 1U); memcpy_aligned<2>(PAGE_N_RECS + PAGE_HEADER + page_zip->data, page_n_recs, 2); @@ -4344,14 +4343,14 @@ void page_zip_dir_delete(buf_block_t *block, byte *rec, if (const ulint n_ext= rec_offs_n_extern(offsets)) { ut_ad(index->is_primary()); - ut_ad(page_is_leaf(block->frame)); + ut_ad(page_is_leaf(block->page.frame)); /* Shift and zero fill the array of BLOB pointers. */ ulint blob_no = page_zip_get_n_prev_extern(page_zip, rec, index); ut_a(blob_no + n_ext <= page_zip->n_blobs); byte *externs= page_zip->data + page_zip_get_size(page_zip) - - (page_dir_get_n_heap(block->frame) - PAGE_HEAP_NO_USER_LOW) * + (page_dir_get_n_heap(block->page.frame) - PAGE_HEAP_NO_USER_LOW) * PAGE_ZIP_CLUST_LEAF_SLOT_SIZE; byte *ext_end= externs - page_zip->n_blobs * FIELD_REF_SIZE; @@ -4416,10 +4415,10 @@ page_zip_reorganize( temp_block = buf_block_alloc(); btr_search_drop_page_hash_index(block); - temp_page = temp_block->frame; + temp_page = temp_block->page.frame; /* Copy the old page to temporary space */ - memcpy_aligned<UNIV_PAGE_SIZE_MIN>(temp_block->frame, block->frame, + memcpy_aligned<UNIV_PAGE_SIZE_MIN>(temp_page, block->page.frame, srv_page_size); /* Recreate the page: note that global data on page (possible @@ -4505,7 +4504,7 @@ page_zip_copy_recs( dict_index_t* index, /*!< in: index of the B-tree */ mtr_t* mtr) /*!< in: mini-transaction */ { - page_t* page = block->frame; + page_t* page = block->page.frame; page_zip_des_t* page_zip = &block->page.zip; ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); @@ -4563,7 +4562,7 @@ page_zip_copy_recs( to the compressed data page. */ { page_zip_t* data = page_zip->data; - memcpy(page_zip, src_zip, sizeof *page_zip); + new (page_zip) page_zip_des_t(*src_zip); page_zip->data = data; } ut_ad(page_zip_get_trailer_len(page_zip, dict_index_is_clust(index)) diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index feb36dba67e..aafe4cc3264 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -295,33 +295,33 @@ public: const dict_index_t* index, rec_offs* offsets) UNIV_NOTHROW { - ut_ad(page_is_leaf(m_cur.block->frame)); + ut_ad(page_is_leaf(m_cur.block->page.frame)); /* We can't end up with an empty page unless it is root. */ - if (page_get_n_recs(m_cur.block->frame) <= 1) { + if (page_get_n_recs(m_cur.block->page.frame) <= 1) { return(false); } if (!rec_offs_any_extern(offsets) && m_cur.block->page.id().page_no() != index->page - && ((page_get_data_size(m_cur.block->frame) + && ((page_get_data_size(m_cur.block->page.frame) - rec_offs_size(offsets) < BTR_CUR_PAGE_COMPRESS_LIMIT(index)) - || !page_has_siblings(m_cur.block->frame) - || (page_get_n_recs(m_cur.block->frame) < 2))) { + || !page_has_siblings(m_cur.block->page.frame) + || (page_get_n_recs(m_cur.block->page.frame) < 2))) { return false; } #ifdef UNIV_ZIP_DEBUG page_zip_des_t* page_zip = buf_block_get_page_zip(m_cur.block); ut_a(!page_zip || page_zip_validate( - page_zip, m_cur.block->frame, index)); + page_zip, m_cur.block->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ page_cur_delete_rec(&m_cur, index, offsets, &m_mtr); #ifdef UNIV_ZIP_DEBUG ut_a(!page_zip || page_zip_validate( - page_zip, m_cur.block->frame, index)); + page_zip, m_cur.block->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ return true; @@ -467,7 +467,7 @@ public: Called for every page in the tablespace. If the page was not updated then its state must be set to BUF_PAGE_NOT_USED. For compressed tables the page descriptor memory will be at offset: - block->frame + srv_page_size; + block->page.frame + srv_page_size; @param block block read from file, note it is not from the buffer pool @retval DB_SUCCESS or error code. */ virtual dberr_t operator()(buf_block_t* block) UNIV_NOTHROW = 0; @@ -484,7 +484,7 @@ public: static byte* get_frame(const buf_block_t* block) { return block->page.zip.data - ? block->page.zip.data : block->frame; + ? block->page.zip.data : block->page.frame; } /** Invoke the functionality for the callback */ @@ -605,7 +605,7 @@ AbstractCallback::init( os_offset_t file_size, const buf_block_t* block) UNIV_NOTHROW { - const page_t* page = block->frame; + const page_t* page = block->page.frame; m_space_flags = fsp_header_get_flags(page); if (!fil_space_t::is_valid_flags(m_space_flags, true)) { @@ -744,7 +744,7 @@ dberr_t FetchIndexRootPages::operator()(buf_block_t* block) UNIV_NOTHROW return(DB_CORRUPTION); } - if (!page_is_comp(block->frame) != + if (!page_is_comp(block->page.frame) != !dict_table_is_comp(m_table)) { ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH, @@ -1594,7 +1594,8 @@ IndexPurge::next() UNIV_NOTHROW } buf_block_t* block = btr_pcur_get_block(&m_pcur); - uint32_t next_page = btr_page_get_next(block->frame); + uint32_t next_page = btr_page_get_next( + block->page.frame); /* MDEV-13542 FIXME: Make these checks part of btr_pcur_move_to_next_page(), and introduce a @@ -1622,15 +1623,15 @@ IndexPurge::next() UNIV_NOTHROW if (UNIV_UNLIKELY(!next_block || !fil_page_index_page_check( - next_block->frame) + next_block->page.frame) || !!dict_index_is_spatial(index) != (fil_page_get_type( - next_block->frame) + next_block->page.frame) == FIL_PAGE_RTREE) - || page_is_comp(next_block->frame) - != page_is_comp(block->frame) + || page_is_comp(next_block->page.frame) + != page_is_comp(block->page.frame) || btr_page_get_prev( - next_block->frame) + next_block->page.frame) != block->page.id().page_no())) { return DB_CORRUPTION; } @@ -1640,7 +1641,7 @@ IndexPurge::next() UNIV_NOTHROW page_cur_set_before_first(next_block, &m_pcur.btr_cur.page_cur); - ut_d(page_check_dir(next_block->frame)); + ut_d(page_check_dir(next_block->page.frame)); } else { btr_pcur_move_to_next_on_page(&m_pcur); } @@ -1925,7 +1926,7 @@ PageConverter::update_index_page( return(DB_SUCCESS); } - buf_frame_t* page = block->frame; + buf_frame_t* page = block->page.frame; const index_id_t id = btr_page_get_index_id(page); if (id != m_index->m_id) { @@ -1976,7 +1977,7 @@ PageConverter::update_index_page( m_index->m_srv_index->id); if (UNIV_LIKELY_NULL(block->page.zip.data)) { memcpy(&block->page.zip.data[PAGE_HEADER + PAGE_INDEX_ID], - &block->frame[PAGE_HEADER + PAGE_INDEX_ID], 8); + &block->page.frame[PAGE_HEADER + PAGE_INDEX_ID], 8); } if (m_index->m_srv_index->is_clust()) { @@ -1985,12 +1986,12 @@ PageConverter::update_index_page( } } else if (page_is_leaf(page)) { /* Set PAGE_MAX_TRX_ID on secondary index leaf pages. */ - mach_write_to_8(&block->frame[PAGE_HEADER + PAGE_MAX_TRX_ID], - m_trx->id); + mach_write_to_8(&block->page.frame + [PAGE_HEADER + PAGE_MAX_TRX_ID], m_trx->id); if (UNIV_LIKELY_NULL(block->page.zip.data)) { memcpy_aligned<8>(&block->page.zip.data [PAGE_HEADER + PAGE_MAX_TRX_ID], - &block->frame + &block->page.frame [PAGE_HEADER + PAGE_MAX_TRX_ID], 8); } } else { @@ -2000,7 +2001,8 @@ clear_page_max_trx_id: in MySQL 5.6, 5.7 and MariaDB 10.0 and 10.1 would set the field to the transaction ID even on clustered index pages. */ - memset_aligned<8>(&block->frame[PAGE_HEADER + PAGE_MAX_TRX_ID], + memset_aligned<8>(&block->page.frame + [PAGE_HEADER + PAGE_MAX_TRX_ID], 0, 8); if (UNIV_LIKELY_NULL(block->page.zip.data)) { memset_aligned<8>(&block->page.zip.data @@ -2022,7 +2024,9 @@ clear_page_max_trx_id: return(DB_SUCCESS); } - return page_is_leaf(block->frame) ? update_records(block) : DB_SUCCESS; + return page_is_leaf(block->page.frame) + ? update_records(block) + : DB_SUCCESS; } /** Validate the space flags and update tablespace header page. @@ -2069,8 +2073,8 @@ PageConverter::update_page(buf_block_t* block, uint16_t& page_type) case FIL_PAGE_INDEX: case FIL_PAGE_RTREE: - /* We need to decompress the contents into block->frame - before we can do any thing with Btree pages. */ + /* We need to decompress the contents + before we can do anything. */ if (is_compressed_table() && !buf_zip_decompress(block, TRUE)) { return(DB_CORRUPTION); @@ -2141,7 +2145,7 @@ dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW if (!block->page.zip.data) { buf_flush_init_for_writing( - NULL, block->frame, NULL, full_crc32); + NULL, block->page.frame, NULL, full_crc32); } else if (fil_page_type_is_index(page_type)) { buf_flush_init_for_writing( NULL, block->page.zip.data, &block->page.zip, @@ -3754,7 +3758,7 @@ dberr_t FetchIndexRootPages::run(const fil_iterator_t& iter, const bool encrypted= iter.crypt_data != NULL && iter.crypt_data->should_encrypt(); byte* const readptr= iter.io_buffer; - block->frame= readptr; + block->page.frame= readptr; if (block->page.zip.data) block->page.zip.data= readptr; @@ -3863,7 +3867,7 @@ static dberr_t fil_iterate( } byte* io_buffer = iter.io_buffer; - block->frame = io_buffer; + block->page.frame = io_buffer; if (block->page.zip.data) { /* Zip IO is done in the compressed page buffer. */ @@ -3903,7 +3907,7 @@ static dberr_t fil_iterate( for (ulint i = 0; i < n_pages_read; ++block->page.id_, - ++i, page_off += size, block->frame += size) { + ++i, page_off += size, block->page.frame += size) { byte* src = readptr + i * size; const ulint page_no = page_get_page_no(src); if (!page_no && block->page.id().page_no()) { @@ -3959,7 +3963,7 @@ page_corrupted: frame_changed = true; } else if (!page_compressed && !block->page.zip.data) { - block->frame = src; + block->page.frame = src; frame_changed = true; } else { ut_ad(dst != src); @@ -4011,8 +4015,7 @@ page_corrupted: if ((err = callback(block)) != DB_SUCCESS) { goto func_exit; } else if (!updated) { - updated = block->page.state() - == BUF_BLOCK_FILE_PAGE; + updated = !!block->page.frame; } /* If tablespace is encrypted we use additional @@ -4020,10 +4023,10 @@ page_corrupted: for decrypting readptr == crypt_io_buffer != io_buffer. Destination for decryption is a buffer pool block - block->frame == dst == io_buffer that is updated. + block->page.frame == dst == io_buffer that is updated. Pages that did not require decryption even when tablespace is marked as encrypted are not copied - instead block->frame is set to src == readptr. + instead block->page.frame is set to src == readptr. For encryption we again use temporary scratch area writeptr != io_buffer == dst @@ -4056,7 +4059,7 @@ page_corrupted: if (block->page.zip.data) { block->page.zip.data = dst; } else { - block->frame = dst; + block->page.frame = dst; } } @@ -4219,8 +4222,8 @@ fil_tablespace_iterate( buf_block_t* block = reinterpret_cast<buf_block_t*> (ut_zalloc_nokey(sizeof *block)); - block->frame = page; - block->page.init(BUF_BLOCK_FILE_PAGE, page_id_t(~0ULL), 1); + block->page.frame = page; + block->page.init(buf_page_t::UNFIXED + 1, page_id_t{~0ULL}); /* Read the first page and determine the page and zip size. */ @@ -4274,8 +4277,9 @@ fil_tablespace_iterate( if (block->page.zip.ssize) { ut_ad(iter.n_io_buffers == 1); - block->frame = iter.io_buffer; - block->page.zip.data = block->frame + srv_page_size; + block->page.frame = iter.io_buffer; + block->page.zip.data = block->page.frame + + srv_page_size; } err = callback.run(iter, block); diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index f3f2ccdcd0a..0262ee9bc1c 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -2629,7 +2629,7 @@ commit_exit: DBUG_EXECUTE_IF("row_ins_row_level", goto skip_bulk_insert;); if (!(flags & BTR_NO_UNDO_LOG_FLAG) - && page_is_empty(block->frame) + && page_is_empty(block->page.frame) && !entry->is_metadata() && !trx->duplicates && !trx->check_unique_secondary && !trx->check_foreigns && !trx->dict_operation diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index 010ec224460..706a9aecd8f 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -567,7 +567,7 @@ row_purge_remove_sec_if_poss_leaf( if (block->page.id().page_no() != index->page - && page_get_n_recs(block->frame) < 2 + && page_get_n_recs(block->page.frame) < 2 && !lock_test_prdt_page_lock( btr_cur->rtr_info && btr_cur->rtr_info->thr @@ -804,9 +804,9 @@ retry: size_t offs = page_offset(ptr); mtr->memset(block, offs, DATA_TRX_ID_LEN, 0); offs += DATA_TRX_ID_LEN; - mtr->write<1,mtr_t::MAYBE_NOP>(*block, - block->frame - + offs, 0x80U); + mtr->write<1,mtr_t::MAYBE_NOP>( + *block, block->page.frame + offs, + 0x80U); mtr->memset(block, offs + 1, DATA_ROLL_PTR_LEN - 1, 0); } diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index a6d92cf7361..d5fd340ad81 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -1161,14 +1161,15 @@ sel_set_rtr_rec_lock( return(DB_SUCCESS_LOCKED_REC); } - ut_ad(page_align(first_rec) == cur_block->frame); + ut_ad(page_align(first_rec) == cur_block->page.frame); ut_ad(match->valid); - match->block.lock.x_lock(); + match->block.page.lock.x_lock(); retry: cur_block = btr_pcur_get_block(pcur); - ut_ad(match->block.lock.have_x() || match->block.lock.have_s()); - ut_ad(page_is_leaf(buf_block_get_frame(cur_block))); + ut_ad(match->block.page.lock.have_x() + || match->block.page.lock.have_s()); + ut_ad(page_is_leaf(cur_block->page.frame)); err = lock_sec_rec_read_check_and_lock( 0, cur_block, rec, index, my_offsets, @@ -1281,7 +1282,7 @@ re_scan: match->locked = true; func_end: - match->block.lock.x_unlock(); + match->block.page.lock.x_unlock(); if (heap != NULL) { mem_heap_free(heap); } @@ -3360,7 +3361,7 @@ Row_sel_get_clust_rec_for_mysql::operator()( if (dict_index_is_spatial(sec_index) && btr_cur->rtr_info->matches && (page_align(rec) - == btr_cur->rtr_info->matches->block.frame + == btr_cur->rtr_info->matches->block.page.frame || rec != btr_pcur_get_rec(prebuilt->pcur))) { #ifdef UNIV_DEBUG rtr_info_t* rtr_info = btr_cur->rtr_info; diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 5b8ec4c50ad..c16676f5f84 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -513,7 +513,7 @@ row_undo_mod_clust( mtr.memset(block, offs, DATA_TRX_ID_LEN, 0); offs += DATA_TRX_ID_LEN; mtr.write<1,mtr_t::MAYBE_NOP>(*block, - block->frame + block->page.frame + offs, 0x80U); mtr.memset(block, offs + 1, DATA_ROLL_PTR_LEN - 1, 0); diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc index f36287c7c8d..0d8d49efc6f 100644 --- a/storage/innobase/row/row0undo.cc +++ b/storage/innobase/row/row0undo.cc @@ -349,7 +349,7 @@ static bool row_undo_rec_get(undo_node_t* node) ut_ad(undo->empty()); } - node->undo_rec = trx_undo_rec_copy(undo_page->frame + offset, + node->undo_rec = trx_undo_rec_copy(undo_page->page.frame + offset, node->heap); mtr.commit(); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 1fc5905ce26..6d027901ac2 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1517,13 +1517,13 @@ file_checked: RW_SX_LATCH, &mtr); ulint size = mach_read_from_4( FSP_HEADER_OFFSET + FSP_SIZE - + block->frame); + + block->page.frame); ut_ad(size == fil_system.sys_space ->size_in_header); size += sum_of_new_sizes; mtr.write<4>(*block, FSP_HEADER_OFFSET + FSP_SIZE - + block->frame, size); + + block->page.frame, size); fil_system.sys_space->size_in_header = uint32_t(size); mtr.commit(); @@ -1537,7 +1537,7 @@ file_checked: buf_block_t* block = buf_page_get(page_id_t(0, 0), 0, RW_S_LATCH, &mtr); ut_ad(mach_read_from_4(FSP_SIZE + FSP_HEADER_OFFSET - + block->frame) + + block->page.frame) == fil_system.sys_space->size_in_header); mtr.commit(); } diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 9d9f9057628..fe62593a313 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -235,12 +235,13 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) rseg->space, rseg->page_no, mtr); buf_block_t* undo_page = trx_undo_set_state_at_finish( undo, mtr); - trx_ulogf_t* undo_header = undo_page->frame + undo->hdr_offset; + trx_ulogf_t* undo_header = undo_page->page.frame + + undo->hdr_offset; ut_ad(mach_read_from_2(undo_header + TRX_UNDO_NEEDS_PURGE) <= 1); if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT - + rseg_header->frame))) { + + rseg_header->page.frame))) { /* This database must have been upgraded from before MariaDB 10.3.5. */ trx_rseg_format_upgrade(rseg_header, mtr); @@ -256,19 +257,19 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED); - uint32_t hist_size = mach_read_from_4(TRX_RSEG_HISTORY_SIZE - + TRX_RSEG - + rseg_header->frame); + uint32_t hist_size = mach_read_from_4( + TRX_RSEG_HISTORY_SIZE + TRX_RSEG + + rseg_header->page.frame); ut_ad(undo->size == flst_get_len(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST - + undo_page->frame)); + + undo_page->page.frame)); mtr->write<4>(*rseg_header, TRX_RSEG + TRX_RSEG_HISTORY_SIZE - + rseg_header->frame, + + rseg_header->page.frame, hist_size + undo->size); mtr->write<8>(*rseg_header, TRX_RSEG + TRX_RSEG_MAX_TRX_ID - + rseg_header->frame, + + rseg_header->page.frame, trx_sys.get_max_trx_id()); } @@ -376,12 +377,13 @@ static void trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr) again. The list of pages in the undo log tail gets inconsistent during the freeing of the segment, and therefore purge should not try to access them again. */ - mtr.write<2,mtr_t::MAYBE_NOP>(*block, block->frame + hdr_addr.boffset + mtr.write<2,mtr_t::MAYBE_NOP>(*block, block->page.frame + + hdr_addr.boffset + TRX_UNDO_NEEDS_PURGE, 0U); while (!fseg_free_step_not_header( TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER - + block->frame, &mtr)) { + + block->page.frame, &mtr)) { rseg->latch.wr_unlock(); mtr.commit(); mtr.start(); @@ -397,7 +399,7 @@ static void trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr) started the freeing. */ const uint32_t seg_size = flst_get_len( - TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->frame); + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->page.frame); /* We may free the undo log segment header page; it must be freed within the same mtr as the undo log header is removed from the @@ -414,9 +416,9 @@ static void trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr) fsp0fsp.cc. */ } while (!fseg_free_step(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER - + block->frame, &mtr)); + + block->page.frame, &mtr)); - byte* hist = TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_hdr->frame; + byte* hist = TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_hdr->page.frame; ut_ad(mach_read_from_4(hist) >= seg_size); mtr.write<4>(*rseg_hdr, hist, mach_read_from_4(hist) - seg_size); @@ -452,7 +454,7 @@ trx_purge_truncate_rseg_history( buf_block_t* rseg_hdr = trx_rsegf_get(rseg.space, rseg.page_no, &mtr); hdr_addr = flst_get_last(TRX_RSEG + TRX_RSEG_HISTORY - + rseg_hdr->frame); + + rseg_hdr->page.frame); hdr_addr.boffset = static_cast<uint16_t>(hdr_addr.boffset - TRX_UNDO_HISTORY_NODE); @@ -467,7 +469,7 @@ func_exit: buf_block_t* block = trx_undo_page_get(page_id_t(rseg.space->id, hdr_addr.page), &mtr); - undo_trx_no = mach_read_from_8(block->frame + hdr_addr.boffset + undo_trx_no = mach_read_from_8(block->page.frame + hdr_addr.boffset + TRX_UNDO_TRX_NO); if (undo_trx_no >= limit.trx_no) { @@ -480,14 +482,15 @@ func_exit: goto func_exit; } - prev_hdr_addr = flst_get_prev_addr(block->frame + hdr_addr.boffset + prev_hdr_addr = flst_get_prev_addr(block->page.frame + hdr_addr.boffset + TRX_UNDO_HISTORY_NODE); prev_hdr_addr.boffset = static_cast<uint16_t>(prev_hdr_addr.boffset - TRX_UNDO_HISTORY_NODE); - if (mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + block->frame) + if (mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + + block->page.frame) == TRX_UNDO_TO_PURGE - && !mach_read_from_2(block->frame + hdr_addr.boffset + && !mach_read_from_2(block->page.frame + hdr_addr.boffset + TRX_UNDO_NEXT_LOG)) { /* We can free the whole log segment */ @@ -700,28 +703,28 @@ not_free: if (bpage->id().space() == space.id && bpage->oldest_modification() != 1) { - ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE); auto block= reinterpret_cast<buf_block_t*>(bpage); - block->fix(); + ut_ad(buf_pool.is_uncompressed(block)); + bpage->lock.x_lock(); buf_pool.flush_hp.set(prev); mysql_mutex_unlock(&buf_pool.flush_list_mutex); #ifdef BTR_CUR_HASH_ADAPT ut_ad(!block->index); /* There is no AHI on undo tablespaces. */ #endif - block->lock.x_lock(); + bpage->fix(); + ut_ad(!bpage->is_io_fixed()); mysql_mutex_lock(&buf_pool.flush_list_mutex); - ut_ad(bpage->io_fix() == BUF_IO_NONE); if (bpage->oldest_modification() > 1) { - bpage->clear_oldest_modification(false); + bpage->reset_oldest_modification(); mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX); } else { - block->lock.x_unlock(); - block->unfix(); + bpage->unfix(); + bpage->lock.x_unlock(); } if (prev != buf_pool.flush_hp.get()) @@ -788,9 +791,10 @@ not_free: sys_header, &mtr); ut_ad(rblock); /* These were written by trx_rseg_header_create(). */ - ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + rblock->frame)); - ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE + - rblock->frame)); + ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + + rblock->page.frame)); + ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE + + rblock->page.frame)); rseg.reinit(rblock ? rblock->page.id().page_no() : FIL_NULL); } @@ -851,7 +855,7 @@ static void trx_purge_rseg_get_next_history_log( page_id_t(purge_sys.rseg->space->id, purge_sys.rseg->last_page_no), &mtr); - const trx_ulogf_t* log_hdr = undo_page->frame + const trx_ulogf_t* log_hdr = undo_page->page.frame + purge_sys.rseg->last_offset(); /* Increase the purge page count by one for every handled log */ @@ -882,7 +886,7 @@ static void trx_purge_rseg_get_next_history_log( log_hdr = trx_undo_page_get_s_latched( page_id_t(purge_sys.rseg->space->id, prev_log_addr.page), - &mtr)->frame + &mtr)->page.frame + prev_log_addr.boffset; trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO); @@ -1045,8 +1049,8 @@ trx_purge_get_next_rec( } } - trx_undo_rec_t* rec_copy = trx_undo_rec_copy(undo_page->frame + offset, - heap); + trx_undo_rec_t* rec_copy = trx_undo_rec_copy(undo_page->page.frame + + offset, heap); mtr_commit(&mtr); diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index 45bd36d9669..8d9a7d6f796 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -59,10 +59,11 @@ const dtuple_t trx_undo_metadata = { @return bytes left */ static ulint trx_undo_left(const buf_block_t *undo_block, const byte *ptr) { - ut_ad(ptr >= &undo_block->frame[TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE]); + ut_ad(ptr >= + &undo_block->page.frame[TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE]); /* The 10 is supposed to be an extra safety margin (and needed for compatibility with older versions) */ - lint left= srv_page_size - (ptr - undo_block->frame) - + lint left= srv_page_size - (ptr - undo_block->page.frame) - (10 + FIL_PAGE_DATA_END); ut_ad(left >= 0); return left < 0 ? 0 : static_cast<ulint>(left); @@ -82,14 +83,14 @@ trx_undo_page_set_next_prev_and_add( written on this undo page. */ mtr_t* mtr) /*!< in: mtr */ { - ut_ad(page_align(ptr) == undo_block->frame); + ut_ad(page_align(ptr) == undo_block->page.frame); if (UNIV_UNLIKELY(trx_undo_left(undo_block, ptr) < 2)) return 0; byte *ptr_to_first_free= my_assume_aligned<2>(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + - undo_block->frame); + undo_block->page.frame); const uint16_t first_free= mach_read_from_2(ptr_to_first_free); @@ -97,13 +98,14 @@ trx_undo_page_set_next_prev_and_add( memcpy(ptr, ptr_to_first_free, 2); ptr += 2; - const uint16_t end_of_rec= static_cast<uint16_t>(ptr - undo_block->frame); + const uint16_t end_of_rec= static_cast<uint16_t> + (ptr - undo_block->page.frame); /* Update the offset to first free undo record */ mach_write_to_2(ptr_to_first_free, end_of_rec); /* Write offset of the next undo log record */ - memcpy(undo_block->frame + first_free, ptr_to_first_free, 2); - const byte *start= undo_block->frame + first_free + 2; + memcpy(undo_block->page.frame + first_free, ptr_to_first_free, 2); + const byte *start= undo_block->page.frame + first_free + 2; mtr->undo_append(*undo_block, start, ptr - start - 2); return first_free; @@ -397,13 +399,13 @@ trx_undo_page_report_insert( TRX_UNDO_INSERT == 1 into insert_undo pages, or TRX_UNDO_UPDATE == 2 into update_undo pages. */ ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE - + undo_block->frame) <= 2); + + undo_block->page.frame) <= 2); uint16_t first_free = mach_read_from_2(my_assume_aligned<2> (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE - + undo_block->frame)); - byte* ptr = undo_block->frame + first_free; + + undo_block->page.frame)); + byte* ptr = undo_block->page.frame + first_free; if (trx_undo_left(undo_block, ptr) < 2 + 1 + 11 + 11) { /* Not enough space for writing the general parameters */ @@ -420,7 +422,7 @@ trx_undo_page_report_insert( if (write_empty) { /* Table is in bulk operation */ - undo_block->frame[first_free + 2] = TRX_UNDO_EMPTY; + undo_block->page.frame[first_free + 2] = TRX_UNDO_EMPTY; goto done; } @@ -430,9 +432,10 @@ trx_undo_page_report_insert( if (UNIV_UNLIKELY(clust_entry->info_bits != 0)) { ut_ad(clust_entry->is_metadata()); ut_ad(index->is_instant()); - ut_ad(undo_block->frame[first_free + 2] + ut_ad(undo_block->page.frame[first_free + 2] == TRX_UNDO_INSERT_REC); - undo_block->frame[first_free + 2] = TRX_UNDO_INSERT_METADATA; + undo_block->page.frame[first_free + 2] + = TRX_UNDO_INSERT_METADATA; goto done; } @@ -809,14 +812,14 @@ trx_undo_page_report_modify( TRX_UNDO_INSERT == 1 into insert_undo pages, or TRX_UNDO_UPDATE == 2 into update_undo pages. */ ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE - + undo_block->frame) <= 2); + + undo_block->page.frame) <= 2); - byte* ptr_to_first_free = my_assume_aligned<2>(TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE - + undo_block->frame); + byte* ptr_to_first_free = my_assume_aligned<2>( + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + + undo_block->page.frame); const uint16_t first_free = mach_read_from_2(ptr_to_first_free); - byte *ptr = undo_block->frame + first_free; + byte *ptr = undo_block->page.frame + first_free; if (trx_undo_left(undo_block, ptr) < 50) { /* NOTE: the value 50 must be big enough so that the general @@ -1405,12 +1408,12 @@ already_logged: mach_write_to_2(ptr, first_free); const uint16_t new_free = static_cast<uint16_t>( - ptr + 2 - undo_block->frame); - mach_write_to_2(undo_block->frame + first_free, new_free); + ptr + 2 - undo_block->page.frame); + mach_write_to_2(undo_block->page.frame + first_free, new_free); mach_write_to_2(ptr_to_first_free, new_free); - const byte* start = &undo_block->frame[first_free + 2]; + const byte* start = &undo_block->page.frame[first_free + 2]; mtr->undo_append(*undo_block, start, ptr - start); return(first_free); } @@ -1868,11 +1871,11 @@ trx_undo_page_report_rename(trx_t* trx, const dict_table_t* table, { byte* ptr_first_free = my_assume_aligned<2>(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE - + block->frame); + + block->page.frame); const uint16_t first_free = mach_read_from_2(ptr_first_free); ut_ad(first_free >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); ut_ad(first_free <= srv_page_size - FIL_PAGE_DATA_END); - byte* const start = block->frame + first_free; + byte* const start = block->page.frame + first_free; size_t len = strlen(table->name.m_name); const size_t fixed = 2 + 1 + 11 + 11 + 2; ut_ad(len <= NAME_LEN * 2 + 1); @@ -1894,7 +1897,7 @@ trx_undo_page_report_rename(trx_t* trx, const dict_table_t* table, memcpy(ptr, table->name.m_name, len); ptr += len; mach_write_to_2(ptr, first_free); - mach_write_to_2(ptr_first_free, ptr + 2 - block->frame); + mach_write_to_2(ptr_first_free, ptr + 2 - block->page.frame); memcpy(start, ptr_first_free, 2); mtr->undo_append(*block, start + 2, ptr - start - 2); return first_free; @@ -2100,8 +2103,8 @@ err_exit: if (UNIV_UNLIKELY(offset == 0)) { const uint16_t first_free = mach_read_from_2( TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE - + undo_block->frame); - memset(undo_block->frame + first_free, 0, + + undo_block->page.frame); + memset(undo_block->page.frame + first_free, 0, (srv_page_size - FIL_PAGE_DATA_END) - first_free); @@ -2249,7 +2252,7 @@ trx_undo_get_undo_rec_low( buf_block_t* undo_page = trx_undo_page_get_s_latched( page_id_t(rseg->space->id, page_no), &mtr); - undo_rec = trx_undo_rec_copy(undo_page->frame + offset, heap); + undo_rec = trx_undo_rec_copy(undo_page->page.frame + offset, heap); mtr.commit(); diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc index ace1d8531fc..23999a4208b 100644 --- a/storage/innobase/trx/trx0rseg.cc +++ b/storage/innobase/trx/trx0rseg.cc @@ -57,28 +57,28 @@ trx_rseg_write_wsrep_checkpoint( mtr->write<4,mtr_t::MAYBE_NOP>(*rseg_header, TRX_RSEG + TRX_RSEG_WSREP_XID_FORMAT - + rseg_header->frame, + + rseg_header->page.frame, uint32_t(xid->formatID)); mtr->write<4,mtr_t::MAYBE_NOP>(*rseg_header, TRX_RSEG + TRX_RSEG_WSREP_XID_GTRID_LEN - + rseg_header->frame, + + rseg_header->page.frame, uint32_t(xid->gtrid_length)); mtr->write<4,mtr_t::MAYBE_NOP>(*rseg_header, TRX_RSEG + TRX_RSEG_WSREP_XID_BQUAL_LEN - + rseg_header->frame, + + rseg_header->page.frame, uint32_t(xid->bqual_length)); const ulint xid_length = static_cast<ulint>(xid->gtrid_length + xid->bqual_length); mtr->memcpy<mtr_t::MAYBE_NOP>(*rseg_header, TRX_RSEG + TRX_RSEG_WSREP_XID_DATA - + rseg_header->frame, + + rseg_header->page.frame, xid->data, xid_length); if (xid_length < XIDDATASIZE && memcmp(TRX_RSEG + TRX_RSEG_WSREP_XID_DATA - + rseg_header->frame, field_ref_zero, + + rseg_header->page.frame, field_ref_zero, XIDDATASIZE - xid_length)) { mtr->memset(rseg_header, TRX_RSEG + TRX_RSEG_WSREP_XID_DATA + xid_length, @@ -139,7 +139,7 @@ trx_rseg_update_wsrep_checkpoint(const XID* xid, mtr_t* mtr) buf_block_t* rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr); if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT - + rseg_header->frame))) { + + rseg_header->page.frame))) { trx_rseg_format_upgrade(rseg_header, mtr); } @@ -186,7 +186,7 @@ bool trx_rseg_read_wsrep_checkpoint(const buf_block_t *rseg_header, XID &xid) { int formatID = static_cast<int>( mach_read_from_4(TRX_RSEG + TRX_RSEG_WSREP_XID_FORMAT - + rseg_header->frame)); + + rseg_header->page.frame)); if (formatID == 0) { return false; } @@ -194,14 +194,14 @@ bool trx_rseg_read_wsrep_checkpoint(const buf_block_t *rseg_header, XID &xid) xid.formatID = formatID; xid.gtrid_length = static_cast<int>( mach_read_from_4(TRX_RSEG + TRX_RSEG_WSREP_XID_GTRID_LEN - + rseg_header->frame)); + + rseg_header->page.frame)); xid.bqual_length = static_cast<int>( mach_read_from_4(TRX_RSEG + TRX_RSEG_WSREP_XID_BQUAL_LEN - + rseg_header->frame)); + + rseg_header->page.frame)); memcpy(xid.data, TRX_RSEG + TRX_RSEG_WSREP_XID_DATA - + rseg_header->frame, XIDDATASIZE); + + rseg_header->page.frame, XIDDATASIZE); return true; } @@ -261,7 +261,7 @@ bool trx_rseg_read_wsrep_checkpoint(XID& xid) trx_sysf_rseg_get_space(sys, rseg_id), page_no, &mtr); if (mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT - + rseg_header->frame)) { + + rseg_header->page.frame)) { continue; } @@ -326,18 +326,18 @@ trx_rseg_header_create( } ut_ad(0 == mach_read_from_4(TRX_RSEG_FORMAT + TRX_RSEG - + block->frame)); + + block->page.frame)); ut_ad(0 == mach_read_from_4(TRX_RSEG_HISTORY_SIZE + TRX_RSEG - + block->frame)); + + block->page.frame)); ut_ad(0 == mach_read_from_4(TRX_RSEG_MAX_TRX_ID + TRX_RSEG - + block->frame)); + + block->page.frame)); /* Initialize the history list */ flst_init(block, TRX_RSEG_HISTORY + TRX_RSEG, mtr); mtr->write<8,mtr_t::MAYBE_NOP>(*block, TRX_RSEG + TRX_RSEG_MAX_TRX_ID - + block->frame, max_trx_id); + + block->page.frame, max_trx_id); /* Reset the undo log slots */ mtr->memset(block, TRX_RSEG_UNDO_SLOTS + TRX_RSEG, @@ -351,12 +351,12 @@ trx_rseg_header_create( *sys_header, TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_SPACE + rseg_id * TRX_SYS_RSEG_SLOT_SIZE - + sys_header->frame, space->id); + + sys_header->page.frame, space->id); mtr->write<4,mtr_t::MAYBE_NOP>( *sys_header, TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_PAGE_NO + rseg_id * TRX_SYS_RSEG_SLOT_SIZE - + sys_header->frame, block->page.id().page_no()); + + sys_header->page.frame, block->page.id().page_no()); } return block; @@ -455,21 +455,22 @@ static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, trx_id_t &max_trx_id, buf_block_t* rseg_hdr = trx_rsegf_get_new( rseg->space->id, rseg->page_no, mtr); - if (!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + rseg_hdr->frame)) { + if (!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + + rseg_hdr->page.frame)) { trx_id_t id = mach_read_from_8(TRX_RSEG + TRX_RSEG_MAX_TRX_ID - + rseg_hdr->frame); + + rseg_hdr->page.frame); if (id > max_trx_id) { max_trx_id = id; } const byte* binlog_name = TRX_RSEG + TRX_RSEG_BINLOG_NAME - + rseg_hdr->frame; + + rseg_hdr->page.frame; if (*binlog_name) { lsn_t lsn = mach_read_from_8(my_assume_aligned<8>( FIL_PAGE_LSN + rseg_hdr - ->frame)); + ->page.frame)); compile_time_assert(TRX_RSEG_BINLOG_NAME_LEN == sizeof trx_sys.recovered_binlog_filename); if (lsn > trx_sys.recovered_binlog_lsn) { @@ -478,7 +479,7 @@ static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, trx_id_t &max_trx_id, = mach_read_from_8( TRX_RSEG + TRX_RSEG_BINLOG_OFFSET - + rseg_hdr->frame); + + rseg_hdr->page.frame); memcpy(trx_sys.recovered_binlog_filename, binlog_name, TRX_RSEG_BINLOG_NAME_LEN); @@ -501,19 +502,19 @@ static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, trx_id_t &max_trx_id, /* Initialize the undo log lists according to the rseg header */ rseg->curr_size = mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE - + rseg_hdr->frame) + + rseg_hdr->page.frame) + 1; if (dberr_t err = trx_undo_lists_init(rseg, max_trx_id, rseg_hdr)) { return err; } if (auto len = flst_get_len(TRX_RSEG + TRX_RSEG_HISTORY - + rseg_hdr->frame)) { + + rseg_hdr->page.frame)) { rseg->history_size += len; fil_addr_t node_addr = flst_get_last(TRX_RSEG + TRX_RSEG_HISTORY - + rseg_hdr->frame); + + rseg_hdr->page.frame); node_addr.boffset = static_cast<uint16_t>( node_addr.boffset - TRX_UNDO_HISTORY_NODE); @@ -522,19 +523,20 @@ static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, trx_id_t &max_trx_id, const buf_block_t* block = trx_undo_page_get( page_id_t(rseg->space->id, node_addr.page), mtr); - trx_id_t id = mach_read_from_8(block->frame + node_addr.boffset + trx_id_t id = mach_read_from_8(block->page.frame + + node_addr.boffset + TRX_UNDO_TRX_ID); if (id > max_trx_id) { max_trx_id = id; } - id = mach_read_from_8(block->frame + node_addr.boffset + id = mach_read_from_8(block->page.frame + node_addr.boffset + TRX_UNDO_TRX_NO); if (id > max_trx_id) { max_trx_id = id; } rseg->set_last_commit(node_addr.boffset, id); - unsigned purge = mach_read_from_2(block->frame + unsigned purge = mach_read_from_2(block->page.frame + node_addr.boffset + TRX_UNDO_NEEDS_PURGE); ut_ad(purge <= 1); @@ -599,8 +601,8 @@ dberr_t trx_rseg_array_init() information from the TRX_SYS page. */ max_trx_id = mach_read_from_8( TRX_SYS + TRX_SYS_TRX_ID_STORE - + sys->frame); - trx_rseg_init_binlog_info(sys->frame); + + sys->page.frame); + trx_rseg_init_binlog_info(sys->page.frame); #ifdef WITH_WSREP wsrep_sys_xid.set(&trx_sys.recovered_wsrep_xid); #endif @@ -715,10 +717,10 @@ void trx_rseg_update_binlog_offset(buf_block_t *rseg_header, const trx_t *trx, mtr->write<8,mtr_t::MAYBE_NOP>(*rseg_header, TRX_RSEG + TRX_RSEG_BINLOG_OFFSET - + rseg_header->frame, + + rseg_header->page.frame, trx->mysql_log_offset); - void* name = TRX_RSEG + TRX_RSEG_BINLOG_NAME + rseg_header->frame; + void* name = TRX_RSEG + TRX_RSEG_BINLOG_NAME + rseg_header->page.frame; if (memcmp(trx->mysql_log_file_name, name, len)) { mtr->memcpy(*rseg_header, name, trx->mysql_log_file_name, len); diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc index 52e246c5c4f..abe97370c03 100644 --- a/storage/innobase/trx/trx0sys.cc +++ b/storage/innobase/trx/trx0sys.cc @@ -164,10 +164,10 @@ trx_sysf_create( ut_a(block->page.id() == page_id_t(0, TRX_SYS_PAGE_NO)); - mtr->write<2>(*block, FIL_PAGE_TYPE + block->frame, + mtr->write<2>(*block, FIL_PAGE_TYPE + block->page.frame, FIL_PAGE_TYPE_TRX_SYS); - ut_ad(!mach_read_from_4(block->frame + ut_ad(!mach_read_from_4(block->page.frame + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_MAGIC)); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 18c93d5a8cc..0f57e7e2baf 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -571,7 +571,7 @@ trx_resurrect_table_locks( page_id_t(trx->rsegs.m_redo.rseg->space->id, undo->top_page_no), &mtr); buf_block_t* undo_block = block; - trx_undo_rec_t* undo_rec = block->frame + undo->top_offset; + trx_undo_rec_t* undo_rec = block->page.frame + undo->top_offset; do { ulint type; diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index da3f6b592fa..23b27656dfb 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -119,7 +119,7 @@ uint16_t trx_undo_page_get_start(const buf_block_t *block, uint32_t page_no, uint16_t offset) { return page_no == block->page.id().page_no() - ? mach_read_from_2(offset + TRX_UNDO_LOG_START + block->frame) + ? mach_read_from_2(offset + TRX_UNDO_LOG_START + block->page.frame) : TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE; } @@ -135,7 +135,7 @@ trx_undo_page_get_first_rec(const buf_block_t *block, uint32_t page_no, { uint16_t start= trx_undo_page_get_start(block, page_no, offset); return start == trx_undo_page_get_end(block, page_no, offset) - ? nullptr : block->frame + start; + ? nullptr : block->page.frame + start; } /** Get the last undo log record on a page. @@ -151,7 +151,8 @@ trx_undo_page_get_last_rec(const buf_block_t *block, uint32_t page_no, { uint16_t end= trx_undo_page_get_end(block, page_no, offset); return trx_undo_page_get_start(block, page_no, offset) == end - ? nullptr : block->frame + mach_read_from_2(block->frame + end - 2); + ? nullptr + : block->page.frame + mach_read_from_2(block->page.frame + end - 2); } /** Get the previous record in an undo log from the previous page. @@ -169,7 +170,7 @@ trx_undo_get_prev_rec_from_prev_page(buf_block_t *&block, uint16_t rec, { uint32_t prev_page_no= flst_get_prev_addr(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + - block->frame).page; + block->page.frame).page; if (prev_page_no == FIL_NULL) return nullptr; @@ -192,10 +193,11 @@ trx_undo_rec_t* trx_undo_page_get_prev_rec(const buf_block_t *block, trx_undo_rec_t *rec, uint32_t page_no, uint16_t offset) { - ut_ad(block->frame == page_align(rec)); - return rec == block->frame + trx_undo_page_get_start(block, page_no, offset) + ut_ad(block->page.frame == page_align(rec)); + return + rec == block->page.frame + trx_undo_page_get_start(block, page_no, offset) ? nullptr - : block->frame + mach_read_from_2(rec - 2); + : block->page.frame + mach_read_from_2(rec - 2); } /** Get the previous record in an undo log. @@ -211,7 +213,7 @@ trx_undo_get_prev_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no, uint16_t offset, bool shared, mtr_t *mtr) { if (trx_undo_rec_t *prev= trx_undo_page_get_prev_rec(block, - block->frame + rec, + block->page.frame + rec, page_no, offset)) return prev; @@ -234,11 +236,11 @@ trx_undo_get_next_rec_from_next_page(buf_block_t *&block, uint32_t page_no, uint16_t offset, ulint mode, mtr_t *mtr) { if (page_no == block->page.id().page_no() && - mach_read_from_2(block->frame + offset + TRX_UNDO_NEXT_LOG)) + mach_read_from_2(block->page.frame + offset + TRX_UNDO_NEXT_LOG)) return NULL; uint32_t next= flst_get_next_addr(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + - block->frame).page; + block->page.frame).page; if (next == FIL_NULL) return NULL; @@ -293,32 +295,34 @@ trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no, /** Initialize an undo log page. NOTE: This corresponds to a redo log record and must not be changed! @see mtr_t::undo_create() -@param[in,out] block undo log page */ +@param block undo log page */ void trx_undo_page_init(const buf_block_t &block) { - mach_write_to_2(my_assume_aligned<2>(FIL_PAGE_TYPE + block.frame), + mach_write_to_2(my_assume_aligned<2>(FIL_PAGE_TYPE + block.page.frame), FIL_PAGE_UNDO_LOG); static_assert(TRX_UNDO_PAGE_HDR == FIL_PAGE_DATA, "compatibility"); - memset_aligned<2>(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + block.frame, + memset_aligned<2>(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + block.page.frame, 0, 2); mach_write_to_2(my_assume_aligned<2> - (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + block.frame), + (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + block.page.frame), TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); - memcpy_aligned<2>(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + block.frame, - TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + block.frame, 2); + memcpy_aligned<2>(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + block.page.frame, + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + block.page.frame, + 2); /* The following corresponds to flst_zero_both(), but without writing log. */ memset_aligned<4>(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_PREV + - FIL_ADDR_PAGE + block.frame, 0xff, 4); + FIL_ADDR_PAGE + block.page.frame, 0xff, 4); memset_aligned<2>(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_PREV + - FIL_ADDR_BYTE + block.frame, 0, 2); + FIL_ADDR_BYTE + block.page.frame, 0, 2); memset_aligned<2>(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_NEXT + - FIL_ADDR_PAGE + block.frame, 0xff, 4); + FIL_ADDR_PAGE + block.page.frame, 0xff, 4); memset_aligned<2>(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_NEXT + - FIL_ADDR_BYTE + block.frame, 0, 2); + FIL_ADDR_BYTE + block.page.frame, 0, 2); static_assert(TRX_UNDO_PAGE_NODE + FLST_NEXT + FIL_ADDR_BYTE + 2 == TRX_UNDO_PAGE_HDR_SIZE, "compatibility"); /* Preserve TRX_UNDO_SEG_HDR, but clear the rest of the page. */ - memset_aligned<2>(TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE + block.frame, 0, + memset_aligned<2>(TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE + + block.page.frame, 0, srv_page_size - (TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE + FIL_PAGE_DATA_END)); } @@ -395,21 +399,21 @@ trx_undo_seg_create(fil_space_t *space, buf_block_t *rseg_hdr, ulint *id, trx_undo_page_init(*block); mtr->write<2>(*block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE - + block->frame, + + block->page.frame, TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE); mtr->write<2,mtr_t::MAYBE_NOP>(*block, TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG - + block->frame, 0U); + + block->page.frame, 0U); - flst_init(*block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->frame, - mtr); + flst_init(*block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + + block->page.frame, mtr); flst_add_last(block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); *id = slot_no; mtr->write<4>(*rseg_hdr, TRX_RSEG + TRX_RSEG_UNDO_SLOTS - + slot_no * TRX_RSEG_SLOT_SIZE + rseg_hdr->frame, + + slot_no * TRX_RSEG_SLOT_SIZE + rseg_hdr->page.frame, block->page.id().page_no()); MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED); @@ -429,11 +433,11 @@ static uint16_t trx_undo_header_create(buf_block_t *undo_page, trx_id_t trx_id, /* Reset the TRX_UNDO_PAGE_TYPE in case this page is being repurposed after upgrading to MariaDB 10.3. */ byte *undo_type= my_assume_aligned<2> - (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + undo_page->frame); + (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + undo_page->page.frame); ut_ad(mach_read_from_2(undo_type) <= 2); mtr->write<2,mtr_t::MAYBE_NOP>(*undo_page, undo_type, 0U); byte *start= my_assume_aligned<4>(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + - undo_page->frame); + undo_page->page.frame); const uint16_t free= mach_read_from_2(start + 2); static_assert(TRX_UNDO_PAGE_START + 2 == TRX_UNDO_PAGE_FREE, "compatibility"); @@ -447,34 +451,34 @@ static uint16_t trx_undo_header_create(buf_block_t *undo_page, trx_id_t trx_id, mtr->memset(*undo_page, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START, 4, start, 2); uint16_t prev_log= mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG + - undo_page->frame); + undo_page->page.frame); alignas(4) byte buf[4]; mach_write_to_2(buf, TRX_UNDO_ACTIVE); mach_write_to_2(buf + 2, free); static_assert(TRX_UNDO_STATE + 2 == TRX_UNDO_LAST_LOG, "compatibility"); static_assert(!((TRX_UNDO_SEG_HDR + TRX_UNDO_STATE) % 4), "alignment"); mtr->memcpy(*undo_page, my_assume_aligned<4> - (TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + undo_page->frame), + (TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + undo_page->page.frame), buf, 4); if (prev_log) - mtr->write<2>(*undo_page, prev_log + TRX_UNDO_NEXT_LOG + undo_page->frame, - free); + mtr->write<2>(*undo_page, prev_log + TRX_UNDO_NEXT_LOG + + undo_page->page.frame, free); mtr->write<8,mtr_t::MAYBE_NOP>(*undo_page, free + TRX_UNDO_TRX_ID + - undo_page->frame, trx_id); + undo_page->page.frame, trx_id); /* Write TRX_UNDO_NEEDS_PURGE=1 and TRX_UNDO_LOG_START. */ mach_write_to_2(buf, 1); memcpy_aligned<2>(buf + 2, start, 2); static_assert(TRX_UNDO_NEEDS_PURGE + 2 == TRX_UNDO_LOG_START, "compatibility"); mtr->memcpy<mtr_t::MAYBE_NOP>(*undo_page, free + TRX_UNDO_NEEDS_PURGE + - undo_page->frame, buf, 4); + undo_page->page.frame, buf, 4); /* Initialize all fields TRX_UNDO_XID_EXISTS to TRX_UNDO_HISTORY_NODE. */ if (prev_log) { mtr->memset(undo_page, free + TRX_UNDO_XID_EXISTS, TRX_UNDO_PREV_LOG - TRX_UNDO_XID_EXISTS, 0); mtr->write<2,mtr_t::MAYBE_NOP>(*undo_page, free + TRX_UNDO_PREV_LOG + - undo_page->frame, prev_log); + undo_page->page.frame, prev_log); static_assert(TRX_UNDO_PREV_LOG + 2 == TRX_UNDO_HISTORY_NODE, "compatibility"); mtr->memset(undo_page, free + TRX_UNDO_HISTORY_NODE, FLST_NODE_SIZE, 0); @@ -502,9 +506,9 @@ static void trx_undo_write_xid(buf_block_t *block, uint16_t offset, static_assert(MAXGTRIDSIZE + MAXBQUALSIZE == XIDDATASIZE, "gtrid and bqual don't fit xid data"); DBUG_ASSERT(mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG + - block->frame) == offset); + block->page.frame) == offset); - trx_ulogf_t* log_hdr= block->frame + offset; + trx_ulogf_t* log_hdr= block->page.frame + offset; mtr->write<4,mtr_t::MAYBE_NOP>(*block, log_hdr + TRX_UNDO_XA_FORMAT, static_cast<uint32_t>(xid.formatID)); @@ -514,7 +518,7 @@ static void trx_undo_write_xid(buf_block_t *block, uint16_t offset, static_cast<uint32_t>(xid.bqual_length)); const ulint xid_length= static_cast<ulint>(xid.gtrid_length + xid.bqual_length); - mtr->memcpy(*block, &block->frame[offset + TRX_UNDO_XA_XID], + mtr->memcpy(*block, &block->page.frame[offset + TRX_UNDO_XA_XID], xid.data, xid_length); if (UNIV_LIKELY(xid_length < XIDDATASIZE)) mtr->memset(block, offset + TRX_UNDO_XA_XID + xid_length, @@ -566,7 +570,7 @@ buf_block_t* trx_undo_add_page(trx_undo_t* undo, mtr_t* mtr) new_block = fseg_alloc_free_page_general( TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER - + header_block->frame, + + header_block->page.frame, undo->top_page_no + 1, FSP_UP, true, mtr, mtr); rseg->space->release_free_extents(n_reserved); @@ -621,19 +625,20 @@ trx_undo_free_page( undo_block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); fseg_free_page(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER - + header_block->frame, + + header_block->page.frame, rseg->space, page_no, mtr); buf_page_free(rseg->space, page_no, mtr); const fil_addr_t last_addr = flst_get_last( - TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + header_block->frame); + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + + header_block->page.frame); rseg->curr_size--; if (in_history) { buf_block_t* rseg_header = trx_rsegf_get( rseg->space, rseg->page_no, mtr); byte* rseg_hist_size = TRX_RSEG + TRX_RSEG_HISTORY_SIZE - + rseg_header->frame; + + rseg_header->page.frame; uint32_t hist_size = mach_read_from_4(rseg_hist_size); ut_ad(hist_size > 0); mtr->write<4>(*rseg_header, rseg_hist_size, hist_size - 1); @@ -705,8 +710,9 @@ func_exit: if (trunc_here) { mtr.write<2>(*undo_block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE - + undo_block->frame, - ulint(trunc_here - undo_block->frame)); + + undo_block->page.frame, + ulint(trunc_here + - undo_block->page.frame)); } mtr.commit(); @@ -761,14 +767,14 @@ done: if (undo_page->page.id().page_no() == hdr_page_no) { uint16_t end = mach_read_from_2(hdr_offset + TRX_UNDO_NEXT_LOG - + undo_page->frame); + + undo_page->page.frame); if (end == 0) { end = mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE - + undo_page->frame); + + undo_page->page.frame); } - mtr.write<2>(*undo_page, undo_page->frame + hdr_offset + mtr.write<2>(*undo_page, undo_page->page.frame + hdr_offset + TRX_UNDO_LOG_START, end); } else { trx_undo_free_page(rseg, true, hdr_page_no, @@ -799,7 +805,7 @@ static void trx_undo_seg_free(const trx_undo_t *undo) page_id_t(SRV_TMP_SPACE_ID, undo->hdr_page_no), &mtr); fseg_header_t* file_seg = TRX_UNDO_SEG_HDR - + TRX_UNDO_FSEG_HEADER + block->frame; + + TRX_UNDO_FSEG_HEADER + block->page.frame; finished = fseg_free_step(file_seg, &mtr); @@ -810,7 +816,7 @@ static void trx_undo_seg_free(const trx_undo_t *undo) compile_time_assert(FIL_NULL == 0xffffffff); memset(TRX_RSEG + TRX_RSEG_UNDO_SLOTS + undo->id * TRX_RSEG_SLOT_SIZE + - rseg_header->frame, 0xff, 4); + rseg_header->page.frame, 0xff, 4); MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED); } @@ -841,7 +847,7 @@ trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no, page_id_t(rseg->space->id, page_no), &mtr); const uint16_t type = mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE - + block->frame); + + block->page.frame); if (UNIV_UNLIKELY(type > 2)) { corrupted_type: sql_print_error("InnoDB: unsupported undo header type %u", @@ -852,7 +858,7 @@ corrupted: } uint16_t offset = mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG - + block->frame); + + block->page.frame); if (offset < TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE || offset >= srv_page_size - TRX_UNDO_LOG_OLD_HDR_SIZE) { sql_print_error("InnoDB: invalid undo header offset %u", @@ -860,9 +866,9 @@ corrupted: goto corrupted; } - const trx_ulogf_t* const undo_header = block->frame + offset; + const trx_ulogf_t* const undo_header = block->page.frame + offset; uint16_t state = mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE - + block->frame); + + block->page.frame); switch (state) { case TRX_UNDO_ACTIVE: case TRX_UNDO_PREPARED: @@ -918,10 +924,10 @@ corrupted: undo->dict_operation = undo_header[TRX_UNDO_DICT_TRANS]; undo->size = flst_get_len(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST - + block->frame); + + block->page.frame); fil_addr_t last_addr = flst_get_last( - TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->frame); + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->page.frame); undo->last_page_no = last_addr.page; undo->top_page_no = last_addr.page; @@ -931,7 +937,8 @@ corrupted: if (const trx_undo_rec_t* rec = trx_undo_page_get_last_rec( last, page_no, offset)) { - undo->top_offset = static_cast<uint16_t>(rec - last->frame); + undo->top_offset = static_cast<uint16_t>( + rec - last->page.frame); undo->top_undo_no = trx_undo_rec_get_undo_no(rec); ut_ad(!undo->empty()); } else { @@ -1063,9 +1070,11 @@ trx_undo_create(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo, if (trx->dict_operation) { (*undo)->dict_operation = true; - mtr->write<1,mtr_t::MAYBE_NOP>(*block, block->frame + offset + mtr->write<1,mtr_t::MAYBE_NOP>(*block, + block->page.frame + offset + TRX_UNDO_DICT_TRANS, 1U); - mtr->write<8,mtr_t::MAYBE_NOP>(*block, block->frame + offset + mtr->write<8,mtr_t::MAYBE_NOP>(*block, + block->page.frame + offset + TRX_UNDO_TABLE_ID, 0U); } @@ -1117,9 +1126,11 @@ trx_undo_reuse_cached(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** pundo, if (trx->dict_operation) { undo->dict_operation = TRUE; - mtr->write<1,mtr_t::MAYBE_NOP>(*block, block->frame + offset + mtr->write<1,mtr_t::MAYBE_NOP>(*block, + block->page.frame + offset + TRX_UNDO_DICT_TRANS, 1U); - mtr->write<8,mtr_t::MAYBE_NOP>(*block, block->frame + offset + mtr->write<8,mtr_t::MAYBE_NOP>(*block, + block->page.frame + offset + TRX_UNDO_TABLE_ID, 0U); } @@ -1243,13 +1254,13 @@ trx_undo_set_state_at_finish( const uint16_t state = undo->size == 1 && TRX_UNDO_PAGE_REUSE_LIMIT > mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE - + block->frame) + + block->page.frame) ? TRX_UNDO_CACHED : TRX_UNDO_TO_PURGE; undo->state = state; mtr->write<2>(*block, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE - + block->frame, state); + + block->page.frame, state); return block; } @@ -1270,7 +1281,7 @@ void trx_undo_set_state_at_prepare(trx_t *trx, trx_undo_t *undo, bool rollback, if (rollback) { ut_ad(undo->state == TRX_UNDO_PREPARED); mtr->write<2>(*block, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE - + block->frame, TRX_UNDO_ACTIVE); + + block->page.frame, TRX_UNDO_ACTIVE); return; } @@ -1280,11 +1291,12 @@ void trx_undo_set_state_at_prepare(trx_t *trx, trx_undo_t *undo, bool rollback, undo->xid = trx->xid; /*------------------------------*/ - mtr->write<2>(*block, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + block->frame, - undo->state); + mtr->write<2>(*block, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + + block->page.frame, undo->state); uint16_t offset = mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG - + block->frame); - mtr->write<1>(*block, block->frame + offset + TRX_UNDO_XID_EXISTS, 1U); + + block->page.frame); + mtr->write<1>(*block, block->page.frame + offset + TRX_UNDO_XID_EXISTS, + 1U); trx_undo_write_xid(block, offset, undo->xid, mtr); } |