diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2019-10-10 20:29:30 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2019-10-10 20:29:30 +0300 |
commit | 6d7a8269532f4989b1515f259120e91019a74dec (patch) | |
tree | 91ad24e73585feac223d832173182a9e50c792d6 /storage | |
parent | 726b1998fc9a97a8b2e58fe56668d570c4a01891 (diff) | |
download | mariadb-git-6d7a8269532f4989b1515f259120e91019a74dec.tar.gz |
MDEV-20788: Bogus assertion failure for PAGE_FREE list
In MDEV-11369 (instant ADD COLUMN) in MariaDB Server 10.3,
we introduced the hidden metadata record that must be the
first record in the clustered index if and only if
index->is_instant() holds.
To catch MDEV-19783, in
commit ed0793e096a17955c5a03844b248bcf8303dd335 and
commit 99dc40d6ac2234fa4c990665cc1914c1925cd641
we added some assertions to find cases where
the metadata record is missing while it should not be, or a
record exists when it should not. Those assertions were invalid
when traversing the PAGE_FREE list. That list can contain anything;
we must only be able to determine the successor and the size of
each garbage record in it.
page_validate(), page_simple_validate_old(), page_simple_validate_new():
Do not invoke page_rec_get_next_const() for traversing the PAGE_FREE
list, but instead use a lower-level accessor that does not attempt to
validate the REC_INFO_MIN_REC_FLAG.
page_copy_rec_list_end_no_locks(),
page_copy_rec_list_start(), page_delete_rec_list_start():
Add assertions.
btr_page_get_split_rec_to_left(): Remove a redundant return value,
and make the output parameter the return value.
btr_page_get_split_rec_to_right(), btr_page_split_and_insert(): Clean up.
Diffstat (limited to 'storage')
-rw-r--r-- | storage/innobase/btr/btr0btr.cc | 194 | ||||
-rw-r--r-- | storage/innobase/btr/btr0cur.cc | 2 | ||||
-rw-r--r-- | storage/innobase/include/btr0btr.h | 40 | ||||
-rw-r--r-- | storage/innobase/page/page0page.cc | 73 |
4 files changed, 141 insertions, 168 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 473e4248656..212b6489f29 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -2030,104 +2030,78 @@ btr_root_raise_and_insert( } } -/*************************************************************//** -Decides if the page should be split at the convergence point of inserts +/** Decide if the page should be split at the convergence point of inserts converging to the left. -@return TRUE if split recommended */ -ibool -btr_page_get_split_rec_to_left( -/*===========================*/ - btr_cur_t* cursor, /*!< in: cursor at which to insert */ - rec_t** split_rec) /*!< out: if split recommended, - the first record on upper half page, - or NULL if tuple to be inserted should - be first */ +@param[in] cursor insert position +@return the first record to be moved to the right half page +@retval NULL if no split is recommended */ +rec_t* btr_page_get_split_rec_to_left(const btr_cur_t* cursor) { - page_t* page; - rec_t* insert_point; - rec_t* infimum; - - page = btr_cur_get_page(cursor); - insert_point = btr_cur_get_rec(cursor); + rec_t* split_rec = btr_cur_get_rec(cursor); + const page_t* page = page_align(split_rec); if (page_header_get_ptr(page, PAGE_LAST_INSERT) - == page_rec_get_next(insert_point)) { - - infimum = page_get_infimum_rec(page); - - /* If the convergence is in the middle of a page, include also - the record immediately before the new insert to the upper - page. Otherwise, we could repeatedly move from page to page - lots of records smaller than the convergence point. */ + != page_rec_get_next(split_rec)) { + return NULL; + } - if (infimum != insert_point - && page_rec_get_next(infimum) != insert_point) { + const rec_t* infimum = page_get_infimum_rec(page); - *split_rec = insert_point; - } else { - *split_rec = page_rec_get_next(insert_point); - } + /* If the convergence is in the middle of a page, include also + the record immediately before the new insert to the upper + page. Otherwise, we could repeatedly move from page to page + lots of records smaller than the convergence point. */ - return(TRUE); + if (split_rec == infimum + || split_rec == page_rec_get_next_const(infimum)) { + split_rec = page_rec_get_next(split_rec); } - return(FALSE); + return split_rec; } -/*************************************************************//** -Decides if the page should be split at the convergence point of inserts +/** Decide if the page should be split at the convergence point of inserts converging to the right. -@return TRUE if split recommended */ -ibool -btr_page_get_split_rec_to_right( -/*============================*/ - btr_cur_t* cursor, /*!< in: cursor at which to insert */ - rec_t** split_rec) /*!< out: if split recommended, - the first record on upper half page, - or NULL if tuple to be inserted should - be first */ +@param[in] cursor insert position +@param[out] split_rec if split recommended, the first record + on the right half page, or + NULL if the to-be-inserted record + should be first +@return whether split is recommended */ +bool +btr_page_get_split_rec_to_right(const btr_cur_t* cursor, rec_t** split_rec) { - page_t* page; - rec_t* insert_point; - - page = btr_cur_get_page(cursor); - insert_point = btr_cur_get_rec(cursor); + rec_t* insert_point = btr_cur_get_rec(cursor); + const page_t* page = page_align(insert_point); /* We use eager heuristics: if the new insert would be right after the previous insert on the same page, we assume that there is a pattern of sequential inserts here. */ - if (page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point) { - - rec_t* next_rec; - - next_rec = page_rec_get_next(insert_point); - - if (page_rec_is_supremum(next_rec)) { -split_at_new: - /* Split at the new record to insert */ - *split_rec = NULL; - } else { - rec_t* next_next_rec = page_rec_get_next(next_rec); - if (page_rec_is_supremum(next_next_rec)) { - - goto split_at_new; - } + if (page_header_get_ptr(page, PAGE_LAST_INSERT) != insert_point) { + return false; + } - /* If there are >= 2 user records up from the insert - point, split all but 1 off. We want to keep one because - then sequential inserts can use the adaptive hash - index, as they can do the necessary checks of the right - search position just by looking at the records on this - page. */ + insert_point = page_rec_get_next(insert_point); - *split_rec = next_next_rec; + if (page_rec_is_supremum(insert_point)) { + insert_point = NULL; + } else { + insert_point = page_rec_get_next(insert_point); + if (page_rec_is_supremum(insert_point)) { + insert_point = NULL; } - return(TRUE); + /* If there are >= 2 user records up from the insert + point, split all but 1 off. We want to keep one because + then sequential inserts can use the adaptive hash + index, as they can do the necessary checks of the right + search position just by looking at the records on this + page. */ } - return(FALSE); + *split_rec = insert_point; + return true; } /*************************************************************//** @@ -2782,30 +2756,20 @@ btr_page_split_and_insert( buf_block_t* block; page_t* page; page_zip_des_t* page_zip; - ulint page_no; - byte direction; - ulint hint_page_no; buf_block_t* new_block; page_t* new_page; page_zip_des_t* new_page_zip; rec_t* split_rec; buf_block_t* left_block; buf_block_t* right_block; - buf_block_t* insert_block; page_cur_t* page_cursor; rec_t* first_rec; byte* buf = 0; /* remove warning */ rec_t* move_limit; - ibool insert_will_fit; - ibool insert_left; ulint n_iterations = 0; - rec_t* rec; ulint n_uniq; - dict_index_t* index; - - index = btr_cur_get_index(cursor); - if (dict_index_is_spatial(index)) { + if (dict_index_is_spatial(cursor->index)) { /* Split rtree page and update parent */ return(rtr_page_split_and_insert(flags, cursor, offsets, heap, tuple, n_ext, mtr)); @@ -2837,23 +2801,19 @@ func_start: ut_ad(!page_is_empty(page)); /* try to insert to the next page if possible before split */ - rec = btr_insert_into_right_sibling( - flags, cursor, offsets, *heap, tuple, n_ext, mtr); - - if (rec != NULL) { + if (rec_t* rec = btr_insert_into_right_sibling( + flags, cursor, offsets, *heap, tuple, n_ext, mtr)) { return(rec); } - page_no = block->page.id.page_no(); - /* 1. Decide the split record; split_rec == NULL means that the tuple to be inserted should be the first record on the upper half-page */ - insert_left = FALSE; + bool insert_left = false; + ulint hint_page_no = block->page.id.page_no() + 1; + byte direction = FSP_UP; - if (tuple != NULL && n_iterations > 0) { - direction = FSP_UP; - hint_page_no = page_no + 1; + if (tuple && n_iterations > 0) { split_rec = btr_page_get_split_rec(cursor, tuple, n_ext); if (split_rec == NULL) { @@ -2861,17 +2821,10 @@ func_start: cursor, tuple, offsets, n_uniq, heap); } } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) { - direction = FSP_UP; - hint_page_no = page_no + 1; - - } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) { + } else if ((split_rec = btr_page_get_split_rec_to_left(cursor))) { direction = FSP_DOWN; - hint_page_no = page_no - 1; - ut_ad(split_rec); + hint_page_no -= 2; } else { - direction = FSP_UP; - hint_page_no = page_no + 1; - /* If there is only one record in the index page, we can't split the node in the middle by default. We need to determine whether the new record will be inserted @@ -2896,7 +2849,7 @@ func_start: new_block = btr_page_alloc(cursor->index, hint_page_no, direction, btr_page_get_level(page, mtr), mtr, mtr); - if (new_block == NULL && os_has_said_disk_full) { + if (!new_block) { return(NULL); } @@ -2921,12 +2874,8 @@ func_start: *offsets = rec_get_offsets(split_rec, cursor->index, *offsets, page_is_leaf(page), n_uniq, heap); - if (tuple != NULL) { - insert_left = cmp_dtuple_rec( - tuple, split_rec, *offsets) < 0; - } else { - insert_left = 1; - } + insert_left = !tuple + || cmp_dtuple_rec(tuple, split_rec, *offsets) < 0; if (!insert_left && new_page_zip && n_iterations > 0) { /* If a compressed page has already been split, @@ -2961,10 +2910,10 @@ insert_empty: on the appropriate half-page, we may release the tree x-latch. We can then move the records after releasing the tree latch, thus reducing the tree latch contention. */ + bool insert_will_fit; if (tuple == NULL) { - insert_will_fit = 1; - } - else if (split_rec) { + insert_will_fit = true; + } else if (split_rec) { insert_will_fit = !new_page_zip && btr_page_insert_fits(cursor, split_rec, offsets, tuple, n_ext, heap); @@ -3069,8 +3018,6 @@ insert_empty: new_block, block, move_limit); } - ut_ad(!dict_index_is_spatial(index)); - btr_search_move_or_delete_hash_entries( new_block, block, cursor->index); @@ -3102,18 +3049,15 @@ insert_empty: /* 6. The split and the tree modification is now completed. Decide the page where the tuple should be inserted */ + rec_t* rec; + buf_block_t* const insert_block = insert_left + ? left_block : right_block; - if (tuple == NULL) { + if (UNIV_UNLIKELY(!tuple)) { rec = NULL; goto func_exit; } - if (insert_left) { - insert_block = left_block; - } else { - insert_block = right_block; - } - /* 7. Reposition the cursor for insert and try insertion */ page_cursor = btr_cur_get_page_cur(cursor); @@ -3190,9 +3134,7 @@ func_exit: ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index)); ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index)); - if (tuple == NULL) { - ut_ad(rec == NULL); - } + ut_ad(tuple || !rec); ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets)); return(rec); } diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 3060865bc8e..788b34bca62 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -3138,7 +3138,7 @@ fail_err: && page_get_n_recs(page) >= 2 && dict_index_get_space_reserve() + rec_size > max_size && (btr_page_get_split_rec_to_right(cursor, &dummy) - || btr_page_get_split_rec_to_left(cursor, &dummy))) { + || btr_page_get_split_rec_to_left(cursor))) { goto fail; } diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index 604890ed03c..ded01e53bad 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -445,30 +445,22 @@ btr_page_reorganize( dict_index_t* index, /*!< in: the index tree of the page */ mtr_t* mtr) /*!< in/out: mini-transaction */ MY_ATTRIBUTE((nonnull)); -/*************************************************************//** -Decides if the page should be split at the convergence point of -inserts converging to left. -@return TRUE if split recommended */ -ibool -btr_page_get_split_rec_to_left( -/*===========================*/ - btr_cur_t* cursor, /*!< in: cursor at which to insert */ - rec_t** split_rec)/*!< out: if split recommended, - the first record on upper half page, - or NULL if tuple should be first */ - MY_ATTRIBUTE((warn_unused_result)); -/*************************************************************//** -Decides if the page should be split at the convergence point of -inserts converging to right. -@return TRUE if split recommended */ -ibool -btr_page_get_split_rec_to_right( -/*============================*/ - btr_cur_t* cursor, /*!< in: cursor at which to insert */ - rec_t** split_rec)/*!< out: if split recommended, - the first record on upper half page, - or NULL if tuple should be first */ - MY_ATTRIBUTE((warn_unused_result)); +/** Decide if the page should be split at the convergence point of inserts +converging to the left. +@param[in] cursor insert position +@return the first record to be moved to the right half page +@retval NULL if no split is recommended */ +rec_t* btr_page_get_split_rec_to_left(const btr_cur_t* cursor); +/** Decide if the page should be split at the convergence point of inserts +converging to the right. +@param[in] cursor insert position +@param[out] split_rec if split recommended, the first record + on the right half page, or + NULL if the to-be-inserted record + should be first +@return whether split is recommended */ +bool +btr_page_get_split_rec_to_right(const btr_cur_t* cursor, rec_t** split_rec); /*************************************************************//** Splits an index page to halves and inserts the tuple. It is assumed diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index bda4693648b..e316c8b1568 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -604,20 +604,20 @@ page_copy_rec_list_end_no_locks( /* Copy records from the original page to the new page */ while (!page_cur_is_after_last(&cur1)) { - rec_t* cur1_rec = page_cur_get_rec(&cur1); rec_t* ins_rec; - offsets = rec_get_offsets(cur1_rec, index, offsets, is_leaf, + offsets = rec_get_offsets(cur1.rec, index, offsets, is_leaf, ULINT_UNDEFINED, &heap); ins_rec = page_cur_insert_rec_low(cur2, index, - cur1_rec, offsets, mtr); + cur1.rec, offsets, mtr); if (UNIV_UNLIKELY(!ins_rec)) { ib::fatal() << "Rec offset " << page_offset(rec) - << ", cur1 offset " - << page_offset(page_cur_get_rec(&cur1)) + << ", cur1 offset " << page_offset(cur1.rec) << ", cur2 offset " << page_offset(cur2); } page_cur_move_to_next(&cur1); + ut_ad(!(rec_get_info_bits(cur1.rec, page_is_comp(new_page)) + & REC_INFO_MIN_REC_FLAG)); cur2 = ins_rec; } @@ -803,6 +803,8 @@ page_copy_rec_list_start( dict_index_t* index, /*!< in: record descriptor */ mtr_t* mtr) /*!< in: mtr */ { + ut_ad(page_align(rec) == block->frame); + page_t* new_page = buf_block_get_frame(new_block); page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block); page_cur_t cur1; @@ -820,7 +822,6 @@ page_copy_rec_list_start( predefined infimum record. */ if (page_rec_is_infimum(rec)) { - return(ret); } @@ -854,17 +855,18 @@ page_copy_rec_list_start( rec_move, max_to_move, &num_moved, mtr); } else { - while (page_cur_get_rec(&cur1) != rec) { - rec_t* cur1_rec = page_cur_get_rec(&cur1); - offsets = rec_get_offsets(cur1_rec, index, offsets, + offsets = rec_get_offsets(cur1.rec, index, offsets, is_leaf, ULINT_UNDEFINED, &heap); cur2 = page_cur_insert_rec_low(cur2, index, - cur1_rec, offsets, mtr); + cur1.rec, offsets, mtr); ut_a(cur2); page_cur_move_to_next(&cur1); + ut_ad(!(rec_get_info_bits(cur1.rec, + page_is_comp(new_page)) + & REC_INFO_MIN_REC_FLAG)); } } @@ -1254,6 +1256,7 @@ page_delete_rec_list_start( rec_offs_init(offsets_); + ut_ad(page_align(rec) == block->frame); ut_ad((ibool) !!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); #ifdef UNIV_ZIP_DEBUG @@ -2163,7 +2166,17 @@ page_simple_validate_old( goto func_exit; } - rec = page_rec_get_next_const(rec); + ulint offs = rec_get_next_offs(rec, FALSE); + if (!offs) { + break; + } + if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM + || offs >= srv_page_size)) { + ib::error() << "Page free list is corrupted " << count; + goto func_exit; + } + + rec = page + offs; } if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { @@ -2355,7 +2368,17 @@ page_simple_validate_new( goto func_exit; } - rec = page_rec_get_next_const(rec); + const ulint offs = rec_get_next_offs(rec, TRUE); + if (!offs) { + break; + } + if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM + || offs >= srv_page_size)) { + ib::error() << "Page free list is corrupted " << count; + goto func_exit; + } + + rec = page + offs; } if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { @@ -2668,16 +2691,32 @@ n_owned_zero: page_is_leaf(page), ULINT_UNDEFINED, &heap); if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) { + ret = FALSE; +next_free: + const ulint offs = rec_get_next_offs( + rec, page_is_comp(page)); + if (!offs) { + break; + } + if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM + || offs >= srv_page_size)) { + ib::error() << "Page free list is corrupted"; + ret = FALSE; + break; + } - goto func_exit; + rec = page + offs; + continue; } count++; offs = page_offset(rec_get_start(rec, offsets)); i = rec_offs_size(offsets); - if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) { - ib::error() << "Record offset out of bounds"; - goto func_exit; + if (UNIV_UNLIKELY(offs + i >= srv_page_size)) { + ib::error() << "Free record offset out of bounds: " + << offs << '+' << i; + ret = FALSE; + goto next_free; } while (i--) { @@ -2691,7 +2730,7 @@ n_owned_zero: buf[offs + i] = 1; } - rec = page_rec_get_next_const(rec); + goto next_free; } if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { |