summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2019-10-10 20:29:30 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2019-10-10 20:29:30 +0300
commit6d7a8269532f4989b1515f259120e91019a74dec (patch)
tree91ad24e73585feac223d832173182a9e50c792d6 /storage
parent726b1998fc9a97a8b2e58fe56668d570c4a01891 (diff)
downloadmariadb-git-6d7a8269532f4989b1515f259120e91019a74dec.tar.gz
MDEV-20788: Bogus assertion failure for PAGE_FREE list
In MDEV-11369 (instant ADD COLUMN) in MariaDB Server 10.3, we introduced the hidden metadata record that must be the first record in the clustered index if and only if index->is_instant() holds. To catch MDEV-19783, in commit ed0793e096a17955c5a03844b248bcf8303dd335 and commit 99dc40d6ac2234fa4c990665cc1914c1925cd641 we added some assertions to find cases where the metadata record is missing while it should not be, or a record exists when it should not. Those assertions were invalid when traversing the PAGE_FREE list. That list can contain anything; we must only be able to determine the successor and the size of each garbage record in it. page_validate(), page_simple_validate_old(), page_simple_validate_new(): Do not invoke page_rec_get_next_const() for traversing the PAGE_FREE list, but instead use a lower-level accessor that does not attempt to validate the REC_INFO_MIN_REC_FLAG. page_copy_rec_list_end_no_locks(), page_copy_rec_list_start(), page_delete_rec_list_start(): Add assertions. btr_page_get_split_rec_to_left(): Remove a redundant return value, and make the output parameter the return value. btr_page_get_split_rec_to_right(), btr_page_split_and_insert(): Clean up.
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/btr/btr0btr.cc194
-rw-r--r--storage/innobase/btr/btr0cur.cc2
-rw-r--r--storage/innobase/include/btr0btr.h40
-rw-r--r--storage/innobase/page/page0page.cc73
4 files changed, 141 insertions, 168 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 473e4248656..212b6489f29 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -2030,104 +2030,78 @@ btr_root_raise_and_insert(
}
}
-/*************************************************************//**
-Decides if the page should be split at the convergence point of inserts
+/** Decide if the page should be split at the convergence point of inserts
converging to the left.
-@return TRUE if split recommended */
-ibool
-btr_page_get_split_rec_to_left(
-/*===========================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec) /*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple to be inserted should
- be first */
+@param[in] cursor insert position
+@return the first record to be moved to the right half page
+@retval NULL if no split is recommended */
+rec_t* btr_page_get_split_rec_to_left(const btr_cur_t* cursor)
{
- page_t* page;
- rec_t* insert_point;
- rec_t* infimum;
-
- page = btr_cur_get_page(cursor);
- insert_point = btr_cur_get_rec(cursor);
+ rec_t* split_rec = btr_cur_get_rec(cursor);
+ const page_t* page = page_align(split_rec);
if (page_header_get_ptr(page, PAGE_LAST_INSERT)
- == page_rec_get_next(insert_point)) {
-
- infimum = page_get_infimum_rec(page);
-
- /* If the convergence is in the middle of a page, include also
- the record immediately before the new insert to the upper
- page. Otherwise, we could repeatedly move from page to page
- lots of records smaller than the convergence point. */
+ != page_rec_get_next(split_rec)) {
+ return NULL;
+ }
- if (infimum != insert_point
- && page_rec_get_next(infimum) != insert_point) {
+ const rec_t* infimum = page_get_infimum_rec(page);
- *split_rec = insert_point;
- } else {
- *split_rec = page_rec_get_next(insert_point);
- }
+ /* If the convergence is in the middle of a page, include also
+ the record immediately before the new insert to the upper
+ page. Otherwise, we could repeatedly move from page to page
+ lots of records smaller than the convergence point. */
- return(TRUE);
+ if (split_rec == infimum
+ || split_rec == page_rec_get_next_const(infimum)) {
+ split_rec = page_rec_get_next(split_rec);
}
- return(FALSE);
+ return split_rec;
}
-/*************************************************************//**
-Decides if the page should be split at the convergence point of inserts
+/** Decide if the page should be split at the convergence point of inserts
converging to the right.
-@return TRUE if split recommended */
-ibool
-btr_page_get_split_rec_to_right(
-/*============================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec) /*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple to be inserted should
- be first */
+@param[in] cursor insert position
+@param[out] split_rec if split recommended, the first record
+ on the right half page, or
+ NULL if the to-be-inserted record
+ should be first
+@return whether split is recommended */
+bool
+btr_page_get_split_rec_to_right(const btr_cur_t* cursor, rec_t** split_rec)
{
- page_t* page;
- rec_t* insert_point;
-
- page = btr_cur_get_page(cursor);
- insert_point = btr_cur_get_rec(cursor);
+ rec_t* insert_point = btr_cur_get_rec(cursor);
+ const page_t* page = page_align(insert_point);
/* We use eager heuristics: if the new insert would be right after
the previous insert on the same page, we assume that there is a
pattern of sequential inserts here. */
- if (page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point) {
-
- rec_t* next_rec;
-
- next_rec = page_rec_get_next(insert_point);
-
- if (page_rec_is_supremum(next_rec)) {
-split_at_new:
- /* Split at the new record to insert */
- *split_rec = NULL;
- } else {
- rec_t* next_next_rec = page_rec_get_next(next_rec);
- if (page_rec_is_supremum(next_next_rec)) {
-
- goto split_at_new;
- }
+ if (page_header_get_ptr(page, PAGE_LAST_INSERT) != insert_point) {
+ return false;
+ }
- /* If there are >= 2 user records up from the insert
- point, split all but 1 off. We want to keep one because
- then sequential inserts can use the adaptive hash
- index, as they can do the necessary checks of the right
- search position just by looking at the records on this
- page. */
+ insert_point = page_rec_get_next(insert_point);
- *split_rec = next_next_rec;
+ if (page_rec_is_supremum(insert_point)) {
+ insert_point = NULL;
+ } else {
+ insert_point = page_rec_get_next(insert_point);
+ if (page_rec_is_supremum(insert_point)) {
+ insert_point = NULL;
}
- return(TRUE);
+ /* If there are >= 2 user records up from the insert
+ point, split all but 1 off. We want to keep one because
+ then sequential inserts can use the adaptive hash
+ index, as they can do the necessary checks of the right
+ search position just by looking at the records on this
+ page. */
}
- return(FALSE);
+ *split_rec = insert_point;
+ return true;
}
/*************************************************************//**
@@ -2782,30 +2756,20 @@ btr_page_split_and_insert(
buf_block_t* block;
page_t* page;
page_zip_des_t* page_zip;
- ulint page_no;
- byte direction;
- ulint hint_page_no;
buf_block_t* new_block;
page_t* new_page;
page_zip_des_t* new_page_zip;
rec_t* split_rec;
buf_block_t* left_block;
buf_block_t* right_block;
- buf_block_t* insert_block;
page_cur_t* page_cursor;
rec_t* first_rec;
byte* buf = 0; /* remove warning */
rec_t* move_limit;
- ibool insert_will_fit;
- ibool insert_left;
ulint n_iterations = 0;
- rec_t* rec;
ulint n_uniq;
- dict_index_t* index;
-
- index = btr_cur_get_index(cursor);
- if (dict_index_is_spatial(index)) {
+ if (dict_index_is_spatial(cursor->index)) {
/* Split rtree page and update parent */
return(rtr_page_split_and_insert(flags, cursor, offsets, heap,
tuple, n_ext, mtr));
@@ -2837,23 +2801,19 @@ func_start:
ut_ad(!page_is_empty(page));
/* try to insert to the next page if possible before split */
- rec = btr_insert_into_right_sibling(
- flags, cursor, offsets, *heap, tuple, n_ext, mtr);
-
- if (rec != NULL) {
+ if (rec_t* rec = btr_insert_into_right_sibling(
+ flags, cursor, offsets, *heap, tuple, n_ext, mtr)) {
return(rec);
}
- page_no = block->page.id.page_no();
-
/* 1. Decide the split record; split_rec == NULL means that the
tuple to be inserted should be the first record on the upper
half-page */
- insert_left = FALSE;
+ bool insert_left = false;
+ ulint hint_page_no = block->page.id.page_no() + 1;
+ byte direction = FSP_UP;
- if (tuple != NULL && n_iterations > 0) {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
+ if (tuple && n_iterations > 0) {
split_rec = btr_page_get_split_rec(cursor, tuple, n_ext);
if (split_rec == NULL) {
@@ -2861,17 +2821,10 @@ func_start:
cursor, tuple, offsets, n_uniq, heap);
}
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
-
- } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
+ } else if ((split_rec = btr_page_get_split_rec_to_left(cursor))) {
direction = FSP_DOWN;
- hint_page_no = page_no - 1;
- ut_ad(split_rec);
+ hint_page_no -= 2;
} else {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
-
/* If there is only one record in the index page, we
can't split the node in the middle by default. We need
to determine whether the new record will be inserted
@@ -2896,7 +2849,7 @@ func_start:
new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
btr_page_get_level(page, mtr), mtr, mtr);
- if (new_block == NULL && os_has_said_disk_full) {
+ if (!new_block) {
return(NULL);
}
@@ -2921,12 +2874,8 @@ func_start:
*offsets = rec_get_offsets(split_rec, cursor->index, *offsets,
page_is_leaf(page), n_uniq, heap);
- if (tuple != NULL) {
- insert_left = cmp_dtuple_rec(
- tuple, split_rec, *offsets) < 0;
- } else {
- insert_left = 1;
- }
+ insert_left = !tuple
+ || cmp_dtuple_rec(tuple, split_rec, *offsets) < 0;
if (!insert_left && new_page_zip && n_iterations > 0) {
/* If a compressed page has already been split,
@@ -2961,10 +2910,10 @@ insert_empty:
on the appropriate half-page, we may release the tree x-latch.
We can then move the records after releasing the tree latch,
thus reducing the tree latch contention. */
+ bool insert_will_fit;
if (tuple == NULL) {
- insert_will_fit = 1;
- }
- else if (split_rec) {
+ insert_will_fit = true;
+ } else if (split_rec) {
insert_will_fit = !new_page_zip
&& btr_page_insert_fits(cursor, split_rec,
offsets, tuple, n_ext, heap);
@@ -3069,8 +3018,6 @@ insert_empty:
new_block, block, move_limit);
}
- ut_ad(!dict_index_is_spatial(index));
-
btr_search_move_or_delete_hash_entries(
new_block, block, cursor->index);
@@ -3102,18 +3049,15 @@ insert_empty:
/* 6. The split and the tree modification is now completed. Decide the
page where the tuple should be inserted */
+ rec_t* rec;
+ buf_block_t* const insert_block = insert_left
+ ? left_block : right_block;
- if (tuple == NULL) {
+ if (UNIV_UNLIKELY(!tuple)) {
rec = NULL;
goto func_exit;
}
- if (insert_left) {
- insert_block = left_block;
- } else {
- insert_block = right_block;
- }
-
/* 7. Reposition the cursor for insert and try insertion */
page_cursor = btr_cur_get_page_cur(cursor);
@@ -3190,9 +3134,7 @@ func_exit:
ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
- if (tuple == NULL) {
- ut_ad(rec == NULL);
- }
+ ut_ad(tuple || !rec);
ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
return(rec);
}
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 3060865bc8e..788b34bca62 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -3138,7 +3138,7 @@ fail_err:
&& page_get_n_recs(page) >= 2
&& dict_index_get_space_reserve() + rec_size > max_size
&& (btr_page_get_split_rec_to_right(cursor, &dummy)
- || btr_page_get_split_rec_to_left(cursor, &dummy))) {
+ || btr_page_get_split_rec_to_left(cursor))) {
goto fail;
}
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index 604890ed03c..ded01e53bad 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -445,30 +445,22 @@ btr_page_reorganize(
dict_index_t* index, /*!< in: the index tree of the page */
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull));
-/*************************************************************//**
-Decides if the page should be split at the convergence point of
-inserts converging to left.
-@return TRUE if split recommended */
-ibool
-btr_page_get_split_rec_to_left(
-/*===========================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec)/*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple should be first */
- MY_ATTRIBUTE((warn_unused_result));
-/*************************************************************//**
-Decides if the page should be split at the convergence point of
-inserts converging to right.
-@return TRUE if split recommended */
-ibool
-btr_page_get_split_rec_to_right(
-/*============================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec)/*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple should be first */
- MY_ATTRIBUTE((warn_unused_result));
+/** Decide if the page should be split at the convergence point of inserts
+converging to the left.
+@param[in] cursor insert position
+@return the first record to be moved to the right half page
+@retval NULL if no split is recommended */
+rec_t* btr_page_get_split_rec_to_left(const btr_cur_t* cursor);
+/** Decide if the page should be split at the convergence point of inserts
+converging to the right.
+@param[in] cursor insert position
+@param[out] split_rec if split recommended, the first record
+ on the right half page, or
+ NULL if the to-be-inserted record
+ should be first
+@return whether split is recommended */
+bool
+btr_page_get_split_rec_to_right(const btr_cur_t* cursor, rec_t** split_rec);
/*************************************************************//**
Splits an index page to halves and inserts the tuple. It is assumed
diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc
index bda4693648b..e316c8b1568 100644
--- a/storage/innobase/page/page0page.cc
+++ b/storage/innobase/page/page0page.cc
@@ -604,20 +604,20 @@ page_copy_rec_list_end_no_locks(
/* Copy records from the original page to the new page */
while (!page_cur_is_after_last(&cur1)) {
- rec_t* cur1_rec = page_cur_get_rec(&cur1);
rec_t* ins_rec;
- offsets = rec_get_offsets(cur1_rec, index, offsets, is_leaf,
+ offsets = rec_get_offsets(cur1.rec, index, offsets, is_leaf,
ULINT_UNDEFINED, &heap);
ins_rec = page_cur_insert_rec_low(cur2, index,
- cur1_rec, offsets, mtr);
+ cur1.rec, offsets, mtr);
if (UNIV_UNLIKELY(!ins_rec)) {
ib::fatal() << "Rec offset " << page_offset(rec)
- << ", cur1 offset "
- << page_offset(page_cur_get_rec(&cur1))
+ << ", cur1 offset " << page_offset(cur1.rec)
<< ", cur2 offset " << page_offset(cur2);
}
page_cur_move_to_next(&cur1);
+ ut_ad(!(rec_get_info_bits(cur1.rec, page_is_comp(new_page))
+ & REC_INFO_MIN_REC_FLAG));
cur2 = ins_rec;
}
@@ -803,6 +803,8 @@ page_copy_rec_list_start(
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr) /*!< in: mtr */
{
+ ut_ad(page_align(rec) == block->frame);
+
page_t* new_page = buf_block_get_frame(new_block);
page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
page_cur_t cur1;
@@ -820,7 +822,6 @@ page_copy_rec_list_start(
predefined infimum record. */
if (page_rec_is_infimum(rec)) {
-
return(ret);
}
@@ -854,17 +855,18 @@ page_copy_rec_list_start(
rec_move, max_to_move,
&num_moved, mtr);
} else {
-
while (page_cur_get_rec(&cur1) != rec) {
- rec_t* cur1_rec = page_cur_get_rec(&cur1);
- offsets = rec_get_offsets(cur1_rec, index, offsets,
+ offsets = rec_get_offsets(cur1.rec, index, offsets,
is_leaf,
ULINT_UNDEFINED, &heap);
cur2 = page_cur_insert_rec_low(cur2, index,
- cur1_rec, offsets, mtr);
+ cur1.rec, offsets, mtr);
ut_a(cur2);
page_cur_move_to_next(&cur1);
+ ut_ad(!(rec_get_info_bits(cur1.rec,
+ page_is_comp(new_page))
+ & REC_INFO_MIN_REC_FLAG));
}
}
@@ -1254,6 +1256,7 @@ page_delete_rec_list_start(
rec_offs_init(offsets_);
+ ut_ad(page_align(rec) == block->frame);
ut_ad((ibool) !!page_rec_is_comp(rec)
== dict_table_is_comp(index->table));
#ifdef UNIV_ZIP_DEBUG
@@ -2163,7 +2166,17 @@ page_simple_validate_old(
goto func_exit;
}
- rec = page_rec_get_next_const(rec);
+ ulint offs = rec_get_next_offs(rec, FALSE);
+ if (!offs) {
+ break;
+ }
+ if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM
+ || offs >= srv_page_size)) {
+ ib::error() << "Page free list is corrupted " << count;
+ goto func_exit;
+ }
+
+ rec = page + offs;
}
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
@@ -2355,7 +2368,17 @@ page_simple_validate_new(
goto func_exit;
}
- rec = page_rec_get_next_const(rec);
+ const ulint offs = rec_get_next_offs(rec, TRUE);
+ if (!offs) {
+ break;
+ }
+ if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM
+ || offs >= srv_page_size)) {
+ ib::error() << "Page free list is corrupted " << count;
+ goto func_exit;
+ }
+
+ rec = page + offs;
}
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
@@ -2668,16 +2691,32 @@ n_owned_zero:
page_is_leaf(page),
ULINT_UNDEFINED, &heap);
if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
+ ret = FALSE;
+next_free:
+ const ulint offs = rec_get_next_offs(
+ rec, page_is_comp(page));
+ if (!offs) {
+ break;
+ }
+ if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM
+ || offs >= srv_page_size)) {
+ ib::error() << "Page free list is corrupted";
+ ret = FALSE;
+ break;
+ }
- goto func_exit;
+ rec = page + offs;
+ continue;
}
count++;
offs = page_offset(rec_get_start(rec, offsets));
i = rec_offs_size(offsets);
- if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
- ib::error() << "Record offset out of bounds";
- goto func_exit;
+ if (UNIV_UNLIKELY(offs + i >= srv_page_size)) {
+ ib::error() << "Free record offset out of bounds: "
+ << offs << '+' << i;
+ ret = FALSE;
+ goto next_free;
}
while (i--) {
@@ -2691,7 +2730,7 @@ n_owned_zero:
buf[offs + i] = 1;
}
- rec = page_rec_get_next_const(rec);
+ goto next_free;
}
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {