summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2019-03-25 16:03:24 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2019-03-25 16:03:24 +0200
commit525e79b057ce1256aac9f8248b89d8d507ab22c3 (patch)
tree1bfdc7452ca5d981d905918b44930a1e0e3f9f65
parentade0a0e9d57906e6a32c145c7201dbf216392f12 (diff)
downloadmariadb-git-525e79b057ce1256aac9f8248b89d8d507ab22c3.tar.gz
MDEV-19022: InnoDB fails to cleanup useless B-tree pages
The test case for reproducing MDEV-14126 demonstrates that InnoDB can end up with an index tree where a non-leaf page has only one child page. The test case innodb.innodb_bug14676111 demonstrates that such pages are sometimes unavoidable, because InnoDB does not implement any sort of B-tree rotation. But, there is no reason to allow a root page with only one child page. btr_cur_node_ptr_delete(): Replaces btr_node_ptr_delete(). btr_page_get_father(): Declare globally. btr_discard_only_page_on_level(): Declare with ATTRIBUTE_COLD. It turns out that this function is not covered by the innodb.innodb_bug14676111 test case after all. btr_discard_page(): If the root page ends up having only one child page, shrink the tree by invoking btr_lift_page_up().
-rw-r--r--mysql-test/suite/innodb/r/innodb_bug14676111.result6
-rw-r--r--storage/innobase/btr/btr0btr.cc74
-rw-r--r--storage/innobase/btr/btr0cur.cc23
-rw-r--r--storage/innobase/btr/btr0defragment.cc8
-rw-r--r--storage/innobase/include/btr0btr.h17
-rw-r--r--storage/innobase/include/btr0cur.h7
6 files changed, 63 insertions, 72 deletions
diff --git a/mysql-test/suite/innodb/r/innodb_bug14676111.result b/mysql-test/suite/innodb/r/innodb_bug14676111.result
index d1e21c92d95..49d099263de 100644
--- a/mysql-test/suite/innodb/r/innodb_bug14676111.result
+++ b/mysql-test/suite/innodb/r/innodb_bug14676111.result
@@ -39,7 +39,7 @@ Table Op Msg_type Msg_text
test.t1 analyze status OK
select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1';
CLUST_INDEX_SIZE
-5
+4
set global innodb_limit_optimistic_insert_debug = 10000;
connection con2;
rollback;
@@ -50,7 +50,7 @@ Table Op Msg_type Msg_text
test.t1 analyze status OK
select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1';
CLUST_INDEX_SIZE
-3
+2
begin;
insert into t1 values (2);
rollback;
@@ -59,7 +59,7 @@ Table Op Msg_type Msg_text
test.t1 analyze status OK
select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1';
CLUST_INDEX_SIZE
-2
+1
begin;
insert into t1 values (2);
rollback;
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 5b1e7e1b853..a40e39624e2 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1062,18 +1062,13 @@ btr_page_get_father_block(
return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
}
-/************************************************************//**
-Seeks to the upper level node pointer to a page.
-It is assumed that mtr holds an x-latch on the tree. */
-static
-void
-btr_page_get_father(
-/*================*/
- dict_index_t* index, /*!< in: b-tree index */
- buf_block_t* block, /*!< in: child page in the index */
- mtr_t* mtr, /*!< in: mtr */
- btr_cur_t* cursor) /*!< out: cursor on node pointer record,
- its page x-latched */
+/** Seek to the parent page of a B-tree page.
+@param[in,out] index b-tree
+@param[in] block child page
+@param[in,out] mtr mini-transaction
+@param[out] cursor cursor pointing to the x-latched parent page */
+void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
+ btr_cur_t* cursor)
{
mem_heap_t* heap;
rec_t* rec
@@ -3464,33 +3459,6 @@ btr_set_min_rec_mark(
}
/*************************************************************//**
-Deletes on the upper level the node pointer to a page. */
-void
-btr_node_ptr_delete(
-/*================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page whose node pointer is deleted */
- mtr_t* mtr) /*!< in: mtr */
-{
- btr_cur_t cursor;
- ibool compressed;
- dberr_t err;
-
- ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
-
- /* Delete node pointer on father page */
- btr_page_get_father(index, block, mtr, &cursor);
-
- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
- BTR_CREATE_FLAG, false, mtr);
- ut_a(err == DB_SUCCESS);
-
- if (!compressed) {
- btr_cur_compress_if_useful(&cursor, FALSE, mtr);
- }
-}
-
-/*************************************************************//**
If page is the only on its level, this function moves its records to the
father page, thus reducing the tree height.
@return father block */
@@ -3939,7 +3907,7 @@ retry:
lock_rec_free_all_from_discard_page(block);
lock_mutex_exit();
} else {
- btr_node_ptr_delete(index, block, mtr);
+ btr_cur_node_ptr_delete(&father_cursor, mtr);
if (!dict_table_is_locking_disabled(index->table)) {
lock_update_merge_left(
merge_block, orig_pred, block);
@@ -4217,8 +4185,9 @@ err_exit:
/*************************************************************//**
Discards a page that is the only page on its level. This will empty
the whole B-tree, leaving just an empty root page. This function
-should never be reached, because btr_compress(), which is invoked in
+should almost never be reached, because btr_compress(), which is invoked in
delete operations, calls btr_lift_page_up() to flatten the B-tree. */
+ATTRIBUTE_COLD
static
void
btr_discard_only_page_on_level(
@@ -4318,10 +4287,7 @@ btr_discard_page(
buf_block_t* block;
page_t* page;
rec_t* node_ptr;
-#ifdef UNIV_DEBUG
btr_cur_t parent_cursor;
- bool parent_is_different = false;
-#endif
block = btr_cur_get_block(cursor);
index = btr_cur_get_index(cursor);
@@ -4337,13 +4303,11 @@ btr_discard_page(
MONITOR_INC(MONITOR_INDEX_DISCARD);
-#ifdef UNIV_DEBUG
if (dict_index_is_spatial(index)) {
rtr_page_get_father(index, block, mtr, cursor, &parent_cursor);
} else {
btr_page_get_father(index, block, mtr, &parent_cursor);
}
-#endif
/* Decide the page which will inherit the locks */
@@ -4351,7 +4315,7 @@ btr_discard_page(
right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
const page_size_t page_size(dict_table_page_size(index->table));
-
+ ut_d(bool parent_is_different = false);
if (left_page_no != FIL_NULL) {
merge_block = btr_block_get(
page_id_t(space, left_page_no), page_size,
@@ -4407,15 +4371,9 @@ btr_discard_page(
}
if (dict_index_is_spatial(index)) {
- btr_cur_t father_cursor;
-
- /* Since rtr_node_ptr_delete doesn't contain get father
- node ptr, so, we need to get father node ptr first and then
- delete it. */
- rtr_page_get_father(index, block, mtr, cursor, &father_cursor);
- rtr_node_ptr_delete(index, &father_cursor, block, mtr);
+ rtr_node_ptr_delete(index, &parent_cursor, block, mtr);
} else {
- btr_node_ptr_delete(index, block, mtr);
+ btr_cur_node_ptr_delete(&parent_cursor, mtr);
}
/* Remove the page from the level list */
@@ -4453,6 +4411,12 @@ btr_discard_page(
we cannot use btr_check_node_ptr() */
ut_ad(parent_is_different
|| btr_check_node_ptr(index, merge_block, mtr));
+
+ if (btr_cur_get_block(&parent_cursor)->page.id.page_no() == index->page
+ && !page_has_siblings(btr_cur_get_page(&parent_cursor))
+ && page_get_n_recs(btr_cur_get_page(&parent_cursor)) == 1) {
+ btr_lift_page_up(index, merge_block, mtr);
+ }
}
#ifdef UNIV_BTR_PRINT
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 37372af4b44..8f26ac9e0e5 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -5358,8 +5358,10 @@ btr_cur_pessimistic_delete(
on the page */
ulint level = btr_page_get_level(page, mtr);
- btr_node_ptr_delete(index, block, mtr);
-
+ btr_cur_t cursor;
+ btr_page_get_father(index, block, mtr, &cursor);
+ btr_cur_node_ptr_delete(&cursor, mtr);
+ // FIXME: reuse the node_ptr from above
dtuple_t* node_ptr = dict_index_build_node_ptr(
index, next_rec, block->page.id.page_no(),
heap, level);
@@ -5428,6 +5430,23 @@ return_after_reservations:
return(ret);
}
+/** Delete the node pointer in a parent page.
+@param[in,out] parent cursor pointing to parent record
+@param[in,out] mtr mini-transaction */
+void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr)
+{
+ ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(parent),
+ MTR_MEMO_PAGE_X_FIX));
+ dberr_t err;
+ ibool compressed = btr_cur_pessimistic_delete(&err, TRUE, parent,
+ BTR_CREATE_FLAG, false,
+ mtr);
+ ut_a(err == DB_SUCCESS);
+ if (!compressed) {
+ btr_cur_compress_if_useful(parent, FALSE, mtr);
+ }
+}
+
/*******************************************************************//**
Adds path information to the cursor for the current page, for which
the binary search has been performed. */
diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc
index 077cf8a1c73..2bb2e84ee05 100644
--- a/storage/innobase/btr/btr0defragment.cc
+++ b/storage/innobase/btr/btr0defragment.cc
@@ -484,6 +484,7 @@ btr_defragment_merge_pages(
ULINT_UNDEFINED);
}
}
+ btr_cur_t parent;
if (n_recs_to_move == n_recs) {
/* The whole page is merged with the previous page,
free it. */
@@ -491,7 +492,8 @@ btr_defragment_merge_pages(
from_block);
btr_search_drop_page_hash_index(from_block);
btr_level_list_remove(space, page_size, (page_t*)from_page, index, mtr);
- btr_node_ptr_delete(index, from_block, mtr);
+ btr_page_get_father(index, from_block, mtr, &parent);
+ btr_cur_node_ptr_delete(&parent, mtr);
/* btr_blob_dbg_remove(from_page, index,
"btr_defragment_n_pages"); */
btr_page_free(index, from_block, mtr);
@@ -509,7 +511,9 @@ btr_defragment_merge_pages(
lock_update_split_and_merge(to_block,
orig_pred,
from_block);
- btr_node_ptr_delete(index, from_block, mtr);
+ // FIXME: reuse the node_ptr!
+ btr_page_get_father(index, from_block, mtr, &parent);
+ btr_cur_node_ptr_delete(&parent, mtr);
rec = page_rec_get_next(
page_get_infimum_rec(from_page));
node_ptr = dict_index_build_node_ptr(
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index 967f738f4e3..23e445e2d5f 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2018, MariaDB Corporation.
+Copyright (c) 2014, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -545,14 +545,13 @@ btr_set_min_rec_mark(
rec_t* rec, /*!< in/out: record */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull));
-/*************************************************************//**
-Deletes on the upper level the node pointer to a page. */
-void
-btr_node_ptr_delete(
-/*================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page whose node pointer is deleted */
- mtr_t* mtr) /*!< in: mtr */
+/** Seek to the parent page of a B-tree page.
+@param[in,out] index b-tree
+@param[in] block child page
+@param[in,out] mtr mini-transaction
+@param[out] cursor cursor pointing to the x-latched parent page */
+void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
+ btr_cur_t* cursor)
MY_ATTRIBUTE((nonnull));
#ifdef UNIV_DEBUG
/************************************************************//**
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 0f027536525..769d9e82542 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -529,6 +529,11 @@ btr_cur_pessimistic_delete(
bool rollback,/*!< in: performing rollback? */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull));
+/** Delete the node pointer in a parent page.
+@param[in,out] parent cursor pointing to parent record
+@param[in,out] mtr mini-transaction */
+void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr)
+ MY_ATTRIBUTE((nonnull));
/***********************************************************//**
Parses a redo log record of updating a record in-place.
@return end of log record or NULL */