summaryrefslogtreecommitdiff
path: root/storage/innobase/btr/btr0btr.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/btr/btr0btr.c')
-rw-r--r--storage/innobase/btr/btr0btr.c2970
1 files changed, 2970 insertions, 0 deletions
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
new file mode 100644
index 00000000000..1744fc36f4d
--- /dev/null
+++ b/storage/innobase/btr/btr0btr.c
@@ -0,0 +1,2970 @@
+/******************************************************
+The B-tree
+
+(c) 1994-1996 Innobase Oy
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "btr0btr.h"
+
+#ifdef UNIV_NONINL
+#include "btr0btr.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "page0page.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "btr0pcur.h"
+#include "rem0cmp.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+
+/*
+Latching strategy of the InnoDB B-tree
+--------------------------------------
+A tree latch protects all non-leaf nodes of the tree. Each node of a tree
+also has a latch of its own.
+
+A B-tree operation normally first acquires an S-latch on the tree. It
+searches down the tree and releases the tree latch when it has the
+leaf node latch. To save CPU time we do not acquire any latch on
+non-leaf nodes of the tree during a search, those pages are only bufferfixed.
+
+If an operation needs to restructure the tree, it acquires an X-latch on
+the tree before searching to a leaf node. If it needs, for example, to
+split a leaf,
+(1) InnoDB decides the split point in the leaf,
+(2) allocates a new page,
+(3) inserts the appropriate node pointer to the first non-leaf level,
+(4) releases the tree X-latch,
+(5) and then moves records from the leaf to the new allocated page.
+
+Node pointers
+-------------
+Leaf pages of a B-tree contain the index records stored in the
+tree. On levels n > 0 we store 'node pointers' to pages on level
+n - 1. For each page there is exactly one node pointer stored:
+thus the our tree is an ordinary B-tree, not a B-link tree.
+
+A node pointer contains a prefix P of an index record. The prefix
+is long enough so that it determines an index record uniquely.
+The file page number of the child page is added as the last
+field. To the child page we can store node pointers or index records
+which are >= P in the alphabetical order, but < P1 if there is
+a next node pointer on the level, and P1 is its prefix.
+
+If a node pointer with a prefix P points to a non-leaf child,
+then the leftmost record in the child must have the same
+prefix P. If it points to a leaf node, the child is not required
+to contain any record with a prefix equal to P. The leaf case
+is decided this way to allow arbitrary deletions in a leaf node
+without touching upper levels of the tree.
+
+We have predefined a special minimum record which we
+define as the smallest record in any alphabetical order.
+A minimum record is denoted by setting a bit in the record
+header. A minimum record acts as the prefix of a node pointer
+which points to a leftmost node on any level of the tree.
+
+File page allocation
+--------------------
+In the root node of a B-tree there are two file segment headers.
+The leaf pages of a tree are allocated from one file segment, to
+make them consecutive on disk if possible. From the other file segment
+we allocate pages for the non-leaf levels of the tree.
+*/
+
+/******************************************************************
+Creates a new index page to the tree (not the root, and also not
+used in page reorganization). */
+static
+void
+btr_page_create(
+/*============*/
+ page_t* page, /* in: page to be created */
+ dict_tree_t* tree, /* in: index tree */
+ mtr_t* mtr); /* in: mtr */
+/****************************************************************
+Returns the upper level node pointer to a page. It is assumed that
+mtr holds an x-latch on the tree. */
+static
+rec_t*
+btr_page_get_father_node_ptr(
+/*=========================*/
+ /* out: pointer to node pointer record */
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page: must contain at least one
+ user record */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Empties an index page. */
+static
+void
+btr_page_empty(
+/*===========*/
+ page_t* page, /* in: page to be emptied */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Returns TRUE if the insert fits on the appropriate half-page
+with the chosen split_rec. */
+static
+ibool
+btr_page_insert_fits(
+/*=================*/
+ /* out: TRUE if fits */
+ btr_cur_t* cursor, /* in: cursor at which insert
+ should be made */
+ rec_t* split_rec, /* in: suggestion for first record
+ on upper half-page, or NULL if
+ tuple should be first */
+ const ulint* offsets, /* in: rec_get_offsets(
+ split_rec, cursor->index) */
+ dtuple_t* tuple, /* in: tuple to insert */
+ mem_heap_t* heap); /* in: temporary memory heap */
+
+/******************************************************************
+Gets the root node of a tree and x-latches it. */
+
+page_t*
+btr_root_get(
+/*=========*/
+ /* out: root page, x-latched */
+ dict_tree_t* tree, /* in: index tree */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint space;
+ ulint root_page_no;
+ page_t* root;
+ ibool comp = UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp;
+
+ space = dict_tree_get_space(tree);
+ root_page_no = dict_tree_get_page(tree);
+
+ root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
+ ut_a(page_is_comp(root) == comp);
+
+ return(root);
+}
+
+/*****************************************************************
+Gets pointer to the previous user record in the tree. It is assumed that
+the caller has appropriate latches on the page and its neighbor. */
+
+rec_t*
+btr_get_prev_user_rec(
+/*==================*/
+ /* out: previous user record, NULL if there is none */
+ rec_t* rec, /* in: record on leaf level */
+ mtr_t* mtr) /* in: mtr holding a latch on the page, and if
+ needed, also to the previous page */
+{
+ page_t* page;
+ page_t* prev_page;
+ ulint prev_page_no;
+ rec_t* prev_rec;
+ ulint space;
+
+ page = buf_frame_align(rec);
+
+ if (page_get_infimum_rec(page) != rec) {
+
+ prev_rec = page_rec_get_prev(rec);
+
+ if (page_get_infimum_rec(page) != prev_rec) {
+
+ return(prev_rec);
+ }
+ }
+
+ prev_page_no = btr_page_get_prev(page, mtr);
+ space = buf_frame_get_space_id(page);
+
+ if (prev_page_no != FIL_NULL) {
+
+ prev_page = buf_page_get_with_no_latch(space, prev_page_no,
+ mtr);
+ /* The caller must already have a latch to the brother */
+ ut_ad((mtr_memo_contains(mtr, buf_block_align(prev_page),
+ MTR_MEMO_PAGE_S_FIX))
+ || (mtr_memo_contains(mtr, buf_block_align(prev_page),
+ MTR_MEMO_PAGE_X_FIX)));
+ ut_a(page_is_comp(prev_page) == page_is_comp(page));
+
+ prev_rec = page_rec_get_prev(page_get_supremum_rec(prev_page));
+
+ return(prev_rec);
+ }
+
+ return(NULL);
+}
+
+/*****************************************************************
+Gets pointer to the next user record in the tree. It is assumed that the
+caller has appropriate latches on the page and its neighbor. */
+
+rec_t*
+btr_get_next_user_rec(
+/*==================*/
+ /* out: next user record, NULL if there is none */
+ rec_t* rec, /* in: record on leaf level */
+ mtr_t* mtr) /* in: mtr holding a latch on the page, and if
+ needed, also to the next page */
+{
+ page_t* page;
+ page_t* next_page;
+ ulint next_page_no;
+ rec_t* next_rec;
+ ulint space;
+
+ page = buf_frame_align(rec);
+
+ if (page_get_supremum_rec(page) != rec) {
+
+ next_rec = page_rec_get_next(rec);
+
+ if (page_get_supremum_rec(page) != next_rec) {
+
+ return(next_rec);
+ }
+ }
+
+ next_page_no = btr_page_get_next(page, mtr);
+ space = buf_frame_get_space_id(page);
+
+ if (next_page_no != FIL_NULL) {
+
+ next_page = buf_page_get_with_no_latch(space, next_page_no,
+ mtr);
+ /* The caller must already have a latch to the brother */
+ ut_ad((mtr_memo_contains(mtr, buf_block_align(next_page),
+ MTR_MEMO_PAGE_S_FIX))
+ || (mtr_memo_contains(mtr, buf_block_align(next_page),
+ MTR_MEMO_PAGE_X_FIX)));
+
+ ut_a(page_is_comp(next_page) == page_is_comp(page));
+ next_rec = page_rec_get_next(page_get_infimum_rec(next_page));
+
+ return(next_rec);
+ }
+
+ return(NULL);
+}
+
+/******************************************************************
+Creates a new index page to the tree (not the root, and also not used in
+page reorganization). */
+static
+void
+btr_page_create(
+/*============*/
+ page_t* page, /* in: page to be created */
+ dict_tree_t* tree, /* in: index tree */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ page_create(page, mtr,
+ UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp);
+ buf_block_align(page)->check_index_page_at_flush = TRUE;
+
+ btr_page_set_index_id(page, tree->id, mtr);
+}
+
+/******************************************************************
+Allocates a new file page to be used in an ibuf tree. Takes the page from
+the free list of the tree, which must contain pages! */
+static
+page_t*
+btr_page_alloc_for_ibuf(
+/*====================*/
+ /* out: new allocated page, x-latched */
+ dict_tree_t* tree, /* in: index tree */
+ mtr_t* mtr) /* in: mtr */
+{
+ fil_addr_t node_addr;
+ page_t* root;
+ page_t* new_page;
+
+ root = btr_root_get(tree, mtr);
+
+ node_addr = flst_get_first(root + PAGE_HEADER
+ + PAGE_BTR_IBUF_FREE_LIST, mtr);
+ ut_a(node_addr.page != FIL_NULL);
+
+ new_page = buf_page_get(dict_tree_get_space(tree), node_addr.page,
+ RW_X_LATCH, mtr);
+#ifdef UNIV_SYNC_DEBUG
+ buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
+#endif /* UNIV_SYNC_DEBUG */
+
+ flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
+ mtr);
+ ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
+
+ return(new_page);
+}
+
+/******************************************************************
+Allocates a new file page to be used in an index tree. NOTE: we assume
+that the caller has made the reservation for free extents! */
+
+page_t*
+btr_page_alloc(
+/*===========*/
+ /* out: new allocated page, x-latched;
+ NULL if out of space */
+ dict_tree_t* tree, /* in: index tree */
+ ulint hint_page_no, /* in: hint of a good page */
+ byte file_direction, /* in: direction where a possible
+ page split is made */
+ ulint level, /* in: level where the page is placed
+ in the tree */
+ mtr_t* mtr) /* in: mtr */
+{
+ fseg_header_t* seg_header;
+ page_t* root;
+ page_t* new_page;
+ ulint new_page_no;
+
+ if (tree->type & DICT_IBUF) {
+
+ return(btr_page_alloc_for_ibuf(tree, mtr));
+ }
+
+ root = btr_root_get(tree, mtr);
+
+ if (level == 0) {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+ } else {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+ }
+
+ /* Parameter TRUE below states that the caller has made the
+ reservation for free extents, and thus we know that a page can
+ be allocated: */
+
+ new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
+ file_direction, TRUE, mtr);
+ if (new_page_no == FIL_NULL) {
+
+ return(NULL);
+ }
+
+ new_page = buf_page_get(dict_tree_get_space(tree), new_page_no,
+ RW_X_LATCH, mtr);
+#ifdef UNIV_SYNC_DEBUG
+ buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
+#endif /* UNIV_SYNC_DEBUG */
+
+ return(new_page);
+}
+
+/******************************************************************
+Gets the number of pages in a B-tree. */
+
+ulint
+btr_get_size(
+/*=========*/
+ /* out: number of pages */
+ dict_index_t* index, /* in: index */
+ ulint flag) /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
+{
+ fseg_header_t* seg_header;
+ page_t* root;
+ ulint n;
+ ulint dummy;
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ mtr_s_lock(dict_tree_get_lock(index->tree), &mtr);
+
+ root = btr_root_get(index->tree, &mtr);
+
+ if (flag == BTR_N_LEAF_PAGES) {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+ fseg_n_reserved_pages(seg_header, &n, &mtr);
+
+ } else if (flag == BTR_TOTAL_SIZE) {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+
+ n = fseg_n_reserved_pages(seg_header, &dummy, &mtr);
+
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+ n += fseg_n_reserved_pages(seg_header, &dummy, &mtr);
+ } else {
+ ut_error;
+ }
+
+ mtr_commit(&mtr);
+
+ return(n);
+}
+
+/******************************************************************
+Frees a page used in an ibuf tree. Puts the page to the free list of the
+ibuf tree. */
+static
+void
+btr_page_free_for_ibuf(
+/*===================*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page to be freed, x-latched */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* root;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ root = btr_root_get(tree, mtr);
+
+ flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
+
+ ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ mtr));
+}
+
+/******************************************************************
+Frees a file page used in an index tree. Can be used also to (BLOB)
+external storage pages, because the page level 0 can be given as an
+argument. */
+
+void
+btr_page_free_low(
+/*==============*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page to be freed, x-latched */
+ ulint level, /* in: page level */
+ mtr_t* mtr) /* in: mtr */
+{
+ fseg_header_t* seg_header;
+ page_t* root;
+ ulint space;
+ ulint page_no;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ /* The page gets invalid for optimistic searches: increment the frame
+ modify clock */
+
+ buf_frame_modify_clock_inc(page);
+
+ if (tree->type & DICT_IBUF) {
+
+ btr_page_free_for_ibuf(tree, page, mtr);
+
+ return;
+ }
+
+ root = btr_root_get(tree, mtr);
+
+ if (level == 0) {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+ } else {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+ }
+
+ space = buf_frame_get_space_id(page);
+ page_no = buf_frame_get_page_no(page);
+
+ fseg_free_page(seg_header, space, page_no, mtr);
+}
+
+/******************************************************************
+Frees a file page used in an index tree. NOTE: cannot free field external
+storage pages because the page must contain info on its level. */
+
+void
+btr_page_free(
+/*==========*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page to be freed, x-latched */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint level;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ level = btr_page_get_level(page, mtr);
+
+ btr_page_free_low(tree, page, level, mtr);
+}
+
+/******************************************************************
+Sets the child node file address in a node pointer. */
+UNIV_INLINE
+void
+btr_node_ptr_set_child_page_no(
+/*===========================*/
+ rec_t* rec, /* in: node pointer record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint page_no,/* in: child node address */
+ mtr_t* mtr) /* in: mtr */
+{
+ byte* field;
+ ulint len;
+
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr));
+ ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
+
+ /* The child address is in the last field */
+ field = rec_get_nth_field(rec, offsets,
+ rec_offs_n_fields(offsets) - 1, &len);
+
+ ut_ad(len == 4);
+
+ mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
+}
+
+/****************************************************************
+Returns the child page of a node pointer and x-latches it. */
+static
+page_t*
+btr_node_ptr_get_child(
+/*===================*/
+ /* out: child page, x-latched */
+ rec_t* node_ptr,/* in: node pointer */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint page_no;
+ ulint space;
+ page_t* page;
+
+ ut_ad(rec_offs_validate(node_ptr, NULL, offsets));
+ space = buf_frame_get_space_id(node_ptr);
+ page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+
+ page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
+
+ return(page);
+}
+
+/****************************************************************
+Returns the upper level node pointer to a page. It is assumed that mtr holds
+an x-latch on the tree. */
+static
+rec_t*
+btr_page_get_father_for_rec(
+/*========================*/
+ /* out: pointer to node pointer record,
+ its page x-latched */
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page: must contain at least one
+ user record */
+ rec_t* user_rec,/* in: user_record on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ mem_heap_t* heap;
+ dtuple_t* tuple;
+ btr_cur_t cursor;
+ rec_t* node_ptr;
+ dict_index_t* index;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ ut_a(user_rec != page_get_supremum_rec(page));
+ ut_a(user_rec != page_get_infimum_rec(page));
+
+ ut_ad(dict_tree_get_page(tree) != buf_frame_get_page_no(page));
+
+ heap = mem_heap_create(100);
+
+ tuple = dict_tree_build_node_ptr(tree, user_rec, 0, heap,
+ btr_page_get_level(page, mtr));
+ index = UT_LIST_GET_FIRST(tree->tree_indexes);
+
+ /* In the following, we choose just any index from the tree as the
+ first parameter for btr_cur_search_to_nth_level. */
+
+ btr_cur_search_to_nth_level(index,
+ btr_page_get_level(page, mtr) + 1,
+ tuple, PAGE_CUR_LE,
+ BTR_CONT_MODIFY_TREE, &cursor, 0, mtr);
+
+ node_ptr = btr_cur_get_rec(&cursor);
+ offsets = rec_get_offsets(node_ptr, index, offsets,
+ ULINT_UNDEFINED, &heap);
+
+ if (btr_node_ptr_get_child_page_no(node_ptr, offsets) !=
+ buf_frame_get_page_no(page)) {
+ fputs("InnoDB: Dump of the child page:\n", stderr);
+ buf_page_print(buf_frame_align(page));
+ fputs("InnoDB: Dump of the parent page:\n", stderr);
+ buf_page_print(buf_frame_align(node_ptr));
+
+ fputs("InnoDB: Corruption of an index tree: table ", stderr);
+ ut_print_name(stderr, NULL, index->table_name);
+ fputs(", index ", stderr);
+ ut_print_name(stderr, NULL, index->name);
+ fprintf(stderr, ",\n"
+"InnoDB: father ptr page no %lu, child page no %lu\n",
+ (ulong)
+ btr_node_ptr_get_child_page_no(node_ptr, offsets),
+ (ulong) buf_frame_get_page_no(page));
+ offsets = rec_get_offsets(page_rec_get_next(
+ page_get_infimum_rec(page)), index,
+ offsets, ULINT_UNDEFINED, &heap);
+ page_rec_print(page_rec_get_next(page_get_infimum_rec(page)),
+ offsets);
+ offsets = rec_get_offsets(node_ptr, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ page_rec_print(node_ptr, offsets);
+
+ fputs(
+"InnoDB: You should dump + drop + reimport the table to fix the\n"
+"InnoDB: corruption. If the crash happens at the database startup, see\n"
+"InnoDB: http://dev.mysql.com/doc/mysql/en/Forcing_recovery.html about\n"
+"InnoDB: forcing recovery. Then dump + drop + reimport.\n", stderr);
+ }
+
+ ut_a(btr_node_ptr_get_child_page_no(node_ptr, offsets) ==
+ buf_frame_get_page_no(page));
+ mem_heap_free(heap);
+
+ return(node_ptr);
+}
+
+/****************************************************************
+Returns the upper level node pointer to a page. It is assumed that
+mtr holds an x-latch on the tree. */
+static
+rec_t*
+btr_page_get_father_node_ptr(
+/*=========================*/
+ /* out: pointer to node pointer record */
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page: must contain at least one
+ user record */
+ mtr_t* mtr) /* in: mtr */
+{
+ return(btr_page_get_father_for_rec(tree, page,
+ page_rec_get_next(page_get_infimum_rec(page)), mtr));
+}
+
+/****************************************************************
+Creates the root node for a new index tree. */
+
+ulint
+btr_create(
+/*=======*/
+ /* out: page number of the created root, FIL_NULL if
+ did not succeed */
+ ulint type, /* in: type of the index */
+ ulint space, /* in: space where created */
+ dulint index_id,/* in: index id */
+ ibool comp, /* in: TRUE=compact page format */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint page_no;
+ buf_frame_t* ibuf_hdr_frame;
+ buf_frame_t* frame;
+ page_t* page;
+
+ /* Create the two new segments (one, in the case of an ibuf tree) for
+ the index tree; the segment headers are put on the allocated root page
+ (for an ibuf tree, not in the root, but on a separate ibuf header
+ page) */
+
+ if (type & DICT_IBUF) {
+ /* Allocate first the ibuf header page */
+ ibuf_hdr_frame = fseg_create(space, 0,
+ IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
+
+#ifdef UNIV_SYNC_DEBUG
+ buf_page_dbg_add_level(ibuf_hdr_frame, SYNC_TREE_NODE_NEW);
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(buf_frame_get_page_no(ibuf_hdr_frame)
+ == IBUF_HEADER_PAGE_NO);
+ /* Allocate then the next page to the segment: it will be the
+ tree root page */
+
+ page_no = fseg_alloc_free_page(
+ ibuf_hdr_frame + IBUF_HEADER
+ + IBUF_TREE_SEG_HEADER, IBUF_TREE_ROOT_PAGE_NO,
+ FSP_UP, mtr);
+ ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
+
+ frame = buf_page_get(space, page_no, RW_X_LATCH, mtr);
+ } else {
+ frame = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP,
+ mtr);
+ }
+
+ if (frame == NULL) {
+
+ return(FIL_NULL);
+ }
+
+ page_no = buf_frame_get_page_no(frame);
+
+#ifdef UNIV_SYNC_DEBUG
+ buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
+#endif /* UNIV_SYNC_DEBUG */
+
+ if (type & DICT_IBUF) {
+ /* It is an insert buffer tree: initialize the free list */
+
+ ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
+
+ flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
+ } else {
+ /* It is a non-ibuf tree: create a file segment for leaf
+ pages */
+ fseg_create(space, page_no, PAGE_HEADER + PAGE_BTR_SEG_LEAF,
+ mtr);
+ /* The fseg create acquires a second latch on the page,
+ therefore we must declare it: */
+#ifdef UNIV_SYNC_DEBUG
+ buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
+#endif /* UNIV_SYNC_DEBUG */
+ }
+
+ /* Create a new index page on the the allocated segment page */
+ page = page_create(frame, mtr, comp);
+ buf_block_align(page)->check_index_page_at_flush = TRUE;
+
+ /* Set the index id of the page */
+ btr_page_set_index_id(page, index_id, mtr);
+
+ /* Set the level of the new index page */
+ btr_page_set_level(page, 0, mtr);
+
+ /* Set the next node and previous node fields */
+ btr_page_set_next(page, FIL_NULL, mtr);
+ btr_page_set_prev(page, FIL_NULL, mtr);
+
+ /* We reset the free bits for the page to allow creation of several
+ trees in the same mtr, otherwise the latch on a bitmap page would
+ prevent it because of the latching order */
+
+ ibuf_reset_free_bits_with_type(type, page);
+
+ /* In the following assertion we test that two records of maximum
+ allowed size fit on the root page: this fact is needed to ensure
+ correctness of split algorithms */
+
+ ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
+
+ return(page_no);
+}
+
+/****************************************************************
+Frees a B-tree except the root page, which MUST be freed after this
+by calling btr_free_root. */
+
+void
+btr_free_but_not_root(
+/*==================*/
+ ulint space, /* in: space where created */
+ ulint root_page_no) /* in: root page number */
+{
+ ibool finished;
+ page_t* root;
+ mtr_t mtr;
+
+leaf_loop:
+ mtr_start(&mtr);
+
+ root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
+
+ /* NOTE: page hash indexes are dropped when a page is freed inside
+ fsp0fsp. */
+
+ finished = fseg_free_step(
+ root + PAGE_HEADER + PAGE_BTR_SEG_LEAF, &mtr);
+ mtr_commit(&mtr);
+
+ if (!finished) {
+
+ goto leaf_loop;
+ }
+top_loop:
+ mtr_start(&mtr);
+
+ root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
+
+ finished = fseg_free_step_not_header(
+ root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
+ mtr_commit(&mtr);
+
+ if (!finished) {
+
+ goto top_loop;
+ }
+}
+
+/****************************************************************
+Frees the B-tree root page. Other tree MUST already have been freed. */
+
+void
+btr_free_root(
+/*==========*/
+ ulint space, /* in: space where created */
+ ulint root_page_no, /* in: root page number */
+ mtr_t* mtr) /* in: a mini-transaction which has already
+ been started */
+{
+ ibool finished;
+ page_t* root;
+
+ root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
+
+ btr_search_drop_page_hash_index(root);
+top_loop:
+ finished = fseg_free_step(
+ root + PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
+ if (!finished) {
+
+ goto top_loop;
+ }
+}
+
+/*****************************************************************
+Reorganizes an index page. */
+static
+void
+btr_page_reorganize_low(
+/*====================*/
+ ibool recovery,/* in: TRUE if called in recovery:
+ locks should not be updated, i.e.,
+ there cannot exist locks on the
+ page, and a hash index should not be
+ dropped: it cannot exist */
+ page_t* page, /* in: page to be reorganized */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* new_page;
+ ulint log_mode;
+ ulint data_size1;
+ ulint data_size2;
+ ulint max_ins_size1;
+ ulint max_ins_size2;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ data_size1 = page_get_data_size(page);
+ max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
+
+ /* Write the log record */
+ mlog_open_and_write_index(mtr, page, index, index->table->comp
+ ? MLOG_COMP_PAGE_REORGANIZE
+ : MLOG_PAGE_REORGANIZE, 0);
+
+ /* Turn logging off */
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+
+ new_page = buf_frame_alloc();
+
+ /* Copy the old page to temporary space */
+ buf_frame_copy(new_page, page);
+
+ if (!recovery) {
+ btr_search_drop_page_hash_index(page);
+ }
+
+ /* Recreate the page: note that global data on page (possible
+ segment headers, next page-field, etc.) is preserved intact */
+
+ page_create(page, mtr, index->table->comp);
+ buf_block_align(page)->check_index_page_at_flush = TRUE;
+
+ /* Copy the records from the temporary space to the recreated page;
+ do not copy the lock bits yet */
+
+ page_copy_rec_list_end_no_locks(page, new_page,
+ page_get_infimum_rec(new_page), index, mtr);
+ /* Copy max trx id to recreated page */
+ page_set_max_trx_id(page, page_get_max_trx_id(new_page));
+
+ if (!recovery) {
+ /* Update the record lock bitmaps */
+ lock_move_reorganize_page(page, new_page);
+ }
+
+ data_size2 = page_get_data_size(page);
+ max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
+
+ if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
+ buf_page_print(page);
+ buf_page_print(new_page);
+ fprintf(stderr,
+"InnoDB: Error: page old data size %lu new data size %lu\n"
+"InnoDB: Error: page old max ins size %lu new max ins size %lu\n"
+"InnoDB: Submit a detailed bug report to http://bugs.mysql.com\n",
+ (unsigned long) data_size1, (unsigned long) data_size2,
+ (unsigned long) max_ins_size1,
+ (unsigned long) max_ins_size2);
+ }
+
+ buf_frame_free(new_page);
+
+ /* Restore logging mode */
+ mtr_set_log_mode(mtr, log_mode);
+}
+
+/*****************************************************************
+Reorganizes an index page. */
+
+void
+btr_page_reorganize(
+/*================*/
+ page_t* page, /* in: page to be reorganized */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr) /* in: mtr */
+{
+ btr_page_reorganize_low(FALSE, page, index, mtr);
+}
+
+/***************************************************************
+Parses a redo log record of reorganizing a page. */
+
+byte*
+btr_parse_page_reorganize(
+/*======================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr __attribute__((unused)),
+ /* in: buffer end */
+ dict_index_t* index, /* in: record descriptor */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ut_ad(ptr && end_ptr);
+
+ /* The record is empty, except for the record initial part */
+
+ if (page) {
+ btr_page_reorganize_low(TRUE, page, index, mtr);
+ }
+
+ return(ptr);
+}
+
+/*****************************************************************
+Empties an index page. */
+static
+void
+btr_page_empty(
+/*===========*/
+ page_t* page, /* in: page to be emptied */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ btr_search_drop_page_hash_index(page);
+
+ /* Recreate the page: note that global data on page (possible
+ segment headers, next page-field, etc.) is preserved intact */
+
+ page_create(page, mtr, page_is_comp(page));
+ buf_block_align(page)->check_index_page_at_flush = TRUE;
+}
+
+/*****************************************************************
+Makes tree one level higher by splitting the root, and inserts
+the tuple. It is assumed that mtr contains an x-latch on the tree.
+NOTE that the operation of this function must always succeed,
+we cannot reverse it: therefore enough free disk space must be
+guaranteed to be available before this function is called. */
+
+rec_t*
+btr_root_raise_and_insert(
+/*======================*/
+ /* out: inserted record */
+ btr_cur_t* cursor, /* in: cursor at which to insert: must be
+ on the root page; when the function returns,
+ the cursor is positioned on the predecessor
+ of the inserted record */
+ dtuple_t* tuple, /* in: tuple to insert */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_tree_t* tree;
+ page_t* root;
+ page_t* new_page;
+ ulint new_page_no;
+ rec_t* rec;
+ mem_heap_t* heap;
+ dtuple_t* node_ptr;
+ ulint level;
+ rec_t* node_ptr_rec;
+ page_cur_t* page_cursor;
+
+ root = btr_cur_get_page(cursor);
+ tree = btr_cur_get_tree(cursor);
+
+ ut_ad(dict_tree_get_page(tree) == buf_frame_get_page_no(root));
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(root),
+ MTR_MEMO_PAGE_X_FIX));
+ btr_search_drop_page_hash_index(root);
+
+ /* Allocate a new page to the tree. Root splitting is done by first
+ moving the root records to the new page, emptying the root, putting
+ a node pointer to the new page, and then splitting the new page. */
+
+ new_page = btr_page_alloc(tree, 0, FSP_NO_DIR,
+ btr_page_get_level(root, mtr), mtr);
+
+ btr_page_create(new_page, tree, mtr);
+
+ level = btr_page_get_level(root, mtr);
+
+ /* Set the levels of the new index page and root page */
+ btr_page_set_level(new_page, level, mtr);
+ btr_page_set_level(root, level + 1, mtr);
+
+ /* Set the next node and previous node fields of new page */
+ btr_page_set_next(new_page, FIL_NULL, mtr);
+ btr_page_set_prev(new_page, FIL_NULL, mtr);
+
+ /* Move the records from root to the new page */
+
+ page_move_rec_list_end(new_page, root, page_get_infimum_rec(root),
+ cursor->index, mtr);
+ /* If this is a pessimistic insert which is actually done to
+ perform a pessimistic update then we have stored the lock
+ information of the record to be inserted on the infimum of the
+ root page: we cannot discard the lock structs on the root page */
+
+ lock_update_root_raise(new_page, root);
+
+ /* Create a memory heap where the node pointer is stored */
+ heap = mem_heap_create(100);
+
+ rec = page_rec_get_next(page_get_infimum_rec(new_page));
+ new_page_no = buf_frame_get_page_no(new_page);
+
+ /* Build the node pointer (= node key and page address) for the
+ child */
+
+ node_ptr = dict_tree_build_node_ptr(tree, rec, new_page_no, heap,
+ level);
+ /* Reorganize the root to get free space */
+ btr_page_reorganize(root, cursor->index, mtr);
+
+ page_cursor = btr_cur_get_page_cur(cursor);
+
+ /* Insert node pointer to the root */
+
+ page_cur_set_before_first(root, page_cursor);
+
+ node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
+ cursor->index, mtr);
+
+ ut_ad(node_ptr_rec);
+
+ /* The node pointer must be marked as the predefined minimum record,
+ as there is no lower alphabetical limit to records in the leftmost
+ node of a level: */
+
+ btr_set_min_rec_mark(node_ptr_rec, cursor->index->table->comp, mtr);
+
+ /* Free the memory heap */
+ mem_heap_free(heap);
+
+ /* We play safe and reset the free bits for the new page */
+
+/* fprintf(stderr, "Root raise new page no %lu\n",
+ buf_frame_get_page_no(new_page)); */
+
+ ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes),
+ new_page);
+ /* Reposition the cursor to the child node */
+ page_cur_search(new_page, cursor->index, tuple,
+ PAGE_CUR_LE, page_cursor);
+
+ /* Split the child and insert tuple */
+ return(btr_page_split_and_insert(cursor, tuple, mtr));
+}
+
+/*****************************************************************
+Decides if the page should be split at the convergence point of inserts
+converging to the left. */
+
+ibool
+btr_page_get_split_rec_to_left(
+/*===========================*/
+ /* out: TRUE if split recommended */
+ btr_cur_t* cursor, /* in: cursor at which to insert */
+ rec_t** split_rec) /* out: if split recommended,
+ the first record on upper half page,
+ or NULL if tuple to be inserted should
+ be first */
+{
+ page_t* page;
+ rec_t* insert_point;
+ rec_t* infimum;
+
+ page = btr_cur_get_page(cursor);
+ insert_point = btr_cur_get_rec(cursor);
+
+ if (page_header_get_ptr(page, PAGE_LAST_INSERT)
+ == page_rec_get_next(insert_point)) {
+
+ infimum = page_get_infimum_rec(page);
+
+ /* If the convergence is in the middle of a page, include also
+ the record immediately before the new insert to the upper
+ page. Otherwise, we could repeatedly move from page to page
+ lots of records smaller than the convergence point. */
+
+ if (infimum != insert_point
+ && page_rec_get_next(infimum) != insert_point) {
+
+ *split_rec = insert_point;
+ } else {
+ *split_rec = page_rec_get_next(insert_point);
+ }
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*****************************************************************
+Decides if the page should be split at the convergence point of inserts
+converging to the right. */
+
+ibool
+btr_page_get_split_rec_to_right(
+/*============================*/
+ /* out: TRUE if split recommended */
+ btr_cur_t* cursor, /* in: cursor at which to insert */
+ rec_t** split_rec) /* out: if split recommended,
+ the first record on upper half page,
+ or NULL if tuple to be inserted should
+ be first */
+{
+ page_t* page;
+ rec_t* insert_point;
+ rec_t* supremum;
+
+ page = btr_cur_get_page(cursor);
+ insert_point = btr_cur_get_rec(cursor);
+
+ /* We use eager heuristics: if the new insert would be right after
+ the previous insert on the same page, we assume that there is a
+ pattern of sequential inserts here. */
+
+ if (page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point) {
+
+ supremum = page_get_supremum_rec(page);
+
+ if (page_rec_get_next(insert_point) != supremum
+ && page_rec_get_next(page_rec_get_next(insert_point))
+ != supremum) {
+
+ /* If there are >= 2 user records up from the insert
+ point, split all but 1 off. We want to keep one because
+ then sequential inserts can use the adaptive hash
+ index, as they can do the necessary checks of the right
+ search position just by looking at the records on this
+ page. */
+
+ *split_rec = page_rec_get_next(
+ page_rec_get_next(insert_point));
+ } else {
+ /* Else split at the new record to insert */
+ *split_rec = NULL;
+ }
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*****************************************************************
+Calculates a split record such that the tuple will certainly fit on
+its half-page when the split is performed. We assume in this function
+only that the cursor page has at least one user record. */
+static
+rec_t*
+btr_page_get_sure_split_rec(
+/*========================*/
+ /* out: split record, or NULL if
+ tuple will be the first record on
+ upper half-page */
+ btr_cur_t* cursor, /* in: cursor at which insert
+ should be made */
+ dtuple_t* tuple) /* in: tuple to insert */
+{
+ page_t* page;
+ ulint insert_size;
+ ulint free_space;
+ ulint total_data;
+ ulint total_n_recs;
+ ulint total_space;
+ ulint incl_data;
+ rec_t* ins_rec;
+ rec_t* rec;
+ rec_t* next_rec;
+ ulint n;
+ mem_heap_t* heap;
+ ulint* offsets;
+
+ page = btr_cur_get_page(cursor);
+
+ insert_size = rec_get_converted_size(cursor->index, tuple);
+ free_space = page_get_free_space_of_empty(cursor->index->table->comp);
+
+ /* free_space is now the free space of a created new page */
+
+ total_data = page_get_data_size(page) + insert_size;
+ total_n_recs = page_get_n_recs(page) + 1;
+ ut_ad(total_n_recs >= 2);
+ total_space = total_data + page_dir_calc_reserved_space(total_n_recs);
+
+ n = 0;
+ incl_data = 0;
+ ins_rec = btr_cur_get_rec(cursor);
+ rec = page_get_infimum_rec(page);
+
+ heap = NULL;
+ offsets = NULL;
+
+ /* We start to include records to the left half, and when the
+ space reserved by them exceeds half of total_space, then if
+ the included records fit on the left page, they will be put there
+ if something was left over also for the right page,
+ otherwise the last included record will be the first on the right
+ half page */
+
+ for (;;) {
+ /* Decide the next record to include */
+ if (rec == ins_rec) {
+ rec = NULL; /* NULL denotes that tuple is
+ now included */
+ } else if (rec == NULL) {
+ rec = page_rec_get_next(ins_rec);
+ } else {
+ rec = page_rec_get_next(rec);
+ }
+
+ if (rec == NULL) {
+ /* Include tuple */
+ incl_data += insert_size;
+ } else {
+ offsets = rec_get_offsets(rec, cursor->index,
+ offsets, ULINT_UNDEFINED, &heap);
+ incl_data += rec_offs_size(offsets);
+ }
+
+ n++;
+
+ if (incl_data + page_dir_calc_reserved_space(n)
+ >= total_space / 2) {
+
+ if (incl_data + page_dir_calc_reserved_space(n)
+ <= free_space) {
+ /* The next record will be the first on
+ the right half page if it is not the
+ supremum record of page */
+
+ if (rec == ins_rec) {
+ next_rec = NULL;
+ } else if (rec == NULL) {
+ next_rec = page_rec_get_next(ins_rec);
+ } else {
+ next_rec = page_rec_get_next(rec);
+ }
+ if (next_rec != page_get_supremum_rec(page)) {
+ if (heap) {
+ mem_heap_free(heap);
+ }
+ return(next_rec);
+ }
+ }
+
+ if (heap) {
+ mem_heap_free(heap);
+ }
+ return(rec);
+ }
+ }
+}
+
+/*****************************************************************
+Returns TRUE if the insert fits on the appropriate half-page with the
+chosen split_rec. */
+static
+ibool
+btr_page_insert_fits(
+/*=================*/
+ /* out: TRUE if fits */
+ btr_cur_t* cursor, /* in: cursor at which insert
+ should be made */
+ rec_t* split_rec, /* in: suggestion for first record
+ on upper half-page, or NULL if
+ tuple to be inserted should be first */
+ const ulint* offsets, /* in: rec_get_offsets(
+ split_rec, cursor->index) */
+ dtuple_t* tuple, /* in: tuple to insert */
+ mem_heap_t* heap) /* in: temporary memory heap */
+{
+ page_t* page;
+ ulint insert_size;
+ ulint free_space;
+ ulint total_data;
+ ulint total_n_recs;
+ rec_t* rec;
+ rec_t* end_rec;
+ ulint* offs;
+
+ page = btr_cur_get_page(cursor);
+
+ ut_ad(!split_rec == !offsets);
+ ut_ad(!offsets
+ || cursor->index->table->comp == rec_offs_comp(offsets));
+ ut_ad(!offsets
+ || rec_offs_validate(split_rec, cursor->index, offsets));
+ ut_ad(page_is_comp(page) == cursor->index->table->comp);
+
+ insert_size = rec_get_converted_size(cursor->index, tuple);
+ free_space = page_get_free_space_of_empty(cursor->index->table->comp);
+
+ /* free_space is now the free space of a created new page */
+
+ total_data = page_get_data_size(page) + insert_size;
+ total_n_recs = page_get_n_recs(page) + 1;
+
+ /* We determine which records (from rec to end_rec, not including
+ end_rec) will end up on the other half page from tuple when it is
+ inserted. */
+
+ if (split_rec == NULL) {
+ rec = page_rec_get_next(page_get_infimum_rec(page));
+ end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
+
+ } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
+
+ rec = page_rec_get_next(page_get_infimum_rec(page));
+ end_rec = split_rec;
+ } else {
+ rec = split_rec;
+ end_rec = page_get_supremum_rec(page);
+ }
+
+ if (total_data + page_dir_calc_reserved_space(total_n_recs)
+ <= free_space) {
+
+ /* Ok, there will be enough available space on the
+ half page where the tuple is inserted */
+
+ return(TRUE);
+ }
+
+ offs = NULL;
+
+ while (rec != end_rec) {
+ /* In this loop we calculate the amount of reserved
+ space after rec is removed from page. */
+
+ offs = rec_get_offsets(rec, cursor->index, offs,
+ ULINT_UNDEFINED, &heap);
+
+ total_data -= rec_offs_size(offs);
+ total_n_recs--;
+
+ if (total_data + page_dir_calc_reserved_space(total_n_recs)
+ <= free_space) {
+
+ /* Ok, there will be enough available space on the
+ half page where the tuple is inserted */
+
+ return(TRUE);
+ }
+
+ rec = page_rec_get_next(rec);
+ }
+
+ return(FALSE);
+}
+
+/***********************************************************
+Inserts a data tuple to a tree on a non-leaf level. It is assumed
+that mtr holds an x-latch on the tree. */
+
+void
+btr_insert_on_non_leaf_level(
+/*=========================*/
+ dict_tree_t* tree, /* in: tree */
+ ulint level, /* in: level, must be > 0 */
+ dtuple_t* tuple, /* in: the record to be inserted */
+ mtr_t* mtr) /* in: mtr */
+{
+ big_rec_t* dummy_big_rec;
+ btr_cur_t cursor;
+ ulint err;
+ rec_t* rec;
+
+ ut_ad(level > 0);
+
+ /* In the following, choose just any index from the tree as the
+ first parameter for btr_cur_search_to_nth_level. */
+
+ btr_cur_search_to_nth_level(UT_LIST_GET_FIRST(tree->tree_indexes),
+ level, tuple, PAGE_CUR_LE,
+ BTR_CONT_MODIFY_TREE,
+ &cursor, 0, mtr);
+
+ err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG
+ | BTR_NO_UNDO_LOG_FLAG,
+ &cursor, tuple,
+ &rec, &dummy_big_rec, NULL, mtr);
+ ut_a(err == DB_SUCCESS);
+}
+
+/******************************************************************
+Attaches the halves of an index page on the appropriate level in an
+index tree. */
+static
+void
+btr_attach_half_pages(
+/*==================*/
+ dict_tree_t* tree, /* in: the index tree */
+ page_t* page, /* in: page to be split */
+ rec_t* split_rec, /* in: first record on upper
+ half page */
+ page_t* new_page, /* in: the new half page */
+ ulint direction, /* in: FSP_UP or FSP_DOWN */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint space;
+ rec_t* node_ptr;
+ page_t* prev_page;
+ page_t* next_page;
+ ulint prev_page_no;
+ ulint next_page_no;
+ ulint level;
+ page_t* lower_page;
+ page_t* upper_page;
+ ulint lower_page_no;
+ ulint upper_page_no;
+ dtuple_t* node_ptr_upper;
+ mem_heap_t* heap;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_a(page_is_comp(page) == page_is_comp(new_page));
+
+ /* Create a memory heap where the data tuple is stored */
+ heap = mem_heap_create(1024);
+
+ /* Based on split direction, decide upper and lower pages */
+ if (direction == FSP_DOWN) {
+
+ lower_page_no = buf_frame_get_page_no(new_page);
+ upper_page_no = buf_frame_get_page_no(page);
+ lower_page = new_page;
+ upper_page = page;
+
+ /* Look from the tree for the node pointer to page */
+ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
+
+ /* Replace the address of the old child node (= page) with the
+ address of the new lower half */
+
+ btr_node_ptr_set_child_page_no(node_ptr,
+ rec_get_offsets(node_ptr,
+ UT_LIST_GET_FIRST(tree->tree_indexes),
+ NULL, ULINT_UNDEFINED, &heap),
+ lower_page_no, mtr);
+ mem_heap_empty(heap);
+ } else {
+ lower_page_no = buf_frame_get_page_no(page);
+ upper_page_no = buf_frame_get_page_no(new_page);
+ lower_page = page;
+ upper_page = new_page;
+ }
+
+ /* Get the level of the split pages */
+ level = btr_page_get_level(page, mtr);
+
+ /* Build the node pointer (= node key and page address) for the upper
+ half */
+
+ node_ptr_upper = dict_tree_build_node_ptr(tree, split_rec,
+ upper_page_no, heap, level);
+
+ /* Insert it next to the pointer to the lower half. Note that this
+ may generate recursion leading to a split on the higher level. */
+
+ btr_insert_on_non_leaf_level(tree, level + 1, node_ptr_upper, mtr);
+
+ /* Free the memory heap */
+ mem_heap_free(heap);
+
+ /* Get the previous and next pages of page */
+
+ prev_page_no = btr_page_get_prev(page, mtr);
+ next_page_no = btr_page_get_next(page, mtr);
+ space = buf_frame_get_space_id(page);
+
+ /* Update page links of the level */
+
+ if (prev_page_no != FIL_NULL) {
+
+ prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
+ ut_a(page_is_comp(prev_page) == page_is_comp(page));
+
+ btr_page_set_next(prev_page, lower_page_no, mtr);
+ }
+
+ if (next_page_no != FIL_NULL) {
+
+ next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
+ ut_a(page_is_comp(next_page) == page_is_comp(page));
+
+ btr_page_set_prev(next_page, upper_page_no, mtr);
+ }
+
+ btr_page_set_prev(lower_page, prev_page_no, mtr);
+ btr_page_set_next(lower_page, upper_page_no, mtr);
+ btr_page_set_level(lower_page, level, mtr);
+
+ btr_page_set_prev(upper_page, lower_page_no, mtr);
+ btr_page_set_next(upper_page, next_page_no, mtr);
+ btr_page_set_level(upper_page, level, mtr);
+}
+
+/*****************************************************************
+Splits an index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
+is released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore
+enough free disk space must be guaranteed to be available before
+this function is called. */
+
+rec_t*
+btr_page_split_and_insert(
+/*======================*/
+ /* out: inserted record; NOTE: the tree
+ x-latch is released! NOTE: 2 free disk
+ pages must be available! */
+ btr_cur_t* cursor, /* in: cursor at which to insert; when the
+ function returns, the cursor is positioned
+ on the predecessor of the inserted record */
+ dtuple_t* tuple, /* in: tuple to insert */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_tree_t* tree;
+ page_t* page;
+ ulint page_no;
+ byte direction;
+ ulint hint_page_no;
+ page_t* new_page;
+ rec_t* split_rec;
+ page_t* left_page;
+ page_t* right_page;
+ page_t* insert_page;
+ page_cur_t* page_cursor;
+ rec_t* first_rec;
+ byte* buf = 0; /* remove warning */
+ rec_t* move_limit;
+ ibool insert_will_fit;
+ ulint n_iterations = 0;
+ rec_t* rec;
+ mem_heap_t* heap;
+ ulint n_uniq;
+ ulint* offsets;
+
+ heap = mem_heap_create(1024);
+ n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
+func_start:
+ mem_heap_empty(heap);
+ offsets = NULL;
+ tree = btr_cur_get_tree(cursor);
+
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_tree_get_lock(tree), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ page = btr_cur_get_page(cursor);
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(page_get_n_recs(page) >= 2);
+
+ page_no = buf_frame_get_page_no(page);
+
+ /* 1. Decide the split record; split_rec == NULL means that the
+ tuple to be inserted should be the first record on the upper
+ half-page */
+
+ if (n_iterations > 0) {
+ direction = FSP_UP;
+ hint_page_no = page_no + 1;
+ split_rec = btr_page_get_sure_split_rec(cursor, tuple);
+
+ } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
+ direction = FSP_UP;
+ hint_page_no = page_no + 1;
+
+ } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
+ direction = FSP_DOWN;
+ hint_page_no = page_no - 1;
+ } else {
+ direction = FSP_UP;
+ hint_page_no = page_no + 1;
+ split_rec = page_get_middle_rec(page);
+ }
+
+ /* 2. Allocate a new page to the tree */
+ new_page = btr_page_alloc(tree, hint_page_no, direction,
+ btr_page_get_level(page, mtr), mtr);
+ btr_page_create(new_page, tree, mtr);
+
+ /* 3. Calculate the first record on the upper half-page, and the
+ first record (move_limit) on original page which ends up on the
+ upper half */
+
+ if (split_rec != NULL) {
+ first_rec = split_rec;
+ move_limit = split_rec;
+ } else {
+ buf = mem_alloc(rec_get_converted_size(cursor->index, tuple));
+
+ first_rec = rec_convert_dtuple_to_rec(buf,
+ cursor->index, tuple);
+ move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
+ }
+
+ /* 4. Do first the modifications in the tree structure */
+
+ btr_attach_half_pages(tree, page, first_rec, new_page, direction, mtr);
+
+ if (split_rec == NULL) {
+ mem_free(buf);
+ }
+
+ /* If the split is made on the leaf level and the insert will fit
+ on the appropriate half-page, we may release the tree x-latch.
+ We can then move the records after releasing the tree latch,
+ thus reducing the tree latch contention. */
+
+ if (split_rec) {
+ offsets = rec_get_offsets(split_rec, cursor->index, offsets,
+ n_uniq, &heap);
+
+ insert_will_fit = btr_page_insert_fits(cursor,
+ split_rec, offsets, tuple, heap);
+ } else {
+ insert_will_fit = btr_page_insert_fits(cursor,
+ NULL, NULL, tuple, heap);
+ }
+
+ if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) {
+
+ mtr_memo_release(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK);
+ }
+
+ /* 5. Move then the records to the new page */
+ if (direction == FSP_DOWN) {
+/* fputs("Split left\n", stderr); */
+
+ page_move_rec_list_start(new_page, page, move_limit,
+ cursor->index, mtr);
+ left_page = new_page;
+ right_page = page;
+
+ lock_update_split_left(right_page, left_page);
+ } else {
+/* fputs("Split right\n", stderr); */
+
+ page_move_rec_list_end(new_page, page, move_limit,
+ cursor->index, mtr);
+ left_page = page;
+ right_page = new_page;
+
+ lock_update_split_right(right_page, left_page);
+ }
+
+ /* 6. The split and the tree modification is now completed. Decide the
+ page where the tuple should be inserted */
+
+ if (split_rec == NULL) {
+ insert_page = right_page;
+
+ } else {
+ offsets = rec_get_offsets(first_rec, cursor->index,
+ offsets, n_uniq, &heap);
+
+ if (cmp_dtuple_rec(tuple, first_rec, offsets) >= 0) {
+
+ insert_page = right_page;
+ } else {
+ insert_page = left_page;
+ }
+ }
+
+ /* 7. Reposition the cursor for insert and try insertion */
+ page_cursor = btr_cur_get_page_cur(cursor);
+
+ page_cur_search(insert_page, cursor->index, tuple,
+ PAGE_CUR_LE, page_cursor);
+
+ rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
+
+ if (rec != NULL) {
+ /* Insert fit on the page: update the free bits for the
+ left and right pages in the same mtr */
+
+ ibuf_update_free_bits_for_two_pages_low(cursor->index,
+ left_page,
+ right_page, mtr);
+ /* fprintf(stderr, "Split and insert done %lu %lu\n",
+ buf_frame_get_page_no(left_page),
+ buf_frame_get_page_no(right_page)); */
+ mem_heap_free(heap);
+ return(rec);
+ }
+
+ /* 8. If insert did not fit, try page reorganization */
+
+ btr_page_reorganize(insert_page, cursor->index, mtr);
+
+ page_cur_search(insert_page, cursor->index, tuple,
+ PAGE_CUR_LE, page_cursor);
+ rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
+
+ if (rec == NULL) {
+ /* The insert did not fit on the page: loop back to the
+ start of the function for a new split */
+
+ /* We play safe and reset the free bits for new_page */
+ ibuf_reset_free_bits(cursor->index, new_page);
+
+ /* fprintf(stderr, "Split second round %lu\n",
+ buf_frame_get_page_no(page)); */
+ n_iterations++;
+ ut_ad(n_iterations < 2);
+ ut_ad(!insert_will_fit);
+
+ goto func_start;
+ }
+
+ /* Insert fit on the page: update the free bits for the
+ left and right pages in the same mtr */
+
+ ibuf_update_free_bits_for_two_pages_low(cursor->index, left_page,
+ right_page, mtr);
+ /* fprintf(stderr, "Split and insert done %lu %lu\n",
+ buf_frame_get_page_no(left_page),
+ buf_frame_get_page_no(right_page)); */
+
+ ut_ad(page_validate(left_page, UT_LIST_GET_FIRST(tree->tree_indexes)));
+ ut_ad(page_validate(right_page, UT_LIST_GET_FIRST(tree->tree_indexes)));
+
+ mem_heap_free(heap);
+ return(rec);
+}
+
+/*****************************************************************
+Removes a page from the level list of pages. */
+static
+void
+btr_level_list_remove(
+/*==================*/
+ dict_tree_t* tree __attribute__((unused)), /* in: index tree */
+ page_t* page, /* in: page to remove */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint space;
+ ulint prev_page_no;
+ page_t* prev_page;
+ ulint next_page_no;
+ page_t* next_page;
+
+ ut_ad(tree && page && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ /* Get the previous and next page numbers of page */
+
+ prev_page_no = btr_page_get_prev(page, mtr);
+ next_page_no = btr_page_get_next(page, mtr);
+ space = buf_frame_get_space_id(page);
+
+ /* Update page links of the level */
+
+ if (prev_page_no != FIL_NULL) {
+
+ prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
+ ut_a(page_is_comp(prev_page) == page_is_comp(page));
+
+ btr_page_set_next(prev_page, next_page_no, mtr);
+ }
+
+ if (next_page_no != FIL_NULL) {
+
+ next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
+ ut_a(page_is_comp(next_page) == page_is_comp(page));
+
+ btr_page_set_prev(next_page, prev_page_no, mtr);
+ }
+}
+
+/********************************************************************
+Writes the redo log record for setting an index record as the predefined
+minimum record. */
+UNIV_INLINE
+void
+btr_set_min_rec_mark_log(
+/*=====================*/
+ rec_t* rec, /* in: record */
+ ibool comp, /* TRUE=compact record format */
+ mtr_t* mtr) /* in: mtr */
+{
+ mlog_write_initial_log_record(rec,
+ comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr);
+
+ /* Write rec offset as a 2-byte ulint */
+ mlog_catenate_ulint(mtr, rec - buf_frame_align(rec), MLOG_2BYTES);
+}
+
+/********************************************************************
+Parses the redo log record for setting an index record as the predefined
+minimum record. */
+
+byte*
+btr_parse_set_min_rec_mark(
+/*=======================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ ibool comp, /* in: TRUE=compact page format */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ rec_t* rec;
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ if (page) {
+ rec = page + mach_read_from_2(ptr);
+
+ btr_set_min_rec_mark(rec, comp, mtr);
+ }
+
+ return(ptr + 2);
+}
+
+/********************************************************************
+Sets a record as the predefined minimum record. */
+
+void
+btr_set_min_rec_mark(
+/*=================*/
+ rec_t* rec, /* in: record */
+ ibool comp, /* in: TRUE=compact page format */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint info_bits;
+
+ info_bits = rec_get_info_bits(rec, comp);
+
+ rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG);
+
+ btr_set_min_rec_mark_log(rec, comp, mtr);
+}
+
+/*****************************************************************
+Deletes on the upper level the node pointer to a page. */
+
+void
+btr_node_ptr_delete(
+/*================*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page whose node pointer is deleted */
+ mtr_t* mtr) /* in: mtr */
+{
+ rec_t* node_ptr;
+ btr_cur_t cursor;
+ ibool compressed;
+ ulint err;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ /* Delete node pointer on father page */
+
+ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
+
+ btr_cur_position(UT_LIST_GET_FIRST(tree->tree_indexes), node_ptr,
+ &cursor);
+ compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE,
+ mtr);
+ ut_a(err == DB_SUCCESS);
+
+ if (!compressed) {
+ btr_cur_compress_if_useful(&cursor, mtr);
+ }
+}
+
+/*****************************************************************
+If page is the only on its level, this function moves its records to the
+father page, thus reducing the tree height. */
+static
+void
+btr_lift_page_up(
+/*=============*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page which is the only on its level;
+ must not be empty: use
+ btr_discard_only_page_on_level if the last
+ record from the page should be removed */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* father_page;
+ ulint page_level;
+ dict_index_t* index;
+
+ ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
+ ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ father_page = buf_frame_align(
+ btr_page_get_father_node_ptr(tree, page, mtr));
+
+ page_level = btr_page_get_level(page, mtr);
+ index = UT_LIST_GET_FIRST(tree->tree_indexes);
+
+ btr_search_drop_page_hash_index(page);
+
+ /* Make the father empty */
+ btr_page_empty(father_page, mtr);
+
+ /* Move records to the father */
+ page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page),
+ index, mtr);
+ lock_update_copy_and_discard(father_page, page);
+
+ btr_page_set_level(father_page, page_level, mtr);
+
+ /* Free the file page */
+ btr_page_free(tree, page, mtr);
+
+ /* We play safe and reset the free bits for the father */
+ ibuf_reset_free_bits(index, father_page);
+ ut_ad(page_validate(father_page, index));
+ ut_ad(btr_check_node_ptr(tree, father_page, mtr));
+}
+
+/*****************************************************************
+Tries to merge the page first to the left immediate brother if such a
+brother exists, and the node pointers to the current page and to the brother
+reside on the same page. If the left brother does not satisfy these
+conditions, looks at the right brother. If the page is the only one on that
+level lifts the records of the page to the father page, thus reducing the
+tree height. It is assumed that mtr holds an x-latch on the tree and on the
+page. If cursor is on the leaf level, mtr must also hold x-latches to the
+brothers, if they exist. NOTE: it is assumed that the caller has reserved
+enough free extents so that the compression will always succeed if done! */
+
+void
+btr_compress(
+/*=========*/
+ btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
+ the page must not be empty: in record delete
+ use btr_discard_page if the page would become
+ empty */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_tree_t* tree;
+ ulint space;
+ ulint left_page_no;
+ ulint right_page_no;
+ page_t* merge_page;
+ page_t* father_page;
+ ibool is_left;
+ page_t* page;
+ rec_t* orig_pred;
+ rec_t* orig_succ;
+ rec_t* node_ptr;
+ ulint data_size;
+ ulint n_recs;
+ ulint max_ins_size;
+ ulint max_ins_size_reorg;
+ ulint level;
+ ibool comp = cursor->index->table->comp;
+
+ page = btr_cur_get_page(cursor);
+ tree = btr_cur_get_tree(cursor);
+ ut_a(comp == page_is_comp(page));
+
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ level = btr_page_get_level(page, mtr);
+ space = dict_tree_get_space(tree);
+
+ left_page_no = btr_page_get_prev(page, mtr);
+ right_page_no = btr_page_get_next(page, mtr);
+
+/* fprintf(stderr, "Merge left page %lu right %lu \n", left_page_no,
+ right_page_no); */
+
+ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
+ ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
+ father_page = buf_frame_align(node_ptr);
+ ut_a(comp == page_is_comp(father_page));
+
+ /* Decide the page to which we try to merge and which will inherit
+ the locks */
+
+ if (left_page_no != FIL_NULL) {
+
+ is_left = TRUE;
+ merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
+ mtr);
+ } else if (right_page_no != FIL_NULL) {
+
+ is_left = FALSE;
+ merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
+ mtr);
+ } else {
+ /* The page is the only one on the level, lift the records
+ to the father */
+ btr_lift_page_up(tree, page, mtr);
+
+ return;
+ }
+
+ n_recs = page_get_n_recs(page);
+ data_size = page_get_data_size(page);
+ ut_a(page_is_comp(merge_page) == page_is_comp(page));
+
+ max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
+ merge_page, n_recs);
+ if (data_size > max_ins_size_reorg) {
+
+ /* No space for merge */
+
+ return;
+ }
+
+ ut_ad(page_validate(merge_page, cursor->index));
+
+ max_ins_size = page_get_max_insert_size(merge_page, n_recs);
+
+ if (data_size > max_ins_size) {
+
+ /* We have to reorganize merge_page */
+
+ btr_page_reorganize(merge_page, cursor->index, mtr);
+
+ max_ins_size = page_get_max_insert_size(merge_page, n_recs);
+
+ ut_ad(page_validate(merge_page, cursor->index));
+ ut_ad(page_get_max_insert_size(merge_page, n_recs)
+ == max_ins_size_reorg);
+ }
+
+ if (data_size > max_ins_size) {
+
+ /* Add fault tolerance, though this should never happen */
+
+ return;
+ }
+
+ btr_search_drop_page_hash_index(page);
+
+ /* Remove the page from the level list */
+ btr_level_list_remove(tree, page, mtr);
+
+ if (is_left) {
+ btr_node_ptr_delete(tree, page, mtr);
+ } else {
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ /* Replace the address of the old child node (= page) with the
+ address of the merge page to the right */
+
+ btr_node_ptr_set_child_page_no(node_ptr,
+ rec_get_offsets(node_ptr, cursor->index,
+ offsets_, ULINT_UNDEFINED, &heap),
+ right_page_no, mtr);
+ if (heap) {
+ mem_heap_free(heap);
+ }
+ btr_node_ptr_delete(tree, merge_page, mtr);
+ }
+
+ /* Move records to the merge page */
+ if (is_left) {
+ orig_pred = page_rec_get_prev(
+ page_get_supremum_rec(merge_page));
+ page_copy_rec_list_start(merge_page, page,
+ page_get_supremum_rec(page), cursor->index, mtr);
+
+ lock_update_merge_left(merge_page, orig_pred, page);
+ } else {
+ orig_succ = page_rec_get_next(
+ page_get_infimum_rec(merge_page));
+ page_copy_rec_list_end(merge_page, page,
+ page_get_infimum_rec(page), cursor->index, mtr);
+
+ lock_update_merge_right(orig_succ, page);
+ }
+
+ /* We have added new records to merge_page: update its free bits */
+ ibuf_update_free_bits_if_full(cursor->index, merge_page,
+ UNIV_PAGE_SIZE, ULINT_UNDEFINED);
+
+ ut_ad(page_validate(merge_page, cursor->index));
+
+ /* Free the file page */
+ btr_page_free(tree, page, mtr);
+
+ ut_ad(btr_check_node_ptr(tree, merge_page, mtr));
+}
+
+/*****************************************************************
+Discards a page that is the only page on its level. */
+static
+void
+btr_discard_only_page_on_level(
+/*===========================*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page which is the only on its level */
+ mtr_t* mtr) /* in: mtr */
+{
+ rec_t* node_ptr;
+ page_t* father_page;
+ ulint page_level;
+
+ ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
+ ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ btr_search_drop_page_hash_index(page);
+
+ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
+ father_page = buf_frame_align(node_ptr);
+
+ page_level = btr_page_get_level(page, mtr);
+
+ lock_update_discard(page_get_supremum_rec(father_page), page);
+
+ btr_page_set_level(father_page, page_level, mtr);
+
+ /* Free the file page */
+ btr_page_free(tree, page, mtr);
+
+ if (buf_frame_get_page_no(father_page) == dict_tree_get_page(tree)) {
+ /* The father is the root page */
+
+ btr_page_empty(father_page, mtr);
+
+ /* We play safe and reset the free bits for the father */
+ ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes),
+ father_page);
+ } else {
+ ut_ad(page_get_n_recs(father_page) == 1);
+
+ btr_discard_only_page_on_level(tree, father_page, mtr);
+ }
+}
+
+/*****************************************************************
+Discards a page from a B-tree. This is used to remove the last record from
+a B-tree page: the whole page must be removed at the same time. This cannot
+be used for the root page, which is allowed to be empty. */
+
+void
+btr_discard_page(
+/*=============*/
+ btr_cur_t* cursor, /* in: cursor on the page to discard: not on
+ the root page */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_tree_t* tree;
+ ulint space;
+ ulint left_page_no;
+ ulint right_page_no;
+ page_t* merge_page;
+ ibool is_left;
+ page_t* page;
+ rec_t* node_ptr;
+
+ page = btr_cur_get_page(cursor);
+ tree = btr_cur_get_tree(cursor);
+
+ ut_ad(dict_tree_get_page(tree) != buf_frame_get_page_no(page));
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ space = dict_tree_get_space(tree);
+
+ /* Decide the page which will inherit the locks */
+
+ left_page_no = btr_page_get_prev(page, mtr);
+ right_page_no = btr_page_get_next(page, mtr);
+
+ if (left_page_no != FIL_NULL) {
+ is_left = TRUE;
+ merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
+ mtr);
+ } else if (right_page_no != FIL_NULL) {
+ is_left = FALSE;
+ merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
+ mtr);
+ } else {
+ btr_discard_only_page_on_level(tree, page, mtr);
+
+ return;
+ }
+
+ ut_a(page_is_comp(merge_page) == page_is_comp(page));
+ btr_search_drop_page_hash_index(page);
+
+ if (left_page_no == FIL_NULL && btr_page_get_level(page, mtr) > 0) {
+
+ /* We have to mark the leftmost node pointer on the right
+ side page as the predefined minimum record */
+
+ node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
+
+ ut_ad(node_ptr != page_get_supremum_rec(merge_page));
+
+ btr_set_min_rec_mark(node_ptr,
+ cursor->index->table->comp, mtr);
+ }
+
+ btr_node_ptr_delete(tree, page, mtr);
+
+ /* Remove the page from the level list */
+ btr_level_list_remove(tree, page, mtr);
+
+ if (is_left) {
+ lock_update_discard(page_get_supremum_rec(merge_page), page);
+ } else {
+ lock_update_discard(page_rec_get_next(
+ page_get_infimum_rec(merge_page)), page);
+ }
+
+ /* Free the file page */
+ btr_page_free(tree, page, mtr);
+
+ ut_ad(btr_check_node_ptr(tree, merge_page, mtr));
+}
+
+/*****************************************************************
+Prints size info of a B-tree. */
+
+void
+btr_print_size(
+/*===========*/
+ dict_tree_t* tree) /* in: index tree */
+{
+ page_t* root;
+ fseg_header_t* seg;
+ mtr_t mtr;
+
+ if (tree->type & DICT_IBUF) {
+ fputs(
+ "Sorry, cannot print info of an ibuf tree: use ibuf functions\n",
+ stderr);
+
+ return;
+ }
+
+ mtr_start(&mtr);
+
+ root = btr_root_get(tree, &mtr);
+
+ seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+
+ fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
+ fseg_print(seg, &mtr);
+
+ if (!(tree->type & DICT_UNIVERSAL)) {
+
+ seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+ fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
+ fseg_print(seg, &mtr);
+ }
+
+ mtr_commit(&mtr);
+}
+
+/****************************************************************
+Prints recursively index tree pages. */
+static
+void
+btr_print_recursive(
+/*================*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: index page */
+ ulint width, /* in: print this many entries from start
+ and end */
+ mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */
+ ulint** offsets,/* in/out: buffer for rec_get_offsets() */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_cur_t cursor;
+ ulint n_recs;
+ ulint i = 0;
+ mtr_t mtr2;
+ rec_t* node_ptr;
+ page_t* child;
+ dict_index_t* index;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
+ (ulong) btr_page_get_level(page, mtr),
+ (ulong) buf_frame_get_page_no(page));
+
+ index = UT_LIST_GET_FIRST(tree->tree_indexes);
+ page_print(page, index, width, width);
+
+ n_recs = page_get_n_recs(page);
+
+ page_cur_set_before_first(page, &cursor);
+ page_cur_move_to_next(&cursor);
+
+ while (!page_cur_is_after_last(&cursor)) {
+
+ if (0 == btr_page_get_level(page, mtr)) {
+
+ /* If this is the leaf level, do nothing */
+
+ } else if ((i <= width) || (i >= n_recs - width)) {
+
+ mtr_start(&mtr2);
+
+ node_ptr = page_cur_get_rec(&cursor);
+
+ *offsets = rec_get_offsets(node_ptr, index, *offsets,
+ ULINT_UNDEFINED, heap);
+ child = btr_node_ptr_get_child(node_ptr,
+ *offsets, &mtr2);
+ btr_print_recursive(tree, child, width,
+ heap, offsets, &mtr2);
+ mtr_commit(&mtr2);
+ }
+
+ page_cur_move_to_next(&cursor);
+ i++;
+ }
+}
+
+/******************************************************************
+Prints directories and other info of all nodes in the tree. */
+
+void
+btr_print_tree(
+/*===========*/
+ dict_tree_t* tree, /* in: tree */
+ ulint width) /* in: print this many entries from start
+ and end */
+{
+ mtr_t mtr;
+ page_t* root;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+
+ fputs("--------------------------\n"
+ "INDEX TREE PRINT\n", stderr);
+
+ mtr_start(&mtr);
+
+ root = btr_root_get(tree, &mtr);
+
+ btr_print_recursive(tree, root, width, &heap, &offsets, &mtr);
+ if (heap) {
+ mem_heap_free(heap);
+ }
+
+ mtr_commit(&mtr);
+
+ btr_validate_tree(tree);
+}
+
+/****************************************************************
+Checks that the node pointer to a page is appropriate. */
+
+ibool
+btr_check_node_ptr(
+/*===============*/
+ /* out: TRUE */
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: index page */
+ mtr_t* mtr) /* in: mtr */
+{
+ mem_heap_t* heap;
+ rec_t* node_ptr;
+ dtuple_t* node_ptr_tuple;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ if (dict_tree_get_page(tree) == buf_frame_get_page_no(page)) {
+
+ return(TRUE);
+ }
+
+ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
+
+ if (btr_page_get_level(page, mtr) == 0) {
+
+ return(TRUE);
+ }
+
+ heap = mem_heap_create(256);
+
+ node_ptr_tuple = dict_tree_build_node_ptr(
+ tree,
+ page_rec_get_next(page_get_infimum_rec(page)),
+ 0, heap, btr_page_get_level(page, mtr));
+
+ ut_a(cmp_dtuple_rec(node_ptr_tuple, node_ptr,
+ rec_get_offsets(node_ptr,
+ dict_tree_find_index(tree, node_ptr),
+ NULL, ULINT_UNDEFINED, &heap)) == 0);
+
+ mem_heap_free(heap);
+
+ return(TRUE);
+}
+
+/****************************************************************
+Display identification information for a record. */
+static
+void
+btr_index_rec_validate_report(
+/*==========================*/
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: index record */
+ dict_index_t* index) /* in: index */
+{
+ fputs("InnoDB: Record in ", stderr);
+ dict_index_name_print(stderr, NULL, index);
+ fprintf(stderr, ", page %lu, at offset %lu\n",
+ buf_frame_get_page_no(page), (ulint)(rec - page));
+}
+
+/****************************************************************
+Checks the size and number of fields in a record based on the definition of
+the index. */
+
+ibool
+btr_index_rec_validate(
+/*====================*/
+ /* out: TRUE if ok */
+ rec_t* rec, /* in: index record */
+ dict_index_t* index, /* in: index */
+ ibool dump_on_error) /* in: TRUE if the function
+ should print hex dump of record
+ and page on error */
+{
+ ulint len;
+ ulint n;
+ ulint i;
+ page_t* page;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+
+ page = buf_frame_align(rec);
+
+ if (index->type & DICT_UNIVERSAL) {
+ /* The insert buffer index tree can contain records from any
+ other index: we cannot check the number of fields or
+ their length */
+
+ return(TRUE);
+ }
+
+ n = dict_index_get_n_fields(index);
+
+ if (!index->table->comp && rec_get_n_fields_old(rec) != n) {
+ btr_index_rec_validate_report(page, rec, index);
+ fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
+ (ulong) rec_get_n_fields_old(rec), (ulong) n);
+
+ if (dump_on_error) {
+ buf_page_print(page);
+
+ fputs("InnoDB: corrupt record ", stderr);
+ rec_print_old(stderr, rec);
+ putc('\n', stderr);
+ }
+ return(FALSE);
+ }
+
+ offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+
+ for (i = 0; i < n; i++) {
+ dtype_t* type = dict_index_get_nth_type(index, i);
+ ulint fixed_size = dtype_get_fixed_size(type);
+
+ rec_get_nth_field(rec, offsets, i, &len);
+
+ /* Note that prefix indexes are not fixed size even when
+ their type is CHAR. */
+
+ if ((dict_index_get_nth_field(index, i)->prefix_len == 0
+ && len != UNIV_SQL_NULL && fixed_size
+ && len != fixed_size)
+ ||
+ (dict_index_get_nth_field(index, i)->prefix_len > 0
+ && len != UNIV_SQL_NULL
+ && len >
+ dict_index_get_nth_field(index, i)->prefix_len)) {
+
+ btr_index_rec_validate_report(page, rec, index);
+ fprintf(stderr,
+"InnoDB: field %lu len is %lu, should be %lu\n",
+ (ulong) i, (ulong) len, (ulong) dtype_get_fixed_size(type));
+
+ if (dump_on_error) {
+ buf_page_print(page);
+
+ fputs("InnoDB: corrupt record ", stderr);
+ rec_print_new(stderr, rec, offsets);
+ putc('\n', stderr);
+ }
+ if (heap) {
+ mem_heap_free(heap);
+ }
+ return(FALSE);
+ }
+ }
+
+ if (heap) {
+ mem_heap_free(heap);
+ }
+ return(TRUE);
+}
+
+/****************************************************************
+Checks the size and number of fields in records based on the definition of
+the index. */
+static
+ibool
+btr_index_page_validate(
+/*====================*/
+ /* out: TRUE if ok */
+ page_t* page, /* in: index page */
+ dict_index_t* index) /* in: index */
+{
+ page_cur_t cur;
+ ibool ret = TRUE;
+
+ page_cur_set_before_first(page, &cur);
+ page_cur_move_to_next(&cur);
+
+ for (;;) {
+ if (page_cur_is_after_last(&cur)) {
+
+ break;
+ }
+
+ if (!btr_index_rec_validate(cur.rec, index, TRUE)) {
+
+ return(FALSE);
+ }
+
+ page_cur_move_to_next(&cur);
+ }
+
+ return(ret);
+}
+
+/****************************************************************
+Report an error on one page of an index tree. */
+static
+void
+btr_validate_report1(
+ /* out: TRUE if ok */
+ dict_index_t* index, /* in: index */
+ ulint level, /* in: B-tree level */
+ page_t* page) /* in: index page */
+{
+ fprintf(stderr, "InnoDB: Error in page %lu of ",
+ buf_frame_get_page_no(page));
+ dict_index_name_print(stderr, NULL, index);
+ if (level) {
+ fprintf(stderr, ", index tree level %lu", level);
+ }
+ putc('\n', stderr);
+}
+
+/****************************************************************
+Report an error on two pages of an index tree. */
+static
+void
+btr_validate_report2(
+ /* out: TRUE if ok */
+ dict_index_t* index, /* in: index */
+ ulint level, /* in: B-tree level */
+ page_t* page1, /* in: first index page */
+ page_t* page2) /* in: second index page */
+{
+ fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
+ buf_frame_get_page_no(page1),
+ buf_frame_get_page_no(page2));
+ dict_index_name_print(stderr, NULL, index);
+ if (level) {
+ fprintf(stderr, ", index tree level %lu", level);
+ }
+ putc('\n', stderr);
+}
+
+/****************************************************************
+Validates index tree level. */
+static
+ibool
+btr_validate_level(
+/*===============*/
+ /* out: TRUE if ok */
+ dict_tree_t* tree, /* in: index tree */
+ ulint level) /* in: level number */
+{
+ ulint space;
+ page_t* page;
+ page_t* right_page = 0; /* remove warning */
+ page_t* father_page;
+ page_t* right_father_page;
+ rec_t* node_ptr;
+ rec_t* right_node_ptr;
+ rec_t* rec;
+ ulint right_page_no;
+ ulint left_page_no;
+ page_cur_t cursor;
+ dtuple_t* node_ptr_tuple;
+ ibool ret = TRUE;
+ dict_index_t* index;
+ mtr_t mtr;
+ mem_heap_t* heap = mem_heap_create(256);
+ ulint* offsets = NULL;
+ ulint* offsets2= NULL;
+
+ mtr_start(&mtr);
+
+ mtr_x_lock(dict_tree_get_lock(tree), &mtr);
+
+ page = btr_root_get(tree, &mtr);
+
+ space = buf_frame_get_space_id(page);
+
+ index = UT_LIST_GET_FIRST(tree->tree_indexes);
+
+ while (level != btr_page_get_level(page, &mtr)) {
+
+ ut_a(btr_page_get_level(page, &mtr) > 0);
+
+ page_cur_set_before_first(page, &cursor);
+ page_cur_move_to_next(&cursor);
+
+ node_ptr = page_cur_get_rec(&cursor);
+ offsets = rec_get_offsets(node_ptr, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ page = btr_node_ptr_get_child(node_ptr, offsets, &mtr);
+ }
+
+ /* Now we are on the desired level. Loop through the pages on that
+ level. */
+loop:
+ mem_heap_empty(heap);
+ offsets = offsets2 = NULL;
+ mtr_x_lock(dict_tree_get_lock(tree), &mtr);
+
+ /* Check ordering etc. of records */
+
+ if (!page_validate(page, index)) {
+ btr_validate_report1(index, level, page);
+
+ ret = FALSE;
+ } else if (level == 0) {
+ /* We are on level 0. Check that the records have the right
+ number of fields, and field lengths are right. */
+
+ if (!btr_index_page_validate(page, index)) {
+
+ ret = FALSE;
+ }
+ }
+
+ ut_a(btr_page_get_level(page, &mtr) == level);
+
+ right_page_no = btr_page_get_next(page, &mtr);
+ left_page_no = btr_page_get_prev(page, &mtr);
+
+ ut_a((page_get_n_recs(page) > 0)
+ || ((level == 0) &&
+ (buf_frame_get_page_no(page) == dict_tree_get_page(tree))));
+
+ if (right_page_no != FIL_NULL) {
+ rec_t* right_rec;
+ right_page = btr_page_get(space, right_page_no, RW_X_LATCH,
+ &mtr);
+ ut_a(page_is_comp(right_page) == page_is_comp(page));
+ rec = page_rec_get_prev(page_get_supremum_rec(page));
+ right_rec = page_rec_get_next(
+ page_get_infimum_rec(right_page));
+ offsets = rec_get_offsets(rec, index,
+ offsets, ULINT_UNDEFINED, &heap);
+ offsets2 = rec_get_offsets(right_rec, index,
+ offsets2, ULINT_UNDEFINED, &heap);
+ if (cmp_rec_rec(rec, right_rec, offsets, offsets2, index)
+ >= 0) {
+
+ btr_validate_report2(index, level, page, right_page);
+
+ fputs("InnoDB: records in wrong order"
+ " on adjacent pages\n", stderr);
+
+ buf_page_print(page);
+ buf_page_print(right_page);
+
+ fputs("InnoDB: record ", stderr);
+ rec = page_rec_get_prev(page_get_supremum_rec(page));
+ rec_print(stderr, rec, index);
+ putc('\n', stderr);
+ fputs("InnoDB: record ", stderr);
+ rec = page_rec_get_next(page_get_infimum_rec(
+ right_page));
+ rec_print(stderr, rec, index);
+ putc('\n', stderr);
+
+ ret = FALSE;
+ }
+ }
+
+ if (level > 0 && left_page_no == FIL_NULL) {
+ ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+ page_rec_get_next(page_get_infimum_rec(page)),
+ index->table->comp));
+ }
+
+ if (buf_frame_get_page_no(page) != dict_tree_get_page(tree)) {
+
+ /* Check father node pointers */
+
+ node_ptr = btr_page_get_father_node_ptr(tree, page, &mtr);
+ father_page = buf_frame_align(node_ptr);
+ offsets = rec_get_offsets(node_ptr, index,
+ offsets, ULINT_UNDEFINED, &heap);
+
+ if (btr_node_ptr_get_child_page_no(node_ptr, offsets) !=
+ buf_frame_get_page_no(page)
+ || node_ptr != btr_page_get_father_for_rec(tree, page,
+ page_rec_get_prev(page_get_supremum_rec(page)),
+ &mtr)) {
+ btr_validate_report1(index, level, page);
+
+ fputs("InnoDB: node pointer to the page is wrong\n",
+ stderr);
+
+ buf_page_print(father_page);
+ buf_page_print(page);
+
+ fputs("InnoDB: node ptr ", stderr);
+ rec_print_new(stderr, node_ptr, offsets);
+
+ fprintf(stderr, "\n"
+ "InnoDB: node ptr child page n:o %lu\n",
+ (unsigned long) btr_node_ptr_get_child_page_no(
+ node_ptr, offsets));
+
+ fputs("InnoDB: record on page ", stderr);
+ rec = btr_page_get_father_for_rec(tree, page,
+ page_rec_get_prev(page_get_supremum_rec(page)),
+ &mtr);
+ rec_print(stderr, rec, index);
+ putc('\n', stderr);
+ ret = FALSE;
+
+ goto node_ptr_fails;
+ }
+
+ if (btr_page_get_level(page, &mtr) > 0) {
+ offsets = rec_get_offsets(node_ptr, index,
+ offsets, ULINT_UNDEFINED, &heap);
+
+ node_ptr_tuple = dict_tree_build_node_ptr(
+ tree,
+ page_rec_get_next(
+ page_get_infimum_rec(page)),
+ 0, heap,
+ btr_page_get_level(page, &mtr));
+
+ if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
+ offsets)) {
+ rec_t* first_rec = page_rec_get_next(
+ page_get_infimum_rec(page));
+
+ btr_validate_report1(index, level, page);
+
+ buf_page_print(father_page);
+ buf_page_print(page);
+
+ fputs("InnoDB: Error: node ptrs differ"
+ " on levels > 0\n"
+ "InnoDB: node ptr ", stderr);
+ rec_print_new(stderr, node_ptr, offsets);
+ fputs("InnoDB: first rec ", stderr);
+ rec_print(stderr, first_rec, index);
+ putc('\n', stderr);
+ ret = FALSE;
+
+ goto node_ptr_fails;
+ }
+ }
+
+ if (left_page_no == FIL_NULL) {
+ ut_a(node_ptr == page_rec_get_next(
+ page_get_infimum_rec(father_page)));
+ ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
+ }
+
+ if (right_page_no == FIL_NULL) {
+ ut_a(node_ptr == page_rec_get_prev(
+ page_get_supremum_rec(father_page)));
+ ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
+ }
+
+ if (right_page_no != FIL_NULL) {
+
+ right_node_ptr = btr_page_get_father_node_ptr(tree,
+ right_page, &mtr);
+ if (page_rec_get_next(node_ptr) !=
+ page_get_supremum_rec(father_page)) {
+
+ if (right_node_ptr !=
+ page_rec_get_next(node_ptr)) {
+ ret = FALSE;
+ fputs(
+ "InnoDB: node pointer to the right page is wrong\n",
+ stderr);
+
+ btr_validate_report1(index, level,
+ page);
+
+ buf_page_print(father_page);
+ buf_page_print(page);
+ buf_page_print(right_page);
+ }
+ } else {
+ right_father_page = buf_frame_align(
+ right_node_ptr);
+
+ if (right_node_ptr != page_rec_get_next(
+ page_get_infimum_rec(
+ right_father_page))) {
+ ret = FALSE;
+ fputs(
+ "InnoDB: node pointer 2 to the right page is wrong\n",
+ stderr);
+
+ btr_validate_report1(index, level,
+ page);
+
+ buf_page_print(father_page);
+ buf_page_print(right_father_page);
+ buf_page_print(page);
+ buf_page_print(right_page);
+ }
+
+ if (buf_frame_get_page_no(right_father_page)
+ != btr_page_get_next(father_page, &mtr)) {
+
+ ret = FALSE;
+ fputs(
+ "InnoDB: node pointer 3 to the right page is wrong\n",
+ stderr);
+
+ btr_validate_report1(index, level,
+ page);
+
+ buf_page_print(father_page);
+ buf_page_print(right_father_page);
+ buf_page_print(page);
+ buf_page_print(right_page);
+ }
+ }
+ }
+ }
+
+node_ptr_fails:
+ mtr_commit(&mtr);
+
+ if (right_page_no != FIL_NULL) {
+ ibool comp = page_is_comp(page);
+ mtr_start(&mtr);
+
+ page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr);
+ ut_a(page_is_comp(page) == comp);
+
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+ return(ret);
+}
+
+/******************************************************************
+Checks the consistency of an index tree. */
+
+ibool
+btr_validate_tree(
+/*==============*/
+ /* out: TRUE if ok */
+ dict_tree_t* tree) /* in: tree */
+{
+ mtr_t mtr;
+ page_t* root;
+ ulint i;
+ ulint n;
+
+ mtr_start(&mtr);
+ mtr_x_lock(dict_tree_get_lock(tree), &mtr);
+
+ root = btr_root_get(tree, &mtr);
+ n = btr_page_get_level(root, &mtr);
+
+ for (i = 0; i <= n; i++) {
+
+ if (!btr_validate_level(tree, n - i)) {
+
+ mtr_commit(&mtr);
+
+ return(FALSE);
+ }
+ }
+
+ mtr_commit(&mtr);
+
+ return(TRUE);
+}