diff options
Diffstat (limited to 'storage/innobase/btr/btr0btr.c')
-rw-r--r-- | storage/innobase/btr/btr0btr.c | 3077 |
1 files changed, 0 insertions, 3077 deletions
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c deleted file mode 100644 index 6e8b43aeb8d..00000000000 --- a/storage/innobase/btr/btr0btr.c +++ /dev/null @@ -1,3077 +0,0 @@ -/****************************************************** -The B-tree - -(c) 1994-1996 Innobase Oy - -Created 6/2/1994 Heikki Tuuri -*******************************************************/ - -#include "btr0btr.h" - -#ifdef UNIV_NONINL -#include "btr0btr.ic" -#endif - -#include "fsp0fsp.h" -#include "page0page.h" -#include "btr0cur.h" -#include "btr0sea.h" -#include "btr0pcur.h" -#include "rem0cmp.h" -#include "lock0lock.h" -#include "ibuf0ibuf.h" -#include "trx0trx.h" - -/* -Latching strategy of the InnoDB B-tree --------------------------------------- -A tree latch protects all non-leaf nodes of the tree. Each node of a tree -also has a latch of its own. - -A B-tree operation normally first acquires an S-latch on the tree. It -searches down the tree and releases the tree latch when it has the -leaf node latch. To save CPU time we do not acquire any latch on -non-leaf nodes of the tree during a search, those pages are only bufferfixed. - -If an operation needs to restructure the tree, it acquires an X-latch on -the tree before searching to a leaf node. If it needs, for example, to -split a leaf, -(1) InnoDB decides the split point in the leaf, -(2) allocates a new page, -(3) inserts the appropriate node pointer to the first non-leaf level, -(4) releases the tree X-latch, -(5) and then moves records from the leaf to the new allocated page. - -Node pointers -------------- -Leaf pages of a B-tree contain the index records stored in the -tree. On levels n > 0 we store 'node pointers' to pages on level -n - 1. For each page there is exactly one node pointer stored: -thus the our tree is an ordinary B-tree, not a B-link tree. - -A node pointer contains a prefix P of an index record. The prefix -is long enough so that it determines an index record uniquely. -The file page number of the child page is added as the last -field. To the child page we can store node pointers or index records -which are >= P in the alphabetical order, but < P1 if there is -a next node pointer on the level, and P1 is its prefix. - -If a node pointer with a prefix P points to a non-leaf child, -then the leftmost record in the child must have the same -prefix P. If it points to a leaf node, the child is not required -to contain any record with a prefix equal to P. The leaf case -is decided this way to allow arbitrary deletions in a leaf node -without touching upper levels of the tree. - -We have predefined a special minimum record which we -define as the smallest record in any alphabetical order. -A minimum record is denoted by setting a bit in the record -header. A minimum record acts as the prefix of a node pointer -which points to a leftmost node on any level of the tree. - -File page allocation --------------------- -In the root node of a B-tree there are two file segment headers. -The leaf pages of a tree are allocated from one file segment, to -make them consecutive on disk if possible. From the other file segment -we allocate pages for the non-leaf levels of the tree. -*/ - -/**************************************************************** -Returns the upper level node pointer to a page. It is assumed that -mtr holds an x-latch on the tree. */ -static -rec_t* -btr_page_get_father_node_ptr( -/*=========================*/ - /* out: pointer to node pointer record */ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page: must contain at least one - user record */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Empties an index page. */ -static -void -btr_page_empty( -/*===========*/ - page_t* page, /* in: page to be emptied */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Returns TRUE if the insert fits on the appropriate half-page -with the chosen split_rec. */ -static -ibool -btr_page_insert_fits( -/*=================*/ - /* out: TRUE if fits */ - btr_cur_t* cursor, /* in: cursor at which insert - should be made */ - rec_t* split_rec, /* in: suggestion for first record - on upper half-page, or NULL if - tuple should be first */ - const ulint* offsets, /* in: rec_get_offsets( - split_rec, cursor->index) */ - dtuple_t* tuple, /* in: tuple to insert */ - mem_heap_t* heap); /* in: temporary memory heap */ - -/****************************************************************** -Gets the root node of a tree and x-latches it. */ - -page_t* -btr_root_get( -/*=========*/ - /* out: root page, x-latched */ - dict_index_t* index, /* in: index tree */ - mtr_t* mtr) /* in: mtr */ -{ - ulint space; - ulint root_page_no; - page_t* root; - - space = dict_index_get_space(index); - root_page_no = dict_index_get_page(index); - - root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr); - ut_a((ibool)!!page_is_comp(root) == dict_table_is_comp(index->table)); - - return(root); -} - -/***************************************************************** -Gets pointer to the previous user record in the tree. It is assumed that -the caller has appropriate latches on the page and its neighbor. */ - -rec_t* -btr_get_prev_user_rec( -/*==================*/ - /* out: previous user record, NULL if there is none */ - rec_t* rec, /* in: record on leaf level */ - mtr_t* mtr) /* in: mtr holding a latch on the page, and if - needed, also to the previous page */ -{ - page_t* page; - page_t* prev_page; - ulint prev_page_no; - ulint space; - - if (!page_rec_is_infimum(rec)) { - - rec_t* prev_rec = page_rec_get_prev(rec); - - if (!page_rec_is_infimum(prev_rec)) { - - return(prev_rec); - } - } - - page = buf_frame_align(rec); - prev_page_no = btr_page_get_prev(page, mtr); - space = buf_frame_get_space_id(page); - - if (prev_page_no != FIL_NULL) { - - prev_page = buf_page_get_with_no_latch(space, prev_page_no, - mtr); - /* The caller must already have a latch to the brother */ - ut_ad((mtr_memo_contains(mtr, buf_block_align(prev_page), - MTR_MEMO_PAGE_S_FIX)) - || (mtr_memo_contains(mtr, buf_block_align(prev_page), - MTR_MEMO_PAGE_X_FIX))); - ut_a(page_is_comp(prev_page) == page_is_comp(page)); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(prev_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - return(page_rec_get_prev(page_get_supremum_rec(prev_page))); - } - - return(NULL); -} - -/***************************************************************** -Gets pointer to the next user record in the tree. It is assumed that the -caller has appropriate latches on the page and its neighbor. */ - -rec_t* -btr_get_next_user_rec( -/*==================*/ - /* out: next user record, NULL if there is none */ - rec_t* rec, /* in: record on leaf level */ - mtr_t* mtr) /* in: mtr holding a latch on the page, and if - needed, also to the next page */ -{ - page_t* page; - page_t* next_page; - ulint next_page_no; - ulint space; - - if (!page_rec_is_supremum(rec)) { - - rec_t* next_rec = page_rec_get_next(rec); - - if (!page_rec_is_supremum(next_rec)) { - - return(next_rec); - } - } - - page = buf_frame_align(rec); - next_page_no = btr_page_get_next(page, mtr); - space = buf_frame_get_space_id(page); - - if (next_page_no != FIL_NULL) { - - next_page = buf_page_get_with_no_latch(space, next_page_no, - mtr); - /* The caller must already have a latch to the brother */ - ut_ad((mtr_memo_contains(mtr, buf_block_align(next_page), - MTR_MEMO_PAGE_S_FIX)) - || (mtr_memo_contains(mtr, buf_block_align(next_page), - MTR_MEMO_PAGE_X_FIX))); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(next_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - ut_a(page_is_comp(next_page) == page_is_comp(page)); - return(page_rec_get_next(page_get_infimum_rec(next_page))); - } - - return(NULL); -} - -/****************************************************************** -Creates a new index page (not the root, and also not -used in page reorganization). */ -static -void -btr_page_create( -/*============*/ - page_t* page, /* in: page to be created */ - dict_index_t* index, /* in: index */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - page_create(page, mtr, dict_table_is_comp(index->table)); - buf_block_align(page)->check_index_page_at_flush = TRUE; - - btr_page_set_index_id(page, index->id, mtr); -} - -/****************************************************************** -Allocates a new file page to be used in an ibuf tree. Takes the page from -the free list of the tree, which must contain pages! */ -static -page_t* -btr_page_alloc_for_ibuf( -/*====================*/ - /* out: new allocated page, x-latched */ - dict_index_t* index, /* in: index tree */ - mtr_t* mtr) /* in: mtr */ -{ - fil_addr_t node_addr; - page_t* root; - page_t* new_page; - - root = btr_root_get(index, mtr); - - node_addr = flst_get_first(root + PAGE_HEADER - + PAGE_BTR_IBUF_FREE_LIST, mtr); - ut_a(node_addr.page != FIL_NULL); - - new_page = buf_page_get(dict_index_get_space(index), node_addr.page, - RW_X_LATCH, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW); -#endif /* UNIV_SYNC_DEBUG */ - - flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, - mtr); - ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - mtr)); - - return(new_page); -} - -/****************************************************************** -Allocates a new file page to be used in an index tree. NOTE: we assume -that the caller has made the reservation for free extents! */ - -page_t* -btr_page_alloc( -/*===========*/ - /* out: new allocated page, x-latched; - NULL if out of space */ - dict_index_t* index, /* in: index */ - ulint hint_page_no, /* in: hint of a good page */ - byte file_direction, /* in: direction where a possible - page split is made */ - ulint level, /* in: level where the page is placed - in the tree */ - mtr_t* mtr) /* in: mtr */ -{ - fseg_header_t* seg_header; - page_t* root; - page_t* new_page; - ulint new_page_no; - - if (index->type & DICT_IBUF) { - - return(btr_page_alloc_for_ibuf(index, mtr)); - } - - root = btr_root_get(index, mtr); - - if (level == 0) { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - } else { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - } - - /* Parameter TRUE below states that the caller has made the - reservation for free extents, and thus we know that a page can - be allocated: */ - - new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no, - file_direction, TRUE, mtr); - if (new_page_no == FIL_NULL) { - - return(NULL); - } - - new_page = buf_page_get(dict_index_get_space(index), new_page_no, - RW_X_LATCH, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW); -#endif /* UNIV_SYNC_DEBUG */ - - return(new_page); -} - -/****************************************************************** -Gets the number of pages in a B-tree. */ - -ulint -btr_get_size( -/*=========*/ - /* out: number of pages */ - dict_index_t* index, /* in: index */ - ulint flag) /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ -{ - fseg_header_t* seg_header; - page_t* root; - ulint n; - ulint dummy; - mtr_t mtr; - - mtr_start(&mtr); - - mtr_s_lock(dict_index_get_lock(index), &mtr); - - root = btr_root_get(index, &mtr); - - if (flag == BTR_N_LEAF_PAGES) { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - - fseg_n_reserved_pages(seg_header, &n, &mtr); - - } else if (flag == BTR_TOTAL_SIZE) { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - - n = fseg_n_reserved_pages(seg_header, &dummy, &mtr); - - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - - n += fseg_n_reserved_pages(seg_header, &dummy, &mtr); - } else { - ut_error; - } - - mtr_commit(&mtr); - - return(n); -} - -/****************************************************************** -Frees a page used in an ibuf tree. Puts the page to the free list of the -ibuf tree. */ -static -void -btr_page_free_for_ibuf( -/*===================*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page to be freed, x-latched */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* root; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - root = btr_root_get(index, mtr); - - flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); - - ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - mtr)); -} - -/****************************************************************** -Frees a file page used in an index tree. Can be used also to (BLOB) -external storage pages, because the page level 0 can be given as an -argument. */ - -void -btr_page_free_low( -/*==============*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page to be freed, x-latched */ - ulint level, /* in: page level */ - mtr_t* mtr) /* in: mtr */ -{ - fseg_header_t* seg_header; - page_t* root; - ulint space; - ulint page_no; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - /* The page gets invalid for optimistic searches: increment the frame - modify clock */ - - buf_frame_modify_clock_inc(page); - - if (index->type & DICT_IBUF) { - - btr_page_free_for_ibuf(index, page, mtr); - - return; - } - - root = btr_root_get(index, mtr); - - if (level == 0) { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - } else { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - } - - space = buf_frame_get_space_id(page); - page_no = buf_frame_get_page_no(page); - - fseg_free_page(seg_header, space, page_no, mtr); -} - -/****************************************************************** -Frees a file page used in an index tree. NOTE: cannot free field external -storage pages because the page must contain info on its level. */ - -void -btr_page_free( -/*==========*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page to be freed, x-latched */ - mtr_t* mtr) /* in: mtr */ -{ - ulint level; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - level = btr_page_get_level(page, mtr); - - btr_page_free_low(index, page, level, mtr); -} - -/****************************************************************** -Sets the child node file address in a node pointer. */ -UNIV_INLINE -void -btr_node_ptr_set_child_page_no( -/*===========================*/ - rec_t* rec, /* in: node pointer record */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint page_no,/* in: child node address */ - mtr_t* mtr) /* in: mtr */ -{ - byte* field; - ulint len; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr)); - ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); - - /* The child address is in the last field */ - field = rec_get_nth_field(rec, offsets, - rec_offs_n_fields(offsets) - 1, &len); - - ut_ad(len == 4); - - mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr); -} - -/**************************************************************** -Returns the child page of a node pointer and x-latches it. */ -static -page_t* -btr_node_ptr_get_child( -/*===================*/ - /* out: child page, x-latched */ - rec_t* node_ptr,/* in: node pointer */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - mtr_t* mtr) /* in: mtr */ -{ - ulint page_no; - ulint space; - page_t* page; - - ut_ad(rec_offs_validate(node_ptr, NULL, offsets)); - space = buf_frame_get_space_id(node_ptr); - page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); - - page = btr_page_get(space, page_no, RW_X_LATCH, mtr); - - return(page); -} - -/**************************************************************** -Returns the upper level node pointer to a page. It is assumed that mtr holds -an x-latch on the tree. */ -static -rec_t* -btr_page_get_father_for_rec( -/*========================*/ - /* out: pointer to node pointer record, - its page x-latched */ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page: must contain at least one - user record */ - rec_t* user_rec,/* in: user_record on page */ - mtr_t* mtr) /* in: mtr */ -{ - mem_heap_t* heap; - dtuple_t* tuple; - btr_cur_t cursor; - rec_t* node_ptr; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_a(page_rec_is_user_rec(user_rec)); - - ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page)); - - heap = mem_heap_create(100); - - tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, - btr_page_get_level(page, mtr)); - - btr_cur_search_to_nth_level(index, - btr_page_get_level(page, mtr) + 1, - tuple, PAGE_CUR_LE, - BTR_CONT_MODIFY_TREE, &cursor, 0, mtr); - - node_ptr = btr_cur_get_rec(&cursor); - offsets = rec_get_offsets(node_ptr, index, offsets, - ULINT_UNDEFINED, &heap); - - if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets) - != buf_frame_get_page_no(page))) { - rec_t* print_rec; - fputs("InnoDB: Dump of the child page:\n", stderr); - buf_page_print(buf_frame_align(page)); - fputs("InnoDB: Dump of the parent page:\n", stderr); - buf_page_print(buf_frame_align(node_ptr)); - - fputs("InnoDB: Corruption of an index tree: table ", stderr); - ut_print_name(stderr, NULL, TRUE, index->table_name); - fputs(", index ", stderr); - ut_print_name(stderr, NULL, FALSE, index->name); - fprintf(stderr, ",\n" - "InnoDB: father ptr page no %lu, child page no %lu\n", - (ulong) - btr_node_ptr_get_child_page_no(node_ptr, offsets), - (ulong) buf_frame_get_page_no(page)); - print_rec = page_rec_get_next(page_get_infimum_rec(page)); - offsets = rec_get_offsets(print_rec, index, - offsets, ULINT_UNDEFINED, &heap); - page_rec_print(print_rec, offsets); - offsets = rec_get_offsets(node_ptr, index, offsets, - ULINT_UNDEFINED, &heap); - page_rec_print(node_ptr, offsets); - - fputs("InnoDB: You should dump + drop + reimport the table" - " to fix the\n" - "InnoDB: corruption. If the crash happens at " - "the database startup, see\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "forcing-recovery.html about\n" - "InnoDB: forcing recovery. " - "Then dump + drop + reimport.\n", stderr); - } - - ut_a(btr_node_ptr_get_child_page_no(node_ptr, offsets) - == buf_frame_get_page_no(page)); - mem_heap_free(heap); - - return(node_ptr); -} - -/**************************************************************** -Returns the upper level node pointer to a page. It is assumed that -mtr holds an x-latch on the tree. */ -static -rec_t* -btr_page_get_father_node_ptr( -/*=========================*/ - /* out: pointer to node pointer record */ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page: must contain at least one - user record */ - mtr_t* mtr) /* in: mtr */ -{ - return(btr_page_get_father_for_rec( - index, page, - page_rec_get_next(page_get_infimum_rec(page)), mtr)); -} - -/**************************************************************** -Creates the root node for a new index tree. */ - -ulint -btr_create( -/*=======*/ - /* out: page number of the created root, FIL_NULL if - did not succeed */ - ulint type, /* in: type of the index */ - ulint space, /* in: space where created */ - dulint index_id,/* in: index id */ - ulint comp, /* in: nonzero=compact page format */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint page_no; - buf_frame_t* ibuf_hdr_frame; - buf_frame_t* frame; - page_t* page; - - /* Create the two new segments (one, in the case of an ibuf tree) for - the index tree; the segment headers are put on the allocated root page - (for an ibuf tree, not in the root, but on a separate ibuf header - page) */ - - if (type & DICT_IBUF) { - /* Allocate first the ibuf header page */ - ibuf_hdr_frame = fseg_create( - space, 0, IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(ibuf_hdr_frame, SYNC_TREE_NODE_NEW); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(buf_frame_get_page_no(ibuf_hdr_frame) - == IBUF_HEADER_PAGE_NO); - /* Allocate then the next page to the segment: it will be the - tree root page */ - - page_no = fseg_alloc_free_page(ibuf_hdr_frame + IBUF_HEADER - + IBUF_TREE_SEG_HEADER, - IBUF_TREE_ROOT_PAGE_NO, - FSP_UP, mtr); - ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO); - - frame = buf_page_get(space, page_no, RW_X_LATCH, mtr); - } else { - frame = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP, - mtr); - } - - if (frame == NULL) { - - return(FIL_NULL); - } - - page_no = buf_frame_get_page_no(frame); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW); -#endif /* UNIV_SYNC_DEBUG */ - - if (type & DICT_IBUF) { - /* It is an insert buffer tree: initialize the free list */ - - ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO); - - flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr); - } else { - /* It is a non-ibuf tree: create a file segment for leaf - pages */ - fseg_create(space, page_no, PAGE_HEADER + PAGE_BTR_SEG_LEAF, - mtr); - /* The fseg create acquires a second latch on the page, - therefore we must declare it: */ -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW); -#endif /* UNIV_SYNC_DEBUG */ - } - - /* Create a new index page on the the allocated segment page */ - page = page_create(frame, mtr, comp); - buf_block_align(page)->check_index_page_at_flush = TRUE; - - /* Set the index id of the page */ - btr_page_set_index_id(page, index_id, mtr); - - /* Set the level of the new index page */ - btr_page_set_level(page, 0, mtr); - - /* Set the next node and previous node fields */ - btr_page_set_next(page, FIL_NULL, mtr); - btr_page_set_prev(page, FIL_NULL, mtr); - - /* We reset the free bits for the page to allow creation of several - trees in the same mtr, otherwise the latch on a bitmap page would - prevent it because of the latching order */ - - ibuf_reset_free_bits_with_type(type, page); - - /* In the following assertion we test that two records of maximum - allowed size fit on the root page: this fact is needed to ensure - correctness of split algorithms */ - - ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE); - - return(page_no); -} - -/**************************************************************** -Frees a B-tree except the root page, which MUST be freed after this -by calling btr_free_root. */ - -void -btr_free_but_not_root( -/*==================*/ - ulint space, /* in: space where created */ - ulint root_page_no) /* in: root page number */ -{ - ibool finished; - page_t* root; - mtr_t mtr; - -leaf_loop: - mtr_start(&mtr); - - root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr); - - /* NOTE: page hash indexes are dropped when a page is freed inside - fsp0fsp. */ - - finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF, - &mtr); - mtr_commit(&mtr); - - if (!finished) { - - goto leaf_loop; - } -top_loop: - mtr_start(&mtr); - - root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr); - - finished = fseg_free_step_not_header( - root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr); - mtr_commit(&mtr); - - if (!finished) { - - goto top_loop; - } -} - -/**************************************************************** -Frees the B-tree root page. Other tree MUST already have been freed. */ - -void -btr_free_root( -/*==========*/ - ulint space, /* in: space where created */ - ulint root_page_no, /* in: root page number */ - mtr_t* mtr) /* in: a mini-transaction which has already - been started */ -{ - ibool finished; - page_t* root; - - root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr); - - btr_search_drop_page_hash_index(root); -top_loop: - finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr); - if (!finished) { - - goto top_loop; - } -} - -/***************************************************************** -Reorganizes an index page. */ -static -void -btr_page_reorganize_low( -/*====================*/ - ibool recovery,/* in: TRUE if called in recovery: - locks should not be updated, i.e., - there cannot exist locks on the - page, and a hash index should not be - dropped: it cannot exist */ - page_t* page, /* in: page to be reorganized */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* new_page; - ulint log_mode; - ulint data_size1; - ulint data_size2; - ulint max_ins_size1; - ulint max_ins_size2; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - data_size1 = page_get_data_size(page); - max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1); - - /* Write the log record */ - mlog_open_and_write_index(mtr, page, index, page_is_comp(page) - ? MLOG_COMP_PAGE_REORGANIZE - : MLOG_PAGE_REORGANIZE, 0); - - /* Turn logging off */ - log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); - - new_page = buf_frame_alloc(); - - /* Copy the old page to temporary space */ - buf_frame_copy(new_page, page); - - if (!recovery) { - btr_search_drop_page_hash_index(page); - } - - /* Recreate the page: note that global data on page (possible - segment headers, next page-field, etc.) is preserved intact */ - - page_create(page, mtr, page_is_comp(page)); - buf_block_align(page)->check_index_page_at_flush = TRUE; - - /* Copy the records from the temporary space to the recreated page; - do not copy the lock bits yet */ - - page_copy_rec_list_end_no_locks(page, new_page, - page_get_infimum_rec(new_page), - index, mtr); - /* Copy max trx id to recreated page */ - page_set_max_trx_id(page, page_get_max_trx_id(new_page)); - - if (!recovery) { - /* Update the record lock bitmaps */ - lock_move_reorganize_page(page, new_page); - } - - data_size2 = page_get_data_size(page); - max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1); - - if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) { - buf_page_print(page); - buf_page_print(new_page); - fprintf(stderr, - "InnoDB: Error: page old data size %lu" - " new data size %lu\n" - "InnoDB: Error: page old max ins size %lu" - " new max ins size %lu\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", - (unsigned long) data_size1, (unsigned long) data_size2, - (unsigned long) max_ins_size1, - (unsigned long) max_ins_size2); - } - - buf_frame_free(new_page); - - /* Restore logging mode */ - mtr_set_log_mode(mtr, log_mode); -} - -/***************************************************************** -Reorganizes an index page. */ - -void -btr_page_reorganize( -/*================*/ - page_t* page, /* in: page to be reorganized */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - btr_page_reorganize_low(FALSE, page, index, mtr); -} - -/*************************************************************** -Parses a redo log record of reorganizing a page. */ - -byte* -btr_parse_page_reorganize( -/*======================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr __attribute__((unused)), - /* in: buffer end */ - dict_index_t* index, /* in: record descriptor */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - ut_ad(ptr && end_ptr); - - /* The record is empty, except for the record initial part */ - - if (page) { - btr_page_reorganize_low(TRUE, page, index, mtr); - } - - return(ptr); -} - -/***************************************************************** -Empties an index page. */ -static -void -btr_page_empty( -/*===========*/ - page_t* page, /* in: page to be emptied */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - btr_search_drop_page_hash_index(page); - - /* Recreate the page: note that global data on page (possible - segment headers, next page-field, etc.) is preserved intact */ - - page_create(page, mtr, page_is_comp(page)); - buf_block_align(page)->check_index_page_at_flush = TRUE; -} - -/***************************************************************** -Makes tree one level higher by splitting the root, and inserts -the tuple. It is assumed that mtr contains an x-latch on the tree. -NOTE that the operation of this function must always succeed, -we cannot reverse it: therefore enough free disk space must be -guaranteed to be available before this function is called. */ - -rec_t* -btr_root_raise_and_insert( -/*======================*/ - /* out: inserted record */ - btr_cur_t* cursor, /* in: cursor at which to insert: must be - on the root page; when the function returns, - the cursor is positioned on the predecessor - of the inserted record */ - dtuple_t* tuple, /* in: tuple to insert */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index; - page_t* root; - page_t* new_page; - ulint new_page_no; - rec_t* rec; - mem_heap_t* heap; - dtuple_t* node_ptr; - ulint level; - rec_t* node_ptr_rec; - page_cur_t* page_cursor; - - root = btr_cur_get_page(cursor); - index = btr_cur_get_index(cursor); - - ut_ad(dict_index_get_page(index) == buf_frame_get_page_no(root)); - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(root), - MTR_MEMO_PAGE_X_FIX)); - btr_search_drop_page_hash_index(root); - - /* Allocate a new page to the tree. Root splitting is done by first - moving the root records to the new page, emptying the root, putting - a node pointer to the new page, and then splitting the new page. */ - - new_page = btr_page_alloc(index, 0, FSP_NO_DIR, - btr_page_get_level(root, mtr), mtr); - - btr_page_create(new_page, index, mtr); - - level = btr_page_get_level(root, mtr); - - /* Set the levels of the new index page and root page */ - btr_page_set_level(new_page, level, mtr); - btr_page_set_level(root, level + 1, mtr); - - /* Set the next node and previous node fields of new page */ - btr_page_set_next(new_page, FIL_NULL, mtr); - btr_page_set_prev(new_page, FIL_NULL, mtr); - - /* Move the records from root to the new page */ - - page_move_rec_list_end(new_page, root, page_get_infimum_rec(root), - index, mtr); - /* If this is a pessimistic insert which is actually done to - perform a pessimistic update then we have stored the lock - information of the record to be inserted on the infimum of the - root page: we cannot discard the lock structs on the root page */ - - lock_update_root_raise(new_page, root); - - /* Create a memory heap where the node pointer is stored */ - heap = mem_heap_create(100); - - rec = page_rec_get_next(page_get_infimum_rec(new_page)); - new_page_no = buf_frame_get_page_no(new_page); - - /* Build the node pointer (= node key and page address) for the - child */ - - node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap, - level); - /* Reorganize the root to get free space */ - btr_page_reorganize(root, index, mtr); - - page_cursor = btr_cur_get_page_cur(cursor); - - /* Insert node pointer to the root */ - - page_cur_set_before_first(root, page_cursor); - - node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, - index, mtr); - - ut_ad(node_ptr_rec); - - /* The node pointer must be marked as the predefined minimum record, - as there is no lower alphabetical limit to records in the leftmost - node of a level: */ - - btr_set_min_rec_mark(node_ptr_rec, page_is_comp(root), mtr); - - /* Free the memory heap */ - mem_heap_free(heap); - - /* We play safe and reset the free bits for the new page */ - -#if 0 - fprintf(stderr, "Root raise new page no %lu\n", - buf_frame_get_page_no(new_page)); -#endif - - ibuf_reset_free_bits(index, new_page); - /* Reposition the cursor to the child node */ - page_cur_search(new_page, index, tuple, - PAGE_CUR_LE, page_cursor); - - /* Split the child and insert tuple */ - return(btr_page_split_and_insert(cursor, tuple, mtr)); -} - -/***************************************************************** -Decides if the page should be split at the convergence point of inserts -converging to the left. */ - -ibool -btr_page_get_split_rec_to_left( -/*===========================*/ - /* out: TRUE if split recommended */ - btr_cur_t* cursor, /* in: cursor at which to insert */ - rec_t** split_rec) /* out: if split recommended, - the first record on upper half page, - or NULL if tuple to be inserted should - be first */ -{ - page_t* page; - rec_t* insert_point; - rec_t* infimum; - - page = btr_cur_get_page(cursor); - insert_point = btr_cur_get_rec(cursor); - - if (page_header_get_ptr(page, PAGE_LAST_INSERT) - == page_rec_get_next(insert_point)) { - - infimum = page_get_infimum_rec(page); - - /* If the convergence is in the middle of a page, include also - the record immediately before the new insert to the upper - page. Otherwise, we could repeatedly move from page to page - lots of records smaller than the convergence point. */ - - if (infimum != insert_point - && page_rec_get_next(infimum) != insert_point) { - - *split_rec = insert_point; - } else { - *split_rec = page_rec_get_next(insert_point); - } - - return(TRUE); - } - - return(FALSE); -} - -/***************************************************************** -Decides if the page should be split at the convergence point of inserts -converging to the right. */ - -ibool -btr_page_get_split_rec_to_right( -/*============================*/ - /* out: TRUE if split recommended */ - btr_cur_t* cursor, /* in: cursor at which to insert */ - rec_t** split_rec) /* out: if split recommended, - the first record on upper half page, - or NULL if tuple to be inserted should - be first */ -{ - page_t* page; - rec_t* insert_point; - - page = btr_cur_get_page(cursor); - insert_point = btr_cur_get_rec(cursor); - - /* We use eager heuristics: if the new insert would be right after - the previous insert on the same page, we assume that there is a - pattern of sequential inserts here. */ - - if (UNIV_LIKELY(page_header_get_ptr(page, PAGE_LAST_INSERT) - == insert_point)) { - - rec_t* next_rec; - - next_rec = page_rec_get_next(insert_point); - - if (page_rec_is_supremum(next_rec)) { -split_at_new: - /* Split at the new record to insert */ - *split_rec = NULL; - } else { - rec_t* next_next_rec = page_rec_get_next(next_rec); - if (page_rec_is_supremum(next_next_rec)) { - - goto split_at_new; - } - - /* If there are >= 2 user records up from the insert - point, split all but 1 off. We want to keep one because - then sequential inserts can use the adaptive hash - index, as they can do the necessary checks of the right - search position just by looking at the records on this - page. */ - - *split_rec = next_next_rec; - } - - return(TRUE); - } - - return(FALSE); -} - -/***************************************************************** -Calculates a split record such that the tuple will certainly fit on -its half-page when the split is performed. We assume in this function -only that the cursor page has at least one user record. */ -static -rec_t* -btr_page_get_sure_split_rec( -/*========================*/ - /* out: split record, or NULL if - tuple will be the first record on - upper half-page */ - btr_cur_t* cursor, /* in: cursor at which insert - should be made */ - dtuple_t* tuple) /* in: tuple to insert */ -{ - page_t* page; - ulint insert_size; - ulint free_space; - ulint total_data; - ulint total_n_recs; - ulint total_space; - ulint incl_data; - rec_t* ins_rec; - rec_t* rec; - rec_t* next_rec; - ulint n; - mem_heap_t* heap; - ulint* offsets; - - page = btr_cur_get_page(cursor); - - insert_size = rec_get_converted_size(cursor->index, tuple); - free_space = page_get_free_space_of_empty(page_is_comp(page)); - - /* free_space is now the free space of a created new page */ - - total_data = page_get_data_size(page) + insert_size; - total_n_recs = page_get_n_recs(page) + 1; - ut_ad(total_n_recs >= 2); - total_space = total_data + page_dir_calc_reserved_space(total_n_recs); - - n = 0; - incl_data = 0; - ins_rec = btr_cur_get_rec(cursor); - rec = page_get_infimum_rec(page); - - heap = NULL; - offsets = NULL; - - /* We start to include records to the left half, and when the - space reserved by them exceeds half of total_space, then if - the included records fit on the left page, they will be put there - if something was left over also for the right page, - otherwise the last included record will be the first on the right - half page */ - - for (;;) { - /* Decide the next record to include */ - if (rec == ins_rec) { - rec = NULL; /* NULL denotes that tuple is - now included */ - } else if (rec == NULL) { - rec = page_rec_get_next(ins_rec); - } else { - rec = page_rec_get_next(rec); - } - - if (rec == NULL) { - /* Include tuple */ - incl_data += insert_size; - } else { - offsets = rec_get_offsets(rec, cursor->index, - offsets, ULINT_UNDEFINED, - &heap); - incl_data += rec_offs_size(offsets); - } - - n++; - - if (incl_data + page_dir_calc_reserved_space(n) - >= total_space / 2) { - - if (incl_data + page_dir_calc_reserved_space(n) - <= free_space) { - /* The next record will be the first on - the right half page if it is not the - supremum record of page */ - - if (rec == ins_rec) { - rec = NULL; - - goto func_exit; - } else if (rec == NULL) { - next_rec = page_rec_get_next(ins_rec); - } else { - next_rec = page_rec_get_next(rec); - } - ut_ad(next_rec); - if (!page_rec_is_supremum(next_rec)) { - rec = next_rec; - } - } - -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(rec); - } - } -} - -/***************************************************************** -Returns TRUE if the insert fits on the appropriate half-page with the -chosen split_rec. */ -static -ibool -btr_page_insert_fits( -/*=================*/ - /* out: TRUE if fits */ - btr_cur_t* cursor, /* in: cursor at which insert - should be made */ - rec_t* split_rec, /* in: suggestion for first record - on upper half-page, or NULL if - tuple to be inserted should be first */ - const ulint* offsets, /* in: rec_get_offsets( - split_rec, cursor->index) */ - dtuple_t* tuple, /* in: tuple to insert */ - mem_heap_t* heap) /* in: temporary memory heap */ -{ - page_t* page; - ulint insert_size; - ulint free_space; - ulint total_data; - ulint total_n_recs; - rec_t* rec; - rec_t* end_rec; - ulint* offs; - - page = btr_cur_get_page(cursor); - - ut_ad(!split_rec == !offsets); - ut_ad(!offsets - || !page_is_comp(page) == !rec_offs_comp(offsets)); - ut_ad(!offsets - || rec_offs_validate(split_rec, cursor->index, offsets)); - - insert_size = rec_get_converted_size(cursor->index, tuple); - free_space = page_get_free_space_of_empty(page_is_comp(page)); - - /* free_space is now the free space of a created new page */ - - total_data = page_get_data_size(page) + insert_size; - total_n_recs = page_get_n_recs(page) + 1; - - /* We determine which records (from rec to end_rec, not including - end_rec) will end up on the other half page from tuple when it is - inserted. */ - - if (split_rec == NULL) { - rec = page_rec_get_next(page_get_infimum_rec(page)); - end_rec = page_rec_get_next(btr_cur_get_rec(cursor)); - - } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) { - - rec = page_rec_get_next(page_get_infimum_rec(page)); - end_rec = split_rec; - } else { - rec = split_rec; - end_rec = page_get_supremum_rec(page); - } - - if (total_data + page_dir_calc_reserved_space(total_n_recs) - <= free_space) { - - /* Ok, there will be enough available space on the - half page where the tuple is inserted */ - - return(TRUE); - } - - offs = NULL; - - while (rec != end_rec) { - /* In this loop we calculate the amount of reserved - space after rec is removed from page. */ - - offs = rec_get_offsets(rec, cursor->index, offs, - ULINT_UNDEFINED, &heap); - - total_data -= rec_offs_size(offs); - total_n_recs--; - - if (total_data + page_dir_calc_reserved_space(total_n_recs) - <= free_space) { - - /* Ok, there will be enough available space on the - half page where the tuple is inserted */ - - return(TRUE); - } - - rec = page_rec_get_next(rec); - } - - return(FALSE); -} - -/*********************************************************** -Inserts a data tuple to a tree on a non-leaf level. It is assumed -that mtr holds an x-latch on the tree. */ - -void -btr_insert_on_non_leaf_level( -/*=========================*/ - dict_index_t* index, /* in: index */ - ulint level, /* in: level, must be > 0 */ - dtuple_t* tuple, /* in: the record to be inserted */ - mtr_t* mtr) /* in: mtr */ -{ - big_rec_t* dummy_big_rec; - btr_cur_t cursor; - ulint err; - rec_t* rec; - - ut_ad(level > 0); - - btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE, - BTR_CONT_MODIFY_TREE, - &cursor, 0, mtr); - - err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG - | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG, - &cursor, tuple, &rec, - &dummy_big_rec, NULL, mtr); - ut_a(err == DB_SUCCESS); -} - -/****************************************************************** -Attaches the halves of an index page on the appropriate level in an -index tree. */ -static -void -btr_attach_half_pages( -/*==================*/ - dict_index_t* index, /* in: the index tree */ - page_t* page, /* in: page to be split */ - rec_t* split_rec, /* in: first record on upper - half page */ - page_t* new_page, /* in: the new half page */ - ulint direction, /* in: FSP_UP or FSP_DOWN */ - mtr_t* mtr) /* in: mtr */ -{ - ulint space; - rec_t* node_ptr; - page_t* prev_page; - page_t* next_page; - ulint prev_page_no; - ulint next_page_no; - ulint level; - page_t* lower_page; - page_t* upper_page; - ulint lower_page_no; - ulint upper_page_no; - dtuple_t* node_ptr_upper; - mem_heap_t* heap; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page), - MTR_MEMO_PAGE_X_FIX)); - ut_a(page_is_comp(page) == page_is_comp(new_page)); - - /* Create a memory heap where the data tuple is stored */ - heap = mem_heap_create(1024); - - /* Based on split direction, decide upper and lower pages */ - if (direction == FSP_DOWN) { - - lower_page_no = buf_frame_get_page_no(new_page); - upper_page_no = buf_frame_get_page_no(page); - lower_page = new_page; - upper_page = page; - - /* Look up the index for the node pointer to page */ - node_ptr = btr_page_get_father_node_ptr(index, page, mtr); - - /* Replace the address of the old child node (= page) with the - address of the new lower half */ - - btr_node_ptr_set_child_page_no(node_ptr, - rec_get_offsets( - node_ptr, index, - NULL, ULINT_UNDEFINED, - &heap), - lower_page_no, mtr); - mem_heap_empty(heap); - } else { - lower_page_no = buf_frame_get_page_no(page); - upper_page_no = buf_frame_get_page_no(new_page); - lower_page = page; - upper_page = new_page; - } - - /* Get the level of the split pages */ - level = btr_page_get_level(page, mtr); - - /* Build the node pointer (= node key and page address) for the upper - half */ - - node_ptr_upper = dict_index_build_node_ptr(index, split_rec, - upper_page_no, heap, level); - - /* Insert it next to the pointer to the lower half. Note that this - may generate recursion leading to a split on the higher level. */ - - btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr); - - /* Free the memory heap */ - mem_heap_free(heap); - - /* Get the previous and next pages of page */ - - prev_page_no = btr_page_get_prev(page, mtr); - next_page_no = btr_page_get_next(page, mtr); - space = buf_frame_get_space_id(page); - - /* Update page links of the level */ - - if (prev_page_no != FIL_NULL) { - - prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr); - ut_a(page_is_comp(prev_page) == page_is_comp(page)); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(prev_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - btr_page_set_next(prev_page, lower_page_no, mtr); - } - - if (next_page_no != FIL_NULL) { - - next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr); - ut_a(page_is_comp(next_page) == page_is_comp(page)); - - btr_page_set_prev(next_page, upper_page_no, mtr); - } - - btr_page_set_prev(lower_page, prev_page_no, mtr); - btr_page_set_next(lower_page, upper_page_no, mtr); - btr_page_set_level(lower_page, level, mtr); - - btr_page_set_prev(upper_page, lower_page_no, mtr); - btr_page_set_next(upper_page, next_page_no, mtr); - btr_page_set_level(upper_page, level, mtr); -} - -/***************************************************************** -Splits an index page to halves and inserts the tuple. It is assumed -that mtr holds an x-latch to the index tree. NOTE: the tree x-latch -is released within this function! NOTE that the operation of this -function must always succeed, we cannot reverse it: therefore -enough free disk space must be guaranteed to be available before -this function is called. */ - -rec_t* -btr_page_split_and_insert( -/*======================*/ - /* out: inserted record; NOTE: the tree - x-latch is released! NOTE: 2 free disk - pages must be available! */ - btr_cur_t* cursor, /* in: cursor at which to insert; when the - function returns, the cursor is positioned - on the predecessor of the inserted record */ - dtuple_t* tuple, /* in: tuple to insert */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - ulint page_no; - byte direction; - ulint hint_page_no; - page_t* new_page; - rec_t* split_rec; - page_t* left_page; - page_t* right_page; - page_t* insert_page; - page_cur_t* page_cursor; - rec_t* first_rec; - byte* buf = 0; /* remove warning */ - rec_t* move_limit; - ibool insert_will_fit; - ulint n_iterations = 0; - rec_t* rec; - mem_heap_t* heap; - ulint n_uniq; - ulint* offsets; - - heap = mem_heap_create(1024); - n_uniq = dict_index_get_n_unique_in_tree(cursor->index); -func_start: - mem_heap_empty(heap); - offsets = NULL; - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - page = btr_cur_get_page(cursor); - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(page_get_n_recs(page) >= 2); - - page_no = buf_frame_get_page_no(page); - - /* 1. Decide the split record; split_rec == NULL means that the - tuple to be inserted should be the first record on the upper - half-page */ - - if (n_iterations > 0) { - direction = FSP_UP; - hint_page_no = page_no + 1; - split_rec = btr_page_get_sure_split_rec(cursor, tuple); - - } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) { - direction = FSP_UP; - hint_page_no = page_no + 1; - - } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) { - direction = FSP_DOWN; - hint_page_no = page_no - 1; - } else { - direction = FSP_UP; - hint_page_no = page_no + 1; - split_rec = page_get_middle_rec(page); - } - - /* 2. Allocate a new page to the index */ - new_page = btr_page_alloc(cursor->index, hint_page_no, direction, - btr_page_get_level(page, mtr), mtr); - btr_page_create(new_page, cursor->index, mtr); - - /* 3. Calculate the first record on the upper half-page, and the - first record (move_limit) on original page which ends up on the - upper half */ - - if (split_rec != NULL) { - first_rec = split_rec; - move_limit = split_rec; - } else { - buf = mem_alloc(rec_get_converted_size(cursor->index, tuple)); - - first_rec = rec_convert_dtuple_to_rec(buf, - cursor->index, tuple); - move_limit = page_rec_get_next(btr_cur_get_rec(cursor)); - } - - /* 4. Do first the modifications in the tree structure */ - - btr_attach_half_pages(cursor->index, page, first_rec, - new_page, direction, mtr); - - if (split_rec == NULL) { - mem_free(buf); - } - - /* If the split is made on the leaf level and the insert will fit - on the appropriate half-page, we may release the tree x-latch. - We can then move the records after releasing the tree latch, - thus reducing the tree latch contention. */ - - if (split_rec) { - offsets = rec_get_offsets(split_rec, cursor->index, offsets, - n_uniq, &heap); - - insert_will_fit = btr_page_insert_fits(cursor, - split_rec, offsets, - tuple, heap); - } else { - insert_will_fit = btr_page_insert_fits(cursor, - NULL, NULL, - tuple, heap); - } - - if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) { - - mtr_memo_release(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK); - } - - /* 5. Move then the records to the new page */ - if (direction == FSP_DOWN) { - /* fputs("Split left\n", stderr); */ - - page_move_rec_list_start(new_page, page, move_limit, - cursor->index, mtr); - left_page = new_page; - right_page = page; - - lock_update_split_left(right_page, left_page); - } else { - /* fputs("Split right\n", stderr); */ - - page_move_rec_list_end(new_page, page, move_limit, - cursor->index, mtr); - left_page = page; - right_page = new_page; - - lock_update_split_right(right_page, left_page); - } - - /* 6. The split and the tree modification is now completed. Decide the - page where the tuple should be inserted */ - - if (split_rec == NULL) { - insert_page = right_page; - - } else { - offsets = rec_get_offsets(first_rec, cursor->index, - offsets, n_uniq, &heap); - - if (cmp_dtuple_rec(tuple, first_rec, offsets) >= 0) { - - insert_page = right_page; - } else { - insert_page = left_page; - } - } - - /* 7. Reposition the cursor for insert and try insertion */ - page_cursor = btr_cur_get_page_cur(cursor); - - page_cur_search(insert_page, cursor->index, tuple, - PAGE_CUR_LE, page_cursor); - - rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); - - if (rec != NULL) { - /* Insert fit on the page: update the free bits for the - left and right pages in the same mtr */ - - ibuf_update_free_bits_for_two_pages_low(cursor->index, - left_page, - right_page, mtr); - /* fprintf(stderr, "Split and insert done %lu %lu\n", - buf_frame_get_page_no(left_page), - buf_frame_get_page_no(right_page)); */ - mem_heap_free(heap); - return(rec); - } - - /* 8. If insert did not fit, try page reorganization */ - - btr_page_reorganize(insert_page, cursor->index, mtr); - - page_cur_search(insert_page, cursor->index, tuple, - PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); - - if (rec == NULL) { - /* The insert did not fit on the page: loop back to the - start of the function for a new split */ - - /* We play safe and reset the free bits for new_page */ - ibuf_reset_free_bits(cursor->index, new_page); - - /* fprintf(stderr, "Split second round %lu\n", - buf_frame_get_page_no(page)); */ - n_iterations++; - ut_ad(n_iterations < 2); - ut_ad(!insert_will_fit); - - goto func_start; - } - - /* Insert fit on the page: update the free bits for the - left and right pages in the same mtr */ - - ibuf_update_free_bits_for_two_pages_low(cursor->index, left_page, - right_page, mtr); -#if 0 - fprintf(stderr, "Split and insert done %lu %lu\n", - buf_frame_get_page_no(left_page), - buf_frame_get_page_no(right_page)); -#endif - - ut_ad(page_validate(left_page, cursor->index)); - ut_ad(page_validate(right_page, cursor->index)); - - mem_heap_free(heap); - return(rec); -} - -/***************************************************************** -Removes a page from the level list of pages. */ -static -void -btr_level_list_remove( -/*==================*/ - page_t* page, /* in: page to remove */ - mtr_t* mtr) /* in: mtr */ -{ - ulint space; - ulint prev_page_no; - page_t* prev_page; - ulint next_page_no; - page_t* next_page; - - ut_ad(page && mtr); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - /* Get the previous and next page numbers of page */ - - prev_page_no = btr_page_get_prev(page, mtr); - next_page_no = btr_page_get_next(page, mtr); - space = buf_frame_get_space_id(page); - - /* Update page links of the level */ - - if (prev_page_no != FIL_NULL) { - - prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr); - ut_a(page_is_comp(prev_page) == page_is_comp(page)); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(prev_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - btr_page_set_next(prev_page, next_page_no, mtr); - } - - if (next_page_no != FIL_NULL) { - - next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr); - ut_a(page_is_comp(next_page) == page_is_comp(page)); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(next_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - btr_page_set_prev(next_page, prev_page_no, mtr); - } -} - -/******************************************************************** -Writes the redo log record for setting an index record as the predefined -minimum record. */ -UNIV_INLINE -void -btr_set_min_rec_mark_log( -/*=====================*/ - rec_t* rec, /* in: record */ - ulint comp, /* nonzero=compact record format */ - mtr_t* mtr) /* in: mtr */ -{ - mlog_write_initial_log_record( - rec, comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr); - - /* Write rec offset as a 2-byte ulint */ - mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES); -} - -/******************************************************************** -Parses the redo log record for setting an index record as the predefined -minimum record. */ - -byte* -btr_parse_set_min_rec_mark( -/*=======================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - ulint comp, /* in: nonzero=compact page format */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - rec_t* rec; - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - if (page) { - ut_a(!page_is_comp(page) == !comp); - - rec = page + mach_read_from_2(ptr); - - btr_set_min_rec_mark(rec, comp, mtr); - } - - return(ptr + 2); -} - -/******************************************************************** -Sets a record as the predefined minimum record. */ - -void -btr_set_min_rec_mark( -/*=================*/ - rec_t* rec, /* in: record */ - ulint comp, /* in: nonzero=compact page format */ - mtr_t* mtr) /* in: mtr */ -{ - ulint info_bits; - - info_bits = rec_get_info_bits(rec, comp); - - rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG); - - btr_set_min_rec_mark_log(rec, comp, mtr); -} - -/***************************************************************** -Deletes on the upper level the node pointer to a page. */ - -void -btr_node_ptr_delete( -/*================*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page whose node pointer is deleted */ - mtr_t* mtr) /* in: mtr */ -{ - rec_t* node_ptr; - btr_cur_t cursor; - ibool compressed; - ulint err; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - /* Delete node pointer on father page */ - - node_ptr = btr_page_get_father_node_ptr(index, page, mtr); - - btr_cur_position(index, node_ptr, &cursor); - compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE, - mtr); - ut_a(err == DB_SUCCESS); - - if (!compressed) { - btr_cur_compress_if_useful(&cursor, mtr); - } -} - -/***************************************************************** -If page is the only on its level, this function moves its records to the -father page, thus reducing the tree height. */ -static -void -btr_lift_page_up( -/*=============*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page which is the only on its level; - must not be empty: use - btr_discard_only_page_on_level if the last - record from the page should be removed */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* father_page; - page_t* iter_page; - page_t* pages[BTR_MAX_LEVELS]; - ulint page_level; - ulint root_page_no; - ulint ancestors; - ulint i; - - ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); - ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - father_page = buf_frame_align( - btr_page_get_father_node_ptr(index, page, mtr)); - - page_level = btr_page_get_level(page, mtr); - root_page_no = dict_index_get_page(index); - - ancestors = 1; - pages[0] = father_page; - - /* Store all ancestor pages so we can reset their levels later on. - We have to do all the searches on the tree now because later on, - after we've replaced the first level, the tree is in an inconsistent - state and can not be searched. */ - iter_page = father_page; - for (;;) { - if (buf_block_get_page_no(buf_block_align(iter_page)) - == root_page_no) { - - break; - } - - ut_a(ancestors < BTR_MAX_LEVELS); - - iter_page = buf_frame_align( - btr_page_get_father_node_ptr(index, iter_page, mtr)); - - pages[ancestors++] = iter_page; - } - - btr_search_drop_page_hash_index(page); - - /* Make the father empty */ - btr_page_empty(father_page, mtr); - - /* Move records to the father */ - page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page), - index, mtr); - lock_update_copy_and_discard(father_page, page); - - /* Go upward to root page, decreasing levels by one. */ - for (i = 0; i < ancestors; i++) { - iter_page = pages[i]; - - ut_ad(btr_page_get_level(iter_page, mtr) == (page_level + 1)); - - btr_page_set_level(iter_page, page_level, mtr); - page_level++; - } - - /* Free the file page */ - btr_page_free(index, page, mtr); - - /* We play safe and reset the free bits for the father */ - ibuf_reset_free_bits(index, father_page); - ut_ad(page_validate(father_page, index)); - ut_ad(btr_check_node_ptr(index, father_page, mtr)); -} - -/***************************************************************** -Tries to merge the page first to the left immediate brother if such a -brother exists, and the node pointers to the current page and to the brother -reside on the same page. If the left brother does not satisfy these -conditions, looks at the right brother. If the page is the only one on that -level lifts the records of the page to the father page, thus reducing the -tree height. It is assumed that mtr holds an x-latch on the tree and on the -page. If cursor is on the leaf level, mtr must also hold x-latches to the -brothers, if they exist. NOTE: it is assumed that the caller has reserved -enough free extents so that the compression will always succeed if done! */ - -void -btr_compress( -/*=========*/ - btr_cur_t* cursor, /* in: cursor on the page to merge or lift; - the page must not be empty: in record delete - use btr_discard_page if the page would become - empty */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index; - ulint space; - ulint left_page_no; - ulint right_page_no; - page_t* merge_page; - page_t* father_page; - ibool is_left; - page_t* page; - rec_t* orig_pred; - rec_t* orig_succ; - rec_t* node_ptr; - ulint data_size; - ulint n_recs; - ulint max_ins_size; - ulint max_ins_size_reorg; - ulint level; - ulint comp; - - page = btr_cur_get_page(cursor); - index = btr_cur_get_index(cursor); - comp = page_is_comp(page); - ut_a((ibool)!!comp == dict_table_is_comp(index->table)); - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - level = btr_page_get_level(page, mtr); - space = dict_index_get_space(index); - - left_page_no = btr_page_get_prev(page, mtr); - right_page_no = btr_page_get_next(page, mtr); - -#if 0 - fprintf(stderr, "Merge left page %lu right %lu \n", - left_page_no, right_page_no); -#endif - - node_ptr = btr_page_get_father_node_ptr(index, page, mtr); - ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR); - father_page = buf_frame_align(node_ptr); - ut_a(comp == page_is_comp(father_page)); - - /* Decide the page to which we try to merge and which will inherit - the locks */ - - is_left = left_page_no != FIL_NULL; - - if (is_left) { - - merge_page = btr_page_get(space, left_page_no, RW_X_LATCH, - mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(merge_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - } else if (right_page_no != FIL_NULL) { - - merge_page = btr_page_get(space, right_page_no, RW_X_LATCH, - mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(merge_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - } else { - /* The page is the only one on the level, lift the records - to the father */ - btr_lift_page_up(index, page, mtr); - - return; - } - - n_recs = page_get_n_recs(page); - data_size = page_get_data_size(page); - ut_a(page_is_comp(merge_page) == comp); - - max_ins_size_reorg = page_get_max_insert_size_after_reorganize( - merge_page, n_recs); - if (data_size > max_ins_size_reorg) { - - /* No space for merge */ - - return; - } - - ut_ad(page_validate(merge_page, index)); - - max_ins_size = page_get_max_insert_size(merge_page, n_recs); - - if (data_size > max_ins_size) { - - /* We have to reorganize merge_page */ - - btr_page_reorganize(merge_page, index, mtr); - - max_ins_size = page_get_max_insert_size(merge_page, n_recs); - - ut_ad(page_validate(merge_page, index)); - ut_ad(page_get_max_insert_size(merge_page, n_recs) - == max_ins_size_reorg); - } - - if (data_size > max_ins_size) { - - /* Add fault tolerance, though this should never happen */ - - return; - } - - btr_search_drop_page_hash_index(page); - - /* Remove the page from the level list */ - btr_level_list_remove(page, mtr); - - if (is_left) { - btr_node_ptr_delete(index, page, mtr); - } else { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - /* Replace the address of the old child node (= page) with the - address of the merge page to the right */ - - btr_node_ptr_set_child_page_no(node_ptr, - rec_get_offsets( - node_ptr, index, - offsets_, - ULINT_UNDEFINED, - &heap), - right_page_no, mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - btr_node_ptr_delete(index, merge_page, mtr); - } - - /* Move records to the merge page */ - if (is_left) { - orig_pred = page_rec_get_prev( - page_get_supremum_rec(merge_page)); - page_copy_rec_list_start(merge_page, page, - page_get_supremum_rec(page), - index, mtr); - - lock_update_merge_left(merge_page, orig_pred, page); - } else { - orig_succ = page_rec_get_next( - page_get_infimum_rec(merge_page)); - page_copy_rec_list_end(merge_page, page, - page_get_infimum_rec(page), - index, mtr); - - lock_update_merge_right(orig_succ, page); - } - - /* We have added new records to merge_page: update its free bits */ - ibuf_update_free_bits_if_full(index, merge_page, - UNIV_PAGE_SIZE, ULINT_UNDEFINED); - - ut_ad(page_validate(merge_page, index)); - - /* Free the file page */ - btr_page_free(index, page, mtr); - - ut_ad(btr_check_node_ptr(index, merge_page, mtr)); -} - -/***************************************************************** -Discards a page that is the only page on its level. */ -static -void -btr_discard_only_page_on_level( -/*===========================*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page which is the only on its level */ - mtr_t* mtr) /* in: mtr */ -{ - rec_t* node_ptr; - page_t* father_page; - ulint page_level; - - ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); - ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - btr_search_drop_page_hash_index(page); - - node_ptr = btr_page_get_father_node_ptr(index, page, mtr); - father_page = buf_frame_align(node_ptr); - - page_level = btr_page_get_level(page, mtr); - - lock_update_discard(page_get_supremum_rec(father_page), page); - - btr_page_set_level(father_page, page_level, mtr); - - /* Free the file page */ - btr_page_free(index, page, mtr); - - if (buf_frame_get_page_no(father_page) == dict_index_get_page(index)) { - /* The father is the root page */ - - btr_page_empty(father_page, mtr); - - /* We play safe and reset the free bits for the father */ - ibuf_reset_free_bits(index, father_page); - } else { - ut_ad(page_get_n_recs(father_page) == 1); - - btr_discard_only_page_on_level(index, father_page, mtr); - } -} - -/***************************************************************** -Discards a page from a B-tree. This is used to remove the last record from -a B-tree page: the whole page must be removed at the same time. This cannot -be used for the root page, which is allowed to be empty. */ - -void -btr_discard_page( -/*=============*/ - btr_cur_t* cursor, /* in: cursor on the page to discard: not on - the root page */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index; - ulint space; - ulint left_page_no; - ulint right_page_no; - page_t* merge_page; - page_t* page; - rec_t* node_ptr; - - page = btr_cur_get_page(cursor); - index = btr_cur_get_index(cursor); - - ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page)); - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - space = dict_index_get_space(index); - - /* Decide the page which will inherit the locks */ - - left_page_no = btr_page_get_prev(page, mtr); - right_page_no = btr_page_get_next(page, mtr); - - if (left_page_no != FIL_NULL) { - merge_page = btr_page_get(space, left_page_no, RW_X_LATCH, - mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(merge_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - } else if (right_page_no != FIL_NULL) { - merge_page = btr_page_get(space, right_page_no, RW_X_LATCH, - mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(merge_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - } else { - btr_discard_only_page_on_level(index, page, mtr); - - return; - } - - ut_a(page_is_comp(merge_page) == page_is_comp(page)); - btr_search_drop_page_hash_index(page); - - if (left_page_no == FIL_NULL && btr_page_get_level(page, mtr) > 0) { - - /* We have to mark the leftmost node pointer on the right - side page as the predefined minimum record */ - - node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page)); - - ut_ad(page_rec_is_user_rec(node_ptr)); - - btr_set_min_rec_mark(node_ptr, page_is_comp(merge_page), mtr); - } - - btr_node_ptr_delete(index, page, mtr); - - /* Remove the page from the level list */ - btr_level_list_remove(page, mtr); - - if (left_page_no != FIL_NULL) { - lock_update_discard(page_get_supremum_rec(merge_page), page); - } else { - lock_update_discard(page_rec_get_next( - page_get_infimum_rec(merge_page)), - page); - } - - /* Free the file page */ - btr_page_free(index, page, mtr); - - ut_ad(btr_check_node_ptr(index, merge_page, mtr)); -} - -#ifdef UNIV_BTR_PRINT -/***************************************************************** -Prints size info of a B-tree. */ - -void -btr_print_size( -/*===========*/ - dict_index_t* index) /* in: index tree */ -{ - page_t* root; - fseg_header_t* seg; - mtr_t mtr; - - if (index->type & DICT_IBUF) { - fputs("Sorry, cannot print info of an ibuf tree:" - " use ibuf functions\n", stderr); - - return; - } - - mtr_start(&mtr); - - root = btr_root_get(index, &mtr); - - seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - - fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr); - fseg_print(seg, &mtr); - - if (!(index->type & DICT_UNIVERSAL)) { - - seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - - fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr); - fseg_print(seg, &mtr); - } - - mtr_commit(&mtr); -} - -/**************************************************************** -Prints recursively index tree pages. */ -static -void -btr_print_recursive( -/*================*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: index page */ - ulint width, /* in: print this many entries from start - and end */ - mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */ - ulint** offsets,/* in/out: buffer for rec_get_offsets() */ - mtr_t* mtr) /* in: mtr */ -{ - page_cur_t cursor; - ulint n_recs; - ulint i = 0; - mtr_t mtr2; - rec_t* node_ptr; - page_t* child; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n", - (ulong) btr_page_get_level(page, mtr), - (ulong) buf_frame_get_page_no(page)); - - page_print(page, index, width, width); - - n_recs = page_get_n_recs(page); - - page_cur_set_before_first(page, &cursor); - page_cur_move_to_next(&cursor); - - while (!page_cur_is_after_last(&cursor)) { - - if (0 == btr_page_get_level(page, mtr)) { - - /* If this is the leaf level, do nothing */ - - } else if ((i <= width) || (i >= n_recs - width)) { - - mtr_start(&mtr2); - - node_ptr = page_cur_get_rec(&cursor); - - *offsets = rec_get_offsets(node_ptr, index, *offsets, - ULINT_UNDEFINED, heap); - child = btr_node_ptr_get_child(node_ptr, - *offsets, &mtr2); - btr_print_recursive(index, child, width, - heap, offsets, &mtr2); - mtr_commit(&mtr2); - } - - page_cur_move_to_next(&cursor); - i++; - } -} - -/****************************************************************** -Prints directories and other info of all nodes in the tree. */ - -void -btr_print_index( -/*============*/ - dict_index_t* index, /* in: index */ - ulint width) /* in: print this many entries from start - and end */ -{ - mtr_t mtr; - page_t* root; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - fputs("--------------------------\n" - "INDEX TREE PRINT\n", stderr); - - mtr_start(&mtr); - - root = btr_root_get(index, &mtr); - - btr_print_recursive(index, root, width, &heap, &offsets, &mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - mtr_commit(&mtr); - - btr_validate_index(index, NULL); -} -#endif /* UNIV_BTR_PRINT */ - -#ifdef UNIV_DEBUG -/**************************************************************** -Checks that the node pointer to a page is appropriate. */ - -ibool -btr_check_node_ptr( -/*===============*/ - /* out: TRUE */ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: index page */ - mtr_t* mtr) /* in: mtr */ -{ - mem_heap_t* heap; - rec_t* node_ptr; - dtuple_t* node_ptr_tuple; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - if (dict_index_get_page(index) == buf_frame_get_page_no(page)) { - - return(TRUE); - } - - node_ptr = btr_page_get_father_node_ptr(index, page, mtr); - - if (btr_page_get_level(page, mtr) == 0) { - - return(TRUE); - } - - heap = mem_heap_create(256); - - node_ptr_tuple = dict_index_build_node_ptr( - index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap, - btr_page_get_level(page, mtr)); - - ut_a(!cmp_dtuple_rec(node_ptr_tuple, node_ptr, - rec_get_offsets(node_ptr, index, - NULL, ULINT_UNDEFINED, &heap))); - - mem_heap_free(heap); - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -/**************************************************************** -Display identification information for a record. */ -static -void -btr_index_rec_validate_report( -/*==========================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: index record */ - dict_index_t* index) /* in: index */ -{ - fputs("InnoDB: Record in ", stderr); - dict_index_name_print(stderr, NULL, index); - fprintf(stderr, ", page %lu, at offset %lu\n", - buf_frame_get_page_no(page), (ulint)(rec - page)); -} - -/**************************************************************** -Checks the size and number of fields in a record based on the definition of -the index. */ - -ibool -btr_index_rec_validate( -/*===================*/ - /* out: TRUE if ok */ - rec_t* rec, /* in: index record */ - dict_index_t* index, /* in: index */ - ibool dump_on_error) /* in: TRUE if the function - should print hex dump of record - and page on error */ -{ - ulint len; - ulint n; - ulint i; - page_t* page; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - page = buf_frame_align(rec); - - if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) { - /* The insert buffer index tree can contain records from any - other index: we cannot check the number of fields or - their length */ - - return(TRUE); - } - - if (UNIV_UNLIKELY((ibool)!!page_is_comp(page) - != dict_table_is_comp(index->table))) { - btr_index_rec_validate_report(page, rec, index); - fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n", - (ulong) !!page_is_comp(page), - (ulong) dict_table_is_comp(index->table)); - - return(FALSE); - } - - n = dict_index_get_n_fields(index); - - if (!page_is_comp(page) - && UNIV_UNLIKELY(rec_get_n_fields_old(rec) != n)) { - btr_index_rec_validate_report(page, rec, index); - fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n", - (ulong) rec_get_n_fields_old(rec), (ulong) n); - - if (dump_on_error) { - buf_page_print(page); - - fputs("InnoDB: corrupt record ", stderr); - rec_print_old(stderr, rec); - putc('\n', stderr); - } - return(FALSE); - } - - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - - for (i = 0; i < n; i++) { - ulint fixed_size = dict_col_get_fixed_size( - dict_index_get_nth_col(index, i)); - - rec_get_nth_field(rec, offsets, i, &len); - - /* Note that if fixed_size != 0, it equals the - length of a fixed-size column in the clustered index. - A prefix index of the column is of fixed, but different - length. When fixed_size == 0, prefix_len is the maximum - length of the prefix index column. */ - - if ((dict_index_get_nth_field(index, i)->prefix_len == 0 - && len != UNIV_SQL_NULL && fixed_size - && len != fixed_size) - || (dict_index_get_nth_field(index, i)->prefix_len > 0 - && len != UNIV_SQL_NULL - && len - > dict_index_get_nth_field(index, i)->prefix_len)) { - - btr_index_rec_validate_report(page, rec, index); - fprintf(stderr, - "InnoDB: field %lu len is %lu," - " should be %lu\n", - (ulong) i, (ulong) len, (ulong) fixed_size); - - if (dump_on_error) { - buf_page_print(page); - - fputs("InnoDB: corrupt record ", stderr); - rec_print_new(stderr, rec, offsets); - putc('\n', stderr); - } - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(FALSE); - } - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(TRUE); -} - -/**************************************************************** -Checks the size and number of fields in records based on the definition of -the index. */ -static -ibool -btr_index_page_validate( -/*====================*/ - /* out: TRUE if ok */ - page_t* page, /* in: index page */ - dict_index_t* index) /* in: index */ -{ - page_cur_t cur; - ibool ret = TRUE; - - page_cur_set_before_first(page, &cur); - page_cur_move_to_next(&cur); - - for (;;) { - if (page_cur_is_after_last(&cur)) { - - break; - } - - if (!btr_index_rec_validate(cur.rec, index, TRUE)) { - - return(FALSE); - } - - page_cur_move_to_next(&cur); - } - - return(ret); -} - -/**************************************************************** -Report an error on one page of an index tree. */ -static -void -btr_validate_report1( -/*=================*/ - /* out: TRUE if ok */ - dict_index_t* index, /* in: index */ - ulint level, /* in: B-tree level */ - page_t* page) /* in: index page */ -{ - fprintf(stderr, "InnoDB: Error in page %lu of ", - buf_frame_get_page_no(page)); - dict_index_name_print(stderr, NULL, index); - if (level) { - fprintf(stderr, ", index tree level %lu", level); - } - putc('\n', stderr); -} - -/**************************************************************** -Report an error on two pages of an index tree. */ -static -void -btr_validate_report2( -/*=================*/ - /* out: TRUE if ok */ - dict_index_t* index, /* in: index */ - ulint level, /* in: B-tree level */ - page_t* page1, /* in: first index page */ - page_t* page2) /* in: second index page */ -{ - fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ", - buf_frame_get_page_no(page1), - buf_frame_get_page_no(page2)); - dict_index_name_print(stderr, NULL, index); - if (level) { - fprintf(stderr, ", index tree level %lu", level); - } - putc('\n', stderr); -} - -/**************************************************************** -Validates index tree level. */ -static -ibool -btr_validate_level( -/*===============*/ - /* out: TRUE if ok */ - dict_index_t* index, /* in: index tree */ - trx_t* trx, /* in: transaction or NULL */ - ulint level) /* in: level number */ -{ - ulint space; - page_t* page; - page_t* right_page = 0; /* remove warning */ - page_t* father_page; - page_t* right_father_page; - rec_t* node_ptr; - rec_t* right_node_ptr; - rec_t* rec; - ulint right_page_no; - ulint left_page_no; - page_cur_t cursor; - dtuple_t* node_ptr_tuple; - ibool ret = TRUE; - mtr_t mtr; - mem_heap_t* heap = mem_heap_create(256); - ulint* offsets = NULL; - ulint* offsets2= NULL; - - mtr_start(&mtr); - - mtr_x_lock(dict_index_get_lock(index), &mtr); - - page = btr_root_get(index, &mtr); - - space = buf_frame_get_space_id(page); - - while (level != btr_page_get_level(page, &mtr)) { - - ut_a(btr_page_get_level(page, &mtr) > 0); - - page_cur_set_before_first(page, &cursor); - page_cur_move_to_next(&cursor); - - node_ptr = page_cur_get_rec(&cursor); - offsets = rec_get_offsets(node_ptr, index, offsets, - ULINT_UNDEFINED, &heap); - page = btr_node_ptr_get_child(node_ptr, offsets, &mtr); - } - - /* Now we are on the desired level. Loop through the pages on that - level. */ -loop: - if (trx_is_interrupted(trx)) { - mtr_commit(&mtr); - mem_heap_free(heap); - return(ret); - } - mem_heap_empty(heap); - offsets = offsets2 = NULL; - mtr_x_lock(dict_index_get_lock(index), &mtr); - - /* Check ordering etc. of records */ - - if (!page_validate(page, index)) { - btr_validate_report1(index, level, page); - - ret = FALSE; - } else if (level == 0) { - /* We are on level 0. Check that the records have the right - number of fields, and field lengths are right. */ - - if (!btr_index_page_validate(page, index)) { - - ret = FALSE; - } - } - - ut_a(btr_page_get_level(page, &mtr) == level); - - right_page_no = btr_page_get_next(page, &mtr); - left_page_no = btr_page_get_prev(page, &mtr); - - ut_a((page_get_n_recs(page) > 0) - || ((level == 0) - && (buf_frame_get_page_no(page) - == dict_index_get_page(index)))); - - if (right_page_no != FIL_NULL) { - rec_t* right_rec; - right_page = btr_page_get(space, right_page_no, RW_X_LATCH, - &mtr); - if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr) - != buf_frame_get_page_no(page))) { - btr_validate_report2(index, level, page, right_page); - fputs("InnoDB: broken FIL_PAGE_NEXT" - " or FIL_PAGE_PREV links\n", stderr); - buf_page_print(page); - buf_page_print(right_page); - - ret = FALSE; - } - - if (UNIV_UNLIKELY(page_is_comp(right_page) - != page_is_comp(page))) { - btr_validate_report2(index, level, page, right_page); - fputs("InnoDB: 'compact' flag mismatch\n", stderr); - buf_page_print(page); - buf_page_print(right_page); - - ret = FALSE; - - goto node_ptr_fails; - } - - rec = page_rec_get_prev(page_get_supremum_rec(page)); - right_rec = page_rec_get_next(page_get_infimum_rec( - right_page)); - offsets = rec_get_offsets(rec, index, - offsets, ULINT_UNDEFINED, &heap); - offsets2 = rec_get_offsets(right_rec, index, - offsets2, ULINT_UNDEFINED, &heap); - if (UNIV_UNLIKELY(cmp_rec_rec(rec, right_rec, - offsets, offsets2, - index) >= 0)) { - - btr_validate_report2(index, level, page, right_page); - - fputs("InnoDB: records in wrong order" - " on adjacent pages\n", stderr); - - buf_page_print(page); - buf_page_print(right_page); - - fputs("InnoDB: record ", stderr); - rec = page_rec_get_prev(page_get_supremum_rec(page)); - rec_print(stderr, rec, index); - putc('\n', stderr); - fputs("InnoDB: record ", stderr); - rec = page_rec_get_next( - page_get_infimum_rec(right_page)); - rec_print(stderr, rec, index); - putc('\n', stderr); - - ret = FALSE; - } - } - - if (level > 0 && left_page_no == FIL_NULL) { - ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( - page_rec_get_next(page_get_infimum_rec(page)), - page_is_comp(page))); - } - - if (buf_frame_get_page_no(page) != dict_index_get_page(index)) { - - /* Check father node pointers */ - - node_ptr = btr_page_get_father_node_ptr(index, page, &mtr); - father_page = buf_frame_align(node_ptr); - offsets = rec_get_offsets(node_ptr, index, - offsets, ULINT_UNDEFINED, &heap); - - if (btr_node_ptr_get_child_page_no(node_ptr, offsets) - != buf_frame_get_page_no(page) - || node_ptr != btr_page_get_father_for_rec( - index, page, - page_rec_get_prev(page_get_supremum_rec(page)), - &mtr)) { - btr_validate_report1(index, level, page); - - fputs("InnoDB: node pointer to the page is wrong\n", - stderr); - - buf_page_print(father_page); - buf_page_print(page); - - fputs("InnoDB: node ptr ", stderr); - rec_print_new(stderr, node_ptr, offsets); - - fprintf(stderr, "\n" - "InnoDB: node ptr child page n:o %lu\n", - (unsigned long) btr_node_ptr_get_child_page_no - (node_ptr, offsets)); - - fputs("InnoDB: record on page ", stderr); - rec = btr_page_get_father_for_rec( - index, page, - page_rec_get_prev(page_get_supremum_rec(page)), - &mtr); - rec_print(stderr, rec, index); - putc('\n', stderr); - ret = FALSE; - - goto node_ptr_fails; - } - - if (btr_page_get_level(page, &mtr) > 0) { - offsets = rec_get_offsets(node_ptr, index, - offsets, ULINT_UNDEFINED, - &heap); - - node_ptr_tuple = dict_index_build_node_ptr( - index, - page_rec_get_next(page_get_infimum_rec(page)), - 0, heap, btr_page_get_level(page, &mtr)); - - if (cmp_dtuple_rec(node_ptr_tuple, node_ptr, - offsets)) { - rec_t* first_rec = page_rec_get_next( - page_get_infimum_rec(page)); - - btr_validate_report1(index, level, page); - - buf_page_print(father_page); - buf_page_print(page); - - fputs("InnoDB: Error: node ptrs differ" - " on levels > 0\n" - "InnoDB: node ptr ", stderr); - rec_print_new(stderr, node_ptr, offsets); - fputs("InnoDB: first rec ", stderr); - rec_print(stderr, first_rec, index); - putc('\n', stderr); - ret = FALSE; - - goto node_ptr_fails; - } - } - - if (left_page_no == FIL_NULL) { - ut_a(node_ptr == page_rec_get_next( - page_get_infimum_rec(father_page))); - ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL); - } - - if (right_page_no == FIL_NULL) { - ut_a(node_ptr == page_rec_get_prev( - page_get_supremum_rec(father_page))); - ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL); - } else { - right_node_ptr = btr_page_get_father_node_ptr( - index, right_page, &mtr); - if (page_rec_get_next(node_ptr) - != page_get_supremum_rec(father_page)) { - - if (right_node_ptr - != page_rec_get_next(node_ptr)) { - ret = FALSE; - fputs("InnoDB: node pointer to" - " the right page is wrong\n", - stderr); - - btr_validate_report1(index, level, - page); - - buf_page_print(father_page); - buf_page_print(page); - buf_page_print(right_page); - } - } else { - right_father_page = buf_frame_align( - right_node_ptr); - - if (right_node_ptr != page_rec_get_next( - page_get_infimum_rec( - right_father_page))) { - ret = FALSE; - fputs("InnoDB: node pointer 2 to" - " the right page is wrong\n", - stderr); - - btr_validate_report1(index, level, - page); - - buf_page_print(father_page); - buf_page_print(right_father_page); - buf_page_print(page); - buf_page_print(right_page); - } - - if (buf_frame_get_page_no(right_father_page) - != btr_page_get_next(father_page, &mtr)) { - - ret = FALSE; - fputs("InnoDB: node pointer 3 to" - " the right page is wrong\n", - stderr); - - btr_validate_report1(index, level, - page); - - buf_page_print(father_page); - buf_page_print(right_father_page); - buf_page_print(page); - buf_page_print(right_page); - } - } - } - } - -node_ptr_fails: - /* Commit the mini-transaction to release the latch on 'page'. - Re-acquire the latch on right_page, which will become 'page' - on the next loop. The page has already been checked. */ - mtr_commit(&mtr); - - if (right_page_no != FIL_NULL) { - mtr_start(&mtr); - - page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr); - - goto loop; - } - - mem_heap_free(heap); - return(ret); -} - -/****************************************************************** -Checks the consistency of an index tree. */ - -ibool -btr_validate_index( -/*===============*/ - /* out: TRUE if ok */ - dict_index_t* index, /* in: index */ - trx_t* trx) /* in: transaction or NULL */ -{ - mtr_t mtr; - page_t* root; - ulint i; - ulint n; - - mtr_start(&mtr); - mtr_x_lock(dict_index_get_lock(index), &mtr); - - root = btr_root_get(index, &mtr); - n = btr_page_get_level(root, &mtr); - - for (i = 0; i <= n && !trx_is_interrupted(trx); i++) { - if (!btr_validate_level(index, trx, n - i)) { - - mtr_commit(&mtr); - - return(FALSE); - } - } - - mtr_commit(&mtr); - - return(TRUE); -} |