diff options
Diffstat (limited to 'storage/innobase/btr/btr0btr.cc')
-rw-r--r-- | storage/innobase/btr/btr0btr.cc | 559 |
1 files changed, 424 insertions, 135 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 8eae3c7e3bc..e3e127c3ace 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -697,14 +698,16 @@ btr_root_fseg_validate( #endif /* UNIV_BTR_DEBUG */ /**************************************************************//** -Gets the root node of a tree and x-latches it. -@return root page, x-latched */ +Gets the root node of a tree and x- or s-latches it. +@return root page, x- or s-latched */ static buf_block_t* btr_root_block_get( /*===============*/ - dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in: mtr */ + const dict_index_t* index, /*!< in: index tree */ + ulint mode, /*!< in: either RW_S_LATCH + or RW_X_LATCH */ + mtr_t* mtr) /*!< in: mtr */ { ulint space; ulint zip_size; @@ -715,8 +718,7 @@ btr_root_block_get( zip_size = dict_table_zip_size(index->table); root_page_no = dict_index_get_page(index); - block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, - index, mtr); + block = btr_block_get(space, zip_size, root_page_no, mode, index, mtr); btr_assert_not_corrupted(block, index); #ifdef UNIV_BTR_DEBUG if (!dict_index_is_ibuf(index)) { @@ -739,10 +741,162 @@ UNIV_INTERN page_t* btr_root_get( /*=========*/ + const dict_index_t* index, /*!< in: index tree */ + mtr_t* mtr) /*!< in: mtr */ +{ + return(buf_block_get_frame(btr_root_block_get(index, RW_X_LATCH, + mtr))); +} + +/**************************************************************//** +Gets the height of the B-tree (the level of the root, when the leaf +level is assumed to be 0). The caller must hold an S or X latch on +the index. +@return tree height (level of the root) */ +UNIV_INTERN +ulint +btr_height_get( +/*===========*/ dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + ulint height; + buf_block_t* root_block; + + ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), + MTR_MEMO_S_LOCK) + || mtr_memo_contains(mtr, dict_index_get_lock(index), + MTR_MEMO_X_LOCK)); + + /* S latches the page */ + root_block = btr_root_block_get(index, RW_S_LATCH, mtr); + + height = btr_page_get_level(buf_block_get_frame(root_block), mtr); + + /* Release the S latch on the root page. */ + mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX); +#ifdef UNIV_SYNC_DEBUG + sync_thread_reset_level(&root_block->lock); +#endif /* UNIV_SYNC_DEBUG */ + + return(height); +} + +/**************************************************************//** +Checks a file segment header within a B-tree root page and updates +the segment header space id. +@return TRUE if valid */ +static +bool +btr_root_fseg_adjust_on_import( +/*===========================*/ + fseg_header_t* seg_header, /*!< in/out: segment header */ + page_zip_des_t* page_zip, /*!< in/out: compressed page, + or NULL */ + ulint space, /*!< in: tablespace identifier */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET); + + if (offset < FIL_PAGE_DATA + || offset > UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) { + + return(FALSE); + + } else if (page_zip) { + mach_write_to_4(seg_header + FSEG_HDR_SPACE, space); + page_zip_write_header(page_zip, seg_header + FSEG_HDR_SPACE, + 4, mtr); + } else { + mlog_write_ulint(seg_header + FSEG_HDR_SPACE, + space, MLOG_4BYTES, mtr); + } + + return(TRUE); +} + +/**************************************************************//** +Checks and adjusts the root node of a tree during IMPORT TABLESPACE. +@return error code, or DB_SUCCESS */ +UNIV_INTERN +dberr_t +btr_root_adjust_on_import( +/*======================*/ + const dict_index_t* index) /*!< in: index tree */ { - return(buf_block_get_frame(btr_root_block_get(index, mtr))); + dberr_t err; + mtr_t mtr; + page_t* page; + buf_block_t* block; + page_zip_des_t* page_zip; + dict_table_t* table = index->table; + ulint space_id = dict_index_get_space(index); + ulint zip_size = dict_table_zip_size(table); + ulint root_page_no = dict_index_get_page(index); + + mtr_start(&mtr); + + mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); + + DBUG_EXECUTE_IF("ib_import_trigger_corruption_3", + return(DB_CORRUPTION);); + + block = btr_block_get( + space_id, zip_size, root_page_no, RW_X_LATCH, index, &mtr); + + page = buf_block_get_frame(block); + page_zip = buf_block_get_page_zip(block); + + /* Check that this is a B-tree page and both the PREV and NEXT + pointers are FIL_NULL, because the root page does not have any + siblings. */ + if (fil_page_get_type(page) != FIL_PAGE_INDEX + || fil_page_get_prev(page) != FIL_NULL + || fil_page_get_next(page) != FIL_NULL) { + + err = DB_CORRUPTION; + + } else if (dict_index_is_clust(index)) { + bool page_is_compact_format; + + page_is_compact_format = page_is_comp(page) > 0; + + /* Check if the page format and table format agree. */ + if (page_is_compact_format != dict_table_is_comp(table)) { + err = DB_CORRUPTION; + } else { + + /* Check that the table flags and the tablespace + flags match. */ + ulint flags = fil_space_get_flags(table->space); + + if (flags + && flags != dict_tf_to_fsp_flags(table->flags)) { + + err = DB_CORRUPTION; + } else { + err = DB_SUCCESS; + } + } + } else { + err = DB_SUCCESS; + } + + /* Check and adjust the file segment headers, if all OK so far. */ + if (err == DB_SUCCESS + && (!btr_root_fseg_adjust_on_import( + FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF + + page, page_zip, space_id, &mtr) + || !btr_root_fseg_adjust_on_import( + FIL_PAGE_DATA + PAGE_BTR_SEG_TOP + + page, page_zip, space_id, &mtr))) { + + err = DB_CORRUPTION; + } + + mtr_commit(&mtr); + + return(err); } /*************************************************************//** @@ -1033,8 +1187,7 @@ btr_get_size( ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), MTR_MEMO_S_LOCK)); - if (index->page == FIL_NULL - || index->to_be_dropped + if (index->page == FIL_NULL || dict_index_is_online_ddl(index) || *index->name == TEMP_INDEX_PREFIX) { return(ULINT_UNDEFINED); } @@ -1584,6 +1737,8 @@ btr_page_reorganize_low( there cannot exist locks on the page, and a hash index should not be dropped: it cannot exist */ + ulint compression_level,/*!< in: compression level to be used + if dealing with compressed page */ buf_block_t* block, /*!< in: page to be reorganized */ dict_index_t* index, /*!< in: record descriptor */ mtr_t* mtr) /*!< in: mtr */ @@ -1601,6 +1756,8 @@ btr_page_reorganize_low( ulint max_ins_size1; ulint max_ins_size2; ibool success = FALSE; + byte type; + byte* log_ptr; ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); btr_assert_not_corrupted(block, index); @@ -1612,9 +1769,23 @@ btr_page_reorganize_low( #ifndef UNIV_HOTBACKUP /* Write the log record */ - mlog_open_and_write_index(mtr, page, index, page_is_comp(page) - ? MLOG_COMP_PAGE_REORGANIZE - : MLOG_PAGE_REORGANIZE, 0); + if (page_zip) { + type = MLOG_ZIP_PAGE_REORGANIZE; + } else if (page_is_comp(page)) { + type = MLOG_COMP_PAGE_REORGANIZE; + } else { + type = MLOG_PAGE_REORGANIZE; + } + + log_ptr = mlog_open_and_write_index( + mtr, page, index, type, page_zip ? 1 : 0); + + /* For compressed pages write the compression level. */ + if (log_ptr && page_zip) { + mach_write_to_1(log_ptr, compression_level); + mlog_close(mtr, log_ptr + 1); + } + #endif /* !UNIV_HOTBACKUP */ /* Turn logging off */ @@ -1662,7 +1833,9 @@ btr_page_reorganize_low( ut_ad(max_trx_id != 0 || recovery); } - if (page_zip && !page_zip_compress(page_zip, page, index, NULL)) { + if (page_zip + && !page_zip_compress(page_zip, page, index, + compression_level, NULL)) { /* Restore the old page and exit. */ btr_blob_dbg_restore(page, temp_page, index, @@ -1750,7 +1923,8 @@ btr_page_reorganize( dict_index_t* index, /*!< in: record descriptor */ mtr_t* mtr) /*!< in: mtr */ { - return(btr_page_reorganize_low(FALSE, block, index, mtr)); + return(btr_page_reorganize_low(FALSE, page_compression_level, + block, index, mtr)); } #endif /* !UNIV_HOTBACKUP */ @@ -1762,18 +1936,32 @@ byte* btr_parse_page_reorganize( /*======================*/ byte* ptr, /*!< in: buffer */ - byte* end_ptr __attribute__((unused)), - /*!< in: buffer end */ + byte* end_ptr,/*!< in: buffer end */ dict_index_t* index, /*!< in: record descriptor */ + bool compressed,/*!< in: true if compressed page */ buf_block_t* block, /*!< in: page to be reorganized, or NULL */ mtr_t* mtr) /*!< in: mtr or NULL */ { + ulint level = page_compression_level; + ut_ad(ptr && end_ptr); - /* The record is empty, except for the record initial part */ + /* If dealing with a compressed page the record has the + compression level used during original compression written in + one byte. Otherwise record is empty. */ + if (compressed) { + if (ptr == end_ptr) { + return(NULL); + } + + level = (ulint)mach_read_from_1(ptr); + + ut_a(level <= 9); + ++ptr; + } if (block != NULL) { - btr_page_reorganize_low(TRUE, block, index, mtr); + btr_page_reorganize_low(TRUE, level, block, index, mtr); } return(ptr); @@ -1827,10 +2015,13 @@ UNIV_INTERN rec_t* btr_root_raise_and_insert( /*======================*/ + ulint flags, /*!< in: undo logging and locking flags */ btr_cur_t* cursor, /*!< in: cursor at which to insert: must be on the root page; when the function returns, the cursor is positioned on the predecessor of the inserted record */ + ulint** offsets,/*!< out: offsets on inserted record */ + mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ mtr_t* mtr) /*!< in: mtr */ @@ -1840,7 +2031,6 @@ btr_root_raise_and_insert( page_t* new_page; ulint new_page_no; rec_t* rec; - mem_heap_t* heap; dtuple_t* node_ptr; ulint level; rec_t* node_ptr_rec; @@ -1926,7 +2116,9 @@ btr_root_raise_and_insert( lock_update_root_raise(new_block, root_block); /* Create a memory heap where the node pointer is stored */ - heap = mem_heap_create(100); + if (!*heap) { + *heap = mem_heap_create(1000); + } rec = page_rec_get_next(page_get_infimum_rec(new_page)); new_page_no = buf_block_get_page_no(new_block); @@ -1934,8 +2126,8 @@ btr_root_raise_and_insert( /* Build the node pointer (= node key and page address) for the child */ - node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap, - level); + node_ptr = dict_index_build_node_ptr( + index, rec, new_page_no, *heap, level); /* The node pointer must be marked as the predefined minimum record, as there is no lower alphabetical limit to records in the leftmost node of a level: */ @@ -1961,15 +2153,12 @@ btr_root_raise_and_insert( page_cur_set_before_first(root_block, page_cursor); node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, - index, 0, mtr); + index, offsets, heap, 0, mtr); /* The root page should only contain the node pointer to new_page at this point. Thus, the data should fit. */ ut_a(node_ptr_rec); - /* Free the memory heap */ - mem_heap_free(heap); - /* We play safe and reset the free bits for the new page */ #if 0 @@ -1985,7 +2174,8 @@ btr_root_raise_and_insert( PAGE_CUR_LE, page_cursor); /* Split the child and insert tuple */ - return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr)); + return(btr_page_split_and_insert(flags, cursor, offsets, heap, + tuple, n_ext, mtr)); } /*************************************************************//** @@ -2213,9 +2403,9 @@ func_exit: /*************************************************************//** Returns TRUE if the insert fits on the appropriate half-page with the chosen split_rec. -@return TRUE if fits */ -static -ibool +@return true if fits */ +static __attribute__((nonnull(1,3,4,6), warn_unused_result)) +bool btr_page_insert_fits( /*=================*/ btr_cur_t* cursor, /*!< in: cursor at which insert @@ -2223,11 +2413,11 @@ btr_page_insert_fits( const rec_t* split_rec,/*!< in: suggestion for first record on upper half-page, or NULL if tuple to be inserted should be first */ - const ulint* offsets,/*!< in: rec_get_offsets( - split_rec, cursor->index) */ + ulint** offsets,/*!< in: rec_get_offsets( + split_rec, cursor->index); out: garbage */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ - mem_heap_t* heap) /*!< in: temporary memory heap */ + mem_heap_t** heap) /*!< in: temporary memory heap */ { page_t* page; ulint insert_size; @@ -2236,15 +2426,13 @@ btr_page_insert_fits( ulint total_n_recs; const rec_t* rec; const rec_t* end_rec; - ulint* offs; page = btr_cur_get_page(cursor); - ut_ad(!split_rec == !offsets); - ut_ad(!offsets - || !page_is_comp(page) == !rec_offs_comp(offsets)); - ut_ad(!offsets - || rec_offs_validate(split_rec, cursor->index, offsets)); + ut_ad(!split_rec + || !page_is_comp(page) == !rec_offs_comp(*offsets)); + ut_ad(!split_rec + || rec_offs_validate(split_rec, cursor->index, *offsets)); insert_size = rec_get_converted_size(cursor->index, tuple, n_ext); free_space = page_get_free_space_of_empty(page_is_comp(page)); @@ -2262,7 +2450,7 @@ btr_page_insert_fits( rec = page_rec_get_next(page_get_infimum_rec(page)); end_rec = page_rec_get_next(btr_cur_get_rec(cursor)); - } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) { + } else if (cmp_dtuple_rec(tuple, split_rec, *offsets) >= 0) { rec = page_rec_get_next(page_get_infimum_rec(page)); end_rec = split_rec; @@ -2277,19 +2465,17 @@ btr_page_insert_fits( /* Ok, there will be enough available space on the half page where the tuple is inserted */ - return(TRUE); + return(true); } - offs = NULL; - while (rec != end_rec) { /* In this loop we calculate the amount of reserved space after rec is removed from page. */ - offs = rec_get_offsets(rec, cursor->index, offs, - ULINT_UNDEFINED, &heap); + *offsets = rec_get_offsets(rec, cursor->index, *offsets, + ULINT_UNDEFINED, heap); - total_data -= rec_offs_size(offs); + total_data -= rec_offs_size(*offsets); total_n_recs--; if (total_data + page_dir_calc_reserved_space(total_n_recs) @@ -2298,13 +2484,13 @@ btr_page_insert_fits( /* Ok, there will be enough available space on the half page where the tuple is inserted */ - return(TRUE); + return(true); } rec = page_rec_get_next_const(rec); } - return(FALSE); + return(false); } /*******************************************************//** @@ -2314,6 +2500,7 @@ UNIV_INTERN void btr_insert_on_non_leaf_level_func( /*==============================*/ + ulint flags, /*!< in: undo logging and locking flags */ dict_index_t* index, /*!< in: index */ ulint level, /*!< in: level, must be > 0 */ dtuple_t* tuple, /*!< in: the record to be inserted */ @@ -2323,8 +2510,10 @@ btr_insert_on_non_leaf_level_func( { big_rec_t* dummy_big_rec; btr_cur_t cursor; - ulint err; + dberr_t err; rec_t* rec; + ulint* offsets = NULL; + mem_heap_t* heap = NULL; ut_ad(level > 0); @@ -2335,26 +2524,35 @@ btr_insert_on_non_leaf_level_func( ut_ad(cursor.flag == BTR_CUR_BINARY); err = btr_cur_optimistic_insert( - BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG, &cursor, tuple, &rec, - &dummy_big_rec, 0, NULL, mtr); + flags + | BTR_NO_LOCKING_FLAG + | BTR_KEEP_SYS_FLAG + | BTR_NO_UNDO_LOG_FLAG, + &cursor, &offsets, &heap, + tuple, &rec, &dummy_big_rec, 0, NULL, mtr); if (err == DB_FAIL) { - err = btr_cur_pessimistic_insert( - BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG, - &cursor, tuple, &rec, &dummy_big_rec, 0, NULL, mtr); + err = btr_cur_pessimistic_insert(flags + | BTR_NO_LOCKING_FLAG + | BTR_KEEP_SYS_FLAG + | BTR_NO_UNDO_LOG_FLAG, + &cursor, &offsets, &heap, + tuple, &rec, + &dummy_big_rec, 0, NULL, mtr); ut_a(err == DB_SUCCESS); } + mem_heap_free(heap); } /**************************************************************//** Attaches the halves of an index page on the appropriate level in an index tree. */ -static +static __attribute__((nonnull)) void btr_attach_half_pages( /*==================*/ + ulint flags, /*!< in: undo logging and + locking flags */ dict_index_t* index, /*!< in: the index tree */ buf_block_t* block, /*!< in/out: page to be split */ const rec_t* split_rec, /*!< in: first record on upper @@ -2432,7 +2630,8 @@ btr_attach_half_pages( /* Insert it next to the pointer to the lower half. Note that this may generate recursion leading to a split on the higher level. */ - btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr); + btr_insert_on_non_leaf_level(flags, index, level + 1, + node_ptr_upper, mtr); /* Free the memory heap */ mem_heap_free(heap); @@ -2484,13 +2683,13 @@ btr_attach_half_pages( /*************************************************************//** Determine if a tuple is smaller than any record on the page. @return TRUE if smaller */ -static -ibool +static __attribute__((nonnull, warn_unused_result)) +bool btr_page_tuple_smaller( /*===================*/ btr_cur_t* cursor, /*!< in: b-tree cursor */ const dtuple_t* tuple, /*!< in: tuple to consider */ - ulint* offsets,/*!< in/out: temporary storage */ + ulint** offsets,/*!< in/out: temporary storage */ ulint n_uniq, /*!< in: number of unique fields in the index page records */ mem_heap_t** heap) /*!< in/out: heap for offsets */ @@ -2505,11 +2704,11 @@ btr_page_tuple_smaller( page_cur_move_to_next(&pcur); first_rec = page_cur_get_rec(&pcur); - offsets = rec_get_offsets( - first_rec, cursor->index, offsets, + *offsets = rec_get_offsets( + first_rec, cursor->index, *offsets, n_uniq, heap); - return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0); + return(cmp_dtuple_rec(tuple, first_rec, *offsets) < 0); } /*************************************************************//** @@ -2525,9 +2724,12 @@ UNIV_INTERN rec_t* btr_page_split_and_insert( /*======================*/ + ulint flags, /*!< in: undo logging and locking flags */ btr_cur_t* cursor, /*!< in: cursor at which to insert; when the function returns, the cursor is positioned on the predecessor of the inserted record */ + ulint** offsets,/*!< out: offsets on inserted record */ + mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ mtr_t* mtr) /*!< in: mtr */ @@ -2553,18 +2755,21 @@ btr_page_split_and_insert( ibool insert_left; ulint n_iterations = 0; rec_t* rec; - mem_heap_t* heap; ulint n_uniq; - ulint* offsets; - heap = mem_heap_create(1024); + if (!*heap) { + *heap = mem_heap_create(1024); + } n_uniq = dict_index_get_n_unique_in_tree(cursor->index); func_start: - mem_heap_empty(heap); - offsets = NULL; + mem_heap_empty(*heap); + *offsets = NULL; ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index), MTR_MEMO_X_LOCK)); + ut_ad(!dict_index_is_online_ddl(cursor->index) + || (flags & BTR_CREATE_FLAG) + || dict_index_is_clust(cursor->index)); #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ @@ -2590,7 +2795,7 @@ func_start: if (split_rec == NULL) { insert_left = btr_page_tuple_smaller( - cursor, tuple, offsets, n_uniq, &heap); + cursor, tuple, offsets, n_uniq, heap); } } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) { direction = FSP_UP; @@ -2612,7 +2817,7 @@ func_start: if (page_get_n_recs(page) > 1) { split_rec = page_get_middle_rec(page); } else if (btr_page_tuple_smaller(cursor, tuple, - offsets, n_uniq, &heap)) { + offsets, n_uniq, heap)) { split_rec = page_rec_get_next( page_get_infimum_rec(page)); } else { @@ -2635,10 +2840,10 @@ func_start: if (split_rec) { first_rec = move_limit = split_rec; - offsets = rec_get_offsets(split_rec, cursor->index, offsets, - n_uniq, &heap); + *offsets = rec_get_offsets(split_rec, cursor->index, *offsets, + n_uniq, heap); - insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0; + insert_left = cmp_dtuple_rec(tuple, split_rec, *offsets) < 0; if (!insert_left && new_page_zip && n_iterations > 0) { /* If a compressed page has already been split, @@ -2665,7 +2870,7 @@ insert_empty: /* 4. Do first the modifications in the tree structure */ - btr_attach_half_pages(cursor->index, block, + btr_attach_half_pages(flags, cursor->index, block, first_rec, new_block, direction, mtr); /* If the split is made on the leaf level and the insert will fit @@ -2685,10 +2890,11 @@ insert_empty: insert_will_fit = !new_page_zip && btr_page_insert_fits(cursor, NULL, - NULL, tuple, n_ext, heap); + offsets, tuple, n_ext, heap); } - if (insert_will_fit && page_is_leaf(page)) { + if (insert_will_fit && page_is_leaf(page) + && !dict_index_is_online_ddl(cursor->index)) { mtr_memo_release(mtr, dict_index_get_lock(cursor->index), MTR_MEMO_X_LOCK); @@ -2805,8 +3011,8 @@ insert_empty: page_cur_search(insert_block, cursor->index, tuple, PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, - cursor->index, n_ext, mtr); + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, + offsets, heap, n_ext, mtr); #ifdef UNIV_ZIP_DEBUG { @@ -2837,7 +3043,7 @@ insert_empty: page_cur_search(insert_block, cursor->index, tuple, PAGE_CUR_LE, page_cursor); rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, - n_ext, mtr); + offsets, heap, n_ext, mtr); if (rec == NULL) { /* The insert did not fit on the page: loop back to the @@ -2878,7 +3084,7 @@ func_exit: ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index)); ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index)); - mem_heap_free(heap); + ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets)); return(rec); } @@ -3058,15 +3264,15 @@ btr_node_ptr_delete( { btr_cur_t cursor; ibool compressed; - ulint err; + dberr_t err; ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); /* Delete node pointer on father page */ btr_page_get_father(index, block, mtr, &cursor); - compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE, - mtr); + compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, + BTR_CREATE_FLAG, RB_NONE, mtr); ut_a(err == DB_SUCCESS); if (!compressed) { @@ -3098,7 +3304,7 @@ btr_lift_page_up( buf_block_t* blocks[BTR_MAX_LEVELS]; ulint n_blocks; /*!< last used index in blocks[] */ ulint i; - ibool lift_father_up = FALSE; + bool lift_father_up; buf_block_t* block_orig = block; ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); @@ -3140,7 +3346,8 @@ btr_lift_page_up( blocks[n_blocks++] = b = btr_cur_get_block(&cursor); } - if (n_blocks && page_level == 0) { + lift_father_up = (n_blocks && page_level == 0); + if (lift_father_up) { /* The father page also should be the only on its level (not root). We should lift up the father page at first. Because the leaf page should be lifted up only for root page. @@ -3149,7 +3356,6 @@ btr_lift_page_up( later freeing of the page doesn't find the page allocation to be freed.*/ - lift_father_up = TRUE; block = father_block; page = buf_block_get_frame(block); page_level = btr_page_get_level(page, mtr); @@ -3295,6 +3501,7 @@ btr_compress( if (adjust) { nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor)); + ut_ad(nth_rec > 0); } /* Decide the page to which we try to merge and which will inherit @@ -3351,6 +3558,16 @@ err_exit: return(FALSE); } + /* If compression padding tells us that merging will result in + too packed up page i.e.: which is likely to cause compression + failure then don't merge the pages. */ + if (zip_size && page_is_leaf(merge_page) + && (page_get_data_size(merge_page) + data_size + >= dict_index_zip_pad_optimal_page_size(index))) { + + goto err_exit; + } + ut_ad(page_validate(merge_page, index)); max_ins_size = page_get_max_insert_size(merge_page, n_recs); @@ -3530,6 +3747,7 @@ func_exit: mem_heap_free(heap); if (adjust) { + ut_ad(nth_rec > 0); btr_cur_position( index, page_rec_get_nth(merge_block->frame, nth_rec), @@ -3846,7 +4064,7 @@ btr_print_index( mtr_start(&mtr); - root = btr_root_block_get(index, &mtr); + root = btr_root_block_get(index, RW_X_LATCH, &mtr); btr_print_recursive(index, root, width, &heap, &offsets, &mtr); if (heap) { @@ -3855,7 +4073,7 @@ btr_print_index( mtr_commit(&mtr); - btr_validate_index(index, NULL); + btr_validate_index(index, 0); } #endif /* UNIV_BTR_PRINT */ @@ -4041,8 +4259,22 @@ btr_index_page_validate( { page_cur_t cur; ibool ret = TRUE; +#ifndef DBUG_OFF + ulint nth = 1; +#endif /* !DBUG_OFF */ page_cur_set_before_first(block, &cur); + + /* Directory slot 0 should only contain the infimum record. */ + DBUG_EXECUTE_IF("check_table_rec_next", + ut_a(page_rec_get_nth_const( + page_cur_get_page(&cur), 0) + == cur.rec); + ut_a(page_dir_slot_get_n_owned( + page_dir_get_nth_slot( + page_cur_get_page(&cur), 0)) + == 1);); + page_cur_move_to_next(&cur); for (;;) { @@ -4056,6 +4288,16 @@ btr_index_page_validate( return(FALSE); } + /* Verify that page_rec_get_nth_const() is correctly + retrieving each record. */ + DBUG_EXECUTE_IF("check_table_rec_next", + ut_a(cur.rec == page_rec_get_nth_const( + page_cur_get_page(&cur), + page_rec_get_n_recs_before( + cur.rec))); + ut_a(nth++ == page_rec_get_n_recs_before( + cur.rec));); + page_cur_move_to_next(&cur); } @@ -4106,14 +4348,15 @@ btr_validate_report2( Validates index tree level. @return TRUE if ok */ static -ibool +bool btr_validate_level( /*===============*/ dict_index_t* index, /*!< in: index tree */ - trx_t* trx, /*!< in: transaction or NULL */ + const trx_t* trx, /*!< in: transaction or NULL */ ulint level) /*!< in: level number */ { ulint space; + ulint space_flags; ulint zip_size; buf_block_t* block; page_t* page; @@ -4127,9 +4370,10 @@ btr_validate_level( ulint left_page_no; page_cur_t cursor; dtuple_t* node_ptr_tuple; - ibool ret = TRUE; + bool ret = true; mtr_t mtr; mem_heap_t* heap = mem_heap_create(256); + fseg_header_t* seg; ulint* offsets = NULL; ulint* offsets2= NULL; #ifdef UNIV_ZIP_DEBUG @@ -4140,15 +4384,39 @@ btr_validate_level( mtr_x_lock(dict_index_get_lock(index), &mtr); - block = btr_root_block_get(index, &mtr); + block = btr_root_block_get(index, RW_X_LATCH, &mtr); page = buf_block_get_frame(block); + seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP; space = dict_index_get_space(index); zip_size = dict_table_zip_size(index->table); + fil_space_get_latch(space, &space_flags); + + if (zip_size != dict_tf_get_zip_size(space_flags)) { + + ib_logf(IB_LOG_LEVEL_WARN, + "Flags mismatch: table=%lu, tablespace=%lu", + (ulint) index->table->flags, (ulint) space_flags); + + mtr_commit(&mtr); + + return(false); + } + while (level != btr_page_get_level(page, &mtr)) { const rec_t* node_ptr; + if (fseg_page_is_free(seg, + block->page.space, block->page.offset)) { + + btr_validate_report1(index, level, block); + + ib_logf(IB_LOG_LEVEL_WARN, "page is free"); + + ret = false; + } + ut_a(space == buf_block_get_space(block)); ut_a(space == page_get_space_id(page)); #ifdef UNIV_ZIP_DEBUG @@ -4169,12 +4437,13 @@ btr_validate_level( /* Now we are on the desired level. Loop through the pages on that level. */ -loop: - if (trx_is_interrupted(trx)) { - mtr_commit(&mtr); - mem_heap_free(heap); - return(ret); + + if (level == 0) { + /* Leaf pages are managed in their own file segment. */ + seg -= PAGE_BTR_SEG_TOP - PAGE_BTR_SEG_LEAF; } + +loop: mem_heap_empty(heap); offsets = offsets2 = NULL; mtr_x_lock(dict_index_get_lock(index), &mtr); @@ -4184,20 +4453,35 @@ loop: ut_a(!page_zip || page_zip_validate(page_zip, page, index)); #endif /* UNIV_ZIP_DEBUG */ - /* Check ordering etc. of records */ + ut_a(block->page.space == space); + + if (fseg_page_is_free(seg, block->page.space, block->page.offset)) { - if (!page_validate(page, index)) { btr_validate_report1(index, level, block); - ret = FALSE; - } else if (level == 0) { + ib_logf(IB_LOG_LEVEL_WARN, "Page is marked as free"); + ret = false; + + } else if (btr_page_get_index_id(page) != index->id) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "Page index id " IB_ID_FMT " != data dictionary " + "index id " IB_ID_FMT, + btr_page_get_index_id(page), index->id); + + ret = false; + + } else if (!page_validate(page, index)) { + + btr_validate_report1(index, level, block); + ret = false; + + } else if (level == 0 && !btr_index_page_validate(block, index)) { + /* We are on level 0. Check that the records have the right number of fields, and field lengths are right. */ - if (!btr_index_page_validate(block, index)) { - - ret = FALSE; - } + ret = false; } ut_a(btr_page_get_level(page, &mtr) == level); @@ -4223,7 +4507,7 @@ loop: buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH); - ret = FALSE; + ret = false; } if (page_is_comp(right_page) != page_is_comp(page)) { @@ -4232,7 +4516,7 @@ loop: buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH); - ret = FALSE; + ret = false; goto node_ptr_fails; } @@ -4265,7 +4549,7 @@ loop: rec_print(stderr, rec, index); putc('\n', stderr); - ret = FALSE; + ret = false; } } @@ -4316,7 +4600,7 @@ loop: fputs("InnoDB: record on page ", stderr); rec_print_new(stderr, rec, offsets); putc('\n', stderr); - ret = FALSE; + ret = false; goto node_ptr_fails; } @@ -4346,7 +4630,7 @@ loop: fputs("InnoDB: first rec ", stderr); rec_print(stderr, first_rec, index); putc('\n', stderr); - ret = FALSE; + ret = false; goto node_ptr_fails; } @@ -4374,7 +4658,7 @@ loop: if (btr_cur_get_rec(&right_node_cur) != right_node_ptr) { - ret = FALSE; + ret = false; fputs("InnoDB: node pointer to" " the right page is wrong\n", stderr); @@ -4400,7 +4684,7 @@ loop: != page_rec_get_next( page_get_infimum_rec( right_father_page))) { - ret = FALSE; + ret = false; fputs("InnoDB: node pointer 2 to" " the right page is wrong\n", stderr); @@ -4425,7 +4709,7 @@ loop: if (page_get_page_no(right_father_page) != btr_page_get_next(father_page, &mtr)) { - ret = FALSE; + ret = false; fputs("InnoDB: node pointer 3 to" " the right page is wrong\n", stderr); @@ -4456,17 +4740,23 @@ node_ptr_fails: on the next loop. The page has already been checked. */ mtr_commit(&mtr); - if (right_page_no != FIL_NULL) { + if (trx_is_interrupted(trx)) { + /* On interrupt, return the current status. */ + } else if (right_page_no != FIL_NULL) { + mtr_start(&mtr); - block = btr_block_get(space, zip_size, right_page_no, - RW_X_LATCH, index, &mtr); + block = btr_block_get( + space, zip_size, right_page_no, + RW_X_LATCH, index, &mtr); + page = buf_block_get_frame(block); goto loop; } mem_heap_free(heap); + return(ret); } @@ -4474,40 +4764,39 @@ node_ptr_fails: Checks the consistency of an index tree. @return TRUE if ok */ UNIV_INTERN -ibool +bool btr_validate_index( /*===============*/ dict_index_t* index, /*!< in: index */ - trx_t* trx) /*!< in: transaction or NULL */ + const trx_t* trx) /*!< in: transaction or NULL */ { - mtr_t mtr; - page_t* root; - ulint i; - ulint n; - /* Full Text index are implemented by auxiliary tables, not the B-tree */ - if (index->type & DICT_FTS) { - return(TRUE); + if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) { + return(true); } + mtr_t mtr; + mtr_start(&mtr); - mtr_x_lock(dict_index_get_lock(index), &mtr); - root = btr_root_get(index, &mtr); - n = btr_page_get_level(root, &mtr); + mtr_x_lock(dict_index_get_lock(index), &mtr); - for (i = 0; i <= n && !trx_is_interrupted(trx); i++) { - if (!btr_validate_level(index, trx, n - i)) { + bool ok = true; + page_t* root = btr_root_get(index, &mtr); + ulint n = btr_page_get_level(root, &mtr); - mtr_commit(&mtr); + for (ulint i = 0; i <= n; ++i) { - return(FALSE); + if (!btr_validate_level(index, trx, n - i)) { + ok = false; + break; } } mtr_commit(&mtr); - return(TRUE); + return(ok); } + #endif /* !UNIV_HOTBACKUP */ |