summaryrefslogtreecommitdiff
path: root/storage/innobase/btr
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/btr')
-rw-r--r--storage/innobase/btr/btr0btr.cc559
-rw-r--r--storage/innobase/btr/btr0cur.cc720
-rw-r--r--storage/innobase/btr/btr0pcur.cc73
-rw-r--r--storage/innobase/btr/btr0sea.cc36
4 files changed, 919 insertions, 469 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 8eae3c7e3bc..e3e127c3ace 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -697,14 +698,16 @@ btr_root_fseg_validate(
#endif /* UNIV_BTR_DEBUG */
/**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return root page, x-latched */
+Gets the root node of a tree and x- or s-latches it.
+@return root page, x- or s-latched */
static
buf_block_t*
btr_root_block_get(
/*===============*/
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in: mtr */
+ const dict_index_t* index, /*!< in: index tree */
+ ulint mode, /*!< in: either RW_S_LATCH
+ or RW_X_LATCH */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint space;
ulint zip_size;
@@ -715,8 +718,7 @@ btr_root_block_get(
zip_size = dict_table_zip_size(index->table);
root_page_no = dict_index_get_page(index);
- block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH,
- index, mtr);
+ block = btr_block_get(space, zip_size, root_page_no, mode, index, mtr);
btr_assert_not_corrupted(block, index);
#ifdef UNIV_BTR_DEBUG
if (!dict_index_is_ibuf(index)) {
@@ -739,10 +741,162 @@ UNIV_INTERN
page_t*
btr_root_get(
/*=========*/
+ const dict_index_t* index, /*!< in: index tree */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ return(buf_block_get_frame(btr_root_block_get(index, RW_X_LATCH,
+ mtr)));
+}
+
+/**************************************************************//**
+Gets the height of the B-tree (the level of the root, when the leaf
+level is assumed to be 0). The caller must hold an S or X latch on
+the index.
+@return tree height (level of the root) */
+UNIV_INTERN
+ulint
+btr_height_get(
+/*===========*/
dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ulint height;
+ buf_block_t* root_block;
+
+ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK)
+ || mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK));
+
+ /* S latches the page */
+ root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
+
+ height = btr_page_get_level(buf_block_get_frame(root_block), mtr);
+
+ /* Release the S latch on the root page. */
+ mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
+#ifdef UNIV_SYNC_DEBUG
+ sync_thread_reset_level(&root_block->lock);
+#endif /* UNIV_SYNC_DEBUG */
+
+ return(height);
+}
+
+/**************************************************************//**
+Checks a file segment header within a B-tree root page and updates
+the segment header space id.
+@return TRUE if valid */
+static
+bool
+btr_root_fseg_adjust_on_import(
+/*===========================*/
+ fseg_header_t* seg_header, /*!< in/out: segment header */
+ page_zip_des_t* page_zip, /*!< in/out: compressed page,
+ or NULL */
+ ulint space, /*!< in: tablespace identifier */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
+
+ if (offset < FIL_PAGE_DATA
+ || offset > UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) {
+
+ return(FALSE);
+
+ } else if (page_zip) {
+ mach_write_to_4(seg_header + FSEG_HDR_SPACE, space);
+ page_zip_write_header(page_zip, seg_header + FSEG_HDR_SPACE,
+ 4, mtr);
+ } else {
+ mlog_write_ulint(seg_header + FSEG_HDR_SPACE,
+ space, MLOG_4BYTES, mtr);
+ }
+
+ return(TRUE);
+}
+
+/**************************************************************//**
+Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
+@return error code, or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+btr_root_adjust_on_import(
+/*======================*/
+ const dict_index_t* index) /*!< in: index tree */
{
- return(buf_block_get_frame(btr_root_block_get(index, mtr)));
+ dberr_t err;
+ mtr_t mtr;
+ page_t* page;
+ buf_block_t* block;
+ page_zip_des_t* page_zip;
+ dict_table_t* table = index->table;
+ ulint space_id = dict_index_get_space(index);
+ ulint zip_size = dict_table_zip_size(table);
+ ulint root_page_no = dict_index_get_page(index);
+
+ mtr_start(&mtr);
+
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+ DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
+ return(DB_CORRUPTION););
+
+ block = btr_block_get(
+ space_id, zip_size, root_page_no, RW_X_LATCH, index, &mtr);
+
+ page = buf_block_get_frame(block);
+ page_zip = buf_block_get_page_zip(block);
+
+ /* Check that this is a B-tree page and both the PREV and NEXT
+ pointers are FIL_NULL, because the root page does not have any
+ siblings. */
+ if (fil_page_get_type(page) != FIL_PAGE_INDEX
+ || fil_page_get_prev(page) != FIL_NULL
+ || fil_page_get_next(page) != FIL_NULL) {
+
+ err = DB_CORRUPTION;
+
+ } else if (dict_index_is_clust(index)) {
+ bool page_is_compact_format;
+
+ page_is_compact_format = page_is_comp(page) > 0;
+
+ /* Check if the page format and table format agree. */
+ if (page_is_compact_format != dict_table_is_comp(table)) {
+ err = DB_CORRUPTION;
+ } else {
+
+ /* Check that the table flags and the tablespace
+ flags match. */
+ ulint flags = fil_space_get_flags(table->space);
+
+ if (flags
+ && flags != dict_tf_to_fsp_flags(table->flags)) {
+
+ err = DB_CORRUPTION;
+ } else {
+ err = DB_SUCCESS;
+ }
+ }
+ } else {
+ err = DB_SUCCESS;
+ }
+
+ /* Check and adjust the file segment headers, if all OK so far. */
+ if (err == DB_SUCCESS
+ && (!btr_root_fseg_adjust_on_import(
+ FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ + page, page_zip, space_id, &mtr)
+ || !btr_root_fseg_adjust_on_import(
+ FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ + page, page_zip, space_id, &mtr))) {
+
+ err = DB_CORRUPTION;
+ }
+
+ mtr_commit(&mtr);
+
+ return(err);
}
/*************************************************************//**
@@ -1033,8 +1187,7 @@ btr_get_size(
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_S_LOCK));
- if (index->page == FIL_NULL
- || index->to_be_dropped
+ if (index->page == FIL_NULL || dict_index_is_online_ddl(index)
|| *index->name == TEMP_INDEX_PREFIX) {
return(ULINT_UNDEFINED);
}
@@ -1584,6 +1737,8 @@ btr_page_reorganize_low(
there cannot exist locks on the
page, and a hash index should not be
dropped: it cannot exist */
+ ulint compression_level,/*!< in: compression level to be used
+ if dealing with compressed page */
buf_block_t* block, /*!< in: page to be reorganized */
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr) /*!< in: mtr */
@@ -1601,6 +1756,8 @@ btr_page_reorganize_low(
ulint max_ins_size1;
ulint max_ins_size2;
ibool success = FALSE;
+ byte type;
+ byte* log_ptr;
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
btr_assert_not_corrupted(block, index);
@@ -1612,9 +1769,23 @@ btr_page_reorganize_low(
#ifndef UNIV_HOTBACKUP
/* Write the log record */
- mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
- ? MLOG_COMP_PAGE_REORGANIZE
- : MLOG_PAGE_REORGANIZE, 0);
+ if (page_zip) {
+ type = MLOG_ZIP_PAGE_REORGANIZE;
+ } else if (page_is_comp(page)) {
+ type = MLOG_COMP_PAGE_REORGANIZE;
+ } else {
+ type = MLOG_PAGE_REORGANIZE;
+ }
+
+ log_ptr = mlog_open_and_write_index(
+ mtr, page, index, type, page_zip ? 1 : 0);
+
+ /* For compressed pages write the compression level. */
+ if (log_ptr && page_zip) {
+ mach_write_to_1(log_ptr, compression_level);
+ mlog_close(mtr, log_ptr + 1);
+ }
+
#endif /* !UNIV_HOTBACKUP */
/* Turn logging off */
@@ -1662,7 +1833,9 @@ btr_page_reorganize_low(
ut_ad(max_trx_id != 0 || recovery);
}
- if (page_zip && !page_zip_compress(page_zip, page, index, NULL)) {
+ if (page_zip
+ && !page_zip_compress(page_zip, page, index,
+ compression_level, NULL)) {
/* Restore the old page and exit. */
btr_blob_dbg_restore(page, temp_page, index,
@@ -1750,7 +1923,8 @@ btr_page_reorganize(
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr) /*!< in: mtr */
{
- return(btr_page_reorganize_low(FALSE, block, index, mtr));
+ return(btr_page_reorganize_low(FALSE, page_compression_level,
+ block, index, mtr));
}
#endif /* !UNIV_HOTBACKUP */
@@ -1762,18 +1936,32 @@ byte*
btr_parse_page_reorganize(
/*======================*/
byte* ptr, /*!< in: buffer */
- byte* end_ptr __attribute__((unused)),
- /*!< in: buffer end */
+ byte* end_ptr,/*!< in: buffer end */
dict_index_t* index, /*!< in: record descriptor */
+ bool compressed,/*!< in: true if compressed page */
buf_block_t* block, /*!< in: page to be reorganized, or NULL */
mtr_t* mtr) /*!< in: mtr or NULL */
{
+ ulint level = page_compression_level;
+
ut_ad(ptr && end_ptr);
- /* The record is empty, except for the record initial part */
+ /* If dealing with a compressed page the record has the
+ compression level used during original compression written in
+ one byte. Otherwise record is empty. */
+ if (compressed) {
+ if (ptr == end_ptr) {
+ return(NULL);
+ }
+
+ level = (ulint)mach_read_from_1(ptr);
+
+ ut_a(level <= 9);
+ ++ptr;
+ }
if (block != NULL) {
- btr_page_reorganize_low(TRUE, block, index, mtr);
+ btr_page_reorganize_low(TRUE, level, block, index, mtr);
}
return(ptr);
@@ -1827,10 +2015,13 @@ UNIV_INTERN
rec_t*
btr_root_raise_and_insert(
/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
+ ulint** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in: mtr */
@@ -1840,7 +2031,6 @@ btr_root_raise_and_insert(
page_t* new_page;
ulint new_page_no;
rec_t* rec;
- mem_heap_t* heap;
dtuple_t* node_ptr;
ulint level;
rec_t* node_ptr_rec;
@@ -1926,7 +2116,9 @@ btr_root_raise_and_insert(
lock_update_root_raise(new_block, root_block);
/* Create a memory heap where the node pointer is stored */
- heap = mem_heap_create(100);
+ if (!*heap) {
+ *heap = mem_heap_create(1000);
+ }
rec = page_rec_get_next(page_get_infimum_rec(new_page));
new_page_no = buf_block_get_page_no(new_block);
@@ -1934,8 +2126,8 @@ btr_root_raise_and_insert(
/* Build the node pointer (= node key and page address) for the
child */
- node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
- level);
+ node_ptr = dict_index_build_node_ptr(
+ index, rec, new_page_no, *heap, level);
/* The node pointer must be marked as the predefined minimum record,
as there is no lower alphabetical limit to records in the leftmost
node of a level: */
@@ -1961,15 +2153,12 @@ btr_root_raise_and_insert(
page_cur_set_before_first(root_block, page_cursor);
node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
- index, 0, mtr);
+ index, offsets, heap, 0, mtr);
/* The root page should only contain the node pointer
to new_page at this point. Thus, the data should fit. */
ut_a(node_ptr_rec);
- /* Free the memory heap */
- mem_heap_free(heap);
-
/* We play safe and reset the free bits for the new page */
#if 0
@@ -1985,7 +2174,8 @@ btr_root_raise_and_insert(
PAGE_CUR_LE, page_cursor);
/* Split the child and insert tuple */
- return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr));
+ return(btr_page_split_and_insert(flags, cursor, offsets, heap,
+ tuple, n_ext, mtr));
}
/*************************************************************//**
@@ -2213,9 +2403,9 @@ func_exit:
/*************************************************************//**
Returns TRUE if the insert fits on the appropriate half-page with the
chosen split_rec.
-@return TRUE if fits */
-static
-ibool
+@return true if fits */
+static __attribute__((nonnull(1,3,4,6), warn_unused_result))
+bool
btr_page_insert_fits(
/*=================*/
btr_cur_t* cursor, /*!< in: cursor at which insert
@@ -2223,11 +2413,11 @@ btr_page_insert_fits(
const rec_t* split_rec,/*!< in: suggestion for first record
on upper half-page, or NULL if
tuple to be inserted should be first */
- const ulint* offsets,/*!< in: rec_get_offsets(
- split_rec, cursor->index) */
+ ulint** offsets,/*!< in: rec_get_offsets(
+ split_rec, cursor->index); out: garbage */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
- mem_heap_t* heap) /*!< in: temporary memory heap */
+ mem_heap_t** heap) /*!< in: temporary memory heap */
{
page_t* page;
ulint insert_size;
@@ -2236,15 +2426,13 @@ btr_page_insert_fits(
ulint total_n_recs;
const rec_t* rec;
const rec_t* end_rec;
- ulint* offs;
page = btr_cur_get_page(cursor);
- ut_ad(!split_rec == !offsets);
- ut_ad(!offsets
- || !page_is_comp(page) == !rec_offs_comp(offsets));
- ut_ad(!offsets
- || rec_offs_validate(split_rec, cursor->index, offsets));
+ ut_ad(!split_rec
+ || !page_is_comp(page) == !rec_offs_comp(*offsets));
+ ut_ad(!split_rec
+ || rec_offs_validate(split_rec, cursor->index, *offsets));
insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
free_space = page_get_free_space_of_empty(page_is_comp(page));
@@ -2262,7 +2450,7 @@ btr_page_insert_fits(
rec = page_rec_get_next(page_get_infimum_rec(page));
end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
- } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
+ } else if (cmp_dtuple_rec(tuple, split_rec, *offsets) >= 0) {
rec = page_rec_get_next(page_get_infimum_rec(page));
end_rec = split_rec;
@@ -2277,19 +2465,17 @@ btr_page_insert_fits(
/* Ok, there will be enough available space on the
half page where the tuple is inserted */
- return(TRUE);
+ return(true);
}
- offs = NULL;
-
while (rec != end_rec) {
/* In this loop we calculate the amount of reserved
space after rec is removed from page. */
- offs = rec_get_offsets(rec, cursor->index, offs,
- ULINT_UNDEFINED, &heap);
+ *offsets = rec_get_offsets(rec, cursor->index, *offsets,
+ ULINT_UNDEFINED, heap);
- total_data -= rec_offs_size(offs);
+ total_data -= rec_offs_size(*offsets);
total_n_recs--;
if (total_data + page_dir_calc_reserved_space(total_n_recs)
@@ -2298,13 +2484,13 @@ btr_page_insert_fits(
/* Ok, there will be enough available space on the
half page where the tuple is inserted */
- return(TRUE);
+ return(true);
}
rec = page_rec_get_next_const(rec);
}
- return(FALSE);
+ return(false);
}
/*******************************************************//**
@@ -2314,6 +2500,7 @@ UNIV_INTERN
void
btr_insert_on_non_leaf_level_func(
/*==============================*/
+ ulint flags, /*!< in: undo logging and locking flags */
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: level, must be > 0 */
dtuple_t* tuple, /*!< in: the record to be inserted */
@@ -2323,8 +2510,10 @@ btr_insert_on_non_leaf_level_func(
{
big_rec_t* dummy_big_rec;
btr_cur_t cursor;
- ulint err;
+ dberr_t err;
rec_t* rec;
+ ulint* offsets = NULL;
+ mem_heap_t* heap = NULL;
ut_ad(level > 0);
@@ -2335,26 +2524,35 @@ btr_insert_on_non_leaf_level_func(
ut_ad(cursor.flag == BTR_CUR_BINARY);
err = btr_cur_optimistic_insert(
- BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG, &cursor, tuple, &rec,
- &dummy_big_rec, 0, NULL, mtr);
+ flags
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG
+ | BTR_NO_UNDO_LOG_FLAG,
+ &cursor, &offsets, &heap,
+ tuple, &rec, &dummy_big_rec, 0, NULL, mtr);
if (err == DB_FAIL) {
- err = btr_cur_pessimistic_insert(
- BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG,
- &cursor, tuple, &rec, &dummy_big_rec, 0, NULL, mtr);
+ err = btr_cur_pessimistic_insert(flags
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG
+ | BTR_NO_UNDO_LOG_FLAG,
+ &cursor, &offsets, &heap,
+ tuple, &rec,
+ &dummy_big_rec, 0, NULL, mtr);
ut_a(err == DB_SUCCESS);
}
+ mem_heap_free(heap);
}
/**************************************************************//**
Attaches the halves of an index page on the appropriate level in an
index tree. */
-static
+static __attribute__((nonnull))
void
btr_attach_half_pages(
/*==================*/
+ ulint flags, /*!< in: undo logging and
+ locking flags */
dict_index_t* index, /*!< in: the index tree */
buf_block_t* block, /*!< in/out: page to be split */
const rec_t* split_rec, /*!< in: first record on upper
@@ -2432,7 +2630,8 @@ btr_attach_half_pages(
/* Insert it next to the pointer to the lower half. Note that this
may generate recursion leading to a split on the higher level. */
- btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr);
+ btr_insert_on_non_leaf_level(flags, index, level + 1,
+ node_ptr_upper, mtr);
/* Free the memory heap */
mem_heap_free(heap);
@@ -2484,13 +2683,13 @@ btr_attach_half_pages(
/*************************************************************//**
Determine if a tuple is smaller than any record on the page.
@return TRUE if smaller */
-static
-ibool
+static __attribute__((nonnull, warn_unused_result))
+bool
btr_page_tuple_smaller(
/*===================*/
btr_cur_t* cursor, /*!< in: b-tree cursor */
const dtuple_t* tuple, /*!< in: tuple to consider */
- ulint* offsets,/*!< in/out: temporary storage */
+ ulint** offsets,/*!< in/out: temporary storage */
ulint n_uniq, /*!< in: number of unique fields
in the index page records */
mem_heap_t** heap) /*!< in/out: heap for offsets */
@@ -2505,11 +2704,11 @@ btr_page_tuple_smaller(
page_cur_move_to_next(&pcur);
first_rec = page_cur_get_rec(&pcur);
- offsets = rec_get_offsets(
- first_rec, cursor->index, offsets,
+ *offsets = rec_get_offsets(
+ first_rec, cursor->index, *offsets,
n_uniq, heap);
- return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0);
+ return(cmp_dtuple_rec(tuple, first_rec, *offsets) < 0);
}
/*************************************************************//**
@@ -2525,9 +2724,12 @@ UNIV_INTERN
rec_t*
btr_page_split_and_insert(
/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
function returns, the cursor is positioned
on the predecessor of the inserted record */
+ ulint** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in: mtr */
@@ -2553,18 +2755,21 @@ btr_page_split_and_insert(
ibool insert_left;
ulint n_iterations = 0;
rec_t* rec;
- mem_heap_t* heap;
ulint n_uniq;
- ulint* offsets;
- heap = mem_heap_create(1024);
+ if (!*heap) {
+ *heap = mem_heap_create(1024);
+ }
n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
func_start:
- mem_heap_empty(heap);
- offsets = NULL;
+ mem_heap_empty(*heap);
+ *offsets = NULL;
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
MTR_MEMO_X_LOCK));
+ ut_ad(!dict_index_is_online_ddl(cursor->index)
+ || (flags & BTR_CREATE_FLAG)
+ || dict_index_is_clust(cursor->index));
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
@@ -2590,7 +2795,7 @@ func_start:
if (split_rec == NULL) {
insert_left = btr_page_tuple_smaller(
- cursor, tuple, offsets, n_uniq, &heap);
+ cursor, tuple, offsets, n_uniq, heap);
}
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
direction = FSP_UP;
@@ -2612,7 +2817,7 @@ func_start:
if (page_get_n_recs(page) > 1) {
split_rec = page_get_middle_rec(page);
} else if (btr_page_tuple_smaller(cursor, tuple,
- offsets, n_uniq, &heap)) {
+ offsets, n_uniq, heap)) {
split_rec = page_rec_get_next(
page_get_infimum_rec(page));
} else {
@@ -2635,10 +2840,10 @@ func_start:
if (split_rec) {
first_rec = move_limit = split_rec;
- offsets = rec_get_offsets(split_rec, cursor->index, offsets,
- n_uniq, &heap);
+ *offsets = rec_get_offsets(split_rec, cursor->index, *offsets,
+ n_uniq, heap);
- insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0;
+ insert_left = cmp_dtuple_rec(tuple, split_rec, *offsets) < 0;
if (!insert_left && new_page_zip && n_iterations > 0) {
/* If a compressed page has already been split,
@@ -2665,7 +2870,7 @@ insert_empty:
/* 4. Do first the modifications in the tree structure */
- btr_attach_half_pages(cursor->index, block,
+ btr_attach_half_pages(flags, cursor->index, block,
first_rec, new_block, direction, mtr);
/* If the split is made on the leaf level and the insert will fit
@@ -2685,10 +2890,11 @@ insert_empty:
insert_will_fit = !new_page_zip
&& btr_page_insert_fits(cursor, NULL,
- NULL, tuple, n_ext, heap);
+ offsets, tuple, n_ext, heap);
}
- if (insert_will_fit && page_is_leaf(page)) {
+ if (insert_will_fit && page_is_leaf(page)
+ && !dict_index_is_online_ddl(cursor->index)) {
mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
MTR_MEMO_X_LOCK);
@@ -2805,8 +3011,8 @@ insert_empty:
page_cur_search(insert_block, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
- rec = page_cur_tuple_insert(page_cursor, tuple,
- cursor->index, n_ext, mtr);
+ rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
+ offsets, heap, n_ext, mtr);
#ifdef UNIV_ZIP_DEBUG
{
@@ -2837,7 +3043,7 @@ insert_empty:
page_cur_search(insert_block, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
- n_ext, mtr);
+ offsets, heap, n_ext, mtr);
if (rec == NULL) {
/* The insert did not fit on the page: loop back to the
@@ -2878,7 +3084,7 @@ func_exit:
ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
- mem_heap_free(heap);
+ ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
return(rec);
}
@@ -3058,15 +3264,15 @@ btr_node_ptr_delete(
{
btr_cur_t cursor;
ibool compressed;
- ulint err;
+ dberr_t err;
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
/* Delete node pointer on father page */
btr_page_get_father(index, block, mtr, &cursor);
- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
- mtr);
+ compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
+ BTR_CREATE_FLAG, RB_NONE, mtr);
ut_a(err == DB_SUCCESS);
if (!compressed) {
@@ -3098,7 +3304,7 @@ btr_lift_page_up(
buf_block_t* blocks[BTR_MAX_LEVELS];
ulint n_blocks; /*!< last used index in blocks[] */
ulint i;
- ibool lift_father_up = FALSE;
+ bool lift_father_up;
buf_block_t* block_orig = block;
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
@@ -3140,7 +3346,8 @@ btr_lift_page_up(
blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
}
- if (n_blocks && page_level == 0) {
+ lift_father_up = (n_blocks && page_level == 0);
+ if (lift_father_up) {
/* The father page also should be the only on its level (not
root). We should lift up the father page at first.
Because the leaf page should be lifted up only for root page.
@@ -3149,7 +3356,6 @@ btr_lift_page_up(
later freeing of the page doesn't find the page allocation
to be freed.*/
- lift_father_up = TRUE;
block = father_block;
page = buf_block_get_frame(block);
page_level = btr_page_get_level(page, mtr);
@@ -3295,6 +3501,7 @@ btr_compress(
if (adjust) {
nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
+ ut_ad(nth_rec > 0);
}
/* Decide the page to which we try to merge and which will inherit
@@ -3351,6 +3558,16 @@ err_exit:
return(FALSE);
}
+ /* If compression padding tells us that merging will result in
+ too packed up page i.e.: which is likely to cause compression
+ failure then don't merge the pages. */
+ if (zip_size && page_is_leaf(merge_page)
+ && (page_get_data_size(merge_page) + data_size
+ >= dict_index_zip_pad_optimal_page_size(index))) {
+
+ goto err_exit;
+ }
+
ut_ad(page_validate(merge_page, index));
max_ins_size = page_get_max_insert_size(merge_page, n_recs);
@@ -3530,6 +3747,7 @@ func_exit:
mem_heap_free(heap);
if (adjust) {
+ ut_ad(nth_rec > 0);
btr_cur_position(
index,
page_rec_get_nth(merge_block->frame, nth_rec),
@@ -3846,7 +4064,7 @@ btr_print_index(
mtr_start(&mtr);
- root = btr_root_block_get(index, &mtr);
+ root = btr_root_block_get(index, RW_X_LATCH, &mtr);
btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
if (heap) {
@@ -3855,7 +4073,7 @@ btr_print_index(
mtr_commit(&mtr);
- btr_validate_index(index, NULL);
+ btr_validate_index(index, 0);
}
#endif /* UNIV_BTR_PRINT */
@@ -4041,8 +4259,22 @@ btr_index_page_validate(
{
page_cur_t cur;
ibool ret = TRUE;
+#ifndef DBUG_OFF
+ ulint nth = 1;
+#endif /* !DBUG_OFF */
page_cur_set_before_first(block, &cur);
+
+ /* Directory slot 0 should only contain the infimum record. */
+ DBUG_EXECUTE_IF("check_table_rec_next",
+ ut_a(page_rec_get_nth_const(
+ page_cur_get_page(&cur), 0)
+ == cur.rec);
+ ut_a(page_dir_slot_get_n_owned(
+ page_dir_get_nth_slot(
+ page_cur_get_page(&cur), 0))
+ == 1););
+
page_cur_move_to_next(&cur);
for (;;) {
@@ -4056,6 +4288,16 @@ btr_index_page_validate(
return(FALSE);
}
+ /* Verify that page_rec_get_nth_const() is correctly
+ retrieving each record. */
+ DBUG_EXECUTE_IF("check_table_rec_next",
+ ut_a(cur.rec == page_rec_get_nth_const(
+ page_cur_get_page(&cur),
+ page_rec_get_n_recs_before(
+ cur.rec)));
+ ut_a(nth++ == page_rec_get_n_recs_before(
+ cur.rec)););
+
page_cur_move_to_next(&cur);
}
@@ -4106,14 +4348,15 @@ btr_validate_report2(
Validates index tree level.
@return TRUE if ok */
static
-ibool
+bool
btr_validate_level(
/*===============*/
dict_index_t* index, /*!< in: index tree */
- trx_t* trx, /*!< in: transaction or NULL */
+ const trx_t* trx, /*!< in: transaction or NULL */
ulint level) /*!< in: level number */
{
ulint space;
+ ulint space_flags;
ulint zip_size;
buf_block_t* block;
page_t* page;
@@ -4127,9 +4370,10 @@ btr_validate_level(
ulint left_page_no;
page_cur_t cursor;
dtuple_t* node_ptr_tuple;
- ibool ret = TRUE;
+ bool ret = true;
mtr_t mtr;
mem_heap_t* heap = mem_heap_create(256);
+ fseg_header_t* seg;
ulint* offsets = NULL;
ulint* offsets2= NULL;
#ifdef UNIV_ZIP_DEBUG
@@ -4140,15 +4384,39 @@ btr_validate_level(
mtr_x_lock(dict_index_get_lock(index), &mtr);
- block = btr_root_block_get(index, &mtr);
+ block = btr_root_block_get(index, RW_X_LATCH, &mtr);
page = buf_block_get_frame(block);
+ seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP;
space = dict_index_get_space(index);
zip_size = dict_table_zip_size(index->table);
+ fil_space_get_latch(space, &space_flags);
+
+ if (zip_size != dict_tf_get_zip_size(space_flags)) {
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Flags mismatch: table=%lu, tablespace=%lu",
+ (ulint) index->table->flags, (ulint) space_flags);
+
+ mtr_commit(&mtr);
+
+ return(false);
+ }
+
while (level != btr_page_get_level(page, &mtr)) {
const rec_t* node_ptr;
+ if (fseg_page_is_free(seg,
+ block->page.space, block->page.offset)) {
+
+ btr_validate_report1(index, level, block);
+
+ ib_logf(IB_LOG_LEVEL_WARN, "page is free");
+
+ ret = false;
+ }
+
ut_a(space == buf_block_get_space(block));
ut_a(space == page_get_space_id(page));
#ifdef UNIV_ZIP_DEBUG
@@ -4169,12 +4437,13 @@ btr_validate_level(
/* Now we are on the desired level. Loop through the pages on that
level. */
-loop:
- if (trx_is_interrupted(trx)) {
- mtr_commit(&mtr);
- mem_heap_free(heap);
- return(ret);
+
+ if (level == 0) {
+ /* Leaf pages are managed in their own file segment. */
+ seg -= PAGE_BTR_SEG_TOP - PAGE_BTR_SEG_LEAF;
}
+
+loop:
mem_heap_empty(heap);
offsets = offsets2 = NULL;
mtr_x_lock(dict_index_get_lock(index), &mtr);
@@ -4184,20 +4453,35 @@ loop:
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
- /* Check ordering etc. of records */
+ ut_a(block->page.space == space);
+
+ if (fseg_page_is_free(seg, block->page.space, block->page.offset)) {
- if (!page_validate(page, index)) {
btr_validate_report1(index, level, block);
- ret = FALSE;
- } else if (level == 0) {
+ ib_logf(IB_LOG_LEVEL_WARN, "Page is marked as free");
+ ret = false;
+
+ } else if (btr_page_get_index_id(page) != index->id) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Page index id " IB_ID_FMT " != data dictionary "
+ "index id " IB_ID_FMT,
+ btr_page_get_index_id(page), index->id);
+
+ ret = false;
+
+ } else if (!page_validate(page, index)) {
+
+ btr_validate_report1(index, level, block);
+ ret = false;
+
+ } else if (level == 0 && !btr_index_page_validate(block, index)) {
+
/* We are on level 0. Check that the records have the right
number of fields, and field lengths are right. */
- if (!btr_index_page_validate(block, index)) {
-
- ret = FALSE;
- }
+ ret = false;
}
ut_a(btr_page_get_level(page, &mtr) == level);
@@ -4223,7 +4507,7 @@ loop:
buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
- ret = FALSE;
+ ret = false;
}
if (page_is_comp(right_page) != page_is_comp(page)) {
@@ -4232,7 +4516,7 @@ loop:
buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
- ret = FALSE;
+ ret = false;
goto node_ptr_fails;
}
@@ -4265,7 +4549,7 @@ loop:
rec_print(stderr, rec, index);
putc('\n', stderr);
- ret = FALSE;
+ ret = false;
}
}
@@ -4316,7 +4600,7 @@ loop:
fputs("InnoDB: record on page ", stderr);
rec_print_new(stderr, rec, offsets);
putc('\n', stderr);
- ret = FALSE;
+ ret = false;
goto node_ptr_fails;
}
@@ -4346,7 +4630,7 @@ loop:
fputs("InnoDB: first rec ", stderr);
rec_print(stderr, first_rec, index);
putc('\n', stderr);
- ret = FALSE;
+ ret = false;
goto node_ptr_fails;
}
@@ -4374,7 +4658,7 @@ loop:
if (btr_cur_get_rec(&right_node_cur)
!= right_node_ptr) {
- ret = FALSE;
+ ret = false;
fputs("InnoDB: node pointer to"
" the right page is wrong\n",
stderr);
@@ -4400,7 +4684,7 @@ loop:
!= page_rec_get_next(
page_get_infimum_rec(
right_father_page))) {
- ret = FALSE;
+ ret = false;
fputs("InnoDB: node pointer 2 to"
" the right page is wrong\n",
stderr);
@@ -4425,7 +4709,7 @@ loop:
if (page_get_page_no(right_father_page)
!= btr_page_get_next(father_page, &mtr)) {
- ret = FALSE;
+ ret = false;
fputs("InnoDB: node pointer 3 to"
" the right page is wrong\n",
stderr);
@@ -4456,17 +4740,23 @@ node_ptr_fails:
on the next loop. The page has already been checked. */
mtr_commit(&mtr);
- if (right_page_no != FIL_NULL) {
+ if (trx_is_interrupted(trx)) {
+ /* On interrupt, return the current status. */
+ } else if (right_page_no != FIL_NULL) {
+
mtr_start(&mtr);
- block = btr_block_get(space, zip_size, right_page_no,
- RW_X_LATCH, index, &mtr);
+ block = btr_block_get(
+ space, zip_size, right_page_no,
+ RW_X_LATCH, index, &mtr);
+
page = buf_block_get_frame(block);
goto loop;
}
mem_heap_free(heap);
+
return(ret);
}
@@ -4474,40 +4764,39 @@ node_ptr_fails:
Checks the consistency of an index tree.
@return TRUE if ok */
UNIV_INTERN
-ibool
+bool
btr_validate_index(
/*===============*/
dict_index_t* index, /*!< in: index */
- trx_t* trx) /*!< in: transaction or NULL */
+ const trx_t* trx) /*!< in: transaction or NULL */
{
- mtr_t mtr;
- page_t* root;
- ulint i;
- ulint n;
-
/* Full Text index are implemented by auxiliary tables,
not the B-tree */
- if (index->type & DICT_FTS) {
- return(TRUE);
+ if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) {
+ return(true);
}
+ mtr_t mtr;
+
mtr_start(&mtr);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
- root = btr_root_get(index, &mtr);
- n = btr_page_get_level(root, &mtr);
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
- for (i = 0; i <= n && !trx_is_interrupted(trx); i++) {
- if (!btr_validate_level(index, trx, n - i)) {
+ bool ok = true;
+ page_t* root = btr_root_get(index, &mtr);
+ ulint n = btr_page_get_level(root, &mtr);
- mtr_commit(&mtr);
+ for (ulint i = 0; i <= n; ++i) {
- return(FALSE);
+ if (!btr_validate_level(index, trx, n - i)) {
+ ok = false;
+ break;
}
}
mtr_commit(&mtr);
- return(TRUE);
+ return(ok);
}
+
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 56cce411bba..913b2088f24 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -2,6 +2,7 @@
Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
+Copyright (c) 2012, Facebook Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -57,6 +58,7 @@ Created 10/16/1994 Heikki Tuuri
#include "buf0lru.h"
#include "btr0btr.h"
#include "btr0sea.h"
+#include "row0log.h"
#include "row0purge.h"
#include "row0upd.h"
#include "trx0rec.h"
@@ -69,13 +71,13 @@ Created 10/16/1994 Heikki Tuuri
#include "zlib.h"
/** Buffered B-tree operation types, introduced as part of delete buffering. */
-typedef enum btr_op_enum {
+enum btr_op_t {
BTR_NO_OP = 0, /*!< Not buffered */
BTR_INSERT_OP, /*!< Insert, do not ignore UNIQUE */
BTR_INSERT_IGNORE_UNIQUE_OP, /*!< Insert, ignoring UNIQUE */
BTR_DELETE_OP, /*!< Purge a delete-marked record */
BTR_DELMARK_OP /*!< Mark a record for deletion */
-} btr_op_t;
+};
#ifdef UNIV_DEBUG
/** If the following is set to TRUE, this module prints a lot of
@@ -430,6 +432,14 @@ btr_cur_search_to_nth_level(
cursor->low_match = ULINT_UNDEFINED;
#endif
+ ibool s_latch_by_caller;
+
+ s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
+
+ ut_ad(!s_latch_by_caller
+ || mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK));
+
/* These flags are mutually exclusive, they are lumped together
with the latch mode for historical reasons. It's possible for
none of the flags to be set. */
@@ -465,11 +475,11 @@ btr_cur_search_to_nth_level(
estimate = latch_mode & BTR_ESTIMATE;
/* Turn the flags unrelated to the latch mode off. */
- latch_mode &= ~(BTR_INSERT
- | BTR_DELETE_MARK
- | BTR_DELETE
- | BTR_ESTIMATE
- | BTR_IGNORE_SEC_UNIQUE);
+ latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+
+ ut_ad(!s_latch_by_caller
+ || latch_mode == BTR_SEARCH_LEAF
+ || latch_mode == BTR_MODIFY_LEAF);
cursor->flag = BTR_CUR_BINARY;
cursor->index = index;
@@ -483,16 +493,16 @@ btr_cur_search_to_nth_level(
#ifdef BTR_CUR_HASH_ADAPT
-#ifdef UNIV_SEARCH_PERF_STAT
+# ifdef UNIV_SEARCH_PERF_STAT
info->n_searches++;
-#endif
+# endif
if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
&& latch_mode <= BTR_MODIFY_LEAF
&& info->last_hash_succ
&& !estimate
-#ifdef PAGE_CUR_LE_OR_EXTENDS
+# ifdef PAGE_CUR_LE_OR_EXTENDS
&& mode != PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
+# endif /* PAGE_CUR_LE_OR_EXTENDS */
/* If !has_search_latch, we do a dirty read of
btr_search_enabled below, and btr_search_guess_on_hash()
will have to check it again. */
@@ -513,7 +523,7 @@ btr_cur_search_to_nth_level(
return;
}
-#endif /* BTR_CUR_HASH_ADAPT */
+# endif /* BTR_CUR_HASH_ADAPT */
#endif /* BTR_CUR_ADAPT */
btr_cur_n_non_sea++;
@@ -530,15 +540,19 @@ btr_cur_search_to_nth_level(
savepoint = mtr_set_savepoint(mtr);
- if (latch_mode == BTR_MODIFY_TREE) {
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
mtr_x_lock(dict_index_get_lock(index), mtr);
-
- } else if (latch_mode == BTR_CONT_MODIFY_TREE) {
+ break;
+ case BTR_CONT_MODIFY_TREE:
/* Do nothing */
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
- } else {
- mtr_s_lock(dict_index_get_lock(index), mtr);
+ break;
+ default:
+ if (!s_latch_by_caller) {
+ mtr_s_lock(dict_index_get_lock(index), mtr);
+ }
}
page_cursor = btr_cur_get_page_cur(cursor);
@@ -692,6 +706,7 @@ retry_page_get:
? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
}
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
ut_ad(index->id == btr_page_get_index_id(page));
if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
@@ -716,13 +731,17 @@ retry_page_get:
cursor, mtr);
}
- if (latch_mode != BTR_MODIFY_TREE
- && latch_mode != BTR_CONT_MODIFY_TREE) {
-
- /* Release the tree s-latch */
-
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint, dict_index_get_lock(index));
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ case BTR_CONT_MODIFY_TREE:
+ break;
+ default:
+ if (!s_latch_by_caller) {
+ /* Release the tree s-latch */
+ mtr_release_s_latch_at_savepoint(
+ mtr, savepoint,
+ dict_index_get_lock(index));
+ }
}
page_mode = mode;
@@ -789,8 +808,7 @@ retry_page_get:
will properly check btr_search_enabled again in
btr_search_build_page_hash_index() before building a
page hash index, while holding btr_search_latch. */
- if (UNIV_LIKELY(btr_search_enabled)) {
-
+ if (btr_search_enabled) {
btr_search_info_update(index, cursor);
}
#endif
@@ -820,14 +838,16 @@ UNIV_INTERN
void
btr_cur_open_at_index_side_func(
/*============================*/
- ibool from_left, /*!< in: TRUE if open to the low end,
- FALSE if to the high end */
+ bool from_left, /*!< in: true if open to the low end,
+ false if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
- btr_cur_t* cursor, /*!< in: cursor */
+ btr_cur_t* cursor, /*!< in/out: cursor */
+ ulint level, /*!< in: level to search for
+ (0=leaf). */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
page_cur_t* page_cursor;
ulint page_no;
@@ -844,16 +864,27 @@ btr_cur_open_at_index_side_func(
rec_offs_init(offsets_);
estimate = latch_mode & BTR_ESTIMATE;
- latch_mode = latch_mode & ~BTR_ESTIMATE;
+ latch_mode &= ~BTR_ESTIMATE;
+
+ ut_ad(level != ULINT_UNDEFINED);
/* Store the position of the tree latch we push to mtr so that we
know how to release it when we have latched the leaf node */
savepoint = mtr_set_savepoint(mtr);
- if (latch_mode == BTR_MODIFY_TREE) {
+ switch (latch_mode) {
+ case BTR_CONT_MODIFY_TREE:
+ break;
+ case BTR_MODIFY_TREE:
mtr_x_lock(dict_index_get_lock(index), mtr);
- } else {
+ break;
+ case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
+ case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
+ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK));
+ break;
+ default:
mtr_s_lock(dict_index_get_lock(index), mtr);
}
@@ -873,6 +904,7 @@ btr_cur_open_at_index_side_func(
RW_NO_LATCH, NULL, BUF_GET,
file, line, mtr);
page = buf_block_get_frame(block);
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
ut_ad(index->id == btr_page_get_index_id(page));
block->check_index_page_at_flush = TRUE;
@@ -882,26 +914,40 @@ btr_cur_open_at_index_side_func(
height = btr_page_get_level(page, mtr);
root_height = height;
+ ut_a(height >= level);
+ } else {
+ /* TODO: flag the index corrupted if this fails */
+ ut_ad(height == btr_page_get_level(page, mtr));
}
- if (height == 0) {
- btr_cur_latch_leaves(page, space, zip_size, page_no,
- latch_mode, cursor, mtr);
-
- /* In versions <= 3.23.52 we had forgotten to
- release the tree latch here. If in an index scan
- we had to scan far to find a record visible to the
- current transaction, that could starve others
- waiting for the tree latch. */
-
- if ((latch_mode != BTR_MODIFY_TREE)
- && (latch_mode != BTR_CONT_MODIFY_TREE)) {
+ if (height == level) {
+ btr_cur_latch_leaves(
+ page, space, zip_size, page_no,
+ latch_mode & ~BTR_ALREADY_S_LATCHED,
+ cursor, mtr);
- /* Release the tree s-latch */
+ if (height == 0) {
+ /* In versions <= 3.23.52 we had
+ forgotten to release the tree latch
+ here. If in an index scan we had to
+ scan far to find a record visible to
+ the current transaction, that could
+ starve others waiting for the tree
+ latch. */
+
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ case BTR_CONT_MODIFY_TREE:
+ case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
+ case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
+ break;
+ default:
+ /* Release the tree s-latch */
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint,
- dict_index_get_lock(index));
+ mtr_release_s_latch_at_savepoint(
+ mtr, savepoint,
+ dict_index_get_lock(index));
+ }
}
}
@@ -911,7 +957,7 @@ btr_cur_open_at_index_side_func(
page_cur_set_after_last(block, page_cursor);
}
- if (height == 0) {
+ if (height == level) {
if (estimate) {
btr_cur_add_path_info(cursor, height,
root_height);
@@ -970,9 +1016,12 @@ btr_cur_open_at_rnd_pos_func(
ulint* offsets = offsets_;
rec_offs_init(offsets_);
- if (latch_mode == BTR_MODIFY_TREE) {
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
mtr_x_lock(dict_index_get_lock(index), mtr);
- } else {
+ break;
+ default:
+ ut_ad(latch_mode != BTR_CONT_MODIFY_TREE);
mtr_s_lock(dict_index_get_lock(index), mtr);
}
@@ -993,6 +1042,7 @@ btr_cur_open_at_rnd_pos_func(
RW_NO_LATCH, NULL, BUF_GET,
file, line, mtr);
page = buf_block_get_frame(block);
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
ut_ad(index->id == btr_page_get_index_id(page));
if (height == ULINT_UNDEFINED) {
@@ -1037,7 +1087,7 @@ be freed by reorganizing. Differs from btr_cur_optimistic_insert because
no heuristics is applied to whether it pays to use CPU time for
reorganizing the page or not.
@return pointer to inserted record if succeed, else NULL */
-static
+static __attribute__((nonnull, warn_unused_result))
rec_t*
btr_cur_insert_if_possible(
/*=======================*/
@@ -1045,6 +1095,8 @@ btr_cur_insert_if_possible(
cursor stays valid */
const dtuple_t* tuple, /*!< in: tuple to insert; the size info need not
have been stored to tuple */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in: mtr */
{
@@ -1060,8 +1112,8 @@ btr_cur_insert_if_possible(
page_cursor = btr_cur_get_page_cur(cursor);
/* Now, try the insert */
- rec = page_cur_tuple_insert(page_cursor, tuple,
- cursor->index, n_ext, mtr);
+ rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
+ offsets, heap, n_ext, mtr);
if (UNIV_UNLIKELY(!rec)) {
/* If record did not fit, reorganize */
@@ -1071,19 +1123,21 @@ btr_cur_insert_if_possible(
page_cur_search(block, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
- rec = page_cur_tuple_insert(page_cursor, tuple,
- cursor->index, n_ext, mtr);
+ rec = page_cur_tuple_insert(
+ page_cursor, tuple, cursor->index,
+ offsets, heap, n_ext, mtr);
}
}
+ ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
return(rec);
}
/*************************************************************//**
For an insert, checks the locks and does the undo logging if desired.
@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INLINE
-ulint
+UNIV_INLINE __attribute__((warn_unused_result, nonnull(2,3,5,6)))
+dberr_t
btr_cur_ins_lock_and_undo(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags: if
@@ -1098,7 +1152,7 @@ btr_cur_ins_lock_and_undo(
successor record */
{
dict_index_t* index;
- ulint err;
+ dberr_t err;
rec_t* rec;
roll_ptr_t roll_ptr;
@@ -1108,6 +1162,10 @@ btr_cur_ins_lock_and_undo(
rec = btr_cur_get_rec(cursor);
index = cursor->index;
+ ut_ad(!dict_index_is_online_ddl(index)
+ || dict_index_is_clust(index)
+ || (flags & BTR_CREATE_FLAG));
+
err = lock_rec_insert_check_and_lock(flags, rec,
btr_cur_get_block(cursor),
index, thr, mtr, inherit);
@@ -1120,7 +1178,7 @@ btr_cur_ins_lock_and_undo(
err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
thr, index, entry,
- NULL, 0, NULL,
+ NULL, 0, NULL, NULL,
&roll_ptr);
if (err != DB_SUCCESS) {
@@ -1145,13 +1203,13 @@ static
void
btr_cur_trx_report(
/*===============*/
- trx_t* trx, /*!< in: transaction */
+ trx_id_t trx_id, /*!< in: transaction id */
const dict_index_t* index, /*!< in: index */
const char* op) /*!< in: operation */
{
- fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx->id);
+ fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx_id);
fputs(op, stderr);
- dict_index_name_print(stderr, trx, index);
+ dict_index_name_print(stderr, NULL, index);
putc('\n', stderr);
}
#endif /* UNIV_DEBUG */
@@ -1164,7 +1222,7 @@ one record on the page, the insert will always succeed; this is to
prevent trying to split a page with just one record.
@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_optimistic_insert(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags: if not
@@ -1172,6 +1230,8 @@ btr_cur_optimistic_insert(
specified */
btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
cursor stays valid */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
succeed */
@@ -1198,13 +1258,16 @@ btr_cur_optimistic_insert(
ibool inherit;
ulint zip_size;
ulint rec_size;
- ulint err;
+ dberr_t err;
*big_rec = NULL;
block = btr_cur_get_block(cursor);
page = buf_block_get_frame(block);
index = cursor->index;
+ ut_ad(!dict_index_is_online_ddl(index)
+ || dict_index_is_clust(index)
+ || (flags & BTR_CREATE_FLAG));
zip_size = buf_block_get_zip_size(block);
#ifdef UNIV_DEBUG_VALGRIND
if (zip_size) {
@@ -1219,7 +1282,7 @@ btr_cur_optimistic_insert(
}
#ifdef UNIV_DEBUG
if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), index, "insert into ");
+ btr_cur_trx_report(thr_get_trx(thr)->id, index, "insert ");
dtuple_print(stderr, entry);
}
#endif /* UNIV_DEBUG */
@@ -1313,6 +1376,15 @@ fail_err:
goto fail;
}
+ /* If compression padding tells us that insertion will result in
+ too packed up page i.e.: which is likely to cause compression
+ failure then don't do an optimistic insertion. */
+ if (zip_size && leaf
+ && (page_get_data_size(page) + rec_size
+ >= dict_index_zip_pad_optimal_page_size(index))) {
+
+ goto fail;
+ }
/* Check locks and write to the undo log, if specified */
err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
thr, mtr, &inherit);
@@ -1329,7 +1401,7 @@ fail_err:
{
const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
*rec = page_cur_tuple_insert(page_cursor, entry, index,
- n_ext, mtr);
+ offsets, heap, n_ext, mtr);
reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
if (UNIV_UNLIKELY(reorg)) {
@@ -1359,7 +1431,7 @@ fail_err:
page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
*rec = page_cur_tuple_insert(page_cursor, entry, index,
- n_ext, mtr);
+ offsets, heap, n_ext, mtr);
if (UNIV_UNLIKELY(!*rec)) {
if (zip_size != 0) {
@@ -1434,7 +1506,7 @@ made on the leaf level, to avoid deadlocks, mtr must also own x-latches
to brothers of page, if those brothers exist.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_pessimistic_insert(
/*=======================*/
ulint flags, /*!< in: undo logging and locking flags: if not
@@ -1445,6 +1517,9 @@ btr_cur_pessimistic_insert(
insertion will certainly succeed */
btr_cur_t* cursor, /*!< in: cursor after which to insert;
cursor stays valid */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
succeed */
@@ -1458,8 +1533,7 @@ btr_cur_pessimistic_insert(
dict_index_t* index = cursor->index;
ulint zip_size = dict_table_zip_size(index->table);
big_rec_t* big_rec_vec = NULL;
- mem_heap_t* heap = NULL;
- ulint err;
+ dberr_t err;
ibool dummy_inh;
ibool success;
ulint n_extents = 0;
@@ -1474,6 +1548,9 @@ btr_cur_pessimistic_insert(
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
+ ut_ad(!dict_index_is_online_ddl(index)
+ || dict_index_is_clust(index)
+ || (flags & BTR_CREATE_FLAG));
cursor->flag = BTR_CUR_BINARY;
@@ -1531,13 +1608,11 @@ btr_cur_pessimistic_insert(
== buf_block_get_page_no(btr_cur_get_block(cursor))) {
/* The page is the root page */
- *rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr);
+ *rec = btr_root_raise_and_insert(
+ flags, cursor, offsets, heap, entry, n_ext, mtr);
} else {
- *rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
+ *rec = btr_page_split_and_insert(
+ flags, cursor, offsets, heap, entry, n_ext, mtr);
}
ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
@@ -1564,29 +1639,36 @@ btr_cur_pessimistic_insert(
/*************************************************************//**
For an update, checks the locks and does the undo logging.
@return DB_SUCCESS, DB_WAIT_LOCK, or error number */
-UNIV_INLINE
-ulint
+UNIV_INLINE __attribute__((warn_unused_result, nonnull(2,3,6,7)))
+dberr_t
btr_cur_upd_lock_and_undo(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on record to update */
+ const ulint* offsets,/*!< in: rec_get_offsets() on cursor */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread
+ (can be NULL if BTR_NO_LOCKING_FLAG) */
mtr_t* mtr, /*!< in/out: mini-transaction */
roll_ptr_t* roll_ptr)/*!< out: roll pointer */
{
dict_index_t* index;
- rec_t* rec;
- ulint err;
+ const rec_t* rec;
+ dberr_t err;
- ut_ad(cursor && update && thr && roll_ptr);
+ ut_ad(thr || (flags & BTR_NO_LOCKING_FLAG));
rec = btr_cur_get_rec(cursor);
index = cursor->index;
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
if (!dict_index_is_clust(index)) {
+ ut_ad(dict_index_is_online_ddl(index)
+ == !!(flags & BTR_CREATE_FLAG));
+
/* We do undo logging only when we update a clustered index
record */
return(lock_sec_rec_modify_check_and_lock(
@@ -1597,50 +1679,39 @@ btr_cur_upd_lock_and_undo(
/* Check if we have to wait for a lock: enqueue an explicit lock
request if yes */
- err = DB_SUCCESS;
-
if (!(flags & BTR_NO_LOCKING_FLAG)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
err = lock_clust_rec_modify_check_and_lock(
flags, btr_cur_get_block(cursor), rec, index,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap), thr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
+ offsets, thr);
if (err != DB_SUCCESS) {
-
return(err);
}
}
/* Append the info about the update in the undo log */
- err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
- index, NULL, update,
- cmpl_info, rec, roll_ptr);
- return(err);
+ return(trx_undo_report_row_operation(
+ flags, TRX_UNDO_MODIFY_OP, thr,
+ index, NULL, update,
+ cmpl_info, rec, offsets, roll_ptr));
}
/***********************************************************//**
Writes a redo log record of updating a record in-place. */
-UNIV_INLINE
+UNIV_INLINE __attribute__((nonnull))
void
btr_cur_update_in_place_log(
/*========================*/
ulint flags, /*!< in: flags */
- rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index where cursor positioned */
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in: index of the record */
const upd_t* update, /*!< in: update vector */
- trx_t* trx, /*!< in: transaction */
+ trx_id_t trx_id, /*!< in: transaction id */
roll_ptr_t roll_ptr, /*!< in: roll ptr */
mtr_t* mtr) /*!< in: mtr */
{
- byte* log_ptr;
- page_t* page = page_align(rec);
+ byte* log_ptr;
+ const page_t* page = page_align(rec);
ut_ad(flags < 256);
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
@@ -1665,8 +1736,8 @@ btr_cur_update_in_place_log(
mach_write_to_1(log_ptr, flags);
log_ptr++;
- log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
- mtr);
+ log_ptr = row_upd_write_sys_vals_to_log(
+ index, trx_id, roll_ptr, log_ptr, mtr);
mach_write_to_2(log_ptr, page_offset(rec));
log_ptr += 2;
@@ -1769,6 +1840,13 @@ btr_cur_update_alloc_zip(
FALSE=update-in-place */
mtr_t* mtr) /*!< in: mini-transaction */
{
+
+ /* Have a local copy of the variables as these can change
+ dynamically. */
+ bool log_compressed = page_log_compressed_pages;
+ ulint compression_level = page_compression_level;
+ page_t* page = buf_block_get_frame(block);
+
ut_a(page_zip == buf_block_get_page_zip(block));
ut_ad(page_zip);
ut_ad(!dict_index_is_ibuf(index));
@@ -1784,12 +1862,27 @@ btr_cur_update_alloc_zip(
return(FALSE);
}
- if (!page_zip_compress(page_zip, buf_block_get_frame(block),
- index, mtr)) {
+ page = buf_block_get_frame(block);
+
+ if (create && page_is_leaf(page)
+ && (length + page_get_data_size(page)
+ >= dict_index_zip_pad_optimal_page_size(index))) {
+
+ return(FALSE);
+ }
+
+ if (!page_zip_compress(
+ page_zip, page, index, compression_level,
+ log_compressed ? mtr : NULL)) {
/* Unable to compress the page */
return(FALSE);
}
+ if (mtr && !log_compressed) {
+ page_zip_compress_write_log_no_data(
+ compression_level, page, index, mtr);
+ }
+
/* After recompressing a page, we must make sure that the free
bits in the insert buffer bitmap will not exceed the free
space on the page. Because this function will not attempt
@@ -1803,8 +1896,7 @@ btr_cur_update_alloc_zip(
if (!page_zip_available(page_zip, dict_index_is_clust(index),
length, create)) {
/* Out of space: reset the free bits. */
- if (!dict_index_is_clust(index)
- && page_is_leaf(buf_block_get_frame(block))) {
+ if (!dict_index_is_clust(index) && page_is_leaf(page)) {
ibuf_reset_free_bits(block);
}
return(FALSE);
@@ -1818,45 +1910,50 @@ Updates a record when the update causes no size changes in its fields.
We assume here that the ordering fields of the record do not change.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_update_in_place(
/*====================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
+ const ulint* offsets,/*!< in: offsets on cursor->page_cur.rec */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread, or NULL if
+ appropriate flags are set */
+ trx_id_t trx_id, /*!< in: transaction id */
mtr_t* mtr) /*!< in: mtr; must be committed before
latching any further pages */
{
dict_index_t* index;
buf_block_t* block;
page_zip_des_t* page_zip;
- ulint err;
+ dberr_t err;
rec_t* rec;
roll_ptr_t roll_ptr = 0;
- trx_t* trx;
ulint was_delete_marked;
ibool is_hashed;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
rec = btr_cur_get_rec(cursor);
index = cursor->index;
+ ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
+ || dict_index_is_clust(index));
+ ut_ad(!thr || thr_get_trx(thr)->id == trx_id);
+ ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG)
+ == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
+ | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
+ ut_ad(fil_page_get_type(btr_cur_get_page(cursor)) == FIL_PAGE_INDEX);
+ ut_ad(btr_page_get_index_id(btr_cur_get_page(cursor)) == index->id);
- trx = thr_get_trx(thr);
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(trx, index, "update ");
+ if (btr_cur_print_record_ops) {
+ btr_cur_trx_report(trx_id, index, "update ");
rec_print_new(stderr, rec, offsets);
}
#endif /* UNIV_DEBUG */
@@ -1872,19 +1969,17 @@ btr_cur_update_in_place(
}
/* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+ err = btr_cur_upd_lock_and_undo(flags, cursor, offsets,
+ update, cmpl_info,
thr, mtr, &roll_ptr);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
return(err);
}
if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields(rec, NULL,
- index, offsets, trx, roll_ptr);
+ row_upd_rec_sys_fields(rec, NULL, index, offsets,
+ thr_get_trx(thr), roll_ptr);
}
was_delete_marked = rec_get_deleted_flag(
@@ -1925,7 +2020,7 @@ btr_cur_update_in_place(
}
btr_cur_update_in_place_log(flags, rec, index, update,
- trx, roll_ptr, mtr);
+ trx_id, roll_ptr, mtr);
if (was_delete_marked
&& !rec_get_deleted_flag(
@@ -1937,9 +2032,6 @@ btr_cur_update_in_place(
rec, index, offsets, mtr);
}
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
return(DB_SUCCESS);
}
@@ -1953,24 +2045,28 @@ fields of the record do not change.
DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
there is not enough space left on the compressed page */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_optimistic_update(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
+ ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const upd_t* update, /*!< in: update vector; this must also
contain trx id and roll ptr fields */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread, or NULL if
+ appropriate flags are set */
+ trx_id_t trx_id, /*!< in: transaction id */
mtr_t* mtr) /*!< in: mtr; must be committed before
latching any further pages */
{
dict_index_t* index;
page_cur_t* page_cursor;
- ulint err;
+ dberr_t err;
buf_block_t* block;
page_t* page;
page_zip_des_t* page_zip;
@@ -1980,10 +2076,8 @@ btr_cur_optimistic_update(
ulint old_rec_size;
dtuple_t* new_entry;
roll_ptr_t roll_ptr;
- mem_heap_t* heap;
ulint i;
ulint n_ext;
- ulint* offsets;
block = btr_cur_get_block(cursor);
page = buf_block_get_frame(block);
@@ -1993,39 +2087,46 @@ btr_cur_optimistic_update(
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
-
- heap = mem_heap_create(1024);
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+ ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
+ || dict_index_is_clust(index));
+ ut_ad(!thr || thr_get_trx(thr)->id == trx_id);
+ ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG)
+ == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
+ | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(btr_page_get_index_id(page) == index->id);
+
+ *offsets = rec_get_offsets(rec, index, *offsets,
+ ULINT_UNDEFINED, heap);
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!rec_offs_any_null_extern(rec, offsets)
+ ut_a(!rec_offs_any_null_extern(rec, *offsets)
|| trx_is_recv(thr_get_trx(thr)));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), index, "update ");
- rec_print_new(stderr, rec, offsets);
+ if (btr_cur_print_record_ops) {
+ btr_cur_trx_report(trx_id, index, "update ");
+ rec_print_new(stderr, rec, *offsets);
}
#endif /* UNIV_DEBUG */
- if (!row_upd_changes_field_size_or_external(index, offsets, update)) {
+ if (!row_upd_changes_field_size_or_external(index, *offsets, update)) {
/* The simplest and the most common case: the update does not
change the size of any field and none of the updated fields is
externally stored in rec or update, and there is enough space
on the compressed page to log the update. */
- mem_heap_free(heap);
- return(btr_cur_update_in_place(flags, cursor, update,
- cmpl_info, thr, mtr));
+ return(btr_cur_update_in_place(
+ flags, cursor, *offsets, update,
+ cmpl_info, thr, trx_id, mtr));
}
- if (rec_offs_any_extern(offsets)) {
+ if (rec_offs_any_extern(*offsets)) {
any_extern:
/* Externally stored fields are treated in pessimistic
update */
- mem_heap_free(heap);
return(DB_OVERFLOW);
}
@@ -2038,8 +2139,14 @@ any_extern:
page_cursor = btr_cur_get_page_cur(cursor);
- new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
- &n_ext, heap);
+ if (!*heap) {
+ *heap = mem_heap_create(
+ rec_offs_size(*offsets)
+ + DTUPLE_EST_ALLOC(rec_offs_n_fields(*offsets)));
+ }
+
+ new_entry = row_rec_to_index_entry(rec, index, *offsets,
+ &n_ext, *heap);
/* We checked above that there are no externally stored fields. */
ut_a(!n_ext);
@@ -2047,8 +2154,8 @@ any_extern:
corresponding to new_entry is latched in mtr.
Thus the following call is safe. */
row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
- FALSE, heap);
- old_rec_size = rec_offs_size(offsets);
+ FALSE, *heap);
+ old_rec_size = rec_offs_size(*offsets);
new_rec_size = rec_get_converted_size(index, new_entry, 0);
page_zip = buf_block_get_page_zip(block);
@@ -2059,16 +2166,14 @@ any_extern:
if (page_zip
&& !btr_cur_update_alloc_zip(page_zip, block, index,
new_rec_size, TRUE, mtr)) {
- err = DB_ZIP_OVERFLOW;
- goto err_exit;
+ return(DB_ZIP_OVERFLOW);
}
if (UNIV_UNLIKELY(new_rec_size
>= (page_get_free_space_of_empty(page_is_comp(page))
/ 2))) {
- err = DB_OVERFLOW;
- goto err_exit;
+ return(DB_OVERFLOW);
}
if (UNIV_UNLIKELY(page_get_data_size(page)
@@ -2077,8 +2182,7 @@ any_extern:
/* The page would become too empty */
- err = DB_UNDERFLOW;
- goto err_exit;
+ return(DB_UNDERFLOW);
}
/* We do not attempt to reorganize if the page is compressed.
@@ -2096,16 +2200,16 @@ any_extern:
reorganize: for simplicity, we decide what to do assuming a
reorganization is needed, though it might not be necessary */
- err = DB_OVERFLOW;
- goto err_exit;
+ return(DB_OVERFLOW);
}
/* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+ err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
+ update, cmpl_info,
thr, mtr, &roll_ptr);
if (err != DB_SUCCESS) {
- goto err_exit;
+ return(err);
}
/* Ok, we may do the replacement. Store on the page infimum the
@@ -2116,13 +2220,7 @@ any_extern:
btr_search_update_hash_on_delete(cursor);
- /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
- invokes rec_offs_make_valid() to point to the copied record that
- the fields of new_entry point to. We have to undo it here. */
- ut_ad(rec_offs_validate(NULL, index, offsets));
- rec_offs_make_valid(page_cur_get_rec(page_cursor), index, offsets);
-
- page_cur_delete_rec(page_cursor, index, offsets, mtr);
+ page_cur_delete_rec(page_cursor, index, *offsets, mtr);
page_cur_move_to_prev(page_cursor);
@@ -2130,11 +2228,12 @@ any_extern:
row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
roll_ptr);
row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- thr_get_trx(thr)->id);
+ trx_id);
}
/* There are no externally stored columns in new_entry */
- rec = btr_cur_insert_if_possible(cursor, new_entry, 0/*n_ext*/, mtr);
+ rec = btr_cur_insert_if_possible(
+ cursor, new_entry, offsets, heap, 0/*n_ext*/, mtr);
ut_a(rec); /* <- We calculated above the insert would fit */
if (page_zip && !dict_index_is_clust(index)
@@ -2149,10 +2248,7 @@ any_extern:
page_cur_move_to_next(page_cursor);
- err = DB_SUCCESS;
-err_exit:
- mem_heap_free(heap);
- return(err);
+ return(DB_SUCCESS);
}
/*************************************************************//**
@@ -2211,7 +2307,7 @@ own x-latches to brothers of page, if those brothers exist. We assume
here that the ordering fields of the record do not change.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_pessimistic_update(
/*=======================*/
ulint flags, /*!< in: undo logging, locking, and rollback
@@ -2219,7 +2315,13 @@ btr_cur_pessimistic_update(
btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
cursor may become invalid if *big_rec == NULL
|| !(flags & BTR_KEEP_POS_FLAG) */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ mem_heap_t** offsets_heap,
+ /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
+ mem_heap_t* entry_heap,
+ /*!< in/out: memory heap for allocating
+ big_rec and the index tuple */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
const upd_t* update, /*!< in: update vector; this is allowed also
@@ -2227,7 +2329,9 @@ btr_cur_pessimistic_update(
the values in update vector have no effect */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread, or NULL if
+ appropriate flags are set */
+ trx_id_t trx_id, /*!< in: transaction id */
mtr_t* mtr) /*!< in: mtr; must be committed before
latching any further pages */
{
@@ -2239,17 +2343,15 @@ btr_cur_pessimistic_update(
page_zip_des_t* page_zip;
rec_t* rec;
page_cur_t* page_cursor;
- dtuple_t* new_entry;
- ulint err;
- ulint optim_err;
+ dberr_t err;
+ dberr_t optim_err;
roll_ptr_t roll_ptr;
- trx_t* trx;
ibool was_first;
ulint n_extents = 0;
ulint n_reserved;
ulint n_ext;
- ulint* offsets = NULL;
+ *offsets = NULL;
*big_rec = NULL;
block = btr_cur_get_block(cursor);
@@ -2266,9 +2368,16 @@ btr_cur_pessimistic_update(
#endif /* UNIV_ZIP_DEBUG */
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
+ || dict_index_is_clust(index));
+ ut_ad(!thr || thr_get_trx(thr)->id == trx_id);
+ ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG)
+ == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
+ | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
- optim_err = btr_cur_optimistic_update(flags, cursor, update,
- cmpl_info, thr, mtr);
+ optim_err = btr_cur_optimistic_update(
+ flags, cursor, offsets, offsets_heap, update,
+ cmpl_info, thr, trx_id, mtr);
switch (optim_err) {
case DB_UNDERFLOW:
@@ -2280,7 +2389,8 @@ btr_cur_pessimistic_update(
}
/* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+ err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
+ update, cmpl_info,
thr, mtr, &roll_ptr);
if (err != DB_SUCCESS) {
@@ -2308,20 +2418,11 @@ btr_cur_pessimistic_update(
}
}
- if (!*heap) {
- *heap = mem_heap_create(1024);
- }
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, heap);
-
- trx = thr_get_trx(thr);
+ *offsets = rec_get_offsets(
+ rec, index, *offsets, ULINT_UNDEFINED, offsets_heap);
- new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
- &n_ext, *heap);
- /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
- invokes rec_offs_make_valid() to point to the copied record that
- the fields of new_entry point to. We have to undo it here. */
- ut_ad(rec_offs_validate(NULL, index, offsets));
- rec_offs_make_valid(rec, index, offsets);
+ dtuple_t* new_entry = row_rec_to_index_entry(
+ rec, index, *offsets, &n_ext, entry_heap);
/* The page containing the clustered index record
corresponding to new_entry is latched in mtr. If the
@@ -2330,15 +2431,15 @@ btr_cur_pessimistic_update(
purge would also have removed the clustered index record
itself. Thus the following call is safe. */
row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
- FALSE, *heap);
+ FALSE, entry_heap);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
roll_ptr);
row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx->id);
+ trx_id);
}
- if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(offsets)) {
+ if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(*offsets)) {
/* We are in a transaction rollback undoing a row
update: we must free possible externally stored fields
which got new values in the update, if they are not
@@ -2349,16 +2450,17 @@ btr_cur_pessimistic_update(
ut_ad(big_rec_vec == NULL);
btr_rec_free_updated_extern_fields(
- index, rec, page_zip, offsets, update,
- trx_is_recv(trx) ? RB_RECOVERY : RB_NORMAL, mtr);
+ index, rec, page_zip, *offsets, update,
+ trx_is_recv(thr_get_trx(thr))
+ ? RB_RECOVERY : RB_NORMAL, mtr);
}
/* We have to set appropriate extern storage bits in the new
record to be inserted: we have to remember which fields were such */
ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
- n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
+ ut_ad(rec_offs_validate(rec, index, *offsets));
+ n_ext += btr_push_update_extern_fields(new_entry, update, entry_heap);
if (page_zip) {
ut_ad(page_is_comp(page));
@@ -2404,11 +2506,12 @@ make_external:
#endif /* UNIV_ZIP_DEBUG */
page_cursor = btr_cur_get_page_cur(cursor);
- page_cur_delete_rec(page_cursor, index, offsets, mtr);
+ page_cur_delete_rec(page_cursor, index, *offsets, mtr);
page_cur_move_to_prev(page_cursor);
- rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
+ rec = btr_cur_insert_if_possible(cursor, new_entry,
+ offsets, offsets_heap, n_ext, mtr);
if (rec) {
page_cursor->rec = rec;
@@ -2416,20 +2519,19 @@ make_external:
lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
rec, block);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, heap);
-
- if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+ if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
/* The new inserted record owns its possible externally
stored fields */
- btr_cur_unmark_extern_fields(page_zip,
- rec, index, offsets, mtr);
+ btr_cur_unmark_extern_fields(
+ page_zip, rec, index, *offsets, mtr);
}
- btr_cur_compress_if_useful(
- cursor,
- big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG),
- mtr);
+ bool adjust = big_rec_vec && (flags & BTR_KEEP_POS_FLAG);
+
+ if (btr_cur_compress_if_useful(cursor, adjust, mtr)
+ && adjust) {
+ rec_offs_make_valid(page_cursor->rec, index, *offsets);
+ }
if (page_zip && !dict_index_is_clust(index)
&& page_is_leaf(page)) {
@@ -2448,8 +2550,7 @@ make_external:
ut_a(page_zip || optim_err != DB_UNDERFLOW);
/* Out of space: reset the free bits. */
- if (!dict_index_is_clust(index)
- && page_is_leaf(page)) {
+ if (!dict_index_is_clust(index) && page_is_leaf(page)) {
ibuf_reset_free_bits(block);
}
}
@@ -2481,11 +2582,13 @@ make_external:
err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG
| BTR_NO_LOCKING_FLAG
| BTR_KEEP_SYS_FLAG,
- cursor, new_entry, &rec,
+ cursor, offsets, offsets_heap,
+ new_entry, &rec,
&dummy_big_rec, n_ext, NULL, mtr);
ut_a(rec);
ut_a(err == DB_SUCCESS);
ut_a(dummy_big_rec == NULL);
+ ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
page_cursor->rec = rec;
if (dict_index_is_sec_or_ibuf(index)) {
@@ -2498,10 +2601,10 @@ make_external:
page_update_max_trx_id(rec_block,
buf_block_get_page_zip(rec_block),
- trx->id, mtr);
+ trx_id, mtr);
}
- if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+ if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
/* The new inserted record owns its possible externally
stored fields */
buf_block_t* rec_block = btr_cur_get_block(cursor);
@@ -2512,10 +2615,8 @@ make_external:
#endif /* UNIV_ZIP_DEBUG */
page_zip = buf_block_get_page_zip(rec_block);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, heap);
btr_cur_unmark_extern_fields(page_zip,
- rec, index, offsets, mtr);
+ rec, index, *offsets, mtr);
}
lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
@@ -2554,17 +2655,13 @@ UNIV_INLINE
void
btr_cur_del_mark_set_clust_rec_log(
/*===============================*/
- ulint flags, /*!< in: flags */
rec_t* rec, /*!< in: record */
dict_index_t* index, /*!< in: index of the record */
- ibool val, /*!< in: value to set */
- trx_t* trx, /*!< in: deleting transaction */
+ trx_id_t trx_id, /*!< in: transaction id */
roll_ptr_t roll_ptr,/*!< in: roll ptr to the undo log record */
mtr_t* mtr) /*!< in: mtr */
{
byte* log_ptr;
- ut_ad(flags < 256);
- ut_ad(val <= 1);
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
@@ -2580,13 +2677,11 @@ btr_cur_del_mark_set_clust_rec_log(
return;
}
- mach_write_to_1(log_ptr, flags);
- log_ptr++;
- mach_write_to_1(log_ptr, val);
- log_ptr++;
+ *log_ptr++ = 0;
+ *log_ptr++ = 1;
- log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
- mtr);
+ log_ptr = row_upd_write_sys_vals_to_log(
+ index, trx_id, roll_ptr, log_ptr, mtr);
mach_write_to_2(log_ptr, page_offset(rec));
log_ptr += 2;
@@ -2683,20 +2778,18 @@ of the deleting transaction, and in the roll ptr field pointer to the
undo log record created.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_del_mark_set_clust_rec(
/*===========================*/
- ulint flags, /*!< in: undo logging and locking flags */
buf_block_t* block, /*!< in/out: buffer block of the record */
rec_t* rec, /*!< in/out: record */
dict_index_t* index, /*!< in: clustered index of the record */
const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr) /*!< in: mtr */
{
roll_ptr_t roll_ptr;
- ulint err;
+ dberr_t err;
page_zip_des_t* page_zip;
trx_t* trx;
@@ -2708,7 +2801,7 @@ btr_cur_del_mark_set_clust_rec(
#ifdef UNIV_DEBUG
if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), index, "del mark ");
+ btr_cur_trx_report(thr_get_trx(thr)->id, index, "del mark ");
rec_print_new(stderr, rec, offsets);
}
#endif /* UNIV_DEBUG */
@@ -2716,7 +2809,7 @@ btr_cur_del_mark_set_clust_rec(
ut_ad(dict_index_is_clust(index));
ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
- err = lock_clust_rec_modify_check_and_lock(flags, block,
+ err = lock_clust_rec_modify_check_and_lock(BTR_NO_LOCKING_FLAG, block,
rec, index, offsets, thr);
if (err != DB_SUCCESS) {
@@ -2724,8 +2817,8 @@ btr_cur_del_mark_set_clust_rec(
return(err);
}
- err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
- index, NULL, NULL, 0, rec,
+ err = trx_undo_report_row_operation(0, TRX_UNDO_MODIFY_OP, thr,
+ index, NULL, NULL, 0, rec, offsets,
&roll_ptr);
if (err != DB_SUCCESS) {
@@ -2738,17 +2831,21 @@ btr_cur_del_mark_set_clust_rec(
page_zip = buf_block_get_page_zip(block);
- btr_blob_dbg_set_deleted_flag(rec, index, offsets, val);
- btr_rec_set_deleted_flag(rec, page_zip, val);
+ btr_blob_dbg_set_deleted_flag(rec, index, offsets, TRUE);
+ btr_rec_set_deleted_flag(rec, page_zip, TRUE);
trx = thr_get_trx(thr);
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields(rec, page_zip,
- index, offsets, trx, roll_ptr);
+ if (dict_index_is_online_ddl(index)) {
+ row_log_table_delete(
+ rec, index, offsets,
+ trx_read_trx_id(row_get_trx_id_offset(index, offsets)
+ + rec));
}
- btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
+ row_upd_rec_sys_fields(rec, page_zip, index, offsets, trx, roll_ptr);
+
+ btr_cur_del_mark_set_clust_rec_log(rec, index, trx->id,
roll_ptr, mtr);
return(err);
@@ -2837,7 +2934,7 @@ btr_cur_parse_del_mark_set_sec_rec(
Sets a secondary index record delete mark to TRUE or FALSE.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_del_mark_set_sec_rec(
/*=========================*/
ulint flags, /*!< in: locking flag */
@@ -2848,14 +2945,14 @@ btr_cur_del_mark_set_sec_rec(
{
buf_block_t* block;
rec_t* rec;
- ulint err;
+ dberr_t err;
block = btr_cur_get_block(cursor);
rec = btr_cur_get_rec(cursor);
#ifdef UNIV_DEBUG
if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), cursor->index,
+ btr_cur_trx_report(thr_get_trx(thr)->id, cursor->index,
"del mark ");
rec_print(stderr, rec, cursor->index);
}
@@ -2945,12 +3042,15 @@ positioned, but no latch on the whole tree.
@return TRUE if success, i.e., the page did not become too empty */
UNIV_INTERN
ibool
-btr_cur_optimistic_delete(
-/*======================*/
+btr_cur_optimistic_delete_func(
+/*===========================*/
btr_cur_t* cursor, /*!< in: cursor on leaf page, on the record to
delete; cursor stays valid: if deletion
succeeds, on function exit it points to the
successor of the deleted record */
+#ifdef UNIV_DEBUG
+ ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
+#endif /* UNIV_DEBUG */
mtr_t* mtr) /*!< in: mtr; if this function returns
TRUE on a leaf page of a secondary
index, the mtr must be committed
@@ -2964,6 +3064,7 @@ btr_cur_optimistic_delete(
ibool no_compress_needed;
rec_offs_init(offsets_);
+ ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
/* This is intended only for leaf page deletions */
@@ -2971,6 +3072,9 @@ btr_cur_optimistic_delete(
block = btr_cur_get_block(cursor);
ut_ad(page_is_leaf(buf_block_get_frame(block)));
+ ut_ad(!dict_index_is_online_ddl(cursor->index)
+ || dict_index_is_clust(cursor->index)
+ || (flags & BTR_CREATE_FLAG));
rec = btr_cur_get_rec(cursor);
offsets = rec_get_offsets(rec, cursor->index, offsets,
@@ -3038,7 +3142,7 @@ UNIV_INTERN
ibool
btr_cur_pessimistic_delete(
/*=======================*/
- ulint* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+ dberr_t* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
the latter may occur because we may have
to update node pointers on upper levels,
and in the case of variable length keys
@@ -3051,6 +3155,7 @@ btr_cur_pessimistic_delete(
if compression does not occur, the cursor
stays valid: it points to successor of
deleted record on function exit */
+ ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
mtr_t* mtr) /*!< in: mtr */
{
@@ -3059,7 +3164,6 @@ btr_cur_pessimistic_delete(
page_zip_des_t* page_zip;
dict_index_t* index;
rec_t* rec;
- dtuple_t* node_ptr;
ulint n_extents = 0;
ulint n_reserved;
ibool success;
@@ -3072,6 +3176,10 @@ btr_cur_pessimistic_delete(
page = buf_block_get_frame(block);
index = btr_cur_get_index(cursor);
+ ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
+ ut_ad(!dict_index_is_online_ddl(index)
+ || dict_index_is_clust(index)
+ || (flags & BTR_CREATE_FLAG));
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
@@ -3120,13 +3228,15 @@ btr_cur_pessimistic_delete(
btr_discard_page(cursor, mtr);
- *err = DB_SUCCESS;
ret = TRUE;
goto return_after_reservations;
}
- lock_update_delete(block, rec);
+ if (flags == 0) {
+ lock_update_delete(block, rec);
+ }
+
level = btr_page_get_level(page, mtr);
if (level > 0
@@ -3155,12 +3265,12 @@ btr_cur_pessimistic_delete(
btr_node_ptr_delete(index, block, mtr);
- node_ptr = dict_index_build_node_ptr(
+ dtuple_t* node_ptr = dict_index_build_node_ptr(
index, next_rec, buf_block_get_page_no(block),
heap, level);
- btr_insert_on_non_leaf_level(index,
- level + 1, node_ptr, mtr);
+ btr_insert_on_non_leaf_level(
+ flags, index, level + 1, node_ptr, mtr);
}
}
@@ -3173,9 +3283,9 @@ btr_cur_pessimistic_delete(
ut_ad(btr_check_node_ptr(index, block, mtr));
+return_after_reservations:
*err = DB_SUCCESS;
-return_after_reservations:
mem_heap_free(heap);
if (ret == FALSE) {
@@ -3202,8 +3312,8 @@ btr_cur_add_path_info(
ulint root_height) /*!< in: root node height in tree */
{
btr_path_t* slot;
- rec_t* rec;
- page_t* page;
+ const rec_t* rec;
+ const page_t* page;
ut_a(cursor->path_arr);
@@ -3415,6 +3525,9 @@ btr_estimate_n_rows_in_range(
ibool is_n_rows_exact;
ulint i;
mtr_t mtr;
+ ib_int64_t table_n_rows;
+
+ table_n_rows = dict_table_get_n_rows(index->table);
mtr_start(&mtr);
@@ -3427,9 +3540,9 @@ btr_estimate_n_rows_in_range(
&cursor, 0,
__FILE__, __LINE__, &mtr);
} else {
- btr_cur_open_at_index_side(TRUE, index,
+ btr_cur_open_at_index_side(true, index,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, &mtr);
+ &cursor, 0, &mtr);
}
mtr_commit(&mtr);
@@ -3445,9 +3558,9 @@ btr_estimate_n_rows_in_range(
&cursor, 0,
__FILE__, __LINE__, &mtr);
} else {
- btr_cur_open_at_index_side(FALSE, index,
+ btr_cur_open_at_index_side(false, index,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, &mtr);
+ &cursor, 0, &mtr);
}
mtr_commit(&mtr);
@@ -3479,20 +3592,21 @@ btr_estimate_n_rows_in_range(
n_rows = n_rows * 2;
}
+ DBUG_EXECUTE_IF("bug14007649", return(n_rows););
+
/* Do not estimate the number of rows in the range
to over 1 / 2 of the estimated rows in the whole
table */
- if (n_rows > index->table->stat_n_rows / 2
- && !is_n_rows_exact) {
+ if (n_rows > table_n_rows / 2 && !is_n_rows_exact) {
- n_rows = index->table->stat_n_rows / 2;
+ n_rows = table_n_rows / 2;
/* If there are just 0 or 1 rows in the table,
then we estimate all rows are in the range */
if (n_rows == 0) {
- n_rows = index->table->stat_n_rows;
+ n_rows = table_n_rows;
}
}
@@ -3552,9 +3666,9 @@ btr_estimate_n_rows_in_range(
/*******************************************************************//**
Record the number of non_null key values in a given index for
-each n-column prefix of the index where n < dict_index_get_n_unique(index).
+each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
The estimates are eventually stored in the array:
-index->stat_n_non_null_key_vals. */
+index->stat_n_non_null_key_vals[], which is indexed from 0 to n-1. */
static
void
btr_record_not_null_field_in_rec(
@@ -3565,7 +3679,7 @@ btr_record_not_null_field_in_rec(
const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
its size could be for all fields or
that of "n_unique" */
- ib_int64_t* n_not_null) /*!< in/out: array to record number of
+ ib_uint64_t* n_not_null) /*!< in/out: array to record number of
not null rows for n-column prefix */
{
ulint i;
@@ -3587,11 +3701,12 @@ btr_record_not_null_field_in_rec(
/*******************************************************************//**
Estimates the number of different key values in a given index, for
-each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals[] and
-the number of pages that were sampled is saved in index->stat_n_sample_sizes[].
-If innodb_stats_method is "nulls_ignored", we also record the number of
-non-null values for each prefix and store the estimates in
+each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
+The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
+0..n_uniq-1) and the number of pages that were sampled is saved in
+index->stat_n_sample_sizes[].
+If innodb_stats_method is nulls_ignored, we also record the number of
+non-null values for each prefix and stored the estimates in
array index->stat_n_non_null_key_vals. */
UNIV_INTERN
void
@@ -3605,8 +3720,8 @@ btr_estimate_number_of_different_key_vals(
ulint n_cols;
ulint matched_fields;
ulint matched_bytes;
- ib_int64_t* n_diff;
- ib_int64_t* n_not_null;
+ ib_uint64_t* n_diff;
+ ib_uint64_t* n_not_null;
ibool stats_null_not_equal;
ullint n_sample_pages; /* number of pages to sample */
ulint not_empty_flag = 0;
@@ -3622,13 +3737,13 @@ btr_estimate_number_of_different_key_vals(
n_cols = dict_index_get_n_unique(index);
heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
- * (n_cols + 1)
+ * n_cols
+ dict_index_get_n_fields(index)
* (sizeof *offsets_rec
+ sizeof *offsets_next_rec));
- n_diff = (ib_int64_t*) mem_heap_zalloc(heap, (n_cols + 1)
- * sizeof(ib_int64_t));
+ n_diff = (ib_uint64_t*) mem_heap_zalloc(
+ heap, n_cols * sizeof(ib_int64_t));
n_not_null = NULL;
@@ -3637,8 +3752,8 @@ btr_estimate_number_of_different_key_vals(
considered equal (by setting stats_null_not_equal value) */
switch (srv_innodb_stats_method) {
case SRV_STATS_NULLS_IGNORED:
- n_not_null = (ib_int64_t*) mem_heap_zalloc(heap, (n_cols + 1)
- * sizeof *n_not_null);
+ n_not_null = (ib_uint64_t*) mem_heap_zalloc(
+ heap, n_cols * sizeof *n_not_null);
/* fall through */
case SRV_STATS_NULLS_UNEQUAL:
@@ -3689,7 +3804,7 @@ btr_estimate_number_of_different_key_vals(
offsets_rec = rec_get_offsets(rec, index, offsets_rec,
ULINT_UNDEFINED, &heap);
- if (n_not_null) {
+ if (n_not_null != NULL) {
btr_record_not_null_field_in_rec(
n_cols, offsets_rec, n_not_null);
}
@@ -3717,14 +3832,14 @@ btr_estimate_number_of_different_key_vals(
&matched_fields,
&matched_bytes);
- for (j = matched_fields + 1; j <= n_cols; j++) {
+ for (j = matched_fields; j < n_cols; j++) {
/* We add one if this index record has
a different prefix from the previous */
n_diff[j]++;
}
- if (n_not_null) {
+ if (n_not_null != NULL) {
btr_record_not_null_field_in_rec(
n_cols, offsets_next_rec, n_not_null);
}
@@ -3759,7 +3874,7 @@ btr_estimate_number_of_different_key_vals(
if (btr_page_get_prev(page, &mtr) != FIL_NULL
|| btr_page_get_next(page, &mtr) != FIL_NULL) {
- n_diff[n_cols]++;
+ n_diff[n_cols - 1]++;
}
}
@@ -3774,7 +3889,7 @@ btr_estimate_number_of_different_key_vals(
also the pages used for external storage of fields (those pages are
included in index->stat_n_leaf_pages) */
- for (j = 0; j <= n_cols; j++) {
+ for (j = 0; j < n_cols; j++) {
index->stat_n_diff_key_vals[j]
= BTR_TABLE_STATS_FROM_SAMPLE(
n_diff[j], index, n_sample_pages,
@@ -3804,7 +3919,7 @@ btr_estimate_number_of_different_key_vals(
sampled result. stat_n_non_null_key_vals[] is created
and initialized to zero in dict_index_add_to_cache(),
along with stat_n_diff_key_vals[] array */
- if (n_not_null != NULL && (j < n_cols)) {
+ if (n_not_null != NULL) {
index->stat_n_non_null_key_vals[j] =
BTR_TABLE_STATS_FROM_SAMPLE(
n_not_null[j], index, n_sample_pages,
@@ -4154,7 +4269,7 @@ The fields are stored on pages allocated from leaf node
file segment of the index tree.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
UNIV_INTERN
-enum db_err
+dberr_t
btr_store_big_rec_extern_fields(
/*============================*/
dict_index_t* index, /*!< in: index of rec; the index tree
@@ -4188,7 +4303,7 @@ btr_store_big_rec_extern_fields(
z_stream c_stream;
buf_block_t** freed_pages = NULL;
ulint n_freed_pages = 0;
- enum db_err error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(rec_offs_any_extern(offsets));
@@ -4219,7 +4334,7 @@ btr_store_big_rec_extern_fields(
heap = mem_heap_create(250000);
page_zip_set_alloc(&c_stream, heap);
- err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
+ err = deflateInit2(&c_stream, page_compression_level,
Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
ut_a(err == Z_OK);
}
@@ -5091,6 +5206,7 @@ btr_copy_zblob_prefix(
" page %lu space %lu\n",
(ulong) fil_page_get_type(bpage->zip.data),
(ulong) page_no, (ulong) space_id);
+ ut_ad(0);
goto end_of_blob;
}
diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc
index 5a67afc7e69..aceb6bd1d41 100644
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -133,6 +133,8 @@ btr_pcur_store_position(
ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
+ ut_ad(page_is_leaf(page));
+ ut_ad(page_get_page_no(page) == index->page);
cursor->old_stored = BTR_PCUR_OLD_STORED;
@@ -258,7 +260,8 @@ btr_pcur_restore_position_func(
btr_cur_open_at_index_side(
cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
- index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr);
+ index, latch_mode,
+ btr_pcur_get_btr_cur(cursor), 0, mtr);
cursor->latch_mode = latch_mode;
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
@@ -326,13 +329,19 @@ btr_pcur_restore_position_func(
/* Save the old search mode of the cursor */
old_mode = cursor->search_mode;
- if (UNIV_LIKELY(cursor->rel_pos == BTR_PCUR_ON)) {
+ switch (cursor->rel_pos) {
+ case BTR_PCUR_ON:
mode = PAGE_CUR_LE;
- } else if (cursor->rel_pos == BTR_PCUR_AFTER) {
+ break;
+ case BTR_PCUR_AFTER:
mode = PAGE_CUR_G;
- } else {
- ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE);
+ break;
+ case BTR_PCUR_BEFORE:
mode = PAGE_CUR_L;
+ break;
+ default:
+ ut_error;
+ mode = 0;
}
btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
@@ -341,25 +350,39 @@ btr_pcur_restore_position_func(
/* Restore the old search mode */
cursor->search_mode = old_mode;
- if (cursor->rel_pos == BTR_PCUR_ON
- && btr_pcur_is_on_user_rec(cursor)
- && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
- rec_get_offsets(
- btr_pcur_get_rec(cursor), index,
- NULL, ULINT_UNDEFINED, &heap))) {
-
- /* We have to store the NEW value for the modify clock, since
- the cursor can now be on a different page! But we can retain
- the value of old_rec */
-
- cursor->block_when_stored = btr_pcur_get_block(cursor);
- cursor->modify_clock = buf_block_get_modify_clock(
- cursor->block_when_stored);
- cursor->old_stored = BTR_PCUR_OLD_STORED;
-
- mem_heap_free(heap);
-
- return(TRUE);
+ switch (cursor->rel_pos) {
+ case BTR_PCUR_ON:
+ if (btr_pcur_is_on_user_rec(cursor)
+ && !cmp_dtuple_rec(
+ tuple, btr_pcur_get_rec(cursor),
+ rec_get_offsets(btr_pcur_get_rec(cursor),
+ index, NULL,
+ ULINT_UNDEFINED, &heap))) {
+
+ /* We have to store the NEW value for
+ the modify clock, since the cursor can
+ now be on a different page! But we can
+ retain the value of old_rec */
+
+ cursor->block_when_stored =
+ btr_pcur_get_block(cursor);
+ cursor->modify_clock =
+ buf_block_get_modify_clock(
+ cursor->block_when_stored);
+ cursor->old_stored = BTR_PCUR_OLD_STORED;
+
+ mem_heap_free(heap);
+
+ return(TRUE);
+ }
+#ifdef UNIV_DEBUG
+ /* fall through */
+ case BTR_PCUR_BEFORE:
+ case BTR_PCUR_AFTER:
+ break;
+ default:
+ ut_error;
+#endif /* UNIV_DEBUG */
}
mem_heap_free(heap);
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index 7e6e2ef1cb1..dcb508a7f29 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -42,7 +42,6 @@ Created 2/17/1996 Heikki Tuuri
#include "btr0pcur.h"
#include "btr0btr.h"
#include "ha0ha.h"
-#include "srv0mon.h"
/** Flag: has the search system been enabled?
Protected by btr_search_latch. */
@@ -352,7 +351,7 @@ void
btr_search_info_update_hash(
/*========================*/
btr_search_t* info, /*!< in/out: search info */
- btr_cur_t* cursor) /*!< in: cursor which was just positioned */
+ const btr_cur_t* cursor)/*!< in: cursor which was just positioned */
{
dict_index_t* index;
ulint n_unique;
@@ -621,7 +620,7 @@ void
btr_search_info_update_slow(
/*========================*/
btr_search_t* info, /*!< in/out: search info */
- btr_cur_t* cursor) /*!< in: cursor which was just positioned */
+ btr_cur_t* cursor) /*!< in: cursor which was just positioned */
{
buf_block_t* block;
ibool build_index;
@@ -865,7 +864,7 @@ btr_search_guess_on_hash(
{
buf_pool_t* buf_pool;
buf_block_t* block;
- rec_t* rec;
+ const rec_t* rec;
ulint fold;
index_id_t index_id;
#ifdef notdefined
@@ -951,7 +950,7 @@ btr_search_guess_on_hash(
ut_ad(page_rec_is_user_rec(rec));
- btr_cur_position(index, rec, block, cursor);
+ btr_cur_position(index, (rec_t*) rec, block, cursor);
/* Check the validity of the guess within the page */
@@ -1077,6 +1076,7 @@ btr_search_drop_page_hash_index(
mem_heap_t* heap;
const dict_index_t* index;
ulint* offsets;
+ btr_search_t* info;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
@@ -1102,6 +1102,27 @@ retry:
}
ut_a(!dict_index_is_ibuf(index));
+#ifdef UNIV_DEBUG
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_CREATION:
+ /* The index is being created (bulk loaded). */
+ case ONLINE_INDEX_COMPLETE:
+ /* The index has been published. */
+ case ONLINE_INDEX_ABORTED:
+ /* Either the index creation was aborted due to an
+ error observed by InnoDB (in which case there should
+ not be any adaptive hash index entries), or it was
+ completed and then flagged aborted in
+ rollback_inplace_alter_table(). */
+ break;
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ /* The index should have been dropped from the tablespace
+ already, and the adaptive hash index entries should have
+ been dropped as well. */
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
+
table = btr_search_sys->hash_index;
#ifdef UNIV_SYNC_DEBUG
@@ -1196,8 +1217,9 @@ next_rec:
ha_remove_all_nodes_to_page(table, folds[i], page);
}
- ut_a(index->search_info->ref_count > 0);
- index->search_info->ref_count--;
+ info = btr_search_get_info(block->index);
+ ut_a(info->ref_count > 0);
+ info->ref_count--;
block->index = NULL;