summaryrefslogtreecommitdiff
path: root/storage/innobase/btr/btr0cur.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/btr/btr0cur.cc')
-rw-r--r--storage/innobase/btr/btr0cur.cc249
1 files changed, 143 insertions, 106 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index e3ac8fb7605..c5463e50b48 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -3,7 +3,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2015, 2017, MariaDB Corporation.
+Copyright (c) 2015, 2018, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -404,7 +404,7 @@ static
dberr_t
btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
{
- ut_ad(index->is_clust());
+ ut_ad(index->is_primary());
ut_ad(index->n_core_null_bytes == dict_index_t::NO_CORE_NULL_BYTES);
ut_ad(index->table->supports_instant());
ut_ad(index->table->is_readable());
@@ -482,10 +482,11 @@ inconsistent:
/* In fact, because we only ever append fields to the 'default
value' record, it is also OK to perform READ UNCOMMITTED and
then ignore any extra fields, provided that
- trx_rw_is_active(DB_TRX_ID). */
+ trx_sys.is_registered(DB_TRX_ID). */
if (rec_offs_n_fields(offsets) > index->n_fields
- && !trx_rw_is_active(row_get_rec_trx_id(rec, index, offsets),
- NULL, false)) {
+ && !trx_sys.is_registered(current_trx(),
+ row_get_rec_trx_id(rec, index,
+ offsets))) {
goto inconsistent;
}
@@ -553,7 +554,7 @@ btr_cur_instant_root_init(dict_index_t* index, const page_t* page)
{
ut_ad(page_is_root(page));
ut_ad(!page_is_comp(page) == !dict_table_is_comp(index->table));
- ut_ad(index->is_clust());
+ ut_ad(index->is_primary());
ut_ad(!index->is_instant());
ut_ad(index->table->supports_instant());
/* This is normally executed as part of btr_cur_instant_init()
@@ -803,10 +804,10 @@ btr_cur_will_modify_tree(
/* is first, 2nd or last record */
if (page_rec_is_first(rec, page)
- || (mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
+ || (page_has_next(page)
&& (page_rec_is_last(rec, page)
|| page_rec_is_second_last(rec, page)))
- || (mach_read_from_4(page + FIL_PAGE_PREV) != FIL_NULL
+ || (page_has_prev(page)
&& page_rec_is_second(rec, page))) {
return(true);
}
@@ -891,13 +892,10 @@ btr_cur_need_opposite_intention(
{
switch (lock_intention) {
case BTR_INTENTION_DELETE:
- return((mach_read_from_4(page + FIL_PAGE_PREV) != FIL_NULL
- && page_rec_is_first(rec, page))
- || (mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
- && page_rec_is_last(rec, page)));
+ return (page_has_prev(page) && page_rec_is_first(rec, page)) ||
+ (page_has_next(page) && page_rec_is_last(rec, page));
case BTR_INTENTION_INSERT:
- return(mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
- && page_rec_is_last(rec, page));
+ return page_has_next(page) && page_rec_is_last(rec, page);
case BTR_INTENTION_BOTH:
return(false);
}
@@ -919,8 +917,7 @@ search tuple should be performed in the B-tree. InnoDB does an insert
immediately after the cursor. Thus, the cursor may end up on a user record,
or on a page infimum record. */
dberr_t
-btr_cur_search_to_nth_level(
-/*========================*/
+btr_cur_search_to_nth_level_func(
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: the tree level of search */
const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
@@ -935,17 +932,16 @@ btr_cur_search_to_nth_level(
cursor->left_block is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
- NOTE that if has_search_latch
- is != 0, we maybe do not have a latch set
- on the cursor page, we assume
- the caller uses his search latch
- to protect the record! */
+ NOTE that if ahi_latch, we might not have a
+ cursor page latch, we assume that ahi_latch
+ protects the record! */
btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
- ulint has_search_latch,
- /*!< in: info on the latch mode the
- caller currently has on search system:
- RW_S_LATCH, or 0 */
+#ifdef BTR_CUR_HASH_ADAPT
+ rw_lock_t* ahi_latch,
+ /*!< in: currently held btr_search_latch
+ (in RW_S_LATCH mode), or NULL */
+#endif /* BTR_CUR_HASH_ADAPT */
const char* file, /*!< in: file name */
unsigned line, /*!< in: line where called */
mtr_t* mtr, /*!< in: mtr */
@@ -1123,17 +1119,15 @@ btr_cur_search_to_nth_level(
&& mode != PAGE_CUR_LE_OR_EXTENDS
# endif /* PAGE_CUR_LE_OR_EXTENDS */
&& !dict_index_is_spatial(index)
- /* If !has_search_latch, we do a dirty read of
+ /* If !ahi_latch, we do a dirty read of
btr_search_enabled below, and btr_search_guess_on_hash()
will have to check it again. */
&& btr_search_enabled
&& !modify_external
&& !(tuple->info_bits & REC_INFO_MIN_REC_FLAG)
- && rw_lock_get_writer(btr_get_search_latch(index))
- == RW_LOCK_NOT_LOCKED
&& btr_search_guess_on_hash(index, info, tuple, mode,
latch_mode, cursor,
- has_search_latch, mtr)) {
+ ahi_latch, mtr)) {
/* Search using the hash index succeeded */
@@ -1154,10 +1148,12 @@ btr_cur_search_to_nth_level(
/* If the hash search did not succeed, do binary search down the
tree */
- if (has_search_latch) {
+#ifdef BTR_CUR_HASH_ADAPT
+ if (ahi_latch) {
/* Release possible search latch to obey latching order */
- btr_search_s_unlock(index);
+ rw_lock_s_unlock(ahi_latch);
}
+#endif /* BTR_CUR_HASH_ADAPT */
/* Store the position of the tree latch we push to mtr so that we
know how to release it when we have latched leaf node(s) */
@@ -1170,7 +1166,7 @@ btr_cur_search_to_nth_level(
Free blocks and read IO bandwidth should be prior
for them, when the history list is glowing huge. */
if (lock_intention == BTR_INTENTION_DELETE
- && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+ && trx_sys.history_size() > BTR_CUR_FINE_HISTORY_LENGTH
&& buf_get_n_pending_read_ios()) {
mtr_x_lock(dict_index_get_lock(index), mtr);
} else if (dict_index_is_spatial(index)
@@ -1517,7 +1513,7 @@ retry_page_get:
if (height == ULINT_UNDEFINED) {
/* We are in the root node */
- height = btr_page_get_level(page, mtr);
+ height = btr_page_get_level(page);
root_height = height;
cursor->tree_height = root_height + 1;
@@ -1711,8 +1707,7 @@ retry_page_get:
/* If this is the desired level, leave the loop */
- ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor),
- mtr));
+ ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor)));
/* Add Predicate lock if it is serializable isolation
and only if it is in the search case */
@@ -1971,7 +1966,7 @@ need_opposite_intention:
MTR_MEMO_PAGE_S_FIX
| MTR_MEMO_PAGE_X_FIX));
- if (btr_page_get_prev(page, mtr) != FIL_NULL
+ if (page_has_prev(page)
&& page_rec_is_first(node_ptr, page)) {
if (leftmost_from_level == 0) {
@@ -2088,7 +2083,7 @@ need_opposite_intention:
} else if (!dict_index_is_spatial(index)
&& latch_mode == BTR_MODIFY_TREE
&& lock_intention == BTR_INTENTION_INSERT
- && mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
+ && page_has_next(page)
&& page_rec_is_last(page_cur_get_rec(page_cursor), page)) {
/* btr_insert_into_right_sibling() might cause
@@ -2224,15 +2219,17 @@ func_exit:
ut_free(prev_tree_savepoints);
}
- if (has_search_latch) {
- btr_search_s_lock(index);
- }
-
if (mbr_adj) {
/* remember that we will need to adjust parent MBR */
cursor->rtr_info->mbr_adj = true;
}
+#ifdef BTR_CUR_HASH_ADAPT
+ if (ahi_latch) {
+ rw_lock_s_lock(ahi_latch);
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
+
DBUG_RETURN(err);
}
@@ -2308,7 +2305,7 @@ btr_cur_open_at_index_side_func(
Free blocks and read IO bandwidth should be prior
for them, when the history list is glowing huge. */
if (lock_intention == BTR_INTENTION_DELETE
- && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+ && trx_sys.history_size() > BTR_CUR_FINE_HISTORY_LENGTH
&& buf_get_n_pending_read_ios()) {
mtr_x_lock(dict_index_get_lock(index), mtr);
} else {
@@ -2410,12 +2407,12 @@ btr_cur_open_at_index_side_func(
if (height == ULINT_UNDEFINED) {
/* We are in the root node */
- height = btr_page_get_level(page, mtr);
+ height = btr_page_get_level(page);
root_height = height;
ut_a(height >= level);
} else {
/* TODO: flag the index corrupted if this fails */
- ut_ad(height == btr_page_get_level(page, mtr));
+ ut_ad(height == btr_page_get_level(page));
}
if (height == level) {
@@ -2654,7 +2651,7 @@ btr_cur_open_at_rnd_pos_func(
Free blocks and read IO bandwidth should be prior
for them, when the history list is glowing huge. */
if (lock_intention == BTR_INTENTION_DELETE
- && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+ && trx_sys.history_size() > BTR_CUR_FINE_HISTORY_LENGTH
&& buf_get_n_pending_read_ios()) {
mtr_x_lock(dict_index_get_lock(index), mtr);
} else {
@@ -2770,7 +2767,7 @@ btr_cur_open_at_rnd_pos_func(
if (height == ULINT_UNDEFINED) {
/* We are in the root node */
- height = btr_page_get_level(page, mtr);
+ height = btr_page_get_level(page);
}
if (height == 0) {
@@ -2975,7 +2972,7 @@ btr_cur_ins_lock_and_undo(
dtuple_t* entry, /*!< in/out: entry to insert */
que_thr_t* thr, /*!< in: query thread or NULL */
mtr_t* mtr, /*!< in/out: mini-transaction */
- ibool* inherit)/*!< out: TRUE if the inserted new record maybe
+ bool* inherit)/*!< out: true if the inserted new record maybe
should inherit LOCK_GAP type locks from the
successor record */
{
@@ -3028,24 +3025,22 @@ btr_cur_ins_lock_and_undo(
}
if (flags & BTR_NO_UNDO_LOG_FLAG) {
- roll_ptr = 0;
+ roll_ptr = roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS;
+ if (!(flags & BTR_KEEP_SYS_FLAG)) {
+upd_sys:
+ row_upd_index_entry_sys_field(entry, index,
+ DATA_ROLL_PTR, roll_ptr);
+ }
} else {
err = trx_undo_report_row_operation(thr, index, entry,
NULL, 0, NULL, NULL,
&roll_ptr);
- if (err != DB_SUCCESS) {
- return(err);
+ if (err == DB_SUCCESS) {
+ goto upd_sys;
}
}
- /* Now we can fill in the roll ptr field in entry */
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
-
- row_upd_index_entry_sys_field(entry, index,
- DATA_ROLL_PTR, roll_ptr);
- }
-
- return(DB_SUCCESS);
+ return(err);
}
/**
@@ -3119,9 +3114,9 @@ btr_cur_optimistic_insert(
buf_block_t* block;
page_t* page;
rec_t* dummy;
- ibool leaf;
- ibool reorg;
- ibool inherit = TRUE;
+ bool leaf;
+ bool reorg;
+ bool inherit = true;
ulint rec_size;
dberr_t err;
@@ -3233,7 +3228,7 @@ fail_err:
DBUG_LOG("ib_cur",
"insert " << index->name << " (" << index->id << ") by "
- << ib::hex(thr ? trx_get_id_for_print(thr_get_trx(thr)) : 0)
+ << ib::hex(thr ? thr->graph->trx->id : 0)
<< ' ' << rec_printer(entry).str());
DBUG_EXECUTE_IF("do_page_reorganize",
btr_page_reorganize(page_cursor, index, mtr););
@@ -3250,6 +3245,32 @@ fail_err:
goto fail_err;
}
+#ifdef UNIV_DEBUG
+ if (!(flags & BTR_CREATE_FLAG)
+ && index->is_primary() && page_is_leaf(page)) {
+ const dfield_t* trx_id = dtuple_get_nth_field(
+ entry, dict_col_get_clust_pos(
+ dict_table_get_sys_col(index->table,
+ DATA_TRX_ID),
+ index));
+
+ ut_ad(trx_id->len == DATA_TRX_ID_LEN);
+ ut_ad(trx_id[1].len == DATA_ROLL_PTR_LEN);
+ ut_ad(*static_cast<const byte*>
+ (trx_id[1].data) & 0x80);
+ if (flags & BTR_NO_UNDO_LOG_FLAG) {
+ ut_ad(!memcmp(trx_id->data, reset_trx_id,
+ DATA_TRX_ID_LEN));
+ } else {
+ ut_ad(thr->graph->trx->id);
+ ut_ad(thr->graph->trx->id
+ == trx_read_trx_id(
+ static_cast<const byte*>(
+ trx_id->data)));
+ }
+ }
+#endif
+
*rec = page_cur_tuple_insert(
page_cursor, entry, index, offsets, heap,
n_ext, mtr);
@@ -3302,10 +3323,14 @@ fail_err:
ut_ad(entry->info_bits == REC_INFO_DEFAULT_ROW);
ut_ad(index->is_instant());
ut_ad(flags == BTR_NO_LOCKING_FLAG);
- } else if (!reorg && cursor->flag == BTR_CUR_HASH) {
- btr_search_update_hash_node_on_insert(cursor);
} else {
- btr_search_update_hash_on_insert(cursor);
+ rw_lock_t* ahi_latch = btr_get_search_latch(index);
+ if (!reorg && cursor->flag == BTR_CUR_HASH) {
+ btr_search_update_hash_node_on_insert(
+ cursor, ahi_latch);
+ } else {
+ btr_search_update_hash_on_insert(cursor, ahi_latch);
+ }
}
#endif /* BTR_CUR_HASH_ADAPT */
@@ -3382,7 +3407,7 @@ btr_cur_pessimistic_insert(
dict_index_t* index = cursor->index;
big_rec_t* big_rec_vec = NULL;
dberr_t err;
- ibool inherit = FALSE;
+ bool inherit = false;
bool success;
ulint n_reserved = 0;
@@ -3493,7 +3518,7 @@ btr_cur_pessimistic_insert(
== FIL_NULL) {
/* split and inserted need to call
lock_update_insert() always. */
- inherit = TRUE;
+ inherit = true;
}
}
}
@@ -3511,7 +3536,8 @@ btr_cur_pessimistic_insert(
ut_ad((flags & ~BTR_KEEP_IBUF_BITMAP)
== BTR_NO_LOCKING_FLAG);
} else {
- btr_search_update_hash_on_insert(cursor);
+ btr_search_update_hash_on_insert(
+ cursor, btr_get_search_latch(index));
}
#endif /* BTR_CUR_HASH_ADAPT */
if (inherit && !(flags & BTR_NO_LOCKING_FLAG)) {
@@ -3713,7 +3739,15 @@ btr_cur_parse_update_in_place(
/* We do not need to reserve search latch, as the page is only
being recovered, and there cannot be a hash index to it. */
- offsets = rec_get_offsets(rec, index, NULL, true,
+ /* The function rtr_update_mbr_field_in_place() is generating
+ these records on node pointer pages; therefore we have to
+ check if this is a leaf page. */
+
+ offsets = rec_get_offsets(rec, index, NULL,
+ flags != (BTR_NO_UNDO_LOG_FLAG
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG)
+ || page_is_leaf(page),
ULINT_UNDEFINED, &heap);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
@@ -3849,6 +3883,7 @@ btr_cur_update_in_place(
roll_ptr_t roll_ptr = 0;
ulint was_delete_marked;
+ ut_ad(page_is_leaf(cursor->page_cur.block->frame));
rec = btr_cur_get_rec(cursor);
index = cursor->index;
ut_ad(rec_offs_validate(rec, index, offsets));
@@ -3913,34 +3948,39 @@ btr_cur_update_in_place(
|| row_get_rec_trx_id(rec, index, offsets));
#ifdef BTR_CUR_HASH_ADAPT
- if (block->index) {
- /* TO DO: Can we skip this if none of the fields
- index->search_info->curr_n_fields
- are being updated? */
-
- /* The function row_upd_changes_ord_field_binary works only
- if the update vector was built for a clustered index, we must
- NOT call it if index is secondary */
+ {
+ rw_lock_t* ahi_latch = block->index
+ ? btr_get_search_latch(index) : NULL;
+ if (ahi_latch) {
+ /* TO DO: Can we skip this if none of the fields
+ index->search_info->curr_n_fields
+ are being updated? */
+
+ /* The function row_upd_changes_ord_field_binary
+ does not work on a secondary index. */
+
+ if (!dict_index_is_clust(index)
+ || row_upd_changes_ord_field_binary(
+ index, update, thr, NULL, NULL)) {
+ ut_ad(!(update->info_bits
+ & REC_INFO_MIN_REC_FLAG));
+ /* Remove possible hash index pointer
+ to this record */
+ btr_search_update_hash_on_delete(cursor);
+ }
- if (!dict_index_is_clust(index)
- || row_upd_changes_ord_field_binary(index, update, thr,
- NULL, NULL)) {
- ut_ad(!(update->info_bits & REC_INFO_MIN_REC_FLAG));
- /* Remove possible hash index pointer to this record */
- btr_search_update_hash_on_delete(cursor);
+ rw_lock_x_lock(ahi_latch);
}
- btr_search_x_lock(index);
- }
-
- assert_block_ahi_valid(block);
+ assert_block_ahi_valid(block);
#endif /* BTR_CUR_HASH_ADAPT */
- row_upd_rec_in_place(rec, index, offsets, update, page_zip);
+ row_upd_rec_in_place(rec, index, offsets, update, page_zip);
#ifdef BTR_CUR_HASH_ADAPT
- if (block->index) {
- btr_search_x_unlock(index);
+ if (ahi_latch) {
+ rw_lock_x_unlock(ahi_latch);
+ }
}
#endif /* BTR_CUR_HASH_ADAPT */
@@ -5315,7 +5355,7 @@ btr_cur_optimistic_delete_func(
index->is_instant() will hold until the
insert into SYS_COLUMNS is rolled back. */
ut_ad(index->table->supports_instant());
- ut_ad(index->is_clust());
+ ut_ad(index->is_primary());
} else {
lock_update_delete(block, rec);
}
@@ -5323,7 +5363,7 @@ btr_cur_optimistic_delete_func(
index, 0, mtr);
page_cur_set_after_last(block, btr_cur_get_page_cur(cursor));
- if (index->is_clust()) {
+ if (index->is_primary()) {
/* Concurrent access is prevented by
root_block->lock X-latch, so this should be
safe. */
@@ -5352,7 +5392,7 @@ btr_cur_optimistic_delete_func(
index->is_instant() will hold until the
insert into SYS_COLUMNS is rolled back. */
ut_ad(cursor->index->table->supports_instant());
- ut_ad(cursor->index->is_clust());
+ ut_ad(cursor->index->is_primary());
ut_ad(!page_zip);
page_cur_delete_rec(btr_cur_get_page_cur(cursor),
cursor->index, offsets, mtr);
@@ -5519,7 +5559,7 @@ btr_cur_pessimistic_delete(
insert into SYS_COLUMNS is rolled back. */
ut_ad(rollback);
ut_ad(index->table->supports_instant());
- ut_ad(index->is_clust());
+ ut_ad(index->is_primary());
} else if (flags == 0) {
lock_update_delete(block, rec);
}
@@ -5545,7 +5585,7 @@ btr_cur_pessimistic_delete(
btr_page_empty(block, page_zip, index, 0, mtr);
page_cur_set_after_last(block,
btr_cur_get_page_cur(cursor));
- if (index->is_clust()) {
+ if (index->is_primary()) {
/* Concurrent access is prevented by
index->lock and root_block->lock
X-latch, so this should be safe. */
@@ -5584,7 +5624,7 @@ discard_page:
rec_t* next_rec = page_rec_get_next(rec);
- if (btr_page_get_prev(page, mtr) == FIL_NULL) {
+ if (!page_has_prev(page)) {
/* If we delete the leftmost node pointer on a
non-leaf level, we must mark the new leftmost node
@@ -5636,7 +5676,7 @@ discard_page:
on the page */
btr_node_ptr_delete(index, block, mtr);
- const ulint level = btr_page_get_level(page, mtr);
+ const ulint level = btr_page_get_level(page);
dtuple_t* node_ptr = dict_index_build_node_ptr(
index, next_rec, block->page.id.page_no(),
@@ -5726,7 +5766,7 @@ btr_cur_add_path_info(
slot->nth_rec = page_rec_get_n_recs_before(rec);
slot->n_recs = page_get_n_recs(page);
slot->page_no = page_get_page_no(page);
- slot->page_level = btr_page_get_level_low(page);
+ slot->page_level = btr_page_get_level(page);
}
/*******************************************************************//**
@@ -5843,7 +5883,7 @@ btr_estimate_n_rows_in_range_on_level(
reuses them. */
if (!fil_page_index_page_check(page)
|| btr_page_get_index_id(page) != index->id
- || btr_page_get_level_low(page) != level) {
+ || btr_page_get_level(page) != level) {
/* The page got reused for something else */
mtr_commit(&mtr);
@@ -6566,7 +6606,8 @@ btr_estimate_number_of_different_key_vals(
}
}
- if (n_cols == dict_index_get_n_unique_in_tree(index)) {
+ if (n_cols == dict_index_get_n_unique_in_tree(index)
+ && page_has_siblings(page)) {
/* If there is more than one leaf page in the tree,
we add one because we know that the first record
@@ -6577,11 +6618,7 @@ btr_estimate_number_of_different_key_vals(
algorithm grossly underestimated the number of rows
in the table. */
- if (btr_page_get_prev(page, &mtr) != FIL_NULL
- || btr_page_get_next(page, &mtr) != FIL_NULL) {
-
- n_diff[n_cols - 1]++;
- }
+ n_diff[n_cols - 1]++;
}
mtr_commit(&mtr);
@@ -7763,7 +7800,7 @@ btr_free_externally_stored_field(
MLOG_4BYTES, &mtr);
/* Zero out the BLOB length. If the server
crashes during the execution of this function,
- trx_rollback_or_clean_all_recovered() could
+ trx_rollback_all_recovered() could
dereference the half-deleted BLOB, fetching a
wrong prefix for the BLOB. */
mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,