diff options
author | Vlad Lesin <vlad_lesin@mail.ru> | 2022-02-21 14:10:27 +0300 |
---|---|---|
committer | Vlad Lesin <vlad_lesin@mail.ru> | 2022-02-21 14:10:27 +0300 |
commit | f6f055a19145dc2dbd9efde26dbdd9fe1c60e28f (patch) | |
tree | 4cc4c241ce0aa7a6d23ea34ebb515cb6b53478fb | |
parent | fa557986acaf8d55bbf03fd0ed076f95ef794d1c (diff) | |
parent | a6f258e47f425a3ebecf6aaba87bdfcc241dc416 (diff) | |
download | mariadb-git-f6f055a19145dc2dbd9efde26dbdd9fe1c60e28f.tar.gz |
Merge 10.3 into 10.4
30 files changed, 771 insertions, 398 deletions
diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index 484287db98d..0f9f130cb8e 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -229,6 +229,7 @@ typedef int (*mysql_show_var_func)(MYSQL_THD, struct st_mysql_show_var*, void *, #define PLUGIN_VAR_NOCMDARG 0x1000 /* No argument for cmd line */ #define PLUGIN_VAR_RQCMDARG 0x0000 /* Argument required for cmd line */ #define PLUGIN_VAR_OPCMDARG 0x2000 /* Argument optional for cmd line */ +#define PLUGIN_VAR_DEPRECATED 0x4000 /* Server variable is deprecated */ #define PLUGIN_VAR_MEMALLOC 0x8000 /* String needs memory allocated */ struct st_mysql_sys_var; @@ -282,7 +283,8 @@ typedef void (*mysql_var_update_func)(MYSQL_THD thd, #define PLUGIN_VAR_MASK \ (PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR | \ PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_NOCMDARG | \ - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC) + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_RQCMDARG | \ + PLUGIN_VAR_DEPRECATED | PLUGIN_VAR_MEMALLOC) #define MYSQL_PLUGIN_VAR_HEADER \ int flags; \ diff --git a/mysql-test/main/plugin.result b/mysql-test/main/plugin.result index a0c7eef818b..6cc6afd38ff 100644 --- a/mysql-test/main/plugin.result +++ b/mysql-test/main/plugin.result @@ -40,11 +40,15 @@ a set global example_ulong_var=500; set global example_enum_var= e1; set session example_int_var= -1; +set global example_deprecated_var=1; +Warnings: +Warning 1287 '@@example_deprecated_var' is deprecated and will be removed in a future release show status like 'example%'; Variable_name Value Example_func_example enum_var is 0, ulong_var is 500, int_var is -1, double_var is 8.500000, really show variables like 'example%'; Variable_name Value +example_deprecated_var 0 example_double_thdvar 8.500000 example_double_var 8.500000 example_enum_var e1 diff --git a/mysql-test/main/plugin.test b/mysql-test/main/plugin.test index 19199f767f5..60773c3e190 100644 --- a/mysql-test/main/plugin.test +++ b/mysql-test/main/plugin.test @@ -27,6 +27,7 @@ SELECT * FROM t1; set global example_ulong_var=500; set global example_enum_var= e1; set session example_int_var= -1; +set global example_deprecated_var=1; show status like 'example%'; show variables like 'example%'; diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def index d6cbf1a2f7f..6f21f5af53c 100644 --- a/mysql-test/suite/galera/disabled.def +++ b/mysql-test/suite/galera/disabled.def @@ -14,6 +14,7 @@ GCF-1081 : MDEV-18283 Galera test failure on galera.GCF-1081 GCF-939 : MDEV-21520 galera.GCF-939 MW-328A : MDEV-22666 galera.MW-328A MTR failed: "Semaphore wait has lasted > 600 seconds" and do not release port 16002 MW-328B : MDEV-22666 galera.MW-328A MTR failed: "Semaphore wait has lasted > 600 seconds" and do not release port 16002 +MW-328D : MDEV-27550 ER_LOCK_DEADLOCK is gone after MDEV-27025 MW-329 : MDEV-19962 Galera test failure on MW-329 galera_as_slave_replication_bundle : MDEV-15785 OPTION_GTID_BEGIN is set in Gtid_log_event::do_apply_event() galera_bf_abort_group_commit : MDEV-18282 Galera test failure on galera.galera_bf_abort_group_commit diff --git a/mysql-test/suite/innodb/r/cursor-restore-locking.result b/mysql-test/suite/innodb/r/cursor-restore-locking.result new file mode 100644 index 00000000000..bc1127f57b3 --- /dev/null +++ b/mysql-test/suite/innodb/r/cursor-restore-locking.result @@ -0,0 +1,35 @@ +CREATE TABLE t (a int PRIMARY KEY, b int NOT NULL UNIQUE) engine = InnoDB; +connect prevent_purge,localhost,root,,; +start transaction with consistent snapshot; +connect con_del_1,localhost,root,,; +INSERT INTO t VALUES (20,20); +SET DEBUG_SYNC = 'innodb_row_search_for_mysql_exit SIGNAL first_del_row_search_mvcc_finished WAIT_FOR first_del_cont'; +DELETE FROM t WHERE b = 20; +connect con_ins_1,localhost,root,,; +SET DEBUG_SYNC = 'now WAIT_FOR first_del_row_search_mvcc_finished'; +SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL first_ins_locked'; +SET DEBUG_SYNC = 'ib_after_row_insert SIGNAL first_ins_row_inserted WAIT_FOR first_ins_cont'; +INSERT INTO t VALUES(10, 20); +connect con_del_2,localhost,root,,; +SET DEBUG_SYNC = 'now WAIT_FOR first_ins_locked'; +SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL second_del_locked'; +DELETE FROM t WHERE b = 20; +connection default; +SET DEBUG_SYNC = 'now WAIT_FOR second_del_locked'; +SET DEBUG_SYNC = 'now SIGNAL first_del_cont'; +SET DEBUG_SYNC = 'now WAIT_FOR first_ins_row_inserted'; +connection con_del_1; +connection default; +disconnect prevent_purge; +InnoDB 0 transactions not purged +SET DEBUG_SYNC = 'now SIGNAL first_ins_cont'; +connection con_del_2; +connection con_ins_1; +connection default; +INSERT INTO t VALUES(30, 20); +disconnect con_ins_1; +disconnect con_del_1; +disconnect con_del_2; +connection default; +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t; diff --git a/mysql-test/suite/innodb/r/lock_wait_conflict.result b/mysql-test/suite/innodb/r/lock_wait_conflict.result new file mode 100644 index 00000000000..25d18c03ea1 --- /dev/null +++ b/mysql-test/suite/innodb/r/lock_wait_conflict.result @@ -0,0 +1,27 @@ +# +# MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock +# +CREATE TABLE t (a INT PRIMARY KEY, b INT NOT NULL UNIQUE) ENGINE=InnoDB; +connect prevent_purge,localhost,root,,; +start transaction with consistent snapshot; +connection default; +INSERT INTO t VALUES (20,20); +DELETE FROM t WHERE b = 20; +connect con_ins,localhost,root,,; +SET DEBUG_SYNC = 'row_ins_sec_index_entry_dup_locks_created SIGNAL ins_set_locks WAIT_FOR ins_cont'; +INSERT INTO t VALUES(10, 20); +connect con_del,localhost,root,,; +SET DEBUG_SYNC = 'now WAIT_FOR ins_set_locks'; +SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL del_locked'; +DELETE FROM t WHERE b = 20; +connection default; +SET DEBUG_SYNC = 'now WAIT_FOR del_locked'; +SET DEBUG_SYNC = 'now SIGNAL ins_cont'; +connection con_ins; +disconnect con_ins; +connection con_del; +disconnect con_del; +disconnect prevent_purge; +connection default; +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t; diff --git a/mysql-test/suite/innodb/t/cursor-restore-locking.test b/mysql-test/suite/innodb/t/cursor-restore-locking.test new file mode 100644 index 00000000000..d032d8a8def --- /dev/null +++ b/mysql-test/suite/innodb/t/cursor-restore-locking.test @@ -0,0 +1,79 @@ +--source include/have_innodb.inc +--source include/count_sessions.inc +source include/have_debug.inc; +source include/have_debug_sync.inc; + +CREATE TABLE t (a int PRIMARY KEY, b int NOT NULL UNIQUE) engine = InnoDB; + +--connect(prevent_purge,localhost,root,,) +start transaction with consistent snapshot; + +--connect(con_del_1,localhost,root,,) +INSERT INTO t VALUES (20,20); +SET DEBUG_SYNC = 'innodb_row_search_for_mysql_exit SIGNAL first_del_row_search_mvcc_finished WAIT_FOR first_del_cont'; +--send DELETE FROM t WHERE b = 20 + +--connect(con_ins_1,localhost,root,,) +SET DEBUG_SYNC = 'now WAIT_FOR first_del_row_search_mvcc_finished'; +# It's supposed the following INSERT will be suspended just after +# lock_wait_suspend_thread_enter syncpoint, and will be awaken +# after the previous DELETE commits. ib_after_row_insert will be executed +# after the INSERT is woken up. The previous DELETE will wait for +# first_del_cont signal before commit, and this signal will be sent later. +# So it's safe to use two signals in a row here, it's guaranted the first +# signal will be received before the second signal is sent. +SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL first_ins_locked'; +SET DEBUG_SYNC = 'ib_after_row_insert SIGNAL first_ins_row_inserted WAIT_FOR first_ins_cont'; +--send INSERT INTO t VALUES(10, 20) + +--connect(con_del_2,localhost,root,,) +SET DEBUG_SYNC = 'now WAIT_FOR first_ins_locked'; +SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL second_del_locked'; +############################################################################### +# This DELETE is locked by the previous DELETE, after that DELETE is +# committed, it will still be locked by the next INSERT on delete-marked +# heap_no 2 record. After that INSERT inserted the record with heap_no 3, +# and after heap_no 2 record is purged, this DELETE will be unlocked and +# must restore persistent cursor position at heap_no 3 record, as it has the +# same secondary key value as former heap_no 2 record. Then it must be blocked +# by the previous INSERT, and after the INSERT is committed, it must +# delete the record, inserted by the previous INSERT, and the last INSERT(see +# below) must be finished without error. But instead this DELETE restores +# persistent cursor position to supremum, as a result, it does not delete the +# record, inserted by the previous INSERT, and the last INSERT is finished with +# duplicate key check error. +############################################################################### +--send DELETE FROM t WHERE b = 20 + +--connection default +SET DEBUG_SYNC = 'now WAIT_FOR second_del_locked'; +SET DEBUG_SYNC = 'now SIGNAL first_del_cont'; +SET DEBUG_SYNC = 'now WAIT_FOR first_ins_row_inserted'; +--connection con_del_1 +--reap + +--connection default +--disconnect prevent_purge +--source include/wait_all_purged.inc +SET DEBUG_SYNC = 'now SIGNAL first_ins_cont'; + +--connection con_del_2 +--reap + +--connection con_ins_1 +--reap + +--connection default +############################################################################### +# Duplicate key error is expected if the bug is not fixed. +############################################################################### +INSERT INTO t VALUES(30, 20); + +--disconnect con_ins_1 +--disconnect con_del_1 +--disconnect con_del_2 +--connection default + +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t; +--source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/innodb/t/lock_wait_conflict.test b/mysql-test/suite/innodb/t/lock_wait_conflict.test new file mode 100644 index 00000000000..46a29e14b43 --- /dev/null +++ b/mysql-test/suite/innodb/t/lock_wait_conflict.test @@ -0,0 +1,60 @@ +--source include/have_innodb.inc +--source include/count_sessions.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc + +--echo # +--echo # MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock +--echo # + +# The test checks the ability to acquire exclusive record lock if the acquiring +# transaction already holds a shared lock on the record and another transaction +# is waiting for a lock. + +CREATE TABLE t (a INT PRIMARY KEY, b INT NOT NULL UNIQUE) ENGINE=InnoDB; + +--connect(prevent_purge,localhost,root,,) +start transaction with consistent snapshot; + +--connection default +INSERT INTO t VALUES (20,20); +DELETE FROM t WHERE b = 20; + +--connect(con_ins,localhost,root,,) +SET DEBUG_SYNC = 'row_ins_sec_index_entry_dup_locks_created SIGNAL ins_set_locks WAIT_FOR ins_cont'; +send +INSERT INTO t VALUES(10, 20); + +--connect(con_del,localhost,root,,) +SET DEBUG_SYNC = 'now WAIT_FOR ins_set_locks'; +SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL del_locked'; +############################################################################### +# This DELETE creates waiting ORDINARY X-lock for heap_no 2 as the record is +# delete-marked, this lock conflicts with ORDINARY S-lock set by the the last +# INSERT. After the last INSERT creates insert-intention lock on +# heap_no 2, this lock will conflict with waiting ORDINARY X-lock of this +# DELETE, what causes DEADLOCK error for this DELETE. +############################################################################### +send +DELETE FROM t WHERE b = 20; + +--connection default +SET DEBUG_SYNC = 'now WAIT_FOR del_locked'; +SET DEBUG_SYNC = 'now SIGNAL ins_cont'; + +--connection con_ins +--reap +--disconnect con_ins + +--connection con_del +# Without the fix, ER_LOCK_DEADLOCK would be reported here. +--reap +--disconnect con_del + +--disconnect prevent_purge + +--connection default + +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t; +--source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/versioning/r/update.result b/mysql-test/suite/versioning/r/update.result index a01d51f62cd..addf1cf4577 100644 --- a/mysql-test/suite/versioning/r/update.result +++ b/mysql-test/suite/versioning/r/update.result @@ -283,7 +283,6 @@ connection default; update t1 set b = 'foo'; connection con1; update t1 set a = 'bar'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction disconnect con1; connection default; drop table t1; diff --git a/mysql-test/suite/versioning/t/update.test b/mysql-test/suite/versioning/t/update.test index 478f5ad231f..add65cf0463 100644 --- a/mysql-test/suite/versioning/t/update.test +++ b/mysql-test/suite/versioning/t/update.test @@ -186,7 +186,9 @@ send update t1 set b = 'foo'; connection con1; let $wait_condition= select count(*) from information_schema.innodb_lock_waits; source include/wait_condition.inc; -error ER_LOCK_DEADLOCK; +# There must no be DEADLOCK here as con1 transaction already holds locks, and +# default's transaction lock is waiting, so the locks of the following "UPDATE" +# must not conflict with waiting lock. update t1 set a = 'bar'; disconnect con1; connection default; diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index 0d7b34327fb..87b4d241028 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -292,7 +292,8 @@ public: struct st_mysql_sys_var *plugin_var; sys_var_pluginvar(sys_var_chain *chain, const char *name_arg, - st_plugin_int *p, st_mysql_sys_var *plugin_var_arg); + st_plugin_int *p, st_mysql_sys_var *plugin_var_arg, + const char *substitute); sys_var_pluginvar *cast_pluginvar() { return this; } uchar* real_value_ptr(THD *thd, enum_var_type type) const; TYPELIB* plugin_var_typelib(void) const; @@ -3377,11 +3378,11 @@ static int pluginvar_sysvar_flags(const st_mysql_sys_var *p) } sys_var_pluginvar::sys_var_pluginvar(sys_var_chain *chain, const char *name_arg, - st_plugin_int *p, st_mysql_sys_var *pv) + st_plugin_int *p, st_mysql_sys_var *pv, const char *substitute) : sys_var(chain, name_arg, pv->comment, pluginvar_sysvar_flags(pv), 0, pv->flags & PLUGIN_VAR_NOCMDOPT ? -1 : 0, NO_ARG, pluginvar_show_type(pv), 0, - NULL, VARIABLE_NOT_IN_BINLOG, NULL, NULL, NULL), + NULL, VARIABLE_NOT_IN_BINLOG, NULL, NULL, substitute), plugin(p), plugin_var(pv) { plugin_var->name= name_arg; @@ -4115,7 +4116,8 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp, my_casedn_str(&my_charset_latin1, varname); convert_dash_to_underscore(varname, len-1); } - v= new (mem_root) sys_var_pluginvar(&chain, varname, tmp, o); + const char *s= o->flags & PLUGIN_VAR_DEPRECATED ? "" : NULL; + v= new (mem_root) sys_var_pluginvar(&chain, varname, tmp, o, s); v->test_load= (var ? &var->loaded : &static_unload); DBUG_ASSERT(static_unload == FALSE); diff --git a/storage/example/ha_example.cc b/storage/example/ha_example.cc index e873837694b..fd7df7ee06f 100644 --- a/storage/example/ha_example.cc +++ b/storage/example/ha_example.cc @@ -1054,12 +1054,17 @@ static MYSQL_THDVAR_DOUBLE( 1000.5, 0); +static MYSQL_THDVAR_INT( + deprecated_var, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED, "-1..1", + NULL, NULL, 0, -1, 1, 0); + static struct st_mysql_sys_var* example_system_variables[]= { MYSQL_SYSVAR(enum_var), MYSQL_SYSVAR(ulong_var), MYSQL_SYSVAR(int_var), MYSQL_SYSVAR(double_var), MYSQL_SYSVAR(double_thdvar), + MYSQL_SYSVAR(deprecated_var), MYSQL_SYSVAR(varopt_default), NULL }; diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 3f03b7a13e6..667f3d6bd37 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -7491,11 +7491,9 @@ struct btr_blob_log_check_t { buf_block_buf_fix_dec(m_pcur->btr_cur.page_cur.block); } else { ut_ad(m_pcur->rel_pos == BTR_PCUR_ON); - bool ret = btr_pcur_restore_position( - BTR_MODIFY_LEAF | BTR_MODIFY_EXTERNAL, - m_pcur, m_mtr); - - ut_a(ret); + ut_a(btr_pcur_restore_position( + BTR_MODIFY_LEAF | BTR_MODIFY_EXTERNAL, m_pcur, + m_mtr) == btr_pcur_t::SAME_ALL); } *m_block = btr_pcur_get_block(m_pcur); diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc index 2c3f06da111..c31db34186c 100644 --- a/storage/innobase/btr/btr0pcur.cc +++ b/storage/innobase/btr/btr0pcur.cc @@ -253,29 +253,32 @@ struct optimistic_latch_leaves } }; -/**************************************************************//** -Restores the stored position of a persistent cursor bufferfixing the page and -obtaining the specified latches. If the cursor position was saved when the -(1) cursor was positioned on a user record: this function restores the position -to the last record LESS OR EQUAL to the stored record; -(2) cursor was positioned on a page infimum record: restores the position to -the last record LESS than the user record which was the successor of the page -infimum; -(3) cursor was positioned on the page supremum: restores to the first record -GREATER than the user record which was the predecessor of the supremum. -(4) cursor was positioned before the first or after the last in an empty tree: -restores to before first or after the last in the tree. -@return TRUE if the cursor position was stored when it was on a user -record and it can be restored on a user record whose ordering fields -are identical to the ones of the original user record */ -ibool -btr_pcur_restore_position_func( -/*===========================*/ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: detached persistent cursor */ - const char* file, /*!< in: file name */ - unsigned line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ +/** Restores the stored position of a persistent cursor bufferfixing +the page and obtaining the specified latches. If the cursor position +was saved when the +(1) cursor was positioned on a user record: this function restores the +position to the last record LESS OR EQUAL to the stored record; +(2) cursor was positioned on a page infimum record: restores the +position to the last record LESS than the user record which was the +successor of the page infimum; +(3) cursor was positioned on the page supremum: restores to the first +record GREATER than the user record which was the predecessor of the +supremum. +(4) cursor was positioned before the first or after the last in an +empty tree: restores to before first or after the last in the tree. +@param latch_mode BTR_SEARCH_LEAF, ... +@param file file name +@param line line where called +@param mtr mtr +@return btr_pcur_t::SAME_ALL cursor position on user rec and points on +the record with the same field values as in the stored record, +btr_pcur_t::SAME_UNIQ cursor position is on user rec and points on the +record with the same unique field values as in the stored record, +btr_pcur_t::NOT_SAME cursor position is not on user rec or points on +the record with not the samebuniq field values as in the stored */ +btr_pcur_t::restore_status +btr_pcur_t::restore_position(ulint restore_latch_mode, const char *file, + unsigned line, mtr_t *mtr) { dict_index_t* index; dtuple_t* tuple; @@ -285,104 +288,104 @@ btr_pcur_restore_position_func( ut_ad(mtr->is_active()); //ut_ad(cursor->old_stored); - ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED - || cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(pos_state == BTR_PCUR_WAS_POSITIONED + || pos_state == BTR_PCUR_IS_POSITIONED); - index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor)); + index = btr_cur_get_index(&btr_cur); if (UNIV_UNLIKELY - (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE - || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) { + (rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE + || rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) { dberr_t err = DB_SUCCESS; /* In these cases we do not try an optimistic restoration, but always do a search */ err = btr_cur_open_at_index_side( - cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE, - index, latch_mode, - btr_pcur_get_btr_cur(cursor), 0, mtr); + rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE, + index, restore_latch_mode, + &btr_cur, 0, mtr); if (err != DB_SUCCESS) { ib::warn() << " Error code: " << err - << " btr_pcur_restore_position_func " + << " btr_pcur_t::restore_position " << " called from file: " << file << " line: " << line << " table: " << index->table->name << " index: " << index->name; } - cursor->latch_mode = - BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode); - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - cursor->block_when_stored.clear(); + latch_mode = + BTR_LATCH_MODE_WITHOUT_INTENTION(restore_latch_mode); + pos_state = BTR_PCUR_IS_POSITIONED; + block_when_stored.clear(); - return(FALSE); + return NOT_SAME; } - ut_a(cursor->old_rec); - ut_a(cursor->old_n_core_fields); - ut_a(cursor->old_n_core_fields <= index->n_core_fields); - ut_a(cursor->old_n_fields); + ut_a(old_rec); + ut_a(old_n_core_fields); + ut_a(old_n_core_fields <= index->n_core_fields); + ut_a(old_n_fields); - switch (latch_mode) { + switch (restore_latch_mode) { case BTR_SEARCH_LEAF: case BTR_MODIFY_LEAF: case BTR_SEARCH_PREV: case BTR_MODIFY_PREV: /* Try optimistic restoration. */ - if (cursor->block_when_stored.run_with_hint( - optimistic_latch_leaves(cursor, &latch_mode, + if (block_when_stored.run_with_hint( + optimistic_latch_leaves(this, &restore_latch_mode, mtr))) { - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - cursor->latch_mode = latch_mode; + pos_state = BTR_PCUR_IS_POSITIONED; + latch_mode = restore_latch_mode; buf_block_dbg_add_level( - btr_pcur_get_block(cursor), + btr_pcur_get_block(this), dict_index_is_ibuf(index) ? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE); - if (cursor->rel_pos == BTR_PCUR_ON) { + if (rel_pos == BTR_PCUR_ON) { #ifdef UNIV_DEBUG const rec_t* rec; rec_offs offsets1_[REC_OFFS_NORMAL_SIZE]; rec_offs offsets2_[REC_OFFS_NORMAL_SIZE]; rec_offs* offsets1 = offsets1_; rec_offs* offsets2 = offsets2_; - rec = btr_pcur_get_rec(cursor); + rec = btr_pcur_get_rec(this); rec_offs_init(offsets1_); rec_offs_init(offsets2_); heap = mem_heap_create(256); - ut_ad(cursor->old_n_core_fields + ut_ad(old_n_core_fields == index->n_core_fields); offsets1 = rec_get_offsets( - cursor->old_rec, index, offsets1, - cursor->old_n_core_fields, - cursor->old_n_fields, &heap); + old_rec, index, offsets1, + old_n_core_fields, + old_n_fields, &heap); offsets2 = rec_get_offsets( rec, index, offsets2, index->n_core_fields, - cursor->old_n_fields, &heap); + old_n_fields, &heap); - ut_ad(!cmp_rec_rec(cursor->old_rec, + ut_ad(!cmp_rec_rec(old_rec, rec, offsets1, offsets2, index)); mem_heap_free(heap); #endif /* UNIV_DEBUG */ - return(TRUE); + return SAME_ALL; } /* This is the same record as stored, may need to be adjusted for BTR_PCUR_BEFORE/AFTER, depending on search mode and direction. */ - if (btr_pcur_is_on_user_rec(cursor)) { - cursor->pos_state + if (btr_pcur_is_on_user_rec(this)) { + pos_state = BTR_PCUR_IS_POSITIONED_OPTIMISTIC; } - return(FALSE); + return NOT_SAME; } } @@ -390,19 +393,19 @@ btr_pcur_restore_position_func( heap = mem_heap_create(256); - tuple = dtuple_create(heap, cursor->old_n_fields); + tuple = dtuple_create(heap, old_n_fields); - dict_index_copy_types(tuple, index, cursor->old_n_fields); + dict_index_copy_types(tuple, index, old_n_fields); - rec_copy_prefix_to_dtuple(tuple, cursor->old_rec, index, - cursor->old_n_core_fields, - cursor->old_n_fields, heap); + rec_copy_prefix_to_dtuple(tuple, old_rec, index, + old_n_core_fields, + old_n_fields, heap); ut_ad(dtuple_check_typed(tuple)); /* Save the old search mode of the cursor */ - old_mode = cursor->search_mode; + old_mode = search_mode; - switch (cursor->rel_pos) { + switch (rel_pos) { case BTR_PCUR_ON: mode = PAGE_CUR_LE; break; @@ -417,41 +420,45 @@ btr_pcur_restore_position_func( mode = PAGE_CUR_UNSUPP; } - btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode, - cursor, + btr_pcur_open_with_no_init_func(index, tuple, mode, restore_latch_mode, + this, #ifdef BTR_CUR_HASH_ADAPT NULL, #endif /* BTR_CUR_HASH_ADAPT */ file, line, mtr); /* Restore the old search mode */ - cursor->search_mode = old_mode; + search_mode = old_mode; - ut_ad(cursor->rel_pos == BTR_PCUR_ON - || cursor->rel_pos == BTR_PCUR_BEFORE - || cursor->rel_pos == BTR_PCUR_AFTER); + ut_ad(rel_pos == BTR_PCUR_ON + || rel_pos == BTR_PCUR_BEFORE + || rel_pos == BTR_PCUR_AFTER); rec_offs offsets[REC_OFFS_NORMAL_SIZE]; rec_offs_init(offsets); - if (cursor->rel_pos == BTR_PCUR_ON - && btr_pcur_is_on_user_rec(cursor) - && !cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor), - rec_get_offsets(btr_pcur_get_rec(cursor), - index, offsets, - index->n_core_fields, - ULINT_UNDEFINED, &heap))) { - - /* We have to store the NEW value for the modify clock, - since the cursor can now be on a different page! - But we can retain the value of old_rec */ - - cursor->block_when_stored.store(btr_pcur_get_block(cursor)); - cursor->modify_clock = buf_block_get_modify_clock( - cursor->block_when_stored.block()); - cursor->old_stored = true; - - mem_heap_free(heap); - - return(TRUE); + restore_status ret_val= NOT_SAME; + if (rel_pos == BTR_PCUR_ON && btr_pcur_is_on_user_rec(this)) { + ulint n_matched_fields= 0; + if (!cmp_dtuple_rec_with_match( + tuple, btr_pcur_get_rec(this), + rec_get_offsets(btr_pcur_get_rec(this), index, offsets, + index->n_core_fields, ULINT_UNDEFINED, &heap), + &n_matched_fields)) { + + /* We have to store the NEW value for the modify clock, + since the cursor can now be on a different page! + But we can retain the value of old_rec */ + + block_when_stored.store(btr_pcur_get_block(this)); + modify_clock= buf_block_get_modify_clock( + block_when_stored.block()); + old_stored= true; + + mem_heap_free(heap); + + return SAME_ALL; + } + if (n_matched_fields >= index->n_uniq) + ret_val= SAME_UNIQ; } mem_heap_free(heap); @@ -460,9 +467,9 @@ btr_pcur_restore_position_func( to the cursor because it can now be on a different page, the record under it may have been removed, etc. */ - btr_pcur_store_position(cursor, mtr); + btr_pcur_store_position(this, mtr); - return(FALSE); + return ret_val; } /*********************************************************//** diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 1d42e889538..156dd8e25df 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -3515,7 +3515,6 @@ fts_add_doc_by_id( get_doc, clust_index, doc_pcur, offsets, &doc); if (doc.found) { - ibool success MY_ATTRIBUTE((unused)); btr_pcur_store_position(doc_pcur, &mtr); mtr_commit(&mtr); @@ -3569,12 +3568,10 @@ fts_add_doc_by_id( mtr_start(&mtr); if (i < num_idx - 1) { - - success = btr_pcur_restore_position( - BTR_SEARCH_LEAF, doc_pcur, - &mtr); - - ut_ad(success); + ut_d(btr_pcur_t::restore_status status=) + btr_pcur_restore_position( + BTR_SEARCH_LEAF, doc_pcur, &mtr); + ut_ad(status == btr_pcur_t::SAME_ALL); } } diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index fccd87ab416..77481dd8ad8 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -4139,7 +4139,8 @@ ibuf_restore_pos( ut_ad(mode == BTR_MODIFY_LEAF || BTR_LATCH_MODE_WITHOUT_INTENTION(mode) == BTR_MODIFY_TREE); - if (btr_pcur_restore_position(mode, pcur, mtr)) { + if (btr_pcur_restore_position(mode, pcur, mtr) == + btr_pcur_t::SAME_ALL) { return(TRUE); } diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h index d8f9a0961e8..bbb9831ae93 100644 --- a/storage/innobase/include/btr0pcur.h +++ b/storage/innobase/include/btr0pcur.h @@ -253,31 +253,9 @@ btr_pcur_store_position( /*====================*/ btr_pcur_t* cursor, /*!< in: persistent cursor */ mtr_t* mtr); /*!< in: mtr */ -/**************************************************************//** -Restores the stored position of a persistent cursor bufferfixing the page and -obtaining the specified latches. If the cursor position was saved when the -(1) cursor was positioned on a user record: this function restores the position -to the last record LESS OR EQUAL to the stored record; -(2) cursor was positioned on a page infimum record: restores the position to -the last record LESS than the user record which was the successor of the page -infimum; -(3) cursor was positioned on the page supremum: restores to the first record -GREATER than the user record which was the predecessor of the supremum. -(4) cursor was positioned before the first or after the last in an empty tree: -restores to before first or after the last in the tree. -@return TRUE if the cursor position was stored when it was on a user -record and it can be restored on a user record whose ordering fields -are identical to the ones of the original user record */ -ibool -btr_pcur_restore_position_func( -/*===========================*/ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: detached persistent cursor */ - const char* file, /*!< in: file name */ - unsigned line, /*!< in: line where called */ - mtr_t* mtr); /*!< in: mtr */ + #define btr_pcur_restore_position(l,cur,mtr) \ - btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr) + (cur)->restore_position(l,__FILE__,__LINE__,mtr) /*********************************************************//** Gets the rel_pos field for a cursor whose position has been stored. @return BTR_PCUR_ON, ... */ @@ -482,6 +460,18 @@ enum pcur_pos_t { selects, updates, and deletes. */ struct btr_pcur_t{ + /** Return value of restore_position() */ + enum restore_status { + /** cursor position on user rec and points on the record with + the same field values as in the stored record */ + SAME_ALL, + /** cursor position is on user rec and points on the record with + the same unique field values as in the stored record */ + SAME_UNIQ, + /** cursor position is not on user rec or points on the record + with not the same uniq field values as in the stored record */ + NOT_SAME + }; /** a B-tree cursor */ btr_cur_t btr_cur; /** see TODO note below! @@ -538,6 +528,31 @@ struct btr_pcur_t{ /** Return the index of this persistent cursor */ dict_index_t* index() const { return(btr_cur.index); } + /** Restores the stored position of a persistent cursor bufferfixing + the page and obtaining the specified latches. If the cursor position + was saved when the + (1) cursor was positioned on a user record: this function restores the + position to the last record LESS OR EQUAL to the stored record; + (2) cursor was positioned on a page infimum record: restores the + position to the last record LESS than the user record which was the + successor of the page infimum; + (3) cursor was positioned on the page supremum: restores to the first + record GREATER than the user record which was the predecessor of the + supremum. + (4) cursor was positioned before the first or after the last in an + empty tree: restores to before first or after the last in the tree. + @param latch_mode BTR_SEARCH_LEAF, ... + @param file file name + @param line line where called + @param mtr mtr + @return btr_pcur_t::SAME_ALL cursor position on user rec and points on + the record with the same field values as in the stored record, + btr_pcur_t::SAME_UNIQ cursor position is on user rec and points on the + record with the same unique field values as in the stored record, + btr_pcur_t::NOT_SAME cursor position is not on user rec or points on + the record with not the samebuniq field values as in the stored */ + restore_status restore_position(ulint latch_mode, const char *file, + unsigned line, mtr_t *mtr); }; #include "btr0pcur.inl" diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h index e2565c62169..696c58cf1c7 100644 --- a/storage/innobase/include/hash0hash.h +++ b/storage/innobase/include/hash0hash.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -31,7 +31,31 @@ Created 5/20/1997 Heikki Tuuri #include "sync0rw.h" struct hash_table_t; -struct hash_cell_t; + +struct hash_cell_t +{ + /** singly-linked, nullptr terminated list of hash buckets */ + void *node; + + /** Insert an element after another. + @tparam T type of the element + @param after the element after which to insert + @param insert the being-inserted element + @param next the next-element pointer in T */ + template<typename T> + void insert_after(T &after, T &insert, T *T::*next) + { +#ifdef UNIV_DEBUG + for (const T *c= static_cast<const T*>(node); c; c= c->*next) + if (c == &after) + goto found; + ut_error; + found: +#endif + insert.*next= after.*next; + after.*next= &insert; + } +}; typedef void* hash_node_t; @@ -477,10 +501,6 @@ hash_unlock_x_all_but( hash_table_t* table, /*!< in: hash table */ rw_lock_t* keep_lock); /*!< in: lock to keep */ -struct hash_cell_t{ - void* node; /*!< hash chain node, NULL if none */ -}; - /* The hash table structure */ struct hash_table_t { enum hash_table_sync_t type; /*<! type of hash_table. */ diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 9c3f5d57f01..e94e6b73590 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2020, MariaDB Corporation. +Copyright (c) 2017, 2020, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -888,26 +888,29 @@ public: /*********************************************************************//** Creates a new record lock and inserts it to the lock queue. Does NOT check for deadlocks or lock compatibility! +@param[in] c_lock conflicting lock +@param[in] thr thread owning trx +@param[in] type_mode lock mode and wait flag, type is ignored and replaced by +LOCK_REC +@param[in] block buffer block containing the record +@param[in] heap_no heap number of the record +@param[in] index index of record +@param[in,out] trx transaction +@param[in] caller_owns_trx_mutex TRUE if caller owns trx mutex +@param[in] insert_before_waiting if true, inserts new B-tree record lock +just after the last non-waiting lock of the current transaction which is +located before the first waiting for the current transaction lock, otherwise +the lock is inserted at the end of the queue @return created lock */ UNIV_INLINE -lock_t* -lock_rec_create( -/*============*/ +lock_t *lock_rec_create(lock_t *c_lock, #ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ - que_thr_t* thr, /*!< thread owning trx */ + que_thr_t *thr, #endif - ulint type_mode,/*!< in: lock mode and wait - flag, type is ignored and - replaced by LOCK_REC */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - dict_index_t* index, /*!< in: index of record */ - trx_t* trx, /*!< in,out: transaction */ - bool caller_owns_trx_mutex); - /*!< in: true if caller owns - trx mutex */ + ulint type_mode, const buf_block_t *block, + ulint heap_no, dict_index_t *index, trx_t *trx, + bool caller_owns_trx_mutex, + bool insert_before_waiting= false); /*************************************************************//** Removes a record lock request, waiting or granted, from the queue. */ @@ -920,6 +923,7 @@ lock_rec_discard( /** Create a new record lock and inserts it to the lock queue, without checking for deadlocks or conflicts. +@param[in] c_lock conflicting lock @param[in] type_mode lock mode and wait flag; type will be replaced with LOCK_REC @param[in] space tablespace id @@ -929,11 +933,15 @@ without checking for deadlocks or conflicts. @param[in] index the index tree @param[in,out] trx transaction @param[in] holds_trx_mutex whether the caller holds trx->mutex +@param[in] insert_before_waiting if true, inserts new B-tree record lock +just after the last non-waiting lock of the current transaction which is +located before the first waiting for the current transaction lock, otherwise +the lock is inserted at the end of the queue @return created lock */ lock_t* lock_rec_create_low( + lock_t* c_lock, #ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ que_thr_t* thr, /*!< thread owning trx */ #endif ulint type_mode, @@ -943,9 +951,12 @@ lock_rec_create_low( ulint heap_no, dict_index_t* index, trx_t* trx, - bool holds_trx_mutex); + bool holds_trx_mutex, + bool insert_before_waiting = false); + /** Enqueue a waiting request for a lock which cannot be granted immediately. Check for deadlocks. +@param[in] c_lock conflicting lock @param[in] type_mode the requested lock mode (LOCK_S or LOCK_X) possibly ORed with LOCK_GAP or LOCK_REC_NOT_GAP, ORed with @@ -964,9 +975,7 @@ Check for deadlocks. (or it happened to commit) */ dberr_t lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ -#endif + lock_t* c_lock, ulint type_mode, const buf_block_t* block, ulint heap_no, diff --git a/storage/innobase/include/lock0lock.inl b/storage/innobase/include/lock0lock.inl index abe5052627b..c6719946b79 100644 --- a/storage/innobase/include/lock0lock.inl +++ b/storage/innobase/include/lock0lock.inl @@ -101,34 +101,37 @@ lock_hash_get( /*********************************************************************//** Creates a new record lock and inserts it to the lock queue. Does NOT check for deadlocks or lock compatibility! +@param[in] c_lock conflicting lock +@param[in] thr thread owning trx +@param[in] type_mode lock mode and wait flag, type is ignored and replaced by +LOCK_REC +@param[in] block buffer block containing the record +@param[in] heap_no heap number of the record +@param[in] index index of record +@param[in,out] trx transaction +@param[in] caller_owns_trx_mutex TRUE if caller owns trx mutex +@param[in] insert_before_waiting if true, inserts new B-tree record lock +just after the last non-waiting lock of the current transaction which is +located before the first waiting for the current transaction lock, otherwise +the lock is inserted at the end of the queue @return created lock */ UNIV_INLINE -lock_t* -lock_rec_create( -/*============*/ +lock_t *lock_rec_create(lock_t *c_lock, #ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ - que_thr_t* thr, /*!< thread owning trx */ + que_thr_t *thr, #endif - ulint type_mode,/*!< in: lock mode and wait - flag, type is ignored and - replaced by LOCK_REC */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - dict_index_t* index, /*!< in: index of record */ - trx_t* trx, /*!< in,out: transaction */ - bool caller_owns_trx_mutex) - /*!< in: TRUE if caller owns - trx mutex */ + ulint type_mode, const buf_block_t *block, + ulint heap_no, dict_index_t *index, trx_t *trx, + bool caller_owns_trx_mutex, + bool insert_before_waiting) { btr_assert_not_corrupted(block, index); - return lock_rec_create_low( + return lock_rec_create_low(c_lock, #ifdef WITH_WSREP - c_lock, thr, + thr, #endif type_mode, block->page.id.space(), block->page.id.page_no(), - block->frame, heap_no, - index, trx, caller_owns_trx_mutex); + block->frame, heap_no, index, trx, + caller_owns_trx_mutex, insert_before_waiting); } diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h index b7dcbfa2b86..db3689a2281 100644 --- a/storage/innobase/include/lock0priv.h +++ b/storage/innobase/include/lock0priv.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2018, MariaDB Corporation. +Copyright (c) 2015, 2018, 2022 MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -585,6 +585,9 @@ lock_rec_get_next_const( /*********************************************************************//** Gets the first explicit lock request on a record. +@param[in] hash hash chain the lock on +@param[in] page_id page id +@param[in] heap_no heap number of the record @return first lock, NULL if none exists */ UNIV_INLINE lock_t* @@ -660,15 +663,26 @@ lock_table_has( /** Set the wait status of a lock. @param[in,out] lock lock that will be waited for -@param[in,out] trx transaction that will wait for the lock */ -inline void lock_set_lock_and_trx_wait(lock_t* lock, trx_t* trx) +@param[in,out] trx transaction that will wait for the lock +@param[in] c_lock conflicting lock */ +inline void lock_set_lock_and_trx_wait(lock_t* lock, trx_t* trx, + const lock_t *c_lock) { ut_ad(lock); ut_ad(lock->trx == trx); - ut_ad(trx->lock.wait_lock == NULL); ut_ad(lock_mutex_own()); ut_ad(trx_mutex_own(trx)); + if (trx->lock.wait_trx) { + ut_ad(!c_lock || trx->lock.wait_trx == c_lock->trx); + ut_ad(trx->lock.wait_lock); + ut_ad((*trx->lock.wait_lock).trx == trx); + } else { + ut_ad(c_lock); + trx->lock.wait_trx = c_lock->trx; + ut_ad(!trx->lock.wait_lock); + } + trx->lock.wait_lock = lock; lock->type_mode |= LOCK_WAIT; } @@ -681,6 +695,7 @@ inline void lock_reset_lock_and_trx_wait(lock_t* lock) ut_ad(lock_mutex_own()); ut_ad(lock->trx->lock.wait_lock == NULL || lock->trx->lock.wait_lock == lock); + lock->trx->lock.wait_trx= NULL; lock->trx->lock.wait_lock = NULL; lock->type_mode &= ~LOCK_WAIT; } diff --git a/storage/innobase/include/lock0priv.inl b/storage/innobase/include/lock0priv.inl index 8bb145e41fc..61e8ff18ab1 100644 --- a/storage/innobase/include/lock0priv.inl +++ b/storage/innobase/include/lock0priv.inl @@ -145,22 +145,19 @@ lock_rec_get_first_on_page_addr( return(NULL); } -/*********************************************************************//** -Gets the first record lock on a page, where the page is identified by a +/** Gets the first record lock on a page, where the page is identified by a pointer to it. +@param[in] lock_hash lock hash table +@param[in] space page's space id +@param[in] page_no page number +@param[in] hash page's hash value in records hash table @return first lock, NULL if none exists */ UNIV_INLINE -lock_t* -lock_rec_get_first_on_page( -/*=======================*/ - hash_table_t* lock_hash, /*!< in: lock hash table */ - const buf_block_t* block) /*!< in: buffer block */ +lock_t *lock_rec_get_first_on_page(hash_table_t *lock_hash, ulint space, + ulint page_no, ulint hash) { ut_ad(lock_mutex_own()); - ulint space = block->page.id.space(); - ulint page_no = block->page.id.page_no(); - ulint hash = buf_block_get_lock_hash_val(block); for (lock_t* lock = static_cast<lock_t*>( HASH_GET_FIRST(lock_hash, hash)); @@ -177,6 +174,20 @@ lock_rec_get_first_on_page( return(NULL); } +/** Gets the first record lock on a page, where the page is identified by a +pointer to it. +@param[in] lock_hash lock hash table +@param[in] block buffer block +@return first lock, NULL if none exists */ +UNIV_INLINE +lock_t *lock_rec_get_first_on_page(hash_table_t *lock_hash, + const buf_block_t *block) +{ + return lock_rec_get_first_on_page(lock_hash, block->page.id.space(), + block->page.id.page_no(), + buf_block_get_lock_hash_val(block)); +} + /*********************************************************************//** Gets the next explicit lock request on a record. @return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */ @@ -210,21 +221,21 @@ lock_rec_get_next_const( return(lock_rec_get_next(heap_no, (lock_t*) lock)); } -/*********************************************************************//** -Gets the first explicit lock request on a record. +/** Gets the first explicit lock request on a record. +@param[in] hash hash chain the lock on +@param[in] space record's space id +@param[in] page_no record's page number +@param[in] lock_hash_val page's hash value in records hash table +@param[in] heap_no heap number of the record @return first lock, NULL if none exists */ UNIV_INLINE -lock_t* -lock_rec_get_first( -/*===============*/ - hash_table_t* hash, /*!< in: hash chain the lock on */ - const buf_block_t* block, /*!< in: block containing the record */ - ulint heap_no)/*!< in: heap number of the record */ +lock_t *lock_rec_get_first(hash_table_t *hash, ulint space, ulint page_no, + uint32_t lock_hash_val, ulint heap_no) { ut_ad(lock_mutex_own()); - for (lock_t* lock = lock_rec_get_first_on_page(hash, block); lock; - lock = lock_rec_get_next_on_page(lock)) { + for (lock_t* lock = lock_rec_get_first_on_page(hash, space, page_no, + lock_hash_val); lock; lock = lock_rec_get_next_on_page(lock)) { if (lock_rec_get_nth_bit(lock, heap_no)) { return(lock); } @@ -233,6 +244,20 @@ lock_rec_get_first( return(NULL); } +/** Gets the first explicit lock request on a record. +@param[in] hash hash chain the lock on +@param[in] block block containing the record +@param[in] heap_no heap number of the record +@return first lock, NULL if none exists */ +UNIV_INLINE +lock_t *lock_rec_get_first(hash_table_t *hash, const buf_block_t *block, + ulint heap_no) +{ + return lock_rec_get_first(hash, block->page.id.space(), + block->page.id.page_no(), + buf_block_get_lock_hash_val(block), heap_no); +} + /*********************************************************************//** Gets the nth bit of a record lock. @return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/ diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index b84f458374e..8e51d3b45af 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -425,7 +425,9 @@ struct trx_lock_t { trx_que_t que_state; /*!< valid when trx->state == TRX_STATE_ACTIVE: TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT, ... */ - + /** Transaction being waited for; protected by the same mutexes as + wait_lock */ + trx_t* wait_trx; lock_t* wait_lock; /*!< if trx execution state is TRX_QUE_LOCK_WAIT, this points to the lock request, otherwise this is diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 9949ebded8e..969b1679ff4 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2021, MariaDB Corporation. +Copyright (c) 2014, 2021, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1141,19 +1141,18 @@ static void wsrep_kill_victim(const trx_t * const trx, const lock_t *lock) /*********************************************************************//** Checks if some other transaction has a conflicting explicit lock request in the queue, so that we have to wait. +@param[in] mode LOCK_S or LOCK_X, possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP, +LOCK_INSERT_INTENTION +@param[in] block buffer block containing the record +@param[in] heap_no heap number of the record +@param[in] trx our transaction +@param[out] was_ignored true if conflicting locks waiting for the current +transaction were ignored @return lock or NULL */ -static -lock_t* -lock_rec_other_has_conflicting( -/*===========================*/ - ulint mode, /*!< in: LOCK_S or LOCK_X, - possibly ORed to LOCK_GAP or - LOC_REC_NOT_GAP, - LOCK_INSERT_INTENTION */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - const trx_t* trx) /*!< in: our transaction */ +static lock_t *lock_rec_other_has_conflicting(ulint mode, + const buf_block_t *block, + ulint heap_no, const trx_t *trx, + bool *was_ignored= NULL) { lock_t* lock; @@ -1165,6 +1164,16 @@ lock_rec_other_has_conflicting( lock != NULL; lock = lock_rec_get_next(heap_no, lock)) { + /* There can't be lock loops for one record, because + all waiting locks of the record will always wait for the same + lock of the record in a cell array, and check for + conflicting lock will always start with the first lock for the + heap_no, and go ahead with the same order(the order of the + locks in the cell array) */ + if (lock_get_wait(lock) && lock->trx->lock.wait_trx == trx) { + if (was_ignored) *was_ignored= true; + continue; + } if (lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) { #ifdef WITH_WSREP if (trx->is_wsrep()) { @@ -1321,6 +1330,7 @@ static void check_trx_state(const trx_t *trx) /** Create a new record lock and inserts it to the lock queue, without checking for deadlocks or conflicts. +@param[in] c_lock conflicting lock @param[in] type_mode lock mode and wait flag; type will be replaced with LOCK_REC @param[in] space tablespace id @@ -1330,11 +1340,15 @@ without checking for deadlocks or conflicts. @param[in] index the index tree @param[in,out] trx transaction @param[in] holds_trx_mutex whether the caller holds trx->mutex +@param[in] insert_before_waiting if true, inserts new B-tree record lock +just after the last non-waiting lock of the current transaction which is +located before the first waiting for the current transaction lock, otherwise +the lock is inserted at the end of the queue @return created lock */ lock_t* lock_rec_create_low( + lock_t* c_lock, #ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ que_thr_t* thr, /*!< thread owning trx */ #endif ulint type_mode, @@ -1344,7 +1358,8 @@ lock_rec_create_low( ulint heap_no, dict_index_t* index, trx_t* trx, - bool holds_trx_mutex) + bool holds_trx_mutex, + bool insert_before_waiting) { lock_t* lock; ulint n_bits; @@ -1454,7 +1469,7 @@ lock_rec_create_low( } trx->lock.que_state = TRX_QUE_LOCK_WAIT; - lock_set_lock_and_trx_wait(lock, trx); + lock_set_lock_and_trx_wait(lock, trx, c_lock); UT_LIST_ADD_LAST(trx->lock.trx_locks, lock); trx->lock.wait_thr = thr; @@ -1482,15 +1497,46 @@ lock_rec_create_low( trx_mutex_exit(c_lock->trx); } else #endif /* WITH_WSREP */ - if (!(type_mode & (LOCK_WAIT | LOCK_PREDICATE | LOCK_PRDT_PAGE)) - && innodb_lock_schedule_algorithm - == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS - && !thd_is_replication_slave_thread(trx->mysql_thd)) { - HASH_PREPEND(lock_t, hash, lock_sys.rec_hash, - lock_rec_fold(space, page_no), lock); - } else { - HASH_INSERT(lock_t, hash, lock_hash_get(type_mode), - lock_rec_fold(space, page_no), lock); + if (insert_before_waiting + && !(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE))) { + /* Try to insert the lock just after the last non-waiting + lock of the current transaction which immediately + precedes the first waiting lock request. */ + uint32_t lock_hash_val = lock_rec_hash(space, page_no); + hash_cell_t& cell = lock_sys.rec_hash->array[lock_hash_val]; + + lock_t* last_non_waiting = NULL; + + for (lock_t* l = lock_rec_get_first(lock_sys.rec_hash, space, + page_no, lock_hash_val, heap_no); l; + l = lock_rec_get_next(heap_no, l)) { + if (lock_get_wait(lock) + && l->trx->lock.wait_trx == trx) { + break; + } + if (l->trx == trx) { + last_non_waiting = l; + } + } + + if (!last_non_waiting) { + goto append_last; + } + + cell.insert_after(*last_non_waiting, *lock, &lock_t::hash); + } + else { +append_last: + if (!(type_mode & (LOCK_WAIT | LOCK_PREDICATE | LOCK_PRDT_PAGE)) + && innodb_lock_schedule_algorithm + == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS + && !thd_is_replication_slave_thread(trx->mysql_thd)) { + HASH_PREPEND(lock_t, hash, lock_sys.rec_hash, + lock_rec_fold(space, page_no), lock); + } else { + HASH_INSERT(lock_t, hash, lock_hash_get(type_mode), + lock_rec_fold(space, page_no), lock); + } } if (!holds_trx_mutex) { @@ -1498,7 +1544,7 @@ lock_rec_create_low( } ut_ad(trx_mutex_own(trx)); if (type_mode & LOCK_WAIT) { - lock_set_lock_and_trx_wait(lock, trx); + lock_set_lock_and_trx_wait(lock, trx, c_lock); } UT_LIST_ADD_LAST(trx->lock.trx_locks, lock); if (!holds_trx_mutex) { @@ -1658,6 +1704,7 @@ lock_rec_insert_to_head( /** Enqueue a waiting request for a lock which cannot be granted immediately. Check for deadlocks. +@param[in] c_lock conflicting lock @param[in] type_mode the requested lock mode (LOCK_S or LOCK_X) possibly ORed with LOCK_GAP or LOCK_REC_NOT_GAP, ORed with @@ -1676,9 +1723,7 @@ Check for deadlocks. (or it happened to commit) */ dberr_t lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ -#endif + lock_t* c_lock, ulint type_mode, const buf_block_t* block, ulint heap_no, @@ -1716,9 +1761,9 @@ lock_rec_enqueue_waiting( /* Enqueue the lock request that will wait to be granted, note that we already own the trx mutex. */ - lock_t* lock = lock_rec_create( + lock_t* lock = lock_rec_create(c_lock, #ifdef WITH_WSREP - c_lock, thr, + thr, #endif type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE); @@ -1782,22 +1827,20 @@ on the record, and the request to be added is not a waiting request, we can reuse a suitable record lock object already existing on the same page, just setting the appropriate bit in its bitmap. This is a low-level function which does NOT check for deadlocks or lock compatibility! +@param[in] type_mode lock mode, wait, gap etc. flags; type is ignored and +replaced by LOCK_REC +@param[in] block buffer block containing the record +@param[in] heap_no heap number of the record +@param[in] index index of record +@param[in,out] trx transaction +@param[in] caller_owns_trx_mutex, TRUE if caller owns the transaction mutex +@param[in] insert_before_waiting true=insert B-tree record lock right before +a waiting lock request; false=insert the lock at the end of the queue @return lock where the bit was set */ -static -void -lock_rec_add_to_queue( -/*==================*/ - ulint type_mode,/*!< in: lock mode, wait, gap - etc. flags; type is ignored - and replaced by LOCK_REC */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - dict_index_t* index, /*!< in: index of record */ - trx_t* trx, /*!< in/out: transaction */ - bool caller_owns_trx_mutex) - /*!< in: TRUE if caller owns the - transaction mutex */ +static void lock_rec_add_to_queue(ulint type_mode, const buf_block_t *block, + ulint heap_no, dict_index_t *index, + trx_t *trx, bool caller_owns_trx_mutex, + bool insert_before_waiting= false) { #ifdef UNIV_DEBUG ut_ad(lock_mutex_own()); @@ -1886,11 +1929,16 @@ lock_rec_add_to_queue( } } - lock_rec_create( + /* Note: We will not pass any conflicting lock to lock_rec_create(), + because we should be moving an existing waiting lock request. */ + ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx); + + lock_rec_create(NULL, #ifdef WITH_WSREP - NULL, NULL, + NULL, #endif - type_mode, block, heap_no, index, trx, caller_owns_trx_mutex); + type_mode, block, heap_no, index, trx, caller_owns_trx_mutex, + insert_before_waiting); } /*********************************************************************//** @@ -1946,28 +1994,23 @@ lock_rec_lock( /* Do nothing if the trx already has a strong enough lock on rec */ if (!lock_rec_has_expl(mode, block, heap_no, trx)) { - if ( -#ifdef WITH_WSREP - lock_t *c_lock= -#endif - lock_rec_other_has_conflicting(mode, block, heap_no, trx)) + bool was_ignored = false; + if (lock_t *c_lock= lock_rec_other_has_conflicting( + mode, block, heap_no, trx, &was_ignored)) { /* If another transaction has a non-gap conflicting request in the queue, as this transaction does not have a lock strong enough already granted on the record, we have to wait. */ - err = lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - c_lock, -#endif /* WITH_WSREP */ - mode, block, heap_no, index, thr, NULL); + err = lock_rec_enqueue_waiting(c_lock, mode, block, heap_no, index, + thr, NULL); } else if (!impl) { /* Set the requested lock on the record. */ lock_rec_add_to_queue(LOCK_REC | mode, block, heap_no, index, trx, - true); + true, was_ignored); err= DB_SUCCESS_LOCKED_REC; } } @@ -1993,9 +2036,9 @@ lock_rec_lock( Note that we don't own the trx mutex. */ if (!impl) - lock_rec_create( + lock_rec_create(NULL, #ifdef WITH_WSREP - NULL, NULL, + NULL, #endif mode, block, heap_no, index, trx, false); @@ -2234,8 +2277,17 @@ static void lock_rec_dequeue_from_page(lock_t* in_lock) if (!lock_get_wait(lock)) { continue; } - const lock_t* c = lock_rec_has_to_wait_in_queue(lock); - if (!c) { + + ut_ad(lock->trx->lock.wait_trx); + ut_ad(lock->trx->lock.wait_lock); + + if (const lock_t* c = lock_rec_has_to_wait_in_queue( + lock)) { + trx_mutex_enter(lock->trx); + lock->trx->lock.wait_trx = c->trx; + trx_mutex_exit(lock->trx); + } + else { /* Grant the lock */ ut_ad(lock->trx != in_lock->trx); lock_grant(lock); @@ -2509,7 +2561,8 @@ lock_rec_move_low( lock_rec_reset_nth_bit(lock, donator_heap_no); if (type_mode & LOCK_WAIT) { - lock_reset_lock_and_trx_wait(lock); + ut_ad(lock->trx->lock.wait_lock == lock); + lock->type_mode &= ~LOCK_WAIT; } /* Note that we FIRST reset the bit, and then set the lock: @@ -2626,8 +2679,8 @@ lock_move_reorganize_page( lock_rec_bitmap_reset(lock); if (lock_get_wait(lock)) { - - lock_reset_lock_and_trx_wait(lock); + ut_ad(lock->trx->lock.wait_lock == lock); + lock->type_mode&= ~LOCK_WAIT; } lock = lock_rec_get_next_on_page(lock); @@ -2802,7 +2855,9 @@ lock_move_rec_list_end( ut_ad(!page_rec_is_metadata(orec)); if (type_mode & LOCK_WAIT) { - lock_reset_lock_and_trx_wait(lock); + ut_ad(lock->trx->lock.wait_lock == + lock); + lock->type_mode&= ~LOCK_WAIT; } lock_rec_add_to_queue( @@ -2899,7 +2954,9 @@ lock_move_rec_list_start( ut_ad(!page_rec_is_metadata(prev)); if (type_mode & LOCK_WAIT) { - lock_reset_lock_and_trx_wait(lock); + ut_ad(lock->trx->lock.wait_lock + == lock); + lock->type_mode&= ~LOCK_WAIT; } lock_rec_add_to_queue( @@ -2994,7 +3051,9 @@ lock_rtr_move_rec_list( if (rec1_heap_no < lock->un_member.rec_lock.n_bits && lock_rec_reset_nth_bit(lock, rec1_heap_no)) { if (type_mode & LOCK_WAIT) { - lock_reset_lock_and_trx_wait(lock); + ut_ad(lock->trx->lock.wait_lock + == lock); + lock->type_mode&= ~LOCK_WAIT; } lock_rec_add_to_queue( @@ -3448,10 +3507,8 @@ lock_table_create( in dictionary cache */ ulint type_mode,/*!< in: lock mode possibly ORed with LOCK_WAIT */ - trx_t* trx /*!< in: trx */ -#ifdef WITH_WSREP - , lock_t* c_lock = NULL /*!< in: conflicting lock */ -#endif + trx_t* trx, /*!< in: trx */ + lock_t* c_lock = NULL /*!< in: conflicting lock */ ) { lock_t* lock; @@ -3534,8 +3591,7 @@ lock_table_create( ut_list_append(table->locks, lock, TableLockGetNode()); if (type_mode & LOCK_WAIT) { - - lock_set_lock_and_trx_wait(lock, trx); + lock_set_lock_and_trx_wait(lock, trx, c_lock); } lock->trx->lock.table_locks.push_back(lock); @@ -3690,10 +3746,8 @@ lock_table_enqueue_waiting( ulint mode, /*!< in: lock mode this transaction is requesting */ dict_table_t* table, /*!< in/out: table */ - que_thr_t* thr /*!< in: query thread */ -#ifdef WITH_WSREP - , lock_t* c_lock /*!< in: conflicting lock or NULL */ -#endif + que_thr_t* thr, /*!< in: query thread */ + lock_t* c_lock /*!< in: conflicting lock or NULL */ ) { trx_t* trx; @@ -3724,11 +3778,7 @@ lock_table_enqueue_waiting( #endif /* WITH_WSREP */ /* Enqueue the lock request that will wait to be granted */ - lock = lock_table_create(table, ulint(mode) | LOCK_WAIT, trx -#ifdef WITH_WSREP - , c_lock -#endif - ); + lock = lock_table_create(table, ulint(mode) | LOCK_WAIT, trx, c_lock); const trx_t* victim_trx = DeadlockChecker::check_and_resolve(lock, trx); @@ -3884,11 +3934,7 @@ lock_table( if (wait_for != NULL) { err = lock_table_enqueue_waiting(ulint(mode) | flags, table, - thr -#ifdef WITH_WSREP - , wait_for -#endif - ); + thr, wait_for); } else { lock_table_create(table, ulint(mode) | flags, trx); @@ -3936,7 +3982,7 @@ lock_table_ix_resurrect( Checks if a waiting table lock request still has to wait in a queue. @return TRUE if still has to wait */ static -bool +const lock_t* lock_table_has_to_wait_in_queue( /*============================*/ const lock_t* wait_lock) /*!< in: waiting table lock */ @@ -3955,11 +4001,11 @@ lock_table_has_to_wait_in_queue( if (lock_has_to_wait(wait_lock, lock)) { - return(true); + return(lock); } } - return(false); + return(NULL); } /*************************************************************//** @@ -3988,9 +4034,17 @@ lock_table_dequeue( lock != NULL; lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) { - if (lock_get_wait(lock) - && !lock_table_has_to_wait_in_queue(lock)) { + if (!lock_get_wait(lock)) + continue; + + ut_ad(lock->trx->lock.wait_trx); + ut_ad(lock->trx->lock.wait_lock); + if (const lock_t *c = lock_table_has_to_wait_in_queue(lock)) { + trx_mutex_enter(lock->trx); + lock->trx->lock.wait_trx = c->trx; + trx_mutex_exit(lock->trx); + } else { /* Grant the lock */ ut_ad(in_lock->trx != lock->trx); lock_grant(lock); @@ -4186,8 +4240,16 @@ released: if (!lock_get_wait(lock)) { continue; } - const lock_t* c = lock_rec_has_to_wait_in_queue(lock); - if (!c) { + ut_ad(lock->trx->lock.wait_trx); + ut_ad(lock->trx->lock.wait_lock); + if (const lock_t* c = lock_rec_has_to_wait_in_queue( + lock)) { + if (lock->trx != trx) + trx_mutex_enter(lock->trx); + lock->trx->lock.wait_trx = c->trx; + if (lock->trx != trx) + trx_mutex_exit(lock->trx); + } else { /* Grant the lock */ ut_ad(trx != lock->trx); lock_grant(lock); @@ -4916,7 +4978,7 @@ func_exit: wsrep_report_bf_lock_wait(impl_trx->mysql_thd, impl_trx->id); wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id); - if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, + if (!lock_rec_has_expl(LOCK_S | LOCK_REC_NOT_GAP, block, heap_no, impl_trx)) { ib::info() << "WSREP impl BF lock conflict"; @@ -4925,7 +4987,20 @@ func_exit: #endif /* WITH_WSREP */ { ut_ad(lock_get_wait(other_lock)); - ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, + /* After MDEV-27025 fix the following case is + possible: + 1. trx 1 acquires S-lock; + 2. trx 2 creates X-lock waiting for trx 1; + 3. trx 1 creates implicit lock, as + lock_rec_other_has_conflicting() returns no + conflicting trx 2 X-lock, the explicit lock + will not be created; + 4. trx 3 creates waiting X-lock, + it will wait for S-lock of trx 1. + That is why we relaxing the condition here and + check only for S-lock. + */ + ut_ad(lock_rec_has_expl(LOCK_S | LOCK_REC_NOT_GAP, block, heap_no, impl_trx)); } } @@ -5331,19 +5406,13 @@ lock_rec_insert_check_and_lock( const ulint type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION; - if ( -#ifdef WITH_WSREP - lock_t* c_lock = -#endif /* WITH_WSREP */ + if (lock_t* c_lock = lock_rec_other_has_conflicting(type_mode, block, heap_no, trx)) { /* Note that we may get DB_SUCCESS also here! */ trx_mutex_enter(trx); - err = lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - c_lock, -#endif /* WITH_WSREP */ - type_mode, block, heap_no, index, thr, NULL); + err = lock_rec_enqueue_waiting(c_lock, type_mode, block, + heap_no, index, thr, NULL); trx_mutex_exit(trx); } else { @@ -5420,7 +5489,7 @@ lock_rec_convert_impl_to_expl_for_trx( && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no, trx)) { lock_rec_add_to_queue(LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP, - block, heap_no, index, trx, true); + block, heap_no, index, trx, true, true); } lock_mutex_exit(); diff --git a/storage/innobase/lock/lock0prdt.cc b/storage/innobase/lock/lock0prdt.cc index 9827243177d..15624cf79af 100644 --- a/storage/innobase/lock/lock0prdt.cc +++ b/storage/innobase/lock/lock0prdt.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, MariaDB Corporation. +Copyright (c) 2018, 2022 MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -487,9 +487,13 @@ lock_prdt_add_to_queue( } } - lock = lock_rec_create( + /* Note: We will not pass any conflicting lock to lock_rec_create(), + because we should be moving an existing waiting lock request. */ + ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx); + + lock = lock_rec_create(NULL, #ifdef WITH_WSREP - NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */ + NULL, /* FIXME: replicate SPATIAL INDEX locks */ #endif type_mode, block, PRDT_HEAPNO, index, trx, caller_owns_trx_mutex); @@ -579,9 +583,7 @@ lock_prdt_insert_check_and_lock( trx_mutex_enter(trx); err = lock_rec_enqueue_waiting( -#ifdef WITH_WSREP NULL, /* FIXME: replicate SPATIAL INDEX locks */ -#endif LOCK_X | LOCK_PREDICATE | LOCK_INSERT_INTENTION, block, PRDT_HEAPNO, index, thr, prdt); @@ -829,9 +831,9 @@ lock_prdt_lock( lock_t* lock = lock_rec_get_first_on_page(hash, block); if (lock == NULL) { - lock = lock_rec_create( + lock = lock_rec_create(NULL, #ifdef WITH_WSREP - NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */ + NULL, /* FIXME: replicate SPATIAL INDEX locks */ #endif ulint(mode) | type_mode, block, PRDT_HEAPNO, index, trx, FALSE); @@ -861,10 +863,8 @@ lock_prdt_lock( if (wait_for != NULL) { err = lock_rec_enqueue_waiting( -#ifdef WITH_WSREP NULL, /* FIXME: replicate SPATIAL INDEX locks */ -#endif ulint(mode) | type_mode, block, PRDT_HEAPNO, index, thr, prdt); @@ -948,9 +948,9 @@ lock_place_prdt_page_lock( } if (lock == NULL) { - lock = lock_rec_create_low( + lock = lock_rec_create_low(NULL, #ifdef WITH_WSREP - NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */ + NULL, /* FIXME: replicate SPATIAL INDEX locks */ #endif mode, space, page_no, NULL, PRDT_HEAPNO, index, trx, FALSE); diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index a4fc2db1529..d61dc2c2280 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -73,7 +73,9 @@ row_purge_reposition_pcur( if (node->found_clust) { ut_ad(node->validate_pcur()); - node->found_clust = btr_pcur_restore_position(mode, &node->pcur, mtr); + node->found_clust = + btr_pcur_restore_position(mode, &node->pcur, mtr) == + btr_pcur_t::SAME_ALL; } else { node->found_clust = row_search_on_row_ref( @@ -256,7 +258,7 @@ static bool row_purge_restore_vsec_cur( return btr_pcur_restore_position( is_tree ? BTR_PURGE_TREE : BTR_PURGE_LEAF, - sec_pcur, sec_mtr); + sec_pcur, sec_mtr) == btr_pcur_t::SAME_ALL; } /** Determines if it is possible to remove a secondary index entry. diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 705e714d216..781769dbf04 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -1434,8 +1434,9 @@ row_sel_restore_pcur_pos( relative_position = btr_pcur_get_rel_pos(&(plan->pcur)); - equal_position = btr_pcur_restore_position(BTR_SEARCH_LEAF, - &(plan->pcur), mtr); + equal_position = + btr_pcur_restore_position(BTR_SEARCH_LEAF, &plan->pcur, mtr) == + btr_pcur_t::SAME_ALL; /* If the cursor is traveling upwards, and relative_position is @@ -3579,36 +3580,29 @@ err_exit: return(err); } -/********************************************************************//** -Restores cursor position after it has been stored. We have to take into +/** Restores cursor position after it has been stored. We have to take into account that the record cursor was positioned on may have been deleted. Then we may have to move the cursor one step up or down. +@param[out] same_user_rec true if we were able to restore the cursor on a user +record with the same ordering prefix in in the B-tree index +@param[in] latch_mode latch mode wished in restoration +@param[in] pcur cursor whose position has been stored +@param[in] moves_up true if the cursor moves up in the index +@param[in] mtr mtr; CAUTION: may commit mtr temporarily! +@param[in] select_lock_type select lock type: LOCK_NONE, LOCK_S, or LOCK_X @return true if we may need to process the record the cursor is now positioned on (i.e. we should not go to the next record yet) */ -static -bool -sel_restore_position_for_mysql( -/*===========================*/ - ibool* same_user_rec, /*!< out: TRUE if we were able to restore - the cursor on a user record with the - same ordering prefix in in the - B-tree index */ - ulint latch_mode, /*!< in: latch mode wished in - restoration */ - btr_pcur_t* pcur, /*!< in: cursor whose position - has been stored */ - ibool moves_up, /*!< in: TRUE if the cursor moves up - in the index */ - mtr_t* mtr) /*!< in: mtr; CAUTION: may commit - mtr temporarily! */ +static bool sel_restore_position_for_mysql(bool *same_user_rec, + ulint latch_mode, btr_pcur_t *pcur, + bool moves_up, mtr_t *mtr, + ulint select_lock_type) { - ibool success; - - success = btr_pcur_restore_position(latch_mode, pcur, mtr); + btr_pcur_t::restore_status status = btr_pcur_restore_position( + latch_mode, pcur, mtr); - *same_user_rec = success; + *same_user_rec = status == btr_pcur_t::SAME_ALL; - ut_ad(!success || pcur->rel_pos == BTR_PCUR_ON); + ut_ad(!*same_user_rec || pcur->rel_pos == BTR_PCUR_ON); #ifdef UNIV_DEBUG if (pcur->pos_state == BTR_PCUR_IS_POSITIONED_OPTIMISTIC) { ut_ad(pcur->rel_pos == BTR_PCUR_BEFORE @@ -3624,7 +3618,10 @@ sel_restore_position_for_mysql( switch (pcur->rel_pos) { case BTR_PCUR_ON: - if (!success && moves_up) { + if (!*same_user_rec && moves_up) { + if (status == btr_pcur_t::SAME_UNIQ + && select_lock_type != LOCK_NONE) + return true; next: if (btr_pcur_move_to_next(pcur, mtr) && rec_is_metadata(btr_pcur_get_rec(pcur), @@ -3634,7 +3631,7 @@ next: return true; } - return(!success); + return(!*same_user_rec); case BTR_PCUR_AFTER_LAST_IN_TREE: case BTR_PCUR_BEFORE_FIRST_IN_TREE: return true; @@ -4303,7 +4300,7 @@ row_search_mvcc( dberr_t err = DB_SUCCESS; ibool unique_search = FALSE; ibool mtr_has_extra_clust_latch = FALSE; - ibool moves_up = FALSE; + bool moves_up = false; ibool set_also_gap_locks = TRUE; /* if the query is a plain locking SELECT, and the isolation level is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */ @@ -4312,7 +4309,7 @@ row_search_mvcc( read (fetch the newest committed version), then this is set to TRUE */ ulint next_offs; - ibool same_user_rec; + bool same_user_rec; mtr_t mtr; mem_heap_t* heap = NULL; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; @@ -4623,10 +4620,10 @@ row_search_mvcc( if (UNIV_UNLIKELY(direction == 0)) { if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G || mode >= PAGE_CUR_CONTAIN) { - moves_up = TRUE; + moves_up = true; } } else if (direction == ROW_SEL_NEXT) { - moves_up = TRUE; + moves_up = true; } thr = que_fork_get_first_thr(prebuilt->sel_graph); @@ -4676,7 +4673,7 @@ wait_table_again: bool need_to_process = sel_restore_position_for_mysql( &same_user_rec, BTR_SEARCH_LEAF, - pcur, moves_up, &mtr); + pcur, moves_up, &mtr, prebuilt->select_lock_type); if (UNIV_UNLIKELY(need_to_process)) { if (UNIV_UNLIKELY(prebuilt->row_read_type @@ -4898,7 +4895,7 @@ rec_loop: if (UNIV_UNLIKELY(next_offs >= srv_page_size - PAGE_DIR)) { wrong_offs: - if (srv_force_recovery == 0 || moves_up == FALSE) { + if (srv_force_recovery == 0 || moves_up == false) { ib::error() << "Rec address " << static_cast<const void*>(rec) << ", buf block fix count " @@ -5701,7 +5698,9 @@ next_rec: if (sel_restore_position_for_mysql(&same_user_rec, BTR_SEARCH_LEAF, - pcur, moves_up, &mtr)) { + pcur, moves_up, &mtr, + prebuilt->select_lock_type) + ) { goto rec_loop; } } @@ -5792,7 +5791,7 @@ lock_table_wait: if (!dict_index_is_spatial(index)) { sel_restore_position_for_mysql( &same_user_rec, BTR_SEARCH_LEAF, pcur, - moves_up, &mtr); + moves_up, &mtr, prebuilt->select_lock_type); } if ((srv_locks_unsafe_for_binlog diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 617fcf68c20..6e3fb31808d 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -65,7 +65,6 @@ row_undo_ins_remove_clust_rec( /*==========================*/ undo_node_t* node) /*!< in: undo node */ { - ibool success; dberr_t err; ulint n_tries = 0; mtr_t mtr; @@ -102,12 +101,12 @@ row_undo_ins_remove_clust_rec( purged. However, we can log the removal out of sync with the B-tree modification. */ - success = btr_pcur_restore_position( + ut_a(btr_pcur_restore_position( online ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED : (node->rec_type == TRX_UNDO_INSERT_METADATA) - ? BTR_MODIFY_TREE : BTR_MODIFY_LEAF, &node->pcur, &mtr); - ut_a(success); + ? BTR_MODIFY_TREE : BTR_MODIFY_LEAF, &node->pcur, &mtr) + == btr_pcur_t::SAME_ALL); rec_t* rec = btr_pcur_get_rec(&node->pcur); @@ -138,9 +137,9 @@ row_undo_ins_remove_clust_rec( mtr.commit(); mtr.start(); - success = btr_pcur_restore_position( - BTR_MODIFY_LEAF, &node->pcur, &mtr); - ut_a(success); + ut_a(btr_pcur_restore_position( + BTR_MODIFY_LEAF, &node->pcur, &mtr) + == btr_pcur_t::SAME_ALL); break; case DICT_COLUMNS_ID: /* This is rolling back an INSERT into SYS_COLUMNS. @@ -181,11 +180,8 @@ retry: } else { index->set_modified(mtr); } - - success = btr_pcur_restore_position( - BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, - &node->pcur, &mtr); - ut_a(success); + ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, + &node->pcur, &mtr) == btr_pcur_t::SAME_ALL); btr_cur_pessimistic_delete(&err, FALSE, &node->pcur.btr_cur, 0, true, &mtr); diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index ae2a710d24b..9f08c92a4e5 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -95,19 +95,14 @@ row_undo_mod_clust_low( btr_pcur_t* pcur; btr_cur_t* btr_cur; dberr_t err; -#ifdef UNIV_DEBUG - ibool success; -#endif /* UNIV_DEBUG */ pcur = &node->pcur; btr_cur = btr_pcur_get_btr_cur(pcur); -#ifdef UNIV_DEBUG - success = -#endif /* UNIV_DEBUG */ + ut_d(btr_pcur_t::restore_status pcur_restore_result =) btr_pcur_restore_position(mode, pcur, mtr); - ut_ad(success); + ut_ad(pcur_restore_result == btr_pcur_t::SAME_ALL); ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur)) == thr_get_trx(thr)->id @@ -388,7 +383,8 @@ row_undo_mod_clust( ut_ad(node->new_trx_id); mtr.start(); - if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) { + if (btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr) != + btr_pcur_t::SAME_ALL) { goto mtr_commit_exit; } @@ -410,9 +406,9 @@ row_undo_mod_clust( btr_pcur_commit_specify_mtr(pcur, &mtr); mtr.start(); - if (!btr_pcur_restore_position( + if (btr_pcur_restore_position( BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, - pcur, &mtr)) { + pcur, &mtr) != btr_pcur_t::SAME_ALL) { goto mtr_commit_exit; } @@ -444,7 +440,8 @@ row_undo_mod_clust( longer accessible by any active read view. */ mtr.start(); - if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) { + if (btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr) + != btr_pcur_t::SAME_ALL) { goto mtr_commit_exit; } rec_t* rec = btr_pcur_get_rec(pcur); @@ -541,7 +538,6 @@ row_undo_mod_del_mark_or_remove_sec_low( { btr_pcur_t pcur; btr_cur_t* btr_cur; - ibool success; dberr_t err = DB_SUCCESS; mtr_t mtr; mtr_t mtr_vers; @@ -613,9 +609,8 @@ row_undo_mod_del_mark_or_remove_sec_low( mtr_vers.start(); - success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur), - &mtr_vers); - ut_a(success); + ut_a(btr_pcur_restore_position(BTR_SEARCH_LEAF, &node->pcur, &mtr_vers) + == btr_pcur_t::SAME_ALL); /* For temporary table, we can skip to check older version of clustered index entry, because there is no MVCC or purge. */ diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index 9f631128b08..193a728e502 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -2919,7 +2919,8 @@ row_upd_clust_rec( the same transaction do not modify the record in the meantime. Therefore we can assert that the restoration of the cursor succeeds. */ - ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); + ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr) == + btr_pcur_t::SAME_ALL); ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), dict_table_is_comp(index->table))); @@ -3122,7 +3123,8 @@ row_upd_clust_step( mode = BTR_MODIFY_LEAF; } - if (!btr_pcur_restore_position(mode, pcur, &mtr)) { + if (btr_pcur_restore_position(mode, pcur, &mtr) != + btr_pcur_t::SAME_ALL) { err = DB_RECORD_NOT_FOUND; goto exit_func; } @@ -3144,7 +3146,8 @@ row_upd_clust_step( mtr.start(); index->set_modified(mtr); - if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) { + if (btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr) != + btr_pcur_t::SAME_ALL) { err = DB_ERROR; goto exit_func; } |