diff options
-rw-r--r-- | mysql-test/suite/galera/r/galera_UK_conflict.result | 131 | ||||
-rw-r--r-- | mysql-test/suite/galera/t/galera_UK_conflict.test | 286 | ||||
-rw-r--r-- | sql/mdl.cc | 8 | ||||
-rw-r--r-- | sql/sql_class.cc | 12 | ||||
-rw-r--r-- | sql/wsrep_thd.cc | 14 | ||||
-rw-r--r-- | storage/innobase/btr/btr0cur.cc | 29 | ||||
-rw-r--r-- | storage/innobase/include/trx0trx.h | 5 | ||||
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 44 | ||||
-rw-r--r-- | storage/innobase/trx/trx0trx.cc | 10 |
9 files changed, 529 insertions, 10 deletions
diff --git a/mysql-test/suite/galera/r/galera_UK_conflict.result b/mysql-test/suite/galera/r/galera_UK_conflict.result new file mode 100644 index 00000000000..44bb64c9d63 --- /dev/null +++ b/mysql-test/suite/galera/r/galera_UK_conflict.result @@ -0,0 +1,131 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2)); +INSERT INTO t1 VALUES (1, 1, 0); +INSERT INTO t1 VALUES (3, 3, 0); +INSERT INTO t1 VALUES (10, 10, 0); +SET GLOBAL wsrep_slave_threads = 3; +SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb"; +connection node_1; +SET SESSION wsrep_sync_wait=0; +START TRANSACTION; +DELETE FROM t1 WHERE f2 = 3; +INSERT INTO t1 VALUES (3, 3, 1); +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1a; +SET SESSION wsrep_sync_wait=0; +connection node_2; +INSERT INTO t1 VALUES (5, 5, 2); +connection node_1a; +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached"; +SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; +connection node_2; +INSERT INTO t1 VALUES (4, 4, 2); +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; +connection node_1; +COMMIT; +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync'; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL DEBUG_DBUG = ""; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb"; +SET GLOBAL debug_dbug = NULL; +SET debug_sync='RESET'; +SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb"; +SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached"; +SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_slave_enter_sync'; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL DEBUG_DBUG = ""; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb"; +SET GLOBAL debug_dbug = NULL; +SET debug_sync='RESET'; +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_slave_enter_sync'; +SET GLOBAL wsrep_provider_options = 'dbug='; +connection node_1; +SELECT * FROM t1; +f1 f2 f3 +1 1 0 +3 3 1 +4 4 2 +5 5 2 +10 10 0 +SET GLOBAL wsrep_slave_threads = DEFAULT; +connection node_2; +SELECT * FROM t1; +f1 f2 f3 +1 1 0 +3 3 1 +4 4 2 +5 5 2 +10 10 0 +INSERT INTO t1 VALUES (7,7,7); +INSERT INTO t1 VALUES (8,8,8); +DROP TABLE t1; +test scenario 2 +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2)); +INSERT INTO t1 VALUES (1, 1, 0); +INSERT INTO t1 VALUES (3, 3, 0); +INSERT INTO t1 VALUES (10, 10, 0); +SET GLOBAL wsrep_slave_threads = 3; +SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; +connection node_1; +SET SESSION wsrep_sync_wait=0; +START TRANSACTION; +DELETE FROM t1 WHERE f2 = 3; +INSERT INTO t1 VALUES (3, 3, 1); +connection node_1a; +SET SESSION wsrep_sync_wait=0; +connection node_2; +INSERT INTO t1 VALUES (5, 5, 2); +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; +connection node_1; +COMMIT; +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_replay_cb"; +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync'; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_replay_cb_reached"; +SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_slave_enter_sync'; +connection node_2; +INSERT INTO t1 VALUES (4, 4, 2); +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_slave_enter_sync'; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL DEBUG_DBUG = ""; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_replay_cb"; +SET GLOBAL debug_dbug = NULL; +SET debug_sync='RESET'; +connection node_1; +SET GLOBAL wsrep_slave_threads = DEFAULT; +connection node_2; +SELECT * FROM t1; +f1 f2 f3 +1 1 0 +3 3 1 +4 4 2 +5 5 2 +10 10 0 +INSERT INTO t1 VALUES (7,7,7); +INSERT INTO t1 VALUES (8,8,8); +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_UK_conflict.test b/mysql-test/suite/galera/t/galera_UK_conflict.test new file mode 100644 index 00000000000..bef7edd0e53 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_UK_conflict.test @@ -0,0 +1,286 @@ +# +# This test tests the operation of transaction replay with a scenario +# where two subsequent write sets in applying conflict with local transaction +# in commit phase. The conflict is "false positive" confict on GAP lock in +# secondary unique index. +# The first applier will cause BF abort for the local committer, which +# starts replaying because of positive certification. +# In buggy version, scenatio continues so that ehile the local transaction +# is replaying, the latter applier experiences similar UK GAP lock conflict +# and forces the replayer to abort second time. +# In fixed version, this latter BF abort should not happen. +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/galera_have_debug_sync.inc + + +--let $wsrep_local_replays_old = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` + +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2)); +INSERT INTO t1 VALUES (1, 1, 0); +INSERT INTO t1 VALUES (3, 3, 0); +INSERT INTO t1 VALUES (10, 10, 0); + +# we will need 2 appliers threads for applyin two write sets in parallel in node1 +# and 1 applier thread for handling replaying +SET GLOBAL wsrep_slave_threads = 3; +SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb"; + +--connection node_1 +# starting a transaction, which deletes and inserts the middle row in test table +# this will be victim of false positive conflict with appliers +SET SESSION wsrep_sync_wait=0; +START TRANSACTION; + +DELETE FROM t1 WHERE f2 = 3; +INSERT INTO t1 VALUES (3, 3, 1); + +# Control connection to manage sync points for appliers +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1a +SET SESSION wsrep_sync_wait=0; + +# send from node 2 first INSERT transaction, which will conflict on GAP lock in node 1 +--connection node_2 +INSERT INTO t1 VALUES (5, 5, 2); + +--connection node_1a +# wait to see the INSERT in apply_cb sync point +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached"; + +# first applier seen in wait point, set sync point for the second INSERT +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_2 +# send second insert into same GAP in test table +INSERT INTO t1 VALUES (4, 4, 2); + +--connection node_1a +# wait for the second insert to arrive in his sync point +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +# both appliers are now waiting in separate sync points + +# Block the local commit, send the COMMIT and wait until it gets blocked +--let $galera_sync_point = commit_monitor_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_1 +--send COMMIT + +--connection node_1a +# wait for the local commit to enter in commit monitor wait state +--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_enter_sync +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +# release the local transaction to continue with commit +--let $galera_sync_point = commit_monitor_enter_sync +--source include/galera_signal_sync_point.inc +--source include/galera_clear_sync_point.inc + +# and now release the first applier, it should force local trx to abort +SET GLOBAL DEBUG_DBUG = ""; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb"; +SET GLOBAL debug_dbug = NULL; +SET debug_sync='RESET'; + +# wait for BF abort to happen and replaying begin +--let $wait_condition = SELECT COUNT(*)=1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE= 'wsrep replaying trx' +--source include/wait_condition.inc + + +# set another sync point for second applier +SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb"; + +# letting the second appier to move forward +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_signal_sync_point.inc + +# waiting until second applier is in wait +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached"; + +# stopping second applier before commit +--let $galera_sync_point = commit_monitor_enter_sync +--source include/galera_set_sync_point.inc +--source include/galera_clear_sync_point.inc + +# releasing the second insert, with buggy version it will conflict with +# replayer +SET GLOBAL DEBUG_DBUG = ""; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb"; +SET GLOBAL debug_dbug = NULL; +SET debug_sync='RESET'; + +# with fixed version, second applier has reached commit monitor, and we can +# release it to complete +--let $galera_sync_point = commit_monitor_enter_sync +--source include/galera_signal_sync_point.inc +--source include/galera_clear_sync_point.inc + +# local commit should succeed +--connection node_1 +--reap + +SELECT * FROM t1; + +# wsrep_local_replays has increased by 1 +--let $wsrep_local_replays_new = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` +--disable_query_log +--eval SELECT $wsrep_local_replays_new - $wsrep_local_replays_old = 1 AS wsrep_local_replays; +--enable_query_log + +# returning original slave thread count +SET GLOBAL wsrep_slave_threads = DEFAULT; + +--connection node_2 +SELECT * FROM t1; + +# replicate some transactions, so that wsrep slave thread count can reach +# original state in node 1 +INSERT INTO t1 VALUES (7,7,7); +INSERT INTO t1 VALUES (8,8,8); + +DROP TABLE t1; + +################################################################################## +# test scenario 2 +# +# commit order is now: INSERT-1, local COMMIT, INSERT-2 +# while local trx is replaying, the latter applier has applied and is waiting +# for commit. +# The point in this scenario is to verify that replayer does not try to abort +# the latter applier +################################################################################# + +--echo Test scenario 2 + +--connection node_1 +--let $wsrep_local_replays_old = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` + +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2)); +INSERT INTO t1 VALUES (1, 1, 0); +INSERT INTO t1 VALUES (3, 3, 0); +INSERT INTO t1 VALUES (10, 10, 0); + +# we will need 2 appliers threads for applyin two writes sets in parallel in node1 +# and 1 applier thread for handling replaying +SET GLOBAL wsrep_slave_threads = 3; + +# set sync point for the first INSERT applier +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_1 +# starting a transaction, which deletes and inserts the middle row in test table +# this will be victim of false positive conflict with appliers +SET SESSION wsrep_sync_wait=0; +START TRANSACTION; + +DELETE FROM t1 WHERE f2 = 3; +INSERT INTO t1 VALUES (3, 3, 1); + +# Control connection to manage sync points for appliers +--connection node_1a +SET SESSION wsrep_sync_wait=0; + +# send from node 2 first an INSERT transaction, which will conflict on GAP lock in node 1 +--connection node_2 +INSERT INTO t1 VALUES (5, 5, 2); + +--connection node_1a +# wait to see the INSERT in apply_cb sync point +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +# Block the local commit, send the COMMIT and wait until it gets blocked +--let $galera_sync_point = commit_monitor_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_1 +--send COMMIT + +--connection node_1a +# wait for the local commit to enter in commit monitor wait state +--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_enter_sync +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +# first applier is now waiting in before commit, and local trx in commit monitor + +# set sync point before replaying +SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_replay_cb"; + +# release the local transaction to continue with commit +# it should advance and end up waiting in commit monitor for his turn +--let $galera_sync_point = commit_monitor_enter_sync +--source include/galera_signal_sync_point.inc +--source include/galera_clear_sync_point.inc + +# and now release the first applier, it should force local trx to abort +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_signal_sync_point.inc +--source include/galera_clear_sync_point.inc + +# waiting for local replayer to reach sync point +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_replay_cb_reached"; + +# set sync point before commit for the second INSERT +--let $galera_sync_point = commit_monitor_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_2 +# send second insert into same GAP in test table +INSERT INTO t1 VALUES (4, 4, 2); + +--connection node_1a +# wait for the second applier to enter in commit monitor wait state +--let $galera_sync_point = commit_monitor_enter_sync +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +# and, letting the second appier to move forward, it will stop naturally +# to wait for commit order after replayer's commit +--let $galera_sync_point = commit_monitor_enter_sync +--source include/galera_signal_sync_point.inc +--source include/galera_clear_sync_point.inc + +# and now release the replayer, if all is good,it will commit before the second applier +SET GLOBAL debug_dbug = ""; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_replay_cb"; +SET GLOBAL debug_dbug = NULL; +SET debug_sync='RESET'; + +# local commit should succeed +--connection node_1 +--reap + +--let $wait_condition = SELECT COUNT(*)=5 FROM t1; +--source include/wait_condition.inc + +# returning original slave thread count +SET GLOBAL wsrep_slave_threads = DEFAULT; + +--connection node_2 +SELECT * FROM t1; + +# wsrep_local_replays has increased by 1 +--let $wsrep_local_replays_new = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` +--disable_query_log +--eval SELECT $wsrep_local_replays_new - $wsrep_local_replays_old = 1 AS wsrep_local_replays; +--enable_query_log + +# replicate some transactions, so that wsrep slave thread count can reach +# original state in node 1 +INSERT INTO t1 VALUES (7,7,7); +INSERT INTO t1 VALUES (8,8,8); + +DROP TABLE t1; + diff --git a/sql/mdl.cc b/sql/mdl.cc index f2b205a86f2..8cb21771991 100644 --- a/sql/mdl.cc +++ b/sql/mdl.cc @@ -1082,7 +1082,7 @@ MDL_wait::timed_wait(MDL_context_owner *owner, struct timespec *abs_timeout, DBUG_ASSERT(!debug_sync_set_action((owner->get_thd()), STRING_WITH_LEN(act))); };); - if (wsrep_thd_is_BF(owner->get_thd(), false)) + if (WSREP_ON && wsrep_thd_is_BF(owner->get_thd(), false)) { wait_result= mysql_cond_wait(&m_COND_wait_status, &m_LOCK_wait_status); } @@ -1155,7 +1155,7 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) */ DBUG_ASSERT(ticket->get_lock()); #ifdef WITH_WSREP - if ((this == &(ticket->get_lock()->m_waiting)) && + if (WSREP_ON && (this == &(ticket->get_lock()->m_waiting)) && wsrep_thd_is_BF(ticket->get_ctx()->get_thd(), false)) { Ticket_iterator itw(ticket->get_lock()->m_waiting); @@ -1581,7 +1581,7 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, ticket->is_incompatible_when_granted(type_arg)) { #ifdef WITH_WSREP - if (wsrep_thd_is_BF(requestor_ctx->get_thd(),false) && + if (WSREP_ON && wsrep_thd_is_BF(requestor_ctx->get_thd(),false) && key.mdl_namespace() == MDL_key::GLOBAL) { WSREP_DEBUG("global lock granted for BF: %lu %s", @@ -1615,7 +1615,7 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, } else { - if (wsrep_thd_is_BF(requestor_ctx->get_thd(), false) && + if (WSREP_ON && wsrep_thd_is_BF(requestor_ctx->get_thd(), false) && key.mdl_namespace() == MDL_key::GLOBAL) { WSREP_DEBUG("global lock granted for BF (waiting queue): %lu %s", diff --git a/sql/sql_class.cc b/sql/sql_class.cc index c3274ae9b82..92736eacee2 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1,6 +1,6 @@ /* Copyright (c) 2000, 2015, Oracle and/or its affiliates. - Copyright (c) 2008, 2020, MariaDB Corporation. + Copyright (c) 2008, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -4730,6 +4730,16 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) DBUG_EXECUTE_IF("disable_thd_need_ordering_with", return 1;); if (!thd || !other_thd) return 1; +#ifdef WITH_WSREP + /* wsrep applier, replayer and TOI processing threads are ordered + by replication provider, relaxed GAP locking protocol can be used + between high priority wsrep threads + */ + if (WSREP_ON && + wsrep_thd_is_BF(const_cast<THD *>(thd), false) && + wsrep_thd_is_BF(const_cast<THD *>(other_thd), true)) + return 0; +#endif /* WITH_WSREP */ rgi= thd->rgi_slave; other_rgi= other_thd->rgi_slave; if (!rgi || !other_rgi) diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index 1c43aeaaead..d8ca70d1cbe 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -22,6 +22,7 @@ //#include "global_threads.h" // LOCK_thread_count, etc. #include "sql_base.h" // close_thread_tables() #include "mysqld.h" // start_wsrep_THD(); +#include "debug_sync.h" #include "slave.h" // opt_log_slave_updates #include "rpl_filter.h" @@ -371,6 +372,19 @@ void wsrep_replay_transaction(THD *thd) thd->variables.option_bits|= OPTION_BEGIN; thd->server_status|= SERVER_STATUS_IN_TRANS; + /* Allow tests to block the replayer thread using the DBUG facilities */ +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("sync.wsrep_replay_cb", + { + const char act[]= + "now " + "SIGNAL sync.wsrep_replay_cb_reached " + "WAIT_FOR signal.wsrep_replay_cb"; + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act))); + };); +#endif /* ENABLED_DEBUG_SYNC */ + int rcode = wsrep->replay_trx(wsrep, &thd->wsrep_ws_handle, (void *)thd); diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 7fd34c5d652..3d03c55bf15 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -3,7 +3,7 @@ Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2015, 2020, MariaDB Corporation. +Copyright (c) 2015, 2021, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -67,6 +67,9 @@ Created 10/16/1994 Heikki Tuuri #include "srv0start.h" #include "mysql_com.h" #include "dict0stats.h" +#ifdef WITH_WSREP +#include "mysql/service_wsrep.h" +#endif /* WITH_WSREP */ /** Buffered B-tree operation types, introduced as part of delete buffering. */ enum btr_op_t { @@ -2941,7 +2944,8 @@ btr_cur_ins_lock_and_undo( /* Check if there is predicate or GAP lock preventing the insertion */ if (!(flags & BTR_NO_LOCKING_FLAG)) { - if (dict_index_is_spatial(index)) { + const unsigned type = index->type; + if (UNIV_UNLIKELY(type & DICT_SPATIAL)) { lock_prdt_t prdt; rtr_mbr_t mbr; @@ -2958,9 +2962,30 @@ btr_cur_ins_lock_and_undo( index, thr, mtr, &prdt); *inherit = false; } else { +#ifdef WITH_WSREP + trx_t* trx= thr_get_trx(thr); + /* If transaction scanning an unique secondary + key is wsrep high priority thread (brute + force) this scanning may involve GAP-locking + in the index. As this locking happens also + when applying replication events in high + priority applier threads, there is a + probability for lock conflicts between two + wsrep high priority threads. To avoid this + GAP-locking we mark that this transaction + is using unique key scan here. */ + if ((type & (DICT_CLUSTERED | DICT_UNIQUE)) == DICT_UNIQUE + && trx->is_wsrep() + && wsrep_thd_is_BF(trx->mysql_thd, false)) { + trx->wsrep_UK_scan= true; + } +#endif /* WITH_WSREP */ err = lock_rec_insert_check_and_lock( flags, rec, btr_cur_get_block(cursor), index, thr, mtr, inherit); +#ifdef WITH_WSREP + trx->wsrep_UK_scan= false; +#endif /* WITH_WSREP */ } } diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 048a050a28d..5354c77db25 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2020, MariaDB Corporation. +Copyright (c) 2015, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -875,6 +875,9 @@ public: /** whether wsrep_on(mysql_thd) held at the start of transaction */ bool wsrep; bool is_wsrep() const { return UNIV_UNLIKELY(wsrep); } + /** true, if BF thread is performing unique secondary index scanning */ + bool wsrep_UK_scan; + bool is_wsrep_UK_scan() const { return UNIV_UNLIKELY(wsrep_UK_scan); } #else /* WITH_WSREP */ bool is_wsrep() const { return false; } #endif /* WITH_WSREP */ diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index c95506abc39..edd29066c97 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2020, MariaDB Corporation. +Copyright (c) 2014, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -816,6 +816,17 @@ lock_rec_has_to_wait( } #ifdef WITH_WSREP + /* New lock request from a transaction is using unique key + scan and this transaction is a wsrep high priority transaction + (brute force). If conflicting transaction is also wsrep high + priority transaction we should avoid lock conflict because + ordering of these transactions is already decided and + conflicting transaction will be later replayed. */ + if (trx->is_wsrep_UK_scan() + && wsrep_thd_is_BF(lock2->trx->mysql_thd, true)) { + return (FALSE); + } + /* There should not be two conflicting locks that are brute force. If there is it is a bug. */ wsrep_assert_no_bf_bf_wait(NULL, lock2, trx); @@ -5928,6 +5939,19 @@ lock_sec_rec_modify_check_and_lock( heap_no = page_rec_get_heap_no(rec); +#ifdef WITH_WSREP + trx_t *trx= thr_get_trx(thr); + /* If transaction scanning an unique secondary key is wsrep + high priority thread (brute force) this scanning may involve + GAP-locking in the index. As this locking happens also when + applying replication events in high priority applier threads, + there is a probability for lock conflicts between two wsrep + high priority threads. To avoid this GAP-locking we mark that + this transaction is using unique key scan here. */ + if (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, false)) + trx->wsrep_UK_scan= true; +#endif /* WITH_WSREP */ + /* Another transaction cannot have an implicit lock on the record, because when we come here, we already have modified the clustered index record, and this would not have been possible if another active @@ -5943,6 +5967,9 @@ lock_sec_rec_modify_check_and_lock( MONITOR_INC(MONITOR_NUM_RECLOCK_REQ); lock_mutex_exit(); +#ifdef WITH_WSREP + trx->wsrep_UK_scan= false; +#endif /* WITH_WSREP */ #ifdef UNIV_DEBUG { @@ -6032,6 +6059,18 @@ lock_sec_rec_read_check_and_lock( lock_rec_convert_impl_to_expl(block, rec, index, offsets); } +#ifdef WITH_WSREP + trx_t *trx= thr_get_trx(thr); + /* If transaction scanning an unique secondary key is wsrep + high priority thread (brute force) this scanning may involve + GAP-locking in the index. As this locking happens also when + applying replication events in high priority applier threads, + there is a probability for lock conflicts between two wsrep + high priority threads. To avoid this GAP-locking we mark that + this transaction is using unique key scan here. */ + if (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, false)) + trx->wsrep_UK_scan= true; +#endif /* WITH_WSREP */ lock_mutex_enter(); ut_ad(mode != LOCK_X @@ -6045,6 +6084,9 @@ lock_sec_rec_read_check_and_lock( MONITOR_INC(MONITOR_NUM_RECLOCK_REQ); lock_mutex_exit(); +#ifdef WITH_WSREP + trx->wsrep_UK_scan= false; +#endif /* WITH_WSREP */ ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets)); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 60e534e0f43..42bd67cb24b 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2020, MariaDB Corporation. +Copyright (c) 2015, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -154,6 +154,11 @@ trx_init( trx->lock.rec_cached = 0; trx->lock.table_cached = 0; +#ifdef WITH_WSREP + ut_ad(!trx->wsrep); + ut_ad(!trx->wsrep_event); + ut_ad(!trx->wsrep_UK_scan); +#endif /* WITH_WSREP */ ut_ad(trx->get_flush_observer() == NULL); } @@ -355,6 +360,7 @@ trx_t *trx_allocate_for_background() #ifdef WITH_WSREP trx->wsrep_event = NULL; + ut_ad(!trx->wsrep_UK_scan); #endif /* WITH_WSREP */ return(trx); @@ -466,6 +472,8 @@ inline void trx_t::free() MEM_NOACCESS(&flush_observer, sizeof flush_observer); #ifdef WITH_WSREP MEM_NOACCESS(&wsrep_event, sizeof wsrep_event); + ut_ad(!wsrep_UK_scan); + MEM_NOACCESS(&wsrep_UK_scan, sizeof wsrep_UK_scan); #endif /* WITH_WSREP */ MEM_NOACCESS(&magic_n, sizeof magic_n); trx_pools->mem_free(this); |