From 66c05326d2a756329ce6fe2c5abb21230b424b4e Mon Sep 17 00:00:00 2001 From: sjaakola Date: Thu, 6 Oct 2022 15:16:06 +0300 Subject: MDEV-29684 Fixes for cluster wide write conflict resolving MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cluster conflict victim's THD is marked with wsrep_aborter. THD::wsrep_aorter holds the thread ID of the hight priority tread, which is currently carrying out BF aborting for this victim. However, the BF abort operation is not always successful, and in such case the wsrep_aborter mark should be removed. In the old code, this wsrep_aborter resetting did not happen, and this could lead to a situation where the sticky wsrep_aborter mark prevents any further attempt to BF abort this transaction. This commit fixes this issue, and resets wsrep_aborter after unsuccesful BF abort attempt. Reviewed-by: Jan Lindström --- sql/service_wsrep.cc | 11 +++++++++-- sql/wsrep_thd.cc | 21 +++++++++++++-------- sql/wsrep_thd.h | 6 ++++-- storage/innobase/handler/ha_innodb.cc | 10 ++++++++++ 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index 291d8dfbef8..7b0a1e5495e 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -349,13 +349,20 @@ extern "C" void wsrep_commit_ordered(THD *thd) extern "C" bool wsrep_thd_set_wsrep_aborter(THD *bf_thd, THD *victim_thd) { - WSREP_DEBUG("wsrep_thd_set_wsrep_aborter called"); mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + if (!bf_thd) + { + victim_thd->wsrep_aborter= 0; + WSREP_DEBUG("wsrep_thd_set_wsrep_aborter resetting wsrep_aborter"); + return false; + } if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id) { return true; } - victim_thd->wsrep_aborter = bf_thd->thread_id; + victim_thd->wsrep_aborter= bf_thd->thread_id; + WSREP_DEBUG("wsrep_thd_set_wsrep_aborter setting wsrep_aborter %u", + victim_thd->wsrep_aborter); return false; } diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index 6900efa8bc9..05c96491906 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2013-2022 Codership Oy +/* Copyright (C) 2013-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -308,11 +308,11 @@ void wsrep_fire_rollbacker(THD *thd) } -int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal) +int wsrep_abort_thd(THD *bf_thd, + THD *victim_thd, + my_bool signal) { DBUG_ENTER("wsrep_abort_thd"); - THD *victim_thd= (THD *) victim_thd_ptr; - THD *bf_thd= (THD *) bf_thd_ptr; mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); @@ -323,16 +323,21 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal) if ((WSREP(bf_thd) || ((WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) && wsrep_thd_is_toi(bf_thd))) && - victim_thd && !wsrep_thd_is_aborting(victim_thd)) { - WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ? - (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); + WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", + (long long)bf_thd->real_id, (long long)victim_thd->real_id); ha_abort_transaction(bf_thd, victim_thd, signal); } else { - WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd); + WSREP_DEBUG("wsrep_abort_thd not effective: bf %llu victim %llu " + "wsrep %d wsrep_on %d RSU %d TOI %d aborting %d", + (long long)bf_thd->real_id, (long long)victim_thd->real_id, + WSREP_NNULL(bf_thd), WSREP_ON, + bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU, + wsrep_thd_is_toi(bf_thd), + wsrep_thd_is_aborting(victim_thd)); wsrep_thd_UNLOCK(victim_thd); } diff --git a/sql/wsrep_thd.h b/sql/wsrep_thd.h index e9add662e3f..cf8528c3165 100644 --- a/sql/wsrep_thd.h +++ b/sql/wsrep_thd.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2013-2022 Codership Oy +/* Copyright (C) 2013-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -88,7 +88,9 @@ bool wsrep_create_appliers(long threads, bool mutex_protected=false); void wsrep_create_rollbacker(); bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd); -int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal); +int wsrep_abort_thd(THD *bf_thd, + THD *victim_thd, + my_bool signal) __attribute__((nonnull(1,2))); /* Helper methods to deal with thread local storage. diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 6f781a1f291..b8e2aea204b 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -18748,6 +18748,16 @@ wsrep_kill_victim( lock_cancel_waiting_and_release(wait_lock); } } + else + { + wsrep_thd_LOCK(thd); + victim_trx->lock.was_chosen_as_wsrep_victim= false; + wsrep_thd_set_wsrep_aborter(NULL, thd); + wsrep_thd_UNLOCK(thd); + + WSREP_DEBUG("wsrep_thd_bf_abort has failed, victim %lu will survive", + thd_get_thread_id(thd)); + } DBUG_VOID_RETURN; } -- cgit v1.2.1 From cd97523dcff4a1a4b1d751d505bd2325aa29b074 Mon Sep 17 00:00:00 2001 From: sjaakola Date: Thu, 29 Dec 2022 12:59:34 +0200 Subject: MDEV-30317 Transaction savepoint may cause failure in galera replaying MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created mtr test for reproducing the crash Developed actual fix for the issue. Setting THD::system_thread_info.rpl_sql_info for replayer thread, same way as it is handled for appliers. Recorded test result, with the fix Reviewed-by: Jan Lindström --- .../suite/galera/r/galera_savepoint_replay.result | 53 +++++++++++++ .../suite/galera/t/galera_savepoint_replay.test | 86 ++++++++++++++++++++++ sql/wsrep_high_priority_service.cc | 35 +++++---- 3 files changed, 158 insertions(+), 16 deletions(-) create mode 100644 mysql-test/suite/galera/r/galera_savepoint_replay.result create mode 100644 mysql-test/suite/galera/t/galera_savepoint_replay.test diff --git a/mysql-test/suite/galera/r/galera_savepoint_replay.result b/mysql-test/suite/galera/r/galera_savepoint_replay.result new file mode 100644 index 00000000000..afea5f82e3c --- /dev/null +++ b/mysql-test/suite/galera/r/galera_savepoint_replay.result @@ -0,0 +1,53 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(1)); +INSERT INTO t1 VALUES (1, 'a'); +INSERT INTO t1 VALUES (2, 'a'); +connection node_1; +SET AUTOCOMMIT=ON; +START TRANSACTION; +UPDATE t1 SET f2 = 'b' WHERE f1 = 1; +SELECT * FROM t1 WHERE f1 = 2 FOR UPDATE; +f1 f2 +2 a +SAVEPOINT my_sp; +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET SESSION wsrep_sync_wait=0; +SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; +connection node_2; +UPDATE t1 SET f2 = 'c' WHERE f1 = 2; +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; +connection node_1; +COMMIT; +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; +SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync'; +connection node_1; +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b'; +COUNT(*) = 1 +1 +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c'; +COUNT(*) = 1 +1 +wsrep_local_replays +1 +connection node_2; +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b'; +COUNT(*) = 1 +1 +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c'; +COUNT(*) = 1 +1 +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_savepoint_replay.test b/mysql-test/suite/galera/t/galera_savepoint_replay.test new file mode 100644 index 00000000000..cff26f4a94f --- /dev/null +++ b/mysql-test/suite/galera/t/galera_savepoint_replay.test @@ -0,0 +1,86 @@ +# +# This test tests replaying a transaction with savepoint +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/galera_have_debug_sync.inc + +--let $wsrep_local_replays_old = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` + +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(1)); +INSERT INTO t1 VALUES (1, 'a'); +INSERT INTO t1 VALUES (2, 'a'); + +--connection node_1 +SET AUTOCOMMIT=ON; +START TRANSACTION; + +UPDATE t1 SET f2 = 'b' WHERE f1 = 1; +SELECT * FROM t1 WHERE f1 = 2 FOR UPDATE; +SAVEPOINT my_sp; + +# Block the applier on node #1 and issue a conflicting update on node #2 +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +SET SESSION wsrep_sync_wait=0; +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_2 +UPDATE t1 SET f2 = 'c' WHERE f1 = 2; + +--connection node_1a +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +# Block the commit, send the COMMIT and wait until it gets blocked + +--let $galera_sync_point = commit_monitor_master_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_1 +--send COMMIT + +--connection node_1a + +--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +# Let the conflicting UPDATE proceed and wait until it hits abort_trx_end. +# The victim transaction still sits in commit_monitor_master_sync_point. + +--let $galera_sync_point = abort_trx_end +--source include/galera_set_sync_point.inc +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_signal_sync_point.inc +--let $galera_sync_point = abort_trx_end commit_monitor_master_enter_sync +--source include/galera_wait_sync_point.inc + +# Let the transactions proceed +--source include/galera_clear_sync_point.inc +--let $galera_sync_point = abort_trx_end +--source include/galera_signal_sync_point.inc +--let $galera_sync_point = commit_monitor_master_enter_sync +--source include/galera_signal_sync_point.inc + +# Commit succeeds +--connection node_1 +--reap + +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b'; +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c'; + +# wsrep_local_replays has increased by 1 +--let $wsrep_local_replays_new = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` +--disable_query_log +--eval SELECT $wsrep_local_replays_new - $wsrep_local_replays_old = 1 AS wsrep_local_replays; +--enable_query_log + +--connection node_2 +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b'; +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c'; + +DROP TABLE t1; + diff --git a/sql/wsrep_high_priority_service.cc b/sql/wsrep_high_priority_service.cc index 700ac599cee..c396a9eeae5 100644 --- a/sql/wsrep_high_priority_service.cc +++ b/sql/wsrep_high_priority_service.cc @@ -1,4 +1,4 @@ -/* Copyright 2018-2021 Codership Oy +/* Copyright 2018-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -619,6 +619,9 @@ Wsrep_replayer_service::Wsrep_replayer_service(THD* replayer_thd, THD* orig_thd) transactional locks */ DBUG_ASSERT(!orig_thd->mdl_context.has_transactional_locks()); + replayer_thd->system_thread_info.rpl_sql_info= + new rpl_sql_thread_info(replayer_thd->wsrep_rgi->rli->mi->rpl_filter); + /* Make a shadow copy of diagnostics area and reset */ m_da_shadow.status= orig_thd->get_stmt_da()->status(); if (m_da_shadow.status == Diagnostics_area::DA_OK) @@ -657,35 +660,35 @@ Wsrep_replayer_service::Wsrep_replayer_service(THD* replayer_thd, THD* orig_thd) Wsrep_replayer_service::~Wsrep_replayer_service() { - THD* replayer_thd= m_thd; - THD* orig_thd= m_orig_thd; - /* Switch execution context back to original. */ - wsrep_after_apply(replayer_thd); - wsrep_after_command_ignore_result(replayer_thd); - wsrep_close(replayer_thd); - wsrep_reset_threadvars(replayer_thd); - wsrep_store_threadvars(orig_thd); + wsrep_after_apply(m_thd); + wsrep_after_command_ignore_result(m_thd); + wsrep_close(m_thd); + wsrep_reset_threadvars(m_thd); + wsrep_store_threadvars(m_orig_thd); - DBUG_ASSERT(!orig_thd->get_stmt_da()->is_sent()); - DBUG_ASSERT(!orig_thd->get_stmt_da()->is_set()); + DBUG_ASSERT(!m_orig_thd->get_stmt_da()->is_sent()); + DBUG_ASSERT(!m_orig_thd->get_stmt_da()->is_set()); + + delete m_thd->system_thread_info.rpl_sql_info; + m_thd->system_thread_info.rpl_sql_info= nullptr; if (m_replay_status == wsrep::provider::success) { - DBUG_ASSERT(replayer_thd->wsrep_cs().current_error() == wsrep::e_success); - orig_thd->reset_kill_query(); - my_ok(orig_thd, m_da_shadow.affected_rows, m_da_shadow.last_insert_id); + DBUG_ASSERT(m_thd->wsrep_cs().current_error() == wsrep::e_success); + m_orig_thd->reset_kill_query(); + my_ok(m_orig_thd, m_da_shadow.affected_rows, m_da_shadow.last_insert_id); } else if (m_replay_status == wsrep::provider::error_certification_failed) { - wsrep_override_error(orig_thd, ER_LOCK_DEADLOCK); + wsrep_override_error(m_orig_thd, ER_LOCK_DEADLOCK); } else { DBUG_ASSERT(0); WSREP_ERROR("trx_replay failed for: %d, schema: %s, query: %s", m_replay_status, - orig_thd->db.str, wsrep_thd_query(orig_thd)); + m_orig_thd->db.str, wsrep_thd_query(m_orig_thd)); unireg_abort(1); } } -- cgit v1.2.1 From 68cfcf9cb6821c3d333b97b213b44627e433861c Mon Sep 17 00:00:00 2001 From: sjaakola Date: Tue, 8 Nov 2022 16:36:34 +0200 Subject: MDEV-29512 deadlock between commit monitor and THD::LOCK_thd_data mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit contains only a mtr test for reproducing the issue in MDEV-29512 The actual fix will be pushed in wsrep-lib repository The hanging in MDEV-29512 happens when binlog purging is attempted, and there is one local BF aborted transaction waiting for commit monitor. The test will launch two node cluster and enable binlogging with expire log days, to force binlog purging to happen. A local transaction is executed so that will become BF abort victim, and has advanced to replication stage waiting for commit monitor for final cleanup (to mark position in innodb) after that, applier is released to complete the BF abort and due to binlog configuration, starting the binlog purging. This is where the hanging would occur, if code is buggy Reviewed-by: Jan Lindström --- mysql-test/suite/galera/r/galera_MDEV-29512.result | 40 ++++++++++ mysql-test/suite/galera/t/galera_MDEV-29512.cnf | 15 ++++ mysql-test/suite/galera/t/galera_MDEV-29512.test | 91 ++++++++++++++++++++++ 3 files changed, 146 insertions(+) create mode 100644 mysql-test/suite/galera/r/galera_MDEV-29512.result create mode 100644 mysql-test/suite/galera/t/galera_MDEV-29512.cnf create mode 100644 mysql-test/suite/galera/t/galera_MDEV-29512.test diff --git a/mysql-test/suite/galera/r/galera_MDEV-29512.result b/mysql-test/suite/galera/r/galera_MDEV-29512.result new file mode 100644 index 00000000000..aaf24df920e --- /dev/null +++ b/mysql-test/suite/galera/r/galera_MDEV-29512.result @@ -0,0 +1,40 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 varchar(2000)); +INSERT INTO t1 VALUES (1, 0, REPEAT('1234567890', 200)); +INSERT INTO t1 VALUES (3, 3, REPEAT('1234567890', 200)); +SET SESSION wsrep_sync_wait=0; +SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb"; +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1a; +SET SESSION wsrep_sync_wait=0; +connection node_1; +begin; +select f1,f2 from t1; +f1 f2 +1 0 +3 3 +connection node_2; +UPDATE t1 SET f2=2 WHERE f1=3; +connection node_1a; +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached"; +connection node_1; +UPDATE t1 SET f2=1 WHERE f1=3; +SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; +COMMIT; +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync'; +SET GLOBAL DEBUG_DBUG = ""; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb"; +SET GLOBAL debug_dbug = NULL; +SET debug_sync='RESET'; +connection node_1; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +select f1,f2 from t1; +f1 f2 +1 0 +3 2 +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_MDEV-29512.cnf b/mysql-test/suite/galera/t/galera_MDEV-29512.cnf new file mode 100644 index 00000000000..bf8e0c37984 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_MDEV-29512.cnf @@ -0,0 +1,15 @@ +!include ../galera_2nodes.cnf + +[mysqld] +log-bin +log-slave-updates + +[mysqld.1] +log_bin +log_slave_updates +max-binlog-size=4096 +expire-logs-days=1 + + +[mysqld.2] + diff --git a/mysql-test/suite/galera/t/galera_MDEV-29512.test b/mysql-test/suite/galera/t/galera_MDEV-29512.test new file mode 100644 index 00000000000..ffcef792f85 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_MDEV-29512.test @@ -0,0 +1,91 @@ +# +# This test is for reproducing the issue in: +# https://jira.mariadb.org/browse/MDEV-29512 +# +# The hanging in MDEV-29512 happens when binlog purging is attempted, and there is +# one local BF aborted transaction waiting for commit monitor. +# +# The test will launch two node cluster and enable binlogging with expire log days, +# to force binlog purging to happen. +# A local transaction is executed so that will become BF abort victim, and has advanced +# to replication stage waiting for commit monitor for final cleanup (to mark position in innodb) +# after that, applier is released to complete the BF abort and due to binlog configuration, +# starting the binlog purging. This is where the hanging would occur, if code is buggy +# +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/galera_have_debug_sync.inc + +# +# binlog size is limited to 4096 bytes, we will create enough events to +# cause binlog rotation +# +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 varchar(2000)); +INSERT INTO t1 VALUES (1, 0, REPEAT('1234567890', 200)); +INSERT INTO t1 VALUES (3, 3, REPEAT('1234567890', 200)); + +SET SESSION wsrep_sync_wait=0; + +# set sync point for replication applier +SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb"; + +# Control connection to manage sync points for appliers +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1a +SET SESSION wsrep_sync_wait=0; + +# starting local transaction, only select so far, +# write will happen later and this will be ordered after the transaction in node_2 +--connection node_1 +begin; +select f1,f2 from t1; + +# send from node 2 an UPDATE transaction, which will BF abort the transaction in node_1 +--connection node_2 +--let $wait_condition=select count(*)=2 from t1 +--source include/wait_condition.inc + +UPDATE t1 SET f2=2 WHERE f1=3; + +--connection node_1a +# wait to see the UPDATE from node_2 in apply_cb sync point +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached"; + +--connection node_1 +# now issuing conflicting update +UPDATE t1 SET f2=1 WHERE f1=3; + +# Block the local commit, send final COMMIT and wait until it gets blocked +--let $galera_sync_point = commit_monitor_master_enter_sync +--source include/galera_set_sync_point.inc +--send COMMIT + +--connection node_1a +# wait for the local commit to enter in commit monitor wait state +--let $galera_sync_point = commit_monitor_master_enter_sync +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +# release the local transaction to continue with commit +--let $galera_sync_point = commit_monitor_master_enter_sync +--source include/galera_signal_sync_point.inc + +# and now release the applier, it should force local trx to abort +SET GLOBAL DEBUG_DBUG = ""; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb"; +SET GLOBAL debug_dbug = NULL; +SET debug_sync='RESET'; + +--connection node_1 +--error ER_LOCK_DEADLOCK +--reap + +# wait until applying is complete +--let $wait_condition = SELECT COUNT(*)=1 FROM t1 WHERE f2=2 +--source include/wait_condition.inc + +# final read to verify what we got +select f1,f2 from t1; + +DROP TABLE t1; -- cgit v1.2.1 From 0ff7f33c7b5d0b5373472f4706aff4d19dc84258 Mon Sep 17 00:00:00 2001 From: sjaakola Date: Wed, 12 Oct 2022 15:07:20 +0300 Subject: 10.4-MDEV-29684 Fixes for cluster wide write conflict resolving MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rather recent thd_need_ordering_with() function does not take high priority transactions' order in consideration. Chaged this funtion to compare also transaction seqnos and favor earlier transaction. Reviewed-by: Jan Lindström --- sql/sql_class.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 10bee9e7aae..b516791b6da 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5230,8 +5230,9 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) (e.g. InnoDB does it by keeping lock_sys.mutex locked) */ if (WSREP_ON && - wsrep_thd_is_BF(const_cast(thd), false) && - wsrep_thd_is_BF(const_cast(other_thd), false)) + wsrep_thd_is_BF(thd, false) && + wsrep_thd_is_BF(other_thd, false) && + wsrep_thd_order_before(thd, other_thd)) return 0; #endif /* WITH_WSREP */ rgi= thd->rgi_slave; -- cgit v1.2.1 From a44d896f98f2d2a3ebf0f1393bf84fd659ecd225 Mon Sep 17 00:00:00 2001 From: sjaakola Date: Wed, 12 Oct 2022 14:13:49 +0300 Subject: 10.4-MDEV-29684 Fixes for cluster wide write conflict resolving MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If two high priority threads have lock conflict, we look at the order of these transactions and honor the earlier transaction. for_locking parameter in lock_rec_has_to_wait() has become obsolete and it is now removed from the code . Reviewed-by: Jan Lindström --- sql/service_wsrep.cc | 6 +++-- sql/sql_class.cc | 2 -- storage/innobase/lock/lock0lock.cc | 53 +++++++++++++++++++++++++------------- 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index 7b0a1e5495e..722c22809de 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -1,4 +1,4 @@ -/* Copyright 2018-2021 Codership Oy +/* Copyright 2018-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -274,7 +274,9 @@ extern "C" my_bool wsrep_thd_skip_locking(const THD *thd) extern "C" my_bool wsrep_thd_order_before(const THD *left, const THD *right) { - if (wsrep_thd_trx_seqno(left) < wsrep_thd_trx_seqno(right)) { + if (wsrep_thd_is_BF(left, false) && + wsrep_thd_is_BF(right, false) && + wsrep_thd_trx_seqno(left) < wsrep_thd_trx_seqno(right)) { WSREP_DEBUG("BF conflict, order: %lld %lld\n", (long long)wsrep_thd_trx_seqno(left), (long long)wsrep_thd_trx_seqno(right)); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index b516791b6da..464d64db73b 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5230,8 +5230,6 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) (e.g. InnoDB does it by keeping lock_sys.mutex locked) */ if (WSREP_ON && - wsrep_thd_is_BF(thd, false) && - wsrep_thd_is_BF(other_thd, false) && wsrep_thd_order_before(thd, other_thd)) return 0; #endif /* WITH_WSREP */ diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index e10d0c9f2b5..26388ad95e2 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -819,25 +819,34 @@ lock_rec_has_to_wait( } #ifdef WITH_WSREP - /* New lock request from a transaction is using unique key - scan and this transaction is a wsrep high priority transaction - (brute force). If conflicting transaction is also wsrep high - priority transaction we should avoid lock conflict because - ordering of these transactions is already decided and - conflicting transaction will be later replayed. Note - that thread holding conflicting lock can't be - committed or rolled back while we hold - lock_sys->mutex. */ - if (trx->is_wsrep_UK_scan() - && wsrep_thd_is_BF(lock2->trx->mysql_thd, false)) { - return false; - } + /* New lock request from a transaction is using unique key + scan and this transaction is a wsrep high priority transaction + (brute force). If conflicting transaction is also wsrep high + priority transaction we should avoid lock conflict because + ordering of these transactions is already decided and + conflicting transaction will be later replayed. Note + that thread holding conflicting lock can't be + committed or rolled back while we hold + lock_sys->mutex. */ + if (trx->is_wsrep_UK_scan() + && wsrep_thd_is_BF(lock2->trx->mysql_thd, false)) { + return false; + } - /* We very well can let bf to wait normally as other - BF will be replayed in case of conflict. For debug - builds we will do additional sanity checks to catch - unsupported bf wait if any. */ - ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx)); + /* If BF-BF conflict, we have to look at write set order */ + if (trx->is_wsrep() + && (type_mode & LOCK_MODE_MASK) == LOCK_X + && (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X + && wsrep_thd_order_before(trx->mysql_thd, + lock2->trx->mysql_thd)) { + return false; + } + + /* We very well can let bf to wait normally as other + BF will be replayed in case of conflict. For debug + builds we will do additional sanity checks to catch + unsupported bf wait if any. */ + ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx)); #endif /* WITH_WSREP */ return true; @@ -2043,6 +2052,14 @@ lock_rec_has_to_wait_in_queue( if (heap_no < lock_rec_get_n_bits(lock) && (p[bit_offset] & bit_mask) && lock_has_to_wait(wait_lock, lock)) { +#ifdef WITH_WSREP + if (lock->trx->is_wsrep() + && wsrep_thd_order_before(wait_lock->trx->mysql_thd, + lock->trx->mysql_thd)) { + /* don't wait for another BF lock */ + continue; + } +#endif return(lock); } } -- cgit v1.2.1