diff options
36 files changed, 816 insertions, 211 deletions
diff --git a/config.h.cmake b/config.h.cmake index 170bb80b691..45e3a6d3ee1 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -189,9 +189,6 @@ #cmakedefine HAVE_LINUX_FALLOC_H 1 #cmakedefine HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE 1 #cmakedefine HAVE_PREAD 1 -#cmakedefine HAVE_PAUSE_INSTRUCTION 1 -#cmakedefine HAVE_FAKE_PAUSE_INSTRUCTION 1 -#cmakedefine HAVE_HMT_PRIORITY_INSTRUCTION 1 #cmakedefine HAVE_RDTSCLL 1 #cmakedefine HAVE_READ_REAL_TIME 1 #cmakedefine HAVE_PTHREAD_ATTR_CREATE 1 diff --git a/configure.cmake b/configure.cmake index dec28f47d72..e385bec81dc 100644 --- a/configure.cmake +++ b/configure.cmake @@ -759,43 +759,6 @@ IF(NOT C_HAS_inline) ENDIF() ENDIF() -IF(NOT CMAKE_CROSSCOMPILING AND NOT MSVC) - STRING(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} processor) - IF(processor MATCHES "86" OR processor MATCHES "amd64" OR processor MATCHES "x64") - #Check for x86 PAUSE instruction - # We have to actually try running the test program, because of a bug - # in Solaris on x86_64, where it wrongly reports that PAUSE is not - # supported when trying to run an application. See - # http://bugs.opensolaris.org/bugdatabase/printableBug.do?bug_id=6478684 - CHECK_C_SOURCE_RUNS(" - int main() - { - __asm__ __volatile__ (\"pause\"); - return 0; - }" HAVE_PAUSE_INSTRUCTION) - ENDIF() - IF (NOT HAVE_PAUSE_INSTRUCTION) - CHECK_C_SOURCE_COMPILES(" - int main() - { - __asm__ __volatile__ (\"rep; nop\"); - return 0; - } - " HAVE_FAKE_PAUSE_INSTRUCTION) - ENDIF() - IF (NOT HAVE_PAUSE_INSTRUCTION) - CHECK_C_SOURCE_COMPILES(" - #include <sys/platform/ppc.h> - int main() - { - __ppc_set_ppr_low(); - __ppc_set_ppr_med(); - return 0; - } - " HAVE_HMT_PRIORITY_INSTRUCTION) - ENDIF() -ENDIF() - CHECK_SYMBOL_EXISTS(tcgetattr "termios.h" HAVE_TCGETATTR 1) # diff --git a/include/atomic/generic-msvc.h b/include/atomic/generic-msvc.h index 754f0bfa8b4..c812aebae90 100644 --- a/include/atomic/generic-msvc.h +++ b/include/atomic/generic-msvc.h @@ -120,7 +120,6 @@ static __inline int my_yield_processor() return 1; } -#define LF_BACKOFF my_yield_processor() #else /* cleanup */ #undef IL_EXCHG_ADD32 diff --git a/include/my_atomic.h b/include/my_atomic.h index aa5c617e593..d6aee503dbe 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -346,15 +346,6 @@ make_atomic_store(ptr) #undef make_atomic_fas_body #undef intptr -/* - the macro below defines (as an expression) the code that - will be run in spin-loops. Intel manuals recummend to have PAUSE there. - It is expected to be defined in include/atomic/ *.h files -*/ -#ifndef LF_BACKOFF -#define LF_BACKOFF (1) -#endif - #define MY_ATOMIC_OK 0 #define MY_ATOMIC_NOT_1CPU 1 extern int my_atomic_initialize(); diff --git a/include/my_cpu.h b/include/my_cpu.h index ebabe6c7202..aff92d22131 100644 --- a/include/my_cpu.h +++ b/include/my_cpu.h @@ -1,3 +1,5 @@ +#ifndef MY_CPU_INCLUDED +#define MY_CPU_INCLUDED /* Copyright (c) 2013, MariaDB foundation Ab and SkySQL This program is free software; you can redistribute it and/or modify @@ -42,3 +44,94 @@ #define HMT_medium_high() #define HMT_high() #endif + +#if defined __i386__ || defined __x86_64__ || defined _WIN32 +# define HAVE_PAUSE_INSTRUCTION /* added in Intel Pentium 4 */ +#endif + +#ifdef _WIN32 +#elif defined HAVE_PAUSE_INSTRUCTION +#elif defined(_ARCH_PWR8) +#else +# include "my_global.h" +# include "my_atomic.h" +#endif + +static inline void MY_RELAX_CPU(void) +{ +#ifdef _WIN32 + /* + In the Win32 API, the x86 PAUSE instruction is executed by calling + the YieldProcessor macro defined in WinNT.h. It is a CPU architecture- + independent way by using YieldProcessor. + */ + YieldProcessor(); +#elif defined HAVE_PAUSE_INSTRUCTION + /* + According to the gcc info page, asm volatile means that the + instruction has important side-effects and must not be removed. + Also asm volatile may trigger a memory barrier (spilling all registers + to memory). + */ +#ifdef __SUNPRO_CC + asm ("pause" ); +#else + __asm__ __volatile__ ("pause"); +#endif +#elif defined(_ARCH_PWR8) + __ppc_get_timebase(); +#else + int32 var, oldval = 0; + my_atomic_cas32_strong_explicit(&var, &oldval, 1, MY_MEMORY_ORDER_RELAXED, + MY_MEMORY_ORDER_RELAXED); +#endif +} + + +#ifdef HAVE_PAUSE_INSTRUCTION +# ifdef __cplusplus +extern "C" { +# endif +extern unsigned my_cpu_relax_multiplier; +void my_cpu_init(void); +# ifdef __cplusplus +} +# endif +#else +# define my_cpu_relax_multiplier 200 +# define my_cpu_init() /* nothing */ +#endif + +/* + LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel + recommends to use it in spin loops also on non-HT machines to reduce power + consumption (see e.g http://softwarecommunity.intel.com/articles/eng/2004.htm) + + Running benchmarks for spinlocks implemented with InterlockedCompareExchange + and YieldProcessor shows that much better performance is achieved by calling + YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting + loop count in the range 200-300 brought best results. +*/ + +static inline int LF_BACKOFF(void) +{ + unsigned i= my_cpu_relax_multiplier; + while (i--) + MY_RELAX_CPU(); + return 1; +} +/** + Run a delay loop while waiting for a shared resource to be released. + @param delay originally, roughly microseconds on 100 MHz Intel Pentium +*/ + +static inline void ut_delay(unsigned delay) +{ + unsigned i= my_cpu_relax_multiplier / 4 * delay; + HMT_low(); + while (i--) + MY_RELAX_CPU(); + HMT_medium(); +} + +#endif diff --git a/include/mysql/service_wsrep.h b/include/mysql/service_wsrep.h index 923ba57fcdc..1ce7ffd0991 100644 --- a/include/mysql/service_wsrep.h +++ b/include/mysql/service_wsrep.h @@ -97,7 +97,8 @@ extern struct wsrep_service_st { enum wsrep_exec_mode (*wsrep_thd_exec_mode_func)(THD *thd); const char * (*wsrep_thd_exec_mode_str_func)(THD *thd); enum wsrep_conflict_state (*wsrep_thd_get_conflict_state_func)(MYSQL_THD); - my_bool (*wsrep_thd_is_BF_func)(MYSQL_THD , my_bool); + my_bool (*wsrep_thd_is_aborting_func)(const MYSQL_THD thd); + my_bool (*wsrep_thd_is_BF_func)(MYSQL_THD, my_bool); my_bool (*wsrep_thd_is_wsrep_func)(MYSQL_THD thd); const char * (*wsrep_thd_query_func)(THD *thd); enum wsrep_query_state (*wsrep_thd_query_state_func)(THD *thd); @@ -111,10 +112,14 @@ extern struct wsrep_service_st { void (*wsrep_set_load_multi_commit_func)(THD *thd, bool split); bool (*wsrep_is_load_multi_commit_func)(THD *thd); int (*wsrep_trx_is_aborting_func)(MYSQL_THD thd); + my_bool (*wsrep_thd_bf_abort_func)(MYSQL_THD bf_thd, + MYSQL_THD victim_thd, + my_bool signal); int (*wsrep_trx_order_before_func)(MYSQL_THD, MYSQL_THD); void (*wsrep_unlock_rollback_func)(); void (*wsrep_set_data_home_dir_func)(const char *data_dir); my_bool (*wsrep_thd_is_applier_func)(MYSQL_THD); + bool (*wsrep_thd_set_wsrep_aborter_func)(MYSQL_THD bf_thd, MYSQL_THD thd); } *wsrep_service; #ifdef MYSQL_DYNAMIC_PLUGIN @@ -143,6 +148,7 @@ extern struct wsrep_service_st { #define wsrep_thd_exec_mode(T) wsrep_service->wsrep_thd_exec_mode_func(T) #define wsrep_thd_exec_mode_str(T) wsrep_service->wsrep_thd_exec_mode_str_func(T) #define wsrep_thd_get_conflict_state(T) wsrep_service->wsrep_thd_get_conflict_state_func(T) +#define wsrep_thd_is_aborting(T) wsrep_service->wsrep_thd_is_aborting_func(T) #define wsrep_thd_is_BF(T,S) wsrep_service->wsrep_thd_is_BF_func(T,S) #define wsrep_thd_is_wsrep(T) wsrep_service->wsrep_thd_is_wsrep_func(T) #define wsrep_thd_query(T) wsrep_service->wsrep_thd_query_func(T) @@ -157,10 +163,12 @@ extern struct wsrep_service_st { #define wsrep_set_load_multi_commit(T,S) wsrep_service->wsrep_set_load_multi_commit_func(T,S) #define wsrep_is_load_multi_commit(T) wsrep_service->wsrep_is_load_multi_commit_func(T) #define wsrep_trx_is_aborting(T) wsrep_service->wsrep_trx_is_aborting_func(T) +#define wsrep_thd_bf_abort(T,T2,S) wsrep_service->wsrep_thd_bf_abort_func(T,T2,S) #define wsrep_trx_order_before(T1,T2) wsrep_service->wsrep_trx_order_before_func(T1,T2) #define wsrep_unlock_rollback() wsrep_service->wsrep_unlock_rollback_func() #define wsrep_set_data_home_dir(A) wsrep_service->wsrep_set_data_home_dir_func(A) #define wsrep_thd_is_applier(T) wsrep_service->wsrep_thd_is_applier_func(T) +#define wsrep_thd_set_wsrep_aborter(T) wsrep_service->wsrep_thd_set_wsrep_aborter_func(T1, T2) #define wsrep_debug get_wsrep_debug() #define wsrep_log_conflicts get_wsrep_log_conflicts() @@ -195,6 +203,9 @@ int wsrep_is_wsrep_xid(const struct xid_t* xid); int wsrep_on(MYSQL_THD thd); int wsrep_thd_retry_counter(THD *thd); int wsrep_trx_is_aborting(MYSQL_THD thd); +my_bool wsrep_thd_bf_abort(MYSQL_THD bf_thd, + MYSQL_THD victim_thd, + my_bool signal); int wsrep_trx_order_before(MYSQL_THD thd1, MYSQL_THD thd2); long get_wsrep_protocol_version(); long long wsrep_thd_trx_seqno(THD *thd); @@ -205,6 +216,7 @@ my_bool get_wsrep_recovery(); my_bool get_wsrep_load_data_splitting(); my_bool get_wsrep_log_conflicts(); my_bool wsrep_aborting_thd_contains(THD *thd); +my_bool wsrep_thd_is_aborting(const MYSQL_THD thd); my_bool wsrep_thd_is_BF(MYSQL_THD thd, my_bool sync); my_bool wsrep_thd_is_wsrep(MYSQL_THD thd); struct wsrep *get_wsrep(); @@ -223,6 +235,8 @@ bool wsrep_thd_ignore_table(THD *thd); void wsrep_unlock_rollback(); void wsrep_set_data_home_dir(const char *data_dir); my_bool wsrep_thd_is_applier(MYSQL_THD thd); +bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd); + #endif #ifdef __cplusplus diff --git a/mysql-test/suite/galera/r/galera_bf_kill.result b/mysql-test/suite/galera/r/galera_bf_kill.result new file mode 100644 index 00000000000..3738e8c9684 --- /dev/null +++ b/mysql-test/suite/galera/r/galera_bf_kill.result @@ -0,0 +1,73 @@ +connection node_2; +connection node_1; +connection node_2; +CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB; +insert into t1 values (NULL,1); +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +begin; +update t1 set a = 5; +connection node_2; +select * from t1; +a b +2 1 +disconnect node_2a; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +begin; +update t1 set a =5; +connection node_2; +select * from t1; +a b +2 1 +disconnect node_2a; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +begin; +update t1 set a =5, b=2; +connection node_2; +ALTER TABLE t1 ADD UNIQUE KEY b1(b); +ALTER TABLE t1 DROP KEY b1; +select * from t1; +a b +2 1 +disconnect node_2a; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +begin; +update t1 set a =5, b=2; +connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2b; +begin; +update t1 set a =6, b=7; +connection node_2; +ALTER TABLE t1 ADD UNIQUE KEY b2(b); +ALTER TABLE t1 DROP KEY b2; +select * from t1; +a b +2 1 +disconnect node_2a; +disconnect node_2b; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +SET SESSION wsrep_on=OFF; +begin; +update t1 set a =5, b=2; +connection node_2; +ALTER TABLE t1 ADD UNIQUE KEY b3(b); +select * from t1; +a b +2 1 +disconnect node_2a; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +SET SESSION wsrep_on=OFF; +begin; +update t1 set a =5, b=2; +connection node_2; +select * from t1; +a b +2 1 +disconnect node_2a; +connection node_1; +drop table t1; diff --git a/mysql-test/suite/galera/r/galera_bf_kill_debug.result b/mysql-test/suite/galera/r/galera_bf_kill_debug.result new file mode 100644 index 00000000000..c3eae243f47 --- /dev/null +++ b/mysql-test/suite/galera/r/galera_bf_kill_debug.result @@ -0,0 +1,54 @@ +connection node_2; +connection node_1; +connection node_2; +CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB; +insert into t1 values (NULL,1); +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +truncate t1; +insert into t1 values (1,0); +begin; +update t1 set b=2 where a=1; +connection node_2; +set session wsrep_sync_wait=0; +connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2b; +SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort"; +connection node_1; +select * from t1; +a b +1 0 +update t1 set b= 1 where a=1; +connection node_2b; +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached"; +connection node_2; +SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill'; +connection node_2b; +SET DEBUG_SYNC='now WAIT_FOR awake_reached'; +SET GLOBAL debug_dbug = ""; +SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort"; +SET DEBUG_SYNC = "now SIGNAL continue_kill"; +connection node_2; +connection node_2a; +select * from t1; +connection node_2; +SET DEBUG_SYNC = "RESET"; +drop table t1; +disconnect node_2a; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +CREATE TABLE t1 (i int primary key); +SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue"; +INSERT INTO t1 VALUES (1); +connection node_2; +SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached"; +SET DEBUG_SYNC = "now SIGNAL bwoc_continue"; +SET DEBUG_SYNC='RESET'; +connection node_2a; +connection node_2; +select * from t1; +i +1 +disconnect node_2a; +connection node_1; +drop table t1; diff --git a/mysql-test/suite/galera/r/galera_bf_lock_wait.result b/mysql-test/suite/galera/r/galera_bf_lock_wait.result index 7ec524da888..ec294ada3bc 100644 --- a/mysql-test/suite/galera/r/galera_bf_lock_wait.result +++ b/mysql-test/suite/galera/r/galera_bf_lock_wait.result @@ -1,3 +1,7 @@ +connection node_2; +call mtr.add_suppression("WSREP: Trying to continue unpaused monitor"); +connection node_1; +call mtr.add_suppression("WSREP: Trying to continue unpaused monitor"); CREATE TABLE t1 ENGINE=InnoDB select 1 as a, 1 as b union select 2, 2; ALTER TABLE t1 add primary key(a); CREATE PROCEDURE p1() @@ -17,7 +21,7 @@ connect node_2_p1, 127.0.0.1, root, , test, $NODE_MYPORT_2; call p1; connect node_2_p2, 127.0.0.1, root, , test, $NODE_MYPORT_2; call p1; -connection default; +connection node_1; checking error log for 'BF lock wait long' message for 10 times every 10 seconds ... drop table t1; drop procedure p1; diff --git a/mysql-test/suite/galera/t/galera_bf_kill.cnf b/mysql-test/suite/galera/t/galera_bf_kill.cnf new file mode 100644 index 00000000000..e68f891792c --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_kill.cnf @@ -0,0 +1,7 @@ +!include ../galera_2nodes.cnf + +[mysqld.1] +wsrep-debug=SERVER + +[mysqld.2] +wsrep-debug=SERVER diff --git a/mysql-test/suite/galera/t/galera_bf_kill.test b/mysql-test/suite/galera/t/galera_bf_kill.test new file mode 100644 index 00000000000..c8564bc9219 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_kill.test @@ -0,0 +1,141 @@ +--source include/galera_cluster.inc +--source include/have_innodb.inc + +# +# Test case 1: Start a transaction on node_2a and kill it +# from other connection on same node +# + +--connection node_2 +CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB; +insert into t1 values (NULL,1); + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +begin; +update t1 set a = 5; + +--connection node_2 + +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1 +--source include/wait_condition.inc + +--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1` + +--disable_query_log +--eval KILL $k_thread +--enable_query_log + +select * from t1; +--disconnect node_2a + +# +# Test case 2: Start a transaction on node_2a and use +# kill query from other connection on same node +# + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +begin; +update t1 set a =5; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1 +--source include/wait_condition.inc + +--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1` + +--disable_query_log +--eval KILL QUERY $k_thread +--enable_query_log + +select * from t1; +--disconnect node_2a +# +# Test case 3: Start a transaction on node_2a and start a DDL on other transaction +# that will then abort node_2a transaction +# +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +begin; +update t1 set a =5, b=2; + +--connection node_2 +ALTER TABLE t1 ADD UNIQUE KEY b1(b); +ALTER TABLE t1 DROP KEY b1; + +select * from t1; + +--disconnect node_2a + +# +# Test case 4: Start a transaction on node_2a and conflicting transaction on node_2b +# and start a DDL on other transaction that will then abort node_2a and node_2b +# transactions +# + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +begin; +update t1 set a =5, b=2; + +--connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2b +begin; +send update t1 set a =6, b=7; + +--connection node_2 +ALTER TABLE t1 ADD UNIQUE KEY b2(b); +ALTER TABLE t1 DROP KEY b2; + +select * from t1; + +--disconnect node_2a +--disconnect node_2b + +# +# Test case 5: Start a transaction on node_2a with wsrep disabled +# and start a DDL on other transaction that will then abort node_2a +# transactions +# + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +SET SESSION wsrep_on=OFF; +begin; +update t1 set a =5, b=2; + +--connection node_2 +ALTER TABLE t1 ADD UNIQUE KEY b3(b); + +select * from t1; + +--disconnect node_2a + +# +# Test case 6: Start a transaction on node_2a with wsrep disabled +# and kill it from other connection on same node +# + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +SET SESSION wsrep_on=OFF; +begin; +update t1 set a =5, b=2; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1 +--source include/wait_condition.inc + +--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1` + +--disable_query_log +--eval KILL $k_thread +--enable_query_log + +select * from t1; + +--disconnect node_2a + +--connection node_1 +drop table t1; diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf b/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf new file mode 100644 index 00000000000..e68f891792c --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf @@ -0,0 +1,7 @@ +!include ../galera_2nodes.cnf + +[mysqld.1] +wsrep-debug=SERVER + +[mysqld.2] +wsrep-debug=SERVER diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.test b/mysql-test/suite/galera/t/galera_bf_kill_debug.test new file mode 100644 index 00000000000..c02dd28d40d --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_kill_debug.test @@ -0,0 +1,139 @@ +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc + +# +# Test case 7: +# 1. Start a transaction on node_2, +# and leave it pending while holding a row locked +# 2. set sync point pause applier +# 3. send a conflicting write on node_1, it will pause +# at the sync point +# 4. though another connection to node_2, kill the local +# transaction +# + +--connection node_2 +CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB; +insert into t1 values (NULL,1); + +# +# connection node_2a runs a local transaction, that is victim of BF abort +# and victim of KILL command by connection node_2 +# +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +truncate t1; +insert into t1 values (1,0); + +# start a transaction that will conflict with later applier +begin; +update t1 set b=2 where a=1; + +--connection node_2 +set session wsrep_sync_wait=0; +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1 +--source include/wait_condition.inc + +--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1` + +# connection node_2b is for controlling debug syn points +# first set a sync point for applier, to pause during BF aborting +# and before THD::awake would be called +# +--connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2b +SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort"; + +# +# replicate an update, which will BF abort the victim node_2a +# however, while applier in node 2 is handling the abort, +# it will pause in sync point set by node_2b +# +--connection node_1 +select * from t1; +update t1 set b= 1 where a=1; + +# +# wait until the applying of above update has reached the sync point +# in node 2 +# +--connection node_2b +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached"; + +--connection node_2 +# +# pause KILL execution before awake +# +SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill'; +--disable_query_log +--send_eval KILL $k_thread +--enable_query_log + + +--connection node_2b +SET DEBUG_SYNC='now WAIT_FOR awake_reached'; + +# release applier and KILL operator +SET GLOBAL debug_dbug = ""; +SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort"; +SET DEBUG_SYNC = "now SIGNAL continue_kill"; + +--connection node_2 +--reap + +--connection node_2a +--error 0,1213 +select * from t1; + +--connection node_2 +SET DEBUG_SYNC = "RESET"; + +drop table t1; + +--disconnect node_2a +# +# Test case 7: +# run a transaction in node 2, and set a sync point to pause the transaction +# in commit phase. +# Through another connection to node 2, kill the committing transaction by +# KILL QUERY command +# + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +--let $connection_id = `SELECT CONNECTION_ID()` + +CREATE TABLE t1 (i int primary key); + +# Set up sync point +SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue"; + +# Send insert which will block in the sync point above +--send INSERT INTO t1 VALUES (1) + +--connection node_2 +SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached"; + +--disable_query_log +--disable_result_log +# victim has passed the point of no return, kill is not possible anymore +--eval KILL QUERY $connection_id +--enable_result_log +--enable_query_log + +SET DEBUG_SYNC = "now SIGNAL bwoc_continue"; +SET DEBUG_SYNC='RESET'; +--connection node_2a +--error 0,1213 +--reap + +--connection node_2 +# victim was able to complete the INSERT +select * from t1; + +--disconnect node_2a + +--connection node_1 +drop table t1; diff --git a/mysql-test/suite/galera/t/galera_bf_lock_wait.test b/mysql-test/suite/galera/t/galera_bf_lock_wait.test index e3a9077a888..a78a94eb1db 100644 --- a/mysql-test/suite/galera/t/galera_bf_lock_wait.test +++ b/mysql-test/suite/galera/t/galera_bf_lock_wait.test @@ -1,6 +1,13 @@ --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/big_test.inc - + +--connection node_2 +call mtr.add_suppression("WSREP: Trying to continue unpaused monitor"); + +--connection node_1 +call mtr.add_suppression("WSREP: Trying to continue unpaused monitor"); + CREATE TABLE t1 ENGINE=InnoDB select 1 as a, 1 as b union select 2, 2; ALTER TABLE t1 add primary key(a); @@ -28,7 +35,7 @@ send call p1; --connect node_2_p2, 127.0.0.1, root, , test, $NODE_MYPORT_2 send call p1; -connection default; +connection node_1; let $counter=10; let $sleep_period=10; diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index 22d758d45a5..9a20bf0291a 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -43,7 +43,7 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c my_default.c my_atomic.c my_getncpus.c my_safehash.c my_chmod.c my_rnd.c my_uuid.c wqueue.c waiting_threads.c ma_dyncol.c ../sql-common/my_time.c my_rdtsc.c my_context.c psi_noop.c - my_atomic_writes.c + my_atomic_writes.c my_cpu.c file_logger.c my_dlerror.c) IF (WIN32) diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index 0dc524be336..1e7219b3296 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -102,6 +102,7 @@ #include <my_global.h> #include <my_sys.h> #include <lf.h> +#include "my_cpu.h" /* when using alloca() leave at least that many bytes of the stack - @@ -430,7 +431,7 @@ static void alloc_free(uchar *first, { anext_node(last)= tmp.node; } while (!my_atomic_casptr((void **)(char *)&allocator->top, - (void **)&tmp.ptr, first) && LF_BACKOFF); + (void **)&tmp.ptr, first) && LF_BACKOFF()); } /* @@ -501,7 +502,7 @@ void *lf_alloc_new(LF_PINS *pins) { node= allocator->top; lf_pin(pins, 0, node); - } while (node != allocator->top && LF_BACKOFF); + } while (node != allocator->top && LF_BACKOFF()); if (!node) { node= (void *)my_malloc(allocator->element_size, MYF(MY_WME)); diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 0dd0c1a94c6..4975c69453d 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -28,6 +28,7 @@ #include <mysys_err.h> #include <my_bit.h> #include <lf.h> +#include "my_cpu.h" /* An element of the list */ typedef struct { @@ -103,7 +104,7 @@ retry: cursor->curr= (LF_SLIST *)(*cursor->prev); lf_pin(pins, 1, cursor->curr); } while (my_atomic_loadptr((void**)cursor->prev) != cursor->curr && - LF_BACKOFF); + LF_BACKOFF()); for (;;) { if (unlikely(!cursor->curr)) @@ -117,7 +118,7 @@ retry: link= cursor->curr->link; cursor->next= PTR(link); lf_pin(pins, 0, cursor->next); - } while (link != cursor->curr->link && LF_BACKOFF); + } while (link != cursor->curr->link && LF_BACKOFF()); if (!DELETED(link)) { @@ -145,7 +146,7 @@ retry: and remove this deleted node */ if (my_atomic_casptr((void **) cursor->prev, - (void **) &cursor->curr, cursor->next) && LF_BACKOFF) + (void **) &cursor->curr, cursor->next) && LF_BACKOFF()) lf_alloc_free(pins, cursor->curr); else goto retry; diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 35f19aa7edb..cf67e8291cb 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -174,6 +174,7 @@ #include <waiting_threads.h> #include <m_string.h> +#include "my_cpu.h" /* status variables */ @@ -617,7 +618,7 @@ retry: { rc= *shared_ptr; lf_pin(arg->thd->pins, 0, rc); - } while (rc != *shared_ptr && LF_BACKOFF); + } while (rc != *shared_ptr && LF_BACKOFF()); if (rc == 0) { diff --git a/sql/event_scheduler.cc b/sql/event_scheduler.cc index 2a5399fb94a..41cf8aba324 100644 --- a/sql/event_scheduler.cc +++ b/sql/event_scheduler.cc @@ -651,13 +651,11 @@ Event_scheduler::stop() DBUG_PRINT("info", ("Scheduler thread has id %lu", (ulong) scheduler_thd->thread_id)); /* Lock from delete */ - mysql_mutex_lock(&scheduler_thd->LOCK_thd_data); /* This will wake up the thread if it waits on Queue's conditional */ sql_print_information("Event Scheduler: Killing the scheduler thread, " "thread id %lu", (ulong) scheduler_thd->thread_id); scheduler_thd->awake(KILL_CONNECTION); - mysql_mutex_unlock(&scheduler_thd->LOCK_thd_data); /* thd could be 0x0, when shutting down */ sql_print_information("Event Scheduler: " diff --git a/sql/slave.cc b/sql/slave.cc index 1bf83aa9652..f83a733da71 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -346,9 +346,7 @@ handle_slave_background(void *arg __attribute__((unused))) THD *to_kill= p->to_kill; kill_list= p->next; - mysql_mutex_lock(&to_kill->LOCK_thd_data); to_kill->awake(KILL_CONNECTION); - mysql_mutex_unlock(&to_kill->LOCK_thd_data); mysql_mutex_lock(&to_kill->LOCK_wakeup_ready); to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED; @@ -859,7 +857,7 @@ terminate_slave_thread(THD *thd, int error __attribute__((unused)); DBUG_PRINT("loop", ("killing slave thread")); - mysql_mutex_lock(&thd->LOCK_thd_data); + mysql_mutex_lock(&thd->LOCK_thd_kill); #ifndef DONT_USE_THR_ALARM /* Error codes from pthread_kill are: @@ -869,9 +867,9 @@ terminate_slave_thread(THD *thd, int err __attribute__((unused))= pthread_kill(thd->real_id, thr_client_alarm); DBUG_ASSERT(err != EINVAL); #endif - thd->awake(NOT_KILLED); + thd->awake_no_mutex(NOT_KILLED); - mysql_mutex_unlock(&thd->LOCK_thd_data); + mysql_mutex_unlock(&thd->LOCK_thd_kill); /* There is a small chance that slave thread might miss the first diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 0a8c136e556..e7934f887dd 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -590,10 +590,8 @@ handle_condition(THD *thd, extern "C" void thd_kill_timeout(THD* thd) { thd->status_var.max_statement_time_exceeded++; - mysql_mutex_lock(&thd->LOCK_thd_data); /* Kill queries that can't cause data corruptions */ thd->awake(KILL_TIMEOUT); - mysql_mutex_unlock(&thd->LOCK_thd_data); } @@ -652,7 +650,8 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) wsrep_po_handle(WSREP_PO_INITIALIZER), wsrep_po_cnt(0), wsrep_apply_format(0), - wsrep_ignore_table(false) + wsrep_ignore_table(false), + wsrep_aborter(0) #endif { ulong tmp; @@ -790,6 +789,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) wsrep_replicate_GTID = false; wsrep_skip_wsrep_GTID = false; wsrep_split_flag = false; + wsrep_aborter = 0; #endif /* Call to init() below requires fully initialized Open_tables_state. */ reset_open_tables_state(this); @@ -1247,7 +1247,7 @@ void THD::init(void) session_tracker.enable(this); #endif //EMBEDDED_LIBRARY - apc_target.init(&LOCK_thd_data); + apc_target.init(&LOCK_thd_kill); DBUG_VOID_RETURN; } @@ -1508,9 +1508,15 @@ THD::~THD() if (!status_in_global) add_status_to_global(); - /* Ensure that no one is using THD */ - mysql_mutex_lock(&LOCK_thd_data); - mysql_mutex_unlock(&LOCK_thd_data); + /* + Other threads may have a lock on LOCK_thd_kill to ensure that this + THD is not deleted while they access it. The following mutex_lock + ensures that no one else is using this THD and it's now safe to delete + */ + if (WSREP(this)) mysql_mutex_lock(&LOCK_thd_data); + mysql_mutex_lock(&LOCK_thd_kill); + mysql_mutex_unlock(&LOCK_thd_kill); + if (WSREP(this)) mysql_mutex_unlock(&LOCK_thd_data); #ifdef WITH_WSREP delete wsrep_rgi; @@ -1680,17 +1686,18 @@ void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var, This is normally called from another thread's THD object. - @note Do always call this while holding LOCK_thd_data. + @note Do always call this while holding LOCK_thd_kill. NOT_KILLED is used to awake a thread for a slave */ -void THD::awake(killed_state state_to_set) +void THD::awake_no_mutex(killed_state state_to_set) { DBUG_ENTER("THD::awake"); DBUG_PRINT("enter", ("this: %p current_thd: %p state: %d", this, current_thd, (int) state_to_set)); THD_CHECK_SENTRY(this); - mysql_mutex_assert_owner(&LOCK_thd_data); + if (WSREP(this)) mysql_mutex_assert_owner(&LOCK_thd_data); + mysql_mutex_assert_owner(&LOCK_thd_kill); print_aborted_warning(3, "KILLED"); @@ -1702,7 +1709,6 @@ void THD::awake(killed_state state_to_set) state_to_set= killed; /* Set the 'killed' flag of 'this', which is the target THD object. */ - mysql_mutex_lock(&LOCK_thd_kill); set_killed_no_mutex(state_to_set); if (state_to_set >= KILL_CONNECTION || state_to_set == NOT_KILLED) @@ -1789,7 +1795,6 @@ void THD::awake(killed_state state_to_set) } mysql_mutex_unlock(&mysys_var->mutex); } - mysql_mutex_unlock(&LOCK_thd_kill); DBUG_VOID_RETURN; } @@ -1841,7 +1846,7 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, { /* This code is similar to kill_delayed_threads() */ DBUG_PRINT("info", ("kill delayed thread")); - mysql_mutex_lock(&in_use->LOCK_thd_data); + mysql_mutex_lock(&in_use->LOCK_thd_kill); if (in_use->killed < KILL_CONNECTION) in_use->set_killed(KILL_CONNECTION); if (in_use->mysys_var) @@ -1854,7 +1859,7 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, in_use->mysys_var->abort= 1; mysql_mutex_unlock(&in_use->mysys_var->mutex); } - mysql_mutex_unlock(&in_use->LOCK_thd_data); + mysql_mutex_unlock(&in_use->LOCK_thd_kill); signalled= TRUE; } @@ -1962,7 +1967,7 @@ bool THD::store_globals() return 1; /* mysys_var is concurrently readable by a killer thread. - It is protected by LOCK_thd_data, it is not needed to lock while the + It is protected by LOCK_thd_kill, it is not needed to lock while the pointer is changing from NULL not non-NULL. If the kill thread reads NULL it doesn't refer to anything, but if it is non-NULL we need to ensure that the thread doesn't proceed to assign another thread to @@ -2013,9 +2018,9 @@ bool THD::store_globals() void THD::reset_globals() { - mysql_mutex_lock(&LOCK_thd_data); + mysql_mutex_lock(&LOCK_thd_kill); mysys_var= 0; - mysql_mutex_unlock(&LOCK_thd_data); + mysql_mutex_unlock(&LOCK_thd_kill); /* Undocking the thread specific data. */ set_current_thd(0); @@ -5240,9 +5245,9 @@ void THD::set_query_and_id(char *query_arg, uint32 query_length_arg, /** Assign a new value to thd->mysys_var. */ void THD::set_mysys_var(struct st_my_thread_var *new_mysys_var) { - mysql_mutex_lock(&LOCK_thd_data); + mysql_mutex_lock(&LOCK_thd_kill); mysys_var= new_mysys_var; - mysql_mutex_unlock(&LOCK_thd_data); + mysql_mutex_unlock(&LOCK_thd_kill); } /** diff --git a/sql/sql_class.h b/sql/sql_class.h index 8d8ab779d56..a091f532bb2 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3214,7 +3214,28 @@ public: } void close_active_vio(); #endif - void awake(killed_state state_to_set); + void awake_no_mutex(killed_state state_to_set); + void awake(killed_state state_to_set) + { +/* + bool wsrep_on_local= WSREP(this); +*/ + /* + mutex locking order (LOCK_thd_data - LOCK_thd_kill)) requires + to grab LOCK_thd_data here + */ +/* + if (wsrep_on_local) + mysql_mutex_lock(&LOCK_thd_data); +*/ + mysql_mutex_lock(&LOCK_thd_kill); + awake_no_mutex(state_to_set); + mysql_mutex_unlock(&LOCK_thd_kill); +/* + if (wsrep_on_local) + mysql_mutex_unlock(&LOCK_thd_data); +*/ + } /** Disconnect the associated communication endpoint. */ void disconnect(); @@ -3755,11 +3776,18 @@ public: */ if (killed != NOT_KILLED) { + mysql_mutex_assert_not_owner(&LOCK_thd_kill); mysql_mutex_lock(&LOCK_thd_kill); killed= NOT_KILLED; killed_err= 0; mysql_mutex_unlock(&LOCK_thd_kill); } +#ifdef WITH_WSREP + mysql_mutex_assert_not_owner(&LOCK_thd_data); + mysql_mutex_lock(&LOCK_thd_data); + wsrep_aborter= 0; + mysql_mutex_unlock(&LOCK_thd_data); +#endif /* WITH_WSREP */ } inline void reset_kill_query() { @@ -4470,6 +4498,8 @@ public: registered again, but replication of last chunk of rows is skipped by the innodb engine: */ bool wsrep_split_flag; + /* thread who has started kill for this THD protected by LOCK_thd_data*/ + my_thread_id wsrep_aborter; #endif /* WITH_WSREP */ /* Handling of timeouts for commands */ diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index ec784bc6df4..faf40e1b928 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -2753,7 +2753,7 @@ void kill_delayed_threads(void) Delayed_insert *di; while ((di= it++)) { - mysql_mutex_lock(&di->thd.LOCK_thd_data); + mysql_mutex_lock(&di->thd.LOCK_thd_kill); if (di->thd.killed < KILL_CONNECTION) di->thd.set_killed(KILL_CONNECTION); if (di->thd.mysys_var) @@ -2773,7 +2773,7 @@ void kill_delayed_threads(void) } mysql_mutex_unlock(&di->thd.mysys_var->mutex); } - mysql_mutex_unlock(&di->thd.LOCK_thd_data); + mysql_mutex_unlock(&di->thd.LOCK_thd_kill); } mysql_mutex_unlock(&LOCK_delayed_insert); // For unlink from list DBUG_VOID_RETURN; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 543c877b7f1..f34e182942d 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -2355,11 +2355,11 @@ com_multi_end: DBUG_ASSERT((command != COM_QUIT && command != COM_STMT_CLOSE) || thd->get_stmt_da()->is_disabled()); /* wsrep BF abort in query exec phase */ - mysql_mutex_lock(&thd->LOCK_thd_data); + mysql_mutex_lock(&thd->LOCK_thd_kill); do_end_of_statement= thd->wsrep_conflict_state != REPLAYING && thd->wsrep_conflict_state != RETRY_AUTOCOMMIT && !thd->killed; - mysql_mutex_unlock(&thd->LOCK_thd_data); + mysql_mutex_unlock(&thd->LOCK_thd_kill); } else do_end_of_statement= true; @@ -8820,7 +8820,7 @@ void add_join_natural(TABLE_LIST *a, TABLE_LIST *b, List<String> *using_fields, /** - Find a thread by id and return it, locking it LOCK_thd_data + Find a thread by id and return it, locking it LOCK_thd_kill @param id Identifier of the thread we're looking for @param query_id If true, search by query_id instead of thread_id @@ -8840,7 +8840,7 @@ THD *find_thread_by_id(longlong id, bool query_id) continue; if (id == (query_id ? tmp->query_id : (longlong) tmp->thread_id)) { - mysql_mutex_lock(&tmp->LOCK_thd_data); // Lock from delete + mysql_mutex_lock(&tmp->LOCK_thd_kill); // Lock from delete break; } } @@ -8896,13 +8896,30 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ thd->security_ctx->user_matches(tmp->security_ctx)) && !wsrep_thd_is_BF(tmp, false)) { - tmp->awake(kill_signal); - error=0; +#ifdef WITH_WSREP + DEBUG_SYNC(thd, "before_awake_no_mutex"); + if (tmp->wsrep_aborter && tmp->wsrep_aborter != thd->thread_id) + { + /* victim is in hit list already, bail out */ + WSREP_DEBUG("victim has wsrep aborter: %lu, skipping awake()", + tmp->wsrep_aborter); + error= 0; + } + else +#endif /* WITH_WSREP */ + { + WSREP_DEBUG("kill_one_thread %llu, victim: %llu wsrep_aborter %llu by signal %d", + thd->thread_id, id, tmp->wsrep_aborter, kill_signal); + tmp->awake_no_mutex(kill_signal); + WSREP_DEBUG("victim: %llu taken care of", id); + error= 0; + } } else error= (type == KILL_TYPE_QUERY ? ER_KILL_QUERY_DENIED_ERROR : ER_KILL_DENIED_ERROR); - mysql_mutex_unlock(&tmp->LOCK_thd_data); + if (WSREP(tmp)) mysql_mutex_unlock(&tmp->LOCK_thd_data); + mysql_mutex_unlock(&tmp->LOCK_thd_kill); } DBUG_PRINT("exit", ("%d", error)); DBUG_RETURN(error); @@ -8971,17 +8988,18 @@ static uint kill_threads_for_user(THD *thd, LEX_USER *user, THD *ptr= it2++; do { - ptr->awake(kill_signal); + ptr->awake_no_mutex(kill_signal); /* Careful here: The list nodes are allocated on the memroots of the THDs to be awakened. But those THDs may be terminated and deleted as soon as we release - LOCK_thd_data, which will make the list nodes invalid. + LOCK_thd_kill, which will make the list nodes invalid. Since the operation "it++" dereferences the "next" pointer of the previous list node, we need to do this while holding LOCK_thd_data. */ next_ptr= it2++; - mysql_mutex_unlock(&ptr->LOCK_thd_data); + mysql_mutex_unlock(&ptr->LOCK_thd_kill); + if (WSREP(ptr)) mysql_mutex_unlock(&ptr->LOCK_thd_data); (*rows)++; } while ((ptr= next_ptr)); } diff --git a/sql/sql_plugin_services.ic b/sql/sql_plugin_services.ic index 20113444b64..cc28c38d85b 100644 --- a/sql/sql_plugin_services.ic +++ b/sql/sql_plugin_services.ic @@ -167,6 +167,7 @@ static struct wsrep_service_st wsrep_handler = { wsrep_thd_exec_mode, wsrep_thd_exec_mode_str, wsrep_thd_get_conflict_state, + wsrep_thd_is_aborting, wsrep_thd_is_BF, wsrep_thd_is_wsrep, wsrep_thd_query, @@ -181,10 +182,12 @@ static struct wsrep_service_st wsrep_handler = { wsrep_set_load_multi_commit, wsrep_is_load_multi_commit, wsrep_trx_is_aborting, + wsrep_thd_bf_abort, wsrep_trx_order_before, wsrep_unlock_rollback, wsrep_set_data_home_dir, - wsrep_thd_is_applier + wsrep_thd_is_applier, + wsrep_thd_set_wsrep_aborter }; static struct thd_specifics_service_st thd_specifics_handler= diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 721b6799ed3..e944ea1bc54 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -3375,7 +3375,8 @@ void kill_zombie_dump_threads(uint32 slave_server_id) if (tmp->get_command() == COM_BINLOG_DUMP && tmp->variables.server_id == slave_server_id) { - mysql_mutex_lock(&tmp->LOCK_thd_data); // Lock from delete + if (WSREP(tmp)) mysql_mutex_lock(&tmp->LOCK_thd_data); + mysql_mutex_lock(&tmp->LOCK_thd_kill); // Lock from delete break; } } @@ -3387,8 +3388,9 @@ void kill_zombie_dump_threads(uint32 slave_server_id) it will be slow because it will iterate through the list again. We just to do kill the thread ourselves. */ - tmp->awake(KILL_SLAVE_SAME_ID); - mysql_mutex_unlock(&tmp->LOCK_thd_data); + tmp->awake_no_mutex(KILL_SLAVE_SAME_ID); + mysql_mutex_unlock(&tmp->LOCK_thd_kill); + if (WSREP(tmp)) mysql_mutex_unlock(&tmp->LOCK_thd_data); } } diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 18b7e92bca5..bbe217c44d3 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -2556,6 +2556,28 @@ public: double progress; }; +/** + Try to lock a mutex, but give up after a short while to not cause deadlocks + + The loop is short, as the mutex we are trying to lock are mutex the should + never be locked a long time, just over a few instructions. + + @return 0 ok + @return 1 error +*/ + +static bool trylock_short(mysql_mutex_t *mutex) +{ + uint i; + for (i= 0 ; i < 100 ; i++) + { + if (!mysql_mutex_trylock(mutex)) + return 0; + LF_BACKOFF(); + } + return 1; +} + static const char *thread_state_info(THD *tmp) { #ifndef EMBEDDED_LIBRARY @@ -2574,10 +2596,17 @@ static const char *thread_state_info(THD *tmp) #endif if (tmp->proc_info) return tmp->proc_info; - else if (tmp->mysys_var && tmp->mysys_var->current_cond) - return "Waiting on cond"; - else - return NULL; + + /* Check if we are waiting on a condition */ + if (!trylock_short(&tmp->LOCK_thd_kill)) + { + /* mysys_var is protected by above mutex */ + bool cond= tmp->mysys_var && tmp->mysys_var->current_cond; + mysql_mutex_unlock(&tmp->LOCK_thd_kill); + if (cond) + return "Waiting on cond"; + } + return NULL; } void mysqld_list_processes(THD *thd,const char *user, bool verbose) @@ -2921,13 +2950,13 @@ int fill_show_explain(THD *thd, TABLE_LIST *table, COND *cond) tmp_sctx->user))) { my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), "PROCESS"); - mysql_mutex_unlock(&tmp->LOCK_thd_data); + mysql_mutex_unlock(&tmp->LOCK_thd_kill); DBUG_RETURN(1); } if (tmp == thd) { - mysql_mutex_unlock(&tmp->LOCK_thd_data); + mysql_mutex_unlock(&tmp->LOCK_thd_kill); my_error(ER_TARGET_NOT_EXPLAINABLE, MYF(0)); DBUG_RETURN(1); } @@ -2935,7 +2964,7 @@ int fill_show_explain(THD *thd, TABLE_LIST *table, COND *cond) bool bres; /* Ok we've found the thread of interest and it won't go away because - we're holding its LOCK_thd data. Post it a SHOW EXPLAIN request. + we're holding its LOCK_thd_kill. Post it a SHOW EXPLAIN request. */ bool timed_out; int timeout_sec= 30; @@ -2949,7 +2978,7 @@ int fill_show_explain(THD *thd, TABLE_LIST *table, COND *cond) explain_req.request_thd= thd; explain_req.failed_to_produce= FALSE; - /* Ok, we have a lock on target->LOCK_thd_data, can call: */ + /* Ok, we have a lock on target->LOCK_thd_kill, can call: */ bres= tmp->apc_target.make_apc_call(thd, &explain_req, timeout_sec, &timed_out); if (bres || explain_req.failed_to_produce) diff --git a/sql/table.h b/sql/table.h index f3a7f278604..ff8f7d9a692 100644 --- a/sql/table.h +++ b/sql/table.h @@ -17,6 +17,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ #include "my_global.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "my_cpu.h" #include "sql_plist.h" #include "sql_list.h" /* Sql_alloc */ #include "mdl.h" @@ -610,7 +611,7 @@ class TABLE_STATISTICS_CB return true; if (expected == READY) return false; - (void) LF_BACKOFF; + (void) LF_BACKOFF(); } } diff --git a/sql/threadpool_common.cc b/sql/threadpool_common.cc index b0438770aae..bf8a1323ef4 100644 --- a/sql/threadpool_common.cc +++ b/sql/threadpool_common.cc @@ -477,11 +477,11 @@ void tp_timeout_handler(TP_connection *c) if (c->state != TP_STATE_IDLE) return; THD *thd=c->thd; - mysql_mutex_lock(&thd->LOCK_thd_data); + mysql_mutex_lock(&thd->LOCK_thd_kill); thd->set_killed(KILL_WAIT_TIMEOUT); c->priority= TP_PRIORITY_HIGH; post_kill_notification(thd); - mysql_mutex_unlock(&thd->LOCK_thd_data); + mysql_mutex_unlock(&thd->LOCK_thd_kill); } diff --git a/sql/wsrep_dummy.cc b/sql/wsrep_dummy.cc index d8ab86c25f2..9b01dba57a9 100644 --- a/sql/wsrep_dummy.cc +++ b/sql/wsrep_dummy.cc @@ -17,9 +17,15 @@ #include <sql_class.h> #include <mysql/service_wsrep.h> +my_bool wsrep_thd_is_aborting(const THD *) +{ return 0; } + my_bool wsrep_thd_is_BF(THD *, my_bool) { return 0; } +my_bool wsrep_thd_bf_abort(THD *, THD *, my_bool) +{ return 0; } + int wsrep_trx_order_before(THD *, THD *) { return 0; } @@ -154,3 +160,6 @@ void wsrep_log(void (*)(const char *, ...), const char *, ...) my_bool wsrep_thd_is_applier(MYSQL_THD thd) { return false; } + +bool wsrep_thd_set_wsrep_aborter(THD*, THD*) +{ return false; } diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index 1e60088c5f1..37f7ab8cb2e 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -27,6 +27,7 @@ #include "rpl_filter.h" #include "rpl_rli.h" #include "rpl_mi.h" +#include "debug_sync.h" #if (__LP64__) static volatile int64 wsrep_bf_aborts_counter(0); @@ -793,10 +794,13 @@ int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) THD *bf_thd = (THD *) bf_thd_ptr; DBUG_ENTER("wsrep_abort_thd"); + mysql_mutex_lock(&victim_thd->LOCK_thd_data); + if ( (WSREP(bf_thd) || ( (WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) && bf_thd->wsrep_exec_mode == TOTAL_ORDER) ) && - victim_thd) + victim_thd && + !wsrep_thd_is_aborting(victim_thd)) { if ((victim_thd->wsrep_conflict_state == MUST_ABORT) || (victim_thd->wsrep_conflict_state == ABORTED) || @@ -811,13 +815,16 @@ int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ? (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); ha_abort_transaction(bf_thd, victim_thd, signal); + mysql_mutex_lock(&victim_thd->LOCK_thd_data); } else { WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd); } + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); DBUG_RETURN(1); } @@ -876,3 +883,74 @@ bool wsrep_is_load_multi_commit(THD *thd) { return thd->wsrep_split_flag; } + +extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd, + my_bool signal) +{ + DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort", + { + const char act[]= + "now " + "SIGNAL sync.before_wsrep_thd_abort_reached " + "WAIT_FOR signal.before_wsrep_thd_abort"; + DBUG_ASSERT(!debug_sync_set_action(bf_thd, + STRING_WITH_LEN(act))); + };); + + my_bool ret= true; // wsrep_bf_abort(bf_thd, victim_thd); + /* + Send awake signal if victim was BF aborted or does not + have wsrep on. Note that this should never interrupt RSU + as RSU has paused the provider. + */ + if ((ret || !wsrep_on(victim_thd)) && signal) + { + mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data); + mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_kill); + mysql_mutex_lock(&victim_thd->LOCK_thd_data); + + if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id) + { + WSREP_DEBUG("victim is killed already by %llu, skipping awake", + victim_thd->wsrep_aborter); + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + return false; + } + + mysql_mutex_lock(&victim_thd->LOCK_thd_kill); + victim_thd->wsrep_aborter= bf_thd->thread_id; + victim_thd->awake_no_mutex(KILL_QUERY); + mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + } else { + WSREP_DEBUG("wsrep_thd_bf_abort skipped awake"); + } + return ret; +} + +extern "C" bool wsrep_thd_set_wsrep_aborter(THD *bf_thd, THD *victim_thd) +{ + WSREP_DEBUG("wsrep_thd_set_wsrep_aborter called"); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id) + { + return true; + } + victim_thd->wsrep_aborter = bf_thd->thread_id; + return false; +} + +extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd) +{ + mysql_mutex_assert_owner(&thd->LOCK_thd_data); + if (thd != 0) + { + if ((thd->wsrep_conflict_state == MUST_ABORT) || + (thd->wsrep_conflict_state == ABORTED) || + (thd->wsrep_conflict_state == ABORTING)) + { + return true; + } + } + return false; +} diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index bb648d99777..e9be5278bda 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -19726,35 +19726,41 @@ wsrep_innobase_kill_one_trx( victim_trx->id); victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; + if (wsrep_thd_set_wsrep_aborter(bf_thd, thd)) + { + WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set"); + wsrep_thd_UNLOCK(thd); + DBUG_RETURN(0); + } - if (victim_trx->lock.wait_lock) { - WSREP_DEBUG("victim has wait flag: %ld", - thd_get_thread_id(thd)); - lock_t* wait_lock = victim_trx->lock.wait_lock; + wsrep_thd_UNLOCK(thd); + DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort"); + + if (wsrep_thd_bf_abort(bf_thd, thd, signal)) { + wsrep_thd_LOCK(thd); + lock_t* wait_lock = victim_trx->lock.wait_lock; if (wait_lock) { + WSREP_DEBUG("victim has wait flag: %ld", + thd_get_thread_id(thd)); WSREP_DEBUG("canceling wait lock"); - victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; lock_cancel_waiting_and_release(wait_lock); - } - - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); - } else { - /* abort currently executing query */ - DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu", - thd_get_thread_id(thd))); - WSREP_DEBUG("kill query for: %ld", - thd_get_thread_id(thd)); - /* Note that innobase_kill_query will take lock_mutex - and trx_mutex */ - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); + wsrep_thd_UNLOCK(thd); + } else { + /* abort currently executing query */ + DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu", + thd_get_thread_id(thd))); + WSREP_DEBUG("kill query for: %ld", + thd_get_thread_id(thd)); + /* Note that innobase_kill_query will take lock_mutex + and trx_mutex */ + wsrep_thd_UNLOCK(thd); - /* for BF thd, we need to prevent him from committing */ - if (wsrep_thd_exec_mode(thd) == REPL_RECV) { - wsrep_abort_slave_trx(bf_seqno, - wsrep_thd_trx_seqno(thd)); + /* for BF thd, we need to prevent him from committing */ + if (wsrep_thd_exec_mode(thd) == REPL_RECV) { + wsrep_abort_slave_trx(bf_seqno, + wsrep_thd_trx_seqno(thd)); + } } } break; diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h index ad72370eefa..b7470901297 100644 --- a/storage/innobase/include/os0once.h +++ b/storage/innobase/include/os0once.h @@ -27,6 +27,7 @@ Created Feb 20, 2014 Vasil Dimov #ifndef os0once_h #define os0once_h +#include "my_cpu.h" #include "univ.i" #include "ut0ut.h" @@ -109,7 +110,7 @@ public: ut_error; } - UT_RELAX_CPU(); + MY_RELAX_CPU(); } } } diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h index a19f3db188d..a22324be630 100644 --- a/storage/innobase/include/ut0ut.h +++ b/storage/innobase/include/ut0ut.h @@ -32,6 +32,7 @@ Created 1/20/1994 Heikki Tuuri #include <ostream> #include <sstream> #include <string.h> +#include "my_cpu.h" #ifndef UNIV_INNOCHECKSUM @@ -50,35 +51,6 @@ Created 1/20/1994 Heikki Tuuri /** Index name prefix in fast index creation, as a string constant */ #define TEMP_INDEX_PREFIX_STR "\377" -#ifdef HAVE_PAUSE_INSTRUCTION - /* According to the gcc info page, asm volatile means that the - instruction has important side-effects and must not be removed. - Also asm volatile may trigger a memory barrier (spilling all registers - to memory). */ -# ifdef __SUNPRO_CC -# define UT_RELAX_CPU() asm ("pause" ) -# else -# define UT_RELAX_CPU() __asm__ __volatile__ ("pause") -# endif /* __SUNPRO_CC */ - -#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION) -# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop") -#elif defined _WIN32 - /* In the Win32 API, the x86 PAUSE instruction is executed by calling - the YieldProcessor macro defined in WinNT.h. It is a CPU architecture- - independent way by using YieldProcessor. */ -# define UT_RELAX_CPU() YieldProcessor() -#elif defined(__powerpc__) && defined __GLIBC__ -# include <sys/platform/ppc.h> -# define UT_RELAX_CPU() __ppc_get_timebase() -#else -# define UT_RELAX_CPU() do { \ - volatile int32 volatile_var; \ - int32 oldval= 0; \ - my_atomic_cas32(&volatile_var, &oldval, 1); \ - } while (0) -#endif - #if defined (__GNUC__) # define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory") #elif defined (_MSC_VER) @@ -87,15 +59,6 @@ Created 1/20/1994 Heikki Tuuri # define UT_COMPILER_BARRIER() #endif -#if defined(HAVE_HMT_PRIORITY_INSTRUCTION) -# include <sys/platform/ppc.h> -# define UT_LOW_PRIORITY_CPU() __ppc_set_ppr_low() -# define UT_RESUME_PRIORITY_CPU() __ppc_set_ppr_med() -#else -# define UT_LOW_PRIORITY_CPU() ((void)0) -# define UT_RESUME_PRIORITY_CPU() ((void)0) -#endif - #define ut_max std::max #define ut_min std::min @@ -241,14 +204,6 @@ ut_sprintf_timestamp( /*=================*/ char* buf); /*!< in: buffer where to sprintf */ /*************************************************************//** -Runs an idle loop on CPU. The argument gives the desired delay -in microseconds on 100 MHz Pentium + Visual C++. -@return dummy value */ -void -ut_delay( -/*=====*/ - ulint delay); /*!< in: delay in microseconds on 100 MHz Pentium */ -/*************************************************************//** Prints the contents of a memory buffer in hex and ascii. */ void ut_print_buf( diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc index 1b3d100f9ee..9ae90fe37d1 100644 --- a/storage/innobase/ut/ut0ut.cc +++ b/storage/innobase/ut/ut0ut.cc @@ -136,27 +136,6 @@ ut_sprintf_timestamp( } /*************************************************************//** -Runs an idle loop on CPU. The argument gives the desired delay -in microseconds on 100 MHz Pentium + Visual C++. -@return dummy value */ -void -ut_delay( -/*=====*/ - ulint delay) /*!< in: delay in microseconds on 100 MHz Pentium */ -{ - ulint i; - - UT_LOW_PRIORITY_CPU(); - - for (i = 0; i < delay * 50; i++) { - UT_RELAX_CPU(); - UT_COMPILER_BARRIER(); - } - - UT_RESUME_PRIORITY_CPU(); -} - -/*************************************************************//** Prints the contents of a memory buffer in hex and ascii. */ void ut_print_buf( diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c index a23558e46dd..71b34d8b8b6 100644 --- a/storage/maria/lockman.c +++ b/storage/maria/lockman.c @@ -110,6 +110,7 @@ #include <my_bit.h> #include <lf.h> #include "lockman.h" +#include "my_cpu.h" /* Lock compatibility matrix. @@ -268,7 +269,7 @@ retry: do { cursor->curr= PTR(*cursor->prev); lf_pin(pins, 1, cursor->curr); - } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); + } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF()); for (;;) { if (!cursor->curr) @@ -277,7 +278,7 @@ retry: cur_link= cursor->curr->link; cursor->next= PTR(cur_link); lf_pin(pins, 0, cursor->next); - } while (cur_link != cursor->curr->link && LF_BACKOFF); + } while (cur_link != cursor->curr->link && LF_BACKOFF()); cur_hashnr= cursor->curr->hashnr; cur_resource= cursor->curr->resource; cur_lock= cursor->curr->lock; @@ -285,7 +286,7 @@ retry: cur_flags= cursor->curr->flags; if (*cursor->prev != (intptr)cursor->curr) { - (void)LF_BACKOFF; + (void)LF_BACKOFF(); goto retry; } if (!DELETED(cur_link)) @@ -362,7 +363,7 @@ retry: lf_alloc_free(pins, cursor->curr); else { - (void)LF_BACKOFF; + (void)LF_BACKOFF(); goto retry; } } |