diff options
author | Jan Lindström <jan.lindstrom@mariadb.com> | 2020-08-26 14:11:17 +0300 |
---|---|---|
committer | Jan Lindström <jan.lindstrom@mariadb.com> | 2020-09-10 13:18:12 +0300 |
commit | 224c950462a22e09f4e2e37d19218c9129bccba6 (patch) | |
tree | 526c75cb247c81ef05c4d76b7e8f9367248ca630 | |
parent | 75e82f71f16c3248387e00bc6e4fe4da02555325 (diff) | |
download | mariadb-git-224c950462a22e09f4e2e37d19218c9129bccba6.tar.gz |
MDEV-23101 : SIGSEGV in lock_rec_unlock() when Galera is enabled
Remove incorrect BF (brute force) handling from lock_rec_has_to_wait_in_queue
and move condition to correct callers. Add a function to report
BF lock waits and assert if incorrect BF-BF lock wait happens.
wsrep_report_bf_lock_wait
Add a new function to report BF lock wait.
wsrep_assert_no_bf_bf_wait
Add a new function to check do we have a
BF-BF wait and if we have report this case
and assert as it is a bug.
lock_rec_has_to_wait
Use new wsrep_assert_bf_wait to check BF-BF wait.
lock_rec_create_low
lock_table_create
Use new function to report BF lock waits.
lock_rec_insert_by_trx_age
lock_grant_and_move_on_page
lock_grant_and_move_on_rec
Assert that trx is not Galera as VATS is not compatible
with Galera.
lock_rec_add_to_queue
If there is conflicting lock in a queue make sure that
transaction is BF.
lock_rec_has_to_wait_in_queue
Remove incorrect BF handling. If there is conflicting
locks in a queue all transactions must wait.
lock_rec_dequeue_from_page
lock_rec_unlock
If there is conflicting lock make sure it is not
BF-BF case.
lock_rec_queue_validate
Add Galera record locking rules comment and use
new function to report BF lock waits.
All attempts to reproduce the original assertion have been
failed. Therefore, there is no test case on this commit.
-rw-r--r-- | include/mysql/service_wsrep.h | 5 | ||||
-rw-r--r-- | sql/sql_plugin_services.ic | 3 | ||||
-rw-r--r-- | sql/wsrep_dummy.cc | 6 | ||||
-rw-r--r-- | sql/wsrep_thd.cc | 20 | ||||
-rw-r--r-- | sql/wsrep_thd.h | 3 | ||||
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 287 |
6 files changed, 168 insertions, 156 deletions
diff --git a/include/mysql/service_wsrep.h b/include/mysql/service_wsrep.h index 923ba57fcdc..54b9eb9250c 100644 --- a/include/mysql/service_wsrep.h +++ b/include/mysql/service_wsrep.h @@ -115,6 +115,8 @@ extern struct wsrep_service_st { void (*wsrep_unlock_rollback_func)(); void (*wsrep_set_data_home_dir_func)(const char *data_dir); my_bool (*wsrep_thd_is_applier_func)(MYSQL_THD); + void (*wsrep_report_bf_lock_wait_func)(MYSQL_THD thd, + unsigned long long trx_id); } *wsrep_service; #ifdef MYSQL_DYNAMIC_PLUGIN @@ -161,6 +163,7 @@ extern struct wsrep_service_st { #define wsrep_unlock_rollback() wsrep_service->wsrep_unlock_rollback_func() #define wsrep_set_data_home_dir(A) wsrep_service->wsrep_set_data_home_dir_func(A) #define wsrep_thd_is_applier(T) wsrep_service->wsrep_thd_is_applier_func(T) +#define wsrep_report_bf_lock_wait(T,I) wsrep_service->wsrep_report_bf_lock_wait_func(T,I) #define wsrep_debug get_wsrep_debug() #define wsrep_log_conflicts get_wsrep_log_conflicts() @@ -223,6 +226,8 @@ bool wsrep_thd_ignore_table(THD *thd); void wsrep_unlock_rollback(); void wsrep_set_data_home_dir(const char *data_dir); my_bool wsrep_thd_is_applier(MYSQL_THD thd); +void wsrep_report_bf_lock_wait(THD *thd, + unsigned long long trx_id); #endif #ifdef __cplusplus diff --git a/sql/sql_plugin_services.ic b/sql/sql_plugin_services.ic index 20113444b64..43fe540731e 100644 --- a/sql/sql_plugin_services.ic +++ b/sql/sql_plugin_services.ic @@ -184,7 +184,8 @@ static struct wsrep_service_st wsrep_handler = { wsrep_trx_order_before, wsrep_unlock_rollback, wsrep_set_data_home_dir, - wsrep_thd_is_applier + wsrep_thd_is_applier, + wsrep_report_bf_lock_wait }; static struct thd_specifics_service_st thd_specifics_handler= diff --git a/sql/wsrep_dummy.cc b/sql/wsrep_dummy.cc index d8ab86c25f2..53941c06892 100644 --- a/sql/wsrep_dummy.cc +++ b/sql/wsrep_dummy.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2014 SkySQL Ab. +/* Copyright (C) 2014, 2020, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -154,3 +154,7 @@ void wsrep_log(void (*)(const char *, ...), const char *, ...) my_bool wsrep_thd_is_applier(MYSQL_THD thd) { return false; } + +void wsrep_report_bf_lock_wait(MYSQL_THD thd, + unsigned long long id) +{} diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index 1e60088c5f1..4dddb399bd1 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -876,3 +876,23 @@ bool wsrep_is_load_multi_commit(THD *thd) { return thd->wsrep_split_flag; } + +void wsrep_report_bf_lock_wait(THD *thd, + unsigned long long trx_id) +{ + if (thd) + { + WSREP_ERROR("Thread %s trx_id: %llu thread: %ld " + "seqno: %lld query_state: %s conf_state: %s exec_mode: %s " + "applier: %d query: %s", + wsrep_thd_is_BF(thd, false) ? "BF" : "normal", + trx_id, + thd_get_thread_id(thd), + wsrep_thd_trx_seqno(thd), + wsrep_thd_query_state_str(thd), + wsrep_thd_conflict_state_str(thd), + wsrep_thd_exec_mode_str(thd), + thd->wsrep_applier, + wsrep_thd_query(thd)); + } +} diff --git a/sql/wsrep_thd.h b/sql/wsrep_thd.h index 10efcbefbf6..46bc08a466a 100644 --- a/sql/wsrep_thd.h +++ b/sql/wsrep_thd.h @@ -45,6 +45,9 @@ extern "C" my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync); extern "C" my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync); extern "C" int wsrep_thd_in_locking_session(void *thd_ptr); +extern void wsrep_report_bf_lock_wait(THD *thd, + unsigned long long trx_id); + #else /* WITH_WSREP */ #define wsrep_thd_is_BF(T, S) (0) diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 4730ae53133..e48ac6bcc92 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -644,6 +644,57 @@ lock_rec_get_insert_intention( return(lock->type_mode & LOCK_INSERT_INTENTION); } +#ifdef WITH_WSREP +/** Check if both conflicting lock and other record lock are brute force +(BF). This case is a bug so report lock information and wsrep state. +@param[in] lock_rec1 conflicting waiting record lock or NULL +@param[in] lock_rec2 other waiting record lock +@param[in] trx1 lock_rec1 can be NULL, trx +*/ +static void wsrep_assert_no_bf_bf_wait( + const lock_t* lock_rec1, + const lock_t* lock_rec2, + const trx_t* trx1) +{ + ut_ad(!lock_rec1 || lock_get_type_low(lock_rec1) == LOCK_REC); + ut_ad(lock_get_type_low(lock_rec2) == LOCK_REC); + + if (!trx1->is_wsrep() || !lock_rec2->trx->is_wsrep()) + return; + if (UNIV_LIKELY(!wsrep_thd_is_BF(trx1->mysql_thd, FALSE))) + return; + if (UNIV_LIKELY(!wsrep_thd_is_BF(lock_rec2->trx->mysql_thd, FALSE))) + return; + + mtr_t mtr; + + if (lock_rec1) { + ib::error() << "Waiting lock on table: " + << lock_rec1->index->table->name + << " index: " + << lock_rec1->index->name() + << " that has conflicting lock "; + lock_rec_print(stderr, lock_rec1, mtr); + } + + ib::error() << "Conflicting lock on table: " + << lock_rec2->index->table->name + << " index: " + << lock_rec2->index->name() + << " that has lock "; + lock_rec_print(stderr, lock_rec2, mtr); + + ib::error() << "WSREP state: "; + + wsrep_report_bf_lock_wait(trx1->mysql_thd, + trx1->id); + wsrep_report_bf_lock_wait(lock_rec2->trx->mysql_thd, + lock_rec2->trx->id); + /* BF-BF wait is a bug */ + ut_error; +} +#endif /* WITH_WSREP */ + /*********************************************************************//** Checks if a lock request for a new lock has to wait for request lock2. @return TRUE if new lock has to wait for lock2 to be removed */ @@ -751,69 +802,9 @@ lock_rec_has_to_wait( } #ifdef WITH_WSREP - /* if BF thread is locking and has conflict with another BF - thread, we need to look at trx ordering and lock types */ - if (wsrep_thd_is_BF(trx->mysql_thd, FALSE) && - wsrep_thd_is_BF(lock2->trx->mysql_thd, FALSE)) { - mtr_t mtr; - - if (UNIV_UNLIKELY(wsrep_debug)) { - ib::info() << - "BF-BF lock conflict, locking: " << for_locking; - lock_rec_print(stderr, lock2, mtr); - ib::info() << " SQL1: " - << wsrep_thd_query(trx->mysql_thd); - ib::info() << " SQL2: " - << wsrep_thd_query(lock2->trx->mysql_thd); - } - - if (wsrep_trx_order_before(trx->mysql_thd, - lock2->trx->mysql_thd) && - (type_mode & LOCK_MODE_MASK) == LOCK_X && - (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) { - if (UNIV_UNLIKELY(for_locking || wsrep_debug)) { - /* exclusive lock conflicts are not - accepted */ - ib::info() << - "BF-BF X lock conflict," - "mode: " << type_mode << - " supremum: " << lock_is_on_supremum; - ib::info() << - "conflicts states: my " - << wsrep_thd_conflict_state(trx->mysql_thd, FALSE) - << " locked " - << wsrep_thd_conflict_state(lock2->trx->mysql_thd, FALSE); - lock_rec_print(stderr, lock2, mtr); - ib::info() << " SQL1: " - << wsrep_thd_query(trx->mysql_thd); - ib::info() << " SQL2: " - << wsrep_thd_query(lock2->trx->mysql_thd); - - if (for_locking) { - return FALSE; - } - } - } else { - /* if lock2->index->n_uniq <= - lock2->index->n_user_defined_cols - operation is on uniq index - */ - if (UNIV_UNLIKELY(wsrep_debug)) { - ib::info() << - "BF conflict, modes: " - << type_mode << ":" << lock2->type_mode - << " idx: " << lock2->index->name() - << " table: " << lock2->index->table->name.m_name - << " n_uniq: " << lock2->index->n_uniq - << " n_user: " << lock2->index->n_user_defined_cols; - ib::info() << " SQL1: " - << wsrep_thd_query(trx->mysql_thd); - ib::info() << " SQL2: " - << wsrep_thd_query(lock2->trx->mysql_thd); - } - return FALSE; - } - } + /* There should not be two conflicting locks that are + brute force. If there is it is a bug. */ + wsrep_assert_no_bf_bf_wait(NULL, lock2, trx); #endif /* WITH_WSREP */ return(TRUE); @@ -1532,11 +1523,8 @@ lock_rec_create_low( trx_mutex_exit(c_lock->trx); if (UNIV_UNLIKELY(wsrep_debug)) { - ib::info() << "WSREP: c_lock canceled " - << ib::hex(c_lock->trx->id) - << " SQL: " - << wsrep_thd_query( - c_lock->trx->mysql_thd); + wsrep_report_bf_lock_wait(trx->mysql_thd, trx->id); + wsrep_report_bf_lock_wait(c_lock->trx->mysql_thd, c_lock->trx->id); } /* have to bail out here to avoid lock_set_lock... */ @@ -1615,6 +1603,7 @@ lock_rec_insert_by_trx_age( hash_table_t* hash; hash_cell_t* cell; + ut_ad(!in_lock->trx->is_wsrep()); space = in_lock->un_member.rec_lock.space; page_no = in_lock->un_member.rec_lock.page_no; rec_fold = lock_rec_fold(space, page_no); @@ -1877,27 +1866,19 @@ lock_rec_add_to_queue( = lock_rec_other_has_expl_req( mode, block, false, heap_no, trx); #ifdef WITH_WSREP - if (other_lock && trx->is_wsrep() && - !wsrep_thd_is_BF(trx->mysql_thd, FALSE) && - !wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)) { - - ib::info() << "WSREP BF lock conflict for my lock:\n BF:" << - ((wsrep_thd_is_BF(trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " << - wsrep_thd_exec_mode(trx->mysql_thd) << " conflict: " << - wsrep_thd_conflict_state(trx->mysql_thd, false) << " seqno: " << - wsrep_thd_trx_seqno(trx->mysql_thd) << " SQL: " << - wsrep_thd_query(trx->mysql_thd); - trx_t* otrx = other_lock->trx; - ib::info() << "WSREP other lock:\n BF:" << - ((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " << - wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " << - wsrep_thd_conflict_state(otrx->mysql_thd, false) << " seqno: " << - wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " << - wsrep_thd_query(otrx->mysql_thd); - } -#else - ut_a(!other_lock); + if (UNIV_UNLIKELY(other_lock && trx->is_wsrep())) { + /* Only BF transaction may be granted lock + before other conflicting lock request. */ + if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) + && !wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)) { + /* If it is not BF, this case is a bug. */ + wsrep_report_bf_lock_wait(trx->mysql_thd, trx->id); + wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id); + ut_error; + } + } else #endif /* WITH_WSREP */ + ut_ad(!other_lock); } #endif /* UNIV_DEBUG */ @@ -2203,9 +2184,6 @@ lock_rec_has_to_wait_in_queue( hash = lock_hash_get(wait_lock->type_mode); for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no); -#ifdef WITH_WSREP - lock && -#endif lock != wait_lock; lock = lock_rec_get_next_on_page_const(lock)) { const byte* p = (const byte*) &lock[1]; @@ -2213,24 +2191,6 @@ lock_rec_has_to_wait_in_queue( if (heap_no < lock_rec_get_n_bits(lock) && (p[bit_offset] & bit_mask) && lock_has_to_wait(wait_lock, lock)) { -#ifdef WITH_WSREP - if (wsrep_thd_is_BF(wait_lock->trx->mysql_thd, FALSE) && - wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE)) { - - if (UNIV_UNLIKELY(wsrep_debug)) { - mtr_t mtr; - ib::info() << "WSREP: waiting BF trx: " << ib::hex(wait_lock->trx->id) - << " query: " << wsrep_thd_query(wait_lock->trx->mysql_thd); - lock_rec_print(stderr, wait_lock, mtr); - ib::info() << "WSREP: do not wait another BF trx: " << ib::hex(lock->trx->id) - << " query: " << wsrep_thd_query(lock->trx->mysql_thd); - lock_rec_print(stderr, lock, mtr); - } - /* don't wait for another BF lock */ - continue; - } -#endif /* WITH_WSREP */ - return(lock); } } @@ -2346,6 +2306,7 @@ lock_grant_and_move_on_page(ulint rec_fold, ulint space, ulint page_no) lock = previous->hash; } + ut_ad(!lock->trx->is_wsrep()); ut_ad(previous->hash == lock || previous == lock); /* Grant locks if there are no conflicting locks ahead. Move granted locks to the head of the list. */ @@ -2416,11 +2377,18 @@ static void lock_rec_dequeue_from_page(lock_t* in_lock) lock != NULL; lock = lock_rec_get_next_on_page(lock)) { - if (lock_get_wait(lock) - && !lock_rec_has_to_wait_in_queue(lock)) { + if (!lock_get_wait(lock)) { + continue; + } + const lock_t* c = lock_rec_has_to_wait_in_queue(lock); + if (!c) { /* Grant the lock */ ut_ad(lock->trx != in_lock->trx); lock_grant(lock); +#ifdef WITH_WSREP + } else { + wsrep_assert_no_bf_bf_wait(c, lock, c->trx); +#endif /* WITH_WSREP */ } } } else { @@ -3662,11 +3630,8 @@ lock_table_create( ut_list_insert(table->locks, c_lock, lock, TableLockGetNode()); if (UNIV_UNLIKELY(wsrep_debug)) { - ib::info() << "table lock BF conflict for " - << ib::hex(c_lock->trx->id) - << " SQL: " - << wsrep_thd_query( - c_lock->trx->mysql_thd); + wsrep_report_bf_lock_wait(trx->mysql_thd, trx->id); + wsrep_report_bf_lock_wait(c_lock->trx->mysql_thd, c_lock->trx->id); } } else { ut_list_append(table->locks, lock, TableLockGetNode()); @@ -3678,6 +3643,8 @@ lock_table_create( c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE; if (UNIV_UNLIKELY(wsrep_debug)) { + wsrep_report_bf_lock_wait(trx->mysql_thd, trx->id); + wsrep_report_bf_lock_wait(c_lock->trx->mysql_thd, c_lock->trx->id); wsrep_print_wait_locks(c_lock); } @@ -3687,14 +3654,6 @@ lock_table_create( lock_cancel_waiting_and_release( c_lock->trx->lock.wait_lock); trx_mutex_enter(trx); - - if (UNIV_UNLIKELY(wsrep_debug)) { - ib::info() << "WSREP: c_lock canceled " - << ib::hex(c_lock->trx->id) - << " SQL: " - << wsrep_thd_query( - c_lock->trx->mysql_thd); - } } trx_mutex_exit(c_lock->trx); @@ -4252,6 +4211,7 @@ lock_grant_and_move_on_rec( } lock = previous->hash; } + ut_ad(!lock->trx->is_wsrep()); /* Grant locks if there are no conflicting locks ahead. Move granted locks to the head of the list. */ for (;lock != NULL;) { @@ -4350,12 +4310,18 @@ released: for (lock = first_lock; lock != NULL; lock = lock_rec_get_next(heap_no, lock)) { - if (lock_get_wait(lock) - && !lock_rec_has_to_wait_in_queue(lock)) { - + if (!lock_get_wait(lock)) { + continue; + } + const lock_t* c = lock_rec_has_to_wait_in_queue(lock); + if (!c) { /* Grant the lock */ ut_ad(trx != lock->trx); lock_grant(lock); +#ifdef WITH_WSREP + } else { + wsrep_assert_no_bf_bf_wait(c, lock, c->trx); +#endif /* WITH_WSREP */ } } } else { @@ -5254,24 +5220,28 @@ lock_rec_queue_validate( explicit granted lock. */ #ifdef WITH_WSREP - if (other_lock->trx->is_wsrep()) { - if (!lock_get_wait(other_lock) ) { - ib::info() << "WSREP impl BF lock conflict for my impl lock:\n BF:" << - ((wsrep_thd_is_BF(impl_trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " << - wsrep_thd_exec_mode(impl_trx->mysql_thd) << " conflict: " << - wsrep_thd_conflict_state(impl_trx->mysql_thd, false) << " seqno: " << - wsrep_thd_trx_seqno(impl_trx->mysql_thd) << " SQL: " << - wsrep_thd_query(impl_trx->mysql_thd); - - trx_t* otrx = other_lock->trx; - - ib::info() << "WSREP other lock:\n BF:" << - ((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " << - wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " << - wsrep_thd_conflict_state(otrx->mysql_thd, false) << " seqno: " << - wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " << - wsrep_thd_query(otrx->mysql_thd); - } + /** Galera record locking rules: + * If there is no other record lock to the same record, we may grant + the lock request. + * If there is other record lock but this requested record lock is + compatible, we may grant the lock request. + * If there is other record lock and it is not compatible with + requested lock, all normal transactions must wait. + * BF (brute force) additional exceptions : + ** If BF already holds record lock for requested record, we may + grant new record lock even if there is conflicting record lock(s) + waiting on a queue. + ** If conflicting transaction holds requested record lock, + we will cancel this record lock and select conflicting transaction + for BF abort or kill victim. + ** If conflicting transaction is waiting for requested record lock + we will cancel this wait and select conflicting transaction + for BF abort or kill victim. + ** There should not be two BF transactions waiting for same record lock + */ + if (other_lock->trx->is_wsrep() && !lock_get_wait(other_lock)) { + wsrep_report_bf_lock_wait(impl_trx->mysql_thd, impl_trx->id); + wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id); if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no, @@ -5280,9 +5250,11 @@ lock_rec_queue_validate( } } else #endif /* WITH_WSREP */ - ut_ad(lock_get_wait(other_lock)); - ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, - block, heap_no, impl_trx)); + { + ut_ad(lock_get_wait(other_lock)); + ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, + block, heap_no, impl_trx)); + } } mutex_exit(&impl_trx->mutex); @@ -5313,13 +5285,20 @@ lock_rec_queue_validate( mode, block, false, heap_no, lock->trx); #ifdef WITH_WSREP - ut_a(!other_lock - || wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE) - || wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)); - -#else - ut_a(!other_lock); + if (UNIV_UNLIKELY(other_lock && lock->trx->is_wsrep())) { + /* Only BF transaction may be granted + lock before other conflicting lock + request. */ + if (!wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE) + && !wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)) { + /* If no BF, this case is a bug. */ + wsrep_report_bf_lock_wait(lock->trx->mysql_thd, lock->trx->id); + wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id); + ut_error; + } + } else #endif /* WITH_WSREP */ + ut_ad(!other_lock); } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) { ut_a(lock_rec_has_to_wait_in_queue(lock)); |