summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Lindström <jan.lindstrom@mariadb.com>2021-03-03 12:14:23 +0200
committerJan Lindström <jan.lindstrom@mariadb.com>2021-03-30 08:58:10 +0300
commitd217a925b267727936bd0a08dce1863ab6d1ed19 (patch)
tree1cd2d1a00faf051cfc7f51f1f2cc1b06b3f40ac0
parentc44273329ed7292e2fdd4039aa4ad20035583f89 (diff)
downloadmariadb-git-d217a925b267727936bd0a08dce1863ab6d1ed19.tar.gz
MDEV-24923 : Port selected Galera conflict resolution changes from 10.6
Add condition on trx->state == TRX_STATE_COMMITTED_IN_MEMORY in order to avoid unnecessary work. If a transaction has already been committed or rolled back, it will release its locks in lock_release() and let the waiting thread(s) continue execution. Let BF wait on lock_rec_has_to_wait and if necessary other BF is replayed. wsrep_trx_order_before If BF is not even replicated yet then they are ordered correctly. bg_wsrep_kill_trx Make sure victim_trx is found and check also its state. If state is TRX_STATE_COMMITTED_IN_MEMORY transaction is already committed or rolled back and will release it locks soon. wsrep_assert_no_bf_bf_wait Transaction requesting new record lock should be TRX_STATE_ACTIVE Conflicting transaction can be in states TRX_STATE_ACTIVE, TRX_STATE_COMMITTED_IN_MEMORY or in TRX_STATE_PREPARED. If conflicting transaction is already committed in memory or prepared we should wait. When transaction is committed in memory we held trx mutex, but not lock_sys->mutex. Therefore, we could end here before transaction has time to do lock_release() that is protected with lock_sys->mutex. lock_rec_has_to_wait We very well can let bf to wait normally as other BF will be replayed in case of conflict. For debug builds we will do additional sanity checks to catch unsupported bf wait if any. wsrep_kill_victim Check is victim already in TRX_STATE_COMMITTED_IN_MEMORY state and if it is we can return. lock_rec_dequeue_from_page lock_rec_unlock Remove unnecessary wsrep_assert_no_bf_bf_wait function calls. We can very well let BF wait here.
-rw-r--r--sql/wsrep_mysqld.cc22
-rw-r--r--storage/innobase/handler/ha_innodb.cc33
-rw-r--r--storage/innobase/lock/lock0lock.cc173
3 files changed, 107 insertions, 121 deletions
diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc
index c033f7e1464..f22d8bf0f5a 100644
--- a/sql/wsrep_mysqld.cc
+++ b/sql/wsrep_mysqld.cc
@@ -2788,16 +2788,18 @@ extern "C" bool wsrep_thd_ignore_table(THD *thd)
extern int
wsrep_trx_order_before(THD *thd1, THD *thd2)
{
- if (wsrep_thd_trx_seqno(thd1) < wsrep_thd_trx_seqno(thd2)) {
- WSREP_DEBUG("BF conflict, order: %lld %lld\n",
- (long long)wsrep_thd_trx_seqno(thd1),
- (long long)wsrep_thd_trx_seqno(thd2));
- return 1;
- }
- WSREP_DEBUG("waiting for BF, trx order: %lld %lld\n",
- (long long)wsrep_thd_trx_seqno(thd1),
- (long long)wsrep_thd_trx_seqno(thd2));
- return 0;
+ const longlong trx1_seqno= wsrep_thd_trx_seqno(thd1);
+ const longlong trx2_seqno= wsrep_thd_trx_seqno(thd2);
+ WSREP_DEBUG("BF conflict, order: %lld %lld\n",
+ trx1_seqno, trx2_seqno);
+
+ if (trx1_seqno == WSREP_SEQNO_UNDEFINED ||
+ trx2_seqno == WSREP_SEQNO_UNDEFINED)
+ return 1; /* trx is not yet replicated */
+ else if (trx1_seqno < trx2_seqno)
+ return 1;
+
+ return 0;
}
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index af6fc4d5b48..445103e0550 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -19596,21 +19596,32 @@ static void bg_wsrep_kill_trx(
DBUG_ENTER("bg_wsrep_kill_trx");
if (thd) {
- victim_trx = thd_to_trx(thd);
- lock_mutex_enter();
- trx_mutex_enter(victim_trx);
- if (victim_trx->id != arg->trx_id)
- {
- trx_mutex_exit(victim_trx);
- lock_mutex_exit();
+ victim_trx= thd_to_trx(thd);
+ /* Victim trx might not exist e.g. on MDL-conflict. */
+ if (victim_trx) {
+ lock_mutex_enter();
+ trx_mutex_enter(victim_trx);
+ if (victim_trx->id != arg->trx_id ||
+ victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY)
+ {
+ /* Victim was meanwhile rolled back or
+ committed */
+ trx_mutex_exit(victim_trx);
+ lock_mutex_exit();
+ wsrep_thd_UNLOCK(thd);
+ victim_trx= NULL;
+ }
+ } else {
+ /* find_thread_by_id locked
+ THD::LOCK_thd_data */
wsrep_thd_UNLOCK(thd);
- victim_trx = NULL;
}
}
if (!victim_trx) {
- /* it can happen that trx_id was meanwhile rolled back */
- DBUG_PRINT("wsrep", ("no thd for conflicting lock"));
+ /* Victim trx might not exist (MDL-conflict) or victim
+ was meanwhile rolled back or committed because of
+ a KILL statement or a disconnect. */
goto ret;
}
@@ -19766,7 +19777,7 @@ static void bg_wsrep_kill_trx(
}
ret_awake:
- awake = true;
+ awake= true;
ret_unlock:
trx_mutex_exit(victim_trx);
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 4e5c8fbf0ec..506106a2269 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -644,71 +644,81 @@ lock_rec_get_insert_intention(
return(lock->type_mode & LOCK_INSERT_INTENTION);
}
+#ifdef UNIV_DEBUG
#ifdef WITH_WSREP
-/** Check if both conflicting lock and other record lock are brute force
-(BF). This case is a bug so report lock information and wsrep state.
-@param[in] lock_rec1 conflicting waiting record lock or NULL
-@param[in] lock_rec2 other waiting record lock
-@param[in] trx1 lock_rec1 can be NULL, trx
+/** Check if both conflicting lock transaction and other transaction
+requesting record lock are brute force (BF). If they are check is
+this BF-BF wait correct and if not report BF wait and assert.
+
+@param[in] lock_rec other waiting record lock
+@param[in] trx trx requesting conflicting record lock
*/
-static void wsrep_assert_no_bf_bf_wait(
- const lock_t* lock_rec1,
- const lock_t* lock_rec2,
- const trx_t* trx1)
+static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx)
{
- ut_ad(!lock_rec1 || lock_get_type_low(lock_rec1) == LOCK_REC);
- ut_ad(lock_get_type_low(lock_rec2) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
ut_ad(lock_mutex_own());
+ trx_t* lock_trx= lock->trx;
/* Note that we are holding lock_sys->mutex, thus we should
not acquire THD::LOCK_thd_data mutex below to avoid mutexing
order violation. */
- if (!trx1->is_wsrep() || !lock_rec2->trx->is_wsrep())
+ if (!trx->is_wsrep() || !lock_trx->is_wsrep())
return;
- if (UNIV_LIKELY(!wsrep_thd_is_BF(trx1->mysql_thd, FALSE)))
- return;
- if (UNIV_LIKELY(!wsrep_thd_is_BF(lock_rec2->trx->mysql_thd, FALSE)))
+ if (UNIV_LIKELY(!wsrep_thd_is_BF(trx->mysql_thd, FALSE))
+ || UNIV_LIKELY(!wsrep_thd_is_BF(lock_trx->mysql_thd, FALSE)))
return;
- /* if BF - BF order is honored, we can keep trx1 waiting for the lock */
- if (wsrep_trx_order_before(trx1->mysql_thd, lock_rec2->trx->mysql_thd))
+ ut_ad(trx->state == TRX_STATE_ACTIVE);
+
+ trx_mutex_enter(lock_trx);
+ const trx_state_t trx2_state= lock_trx->state;
+ trx_mutex_exit(lock_trx);
+
+ /* If transaction is already committed in memory or
+ prepared we should wait. When transaction is committed in
+ memory we held trx mutex, but not lock_sys->mutex. Therefore,
+ we could end here before transaction has time to do
+ lock_release() that is protected with lock_sys->mutex. */
+ switch (trx2_state) {
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ case TRX_STATE_PREPARED:
return;
+ case TRX_STATE_ACTIVE:
+ break;
+ default:
+ ut_ad("invalid state" == 0);
+ }
- /* avoiding BF-BF conflict assert, if victim is already aborting
- or rolling back for replaying
- */
- if (wsrep_trx_is_aborting(lock_rec2->trx->mysql_thd))
+ /* If BF - BF order is honored, i.e. trx already holding
+ record lock should be ordered before this new lock request
+ we can keep trx waiting for the lock. If conflicting
+ transaction is already aborting or rolling back for replaying
+ we can also let new transaction waiting. */
+ if (wsrep_trx_order_before(lock_trx->mysql_thd, trx->mysql_thd)
+ || wsrep_trx_is_aborting(lock_trx->mysql_thd))
return;
mtr_t mtr;
- if (lock_rec1) {
- ib::error() << "Waiting lock on table: "
- << lock_rec1->index->table->name
- << " index: "
- << lock_rec1->index->name()
- << " that has conflicting lock ";
- lock_rec_print(stderr, lock_rec1, mtr);
- }
-
ib::error() << "Conflicting lock on table: "
- << lock_rec2->index->table->name
+ << lock->index->table->name
<< " index: "
- << lock_rec2->index->name()
+ << lock->index->name()
<< " that has lock ";
- lock_rec_print(stderr, lock_rec2, mtr);
+ lock_rec_print(stderr, lock, mtr);
ib::error() << "WSREP state: ";
- wsrep_report_bf_lock_wait(trx1->mysql_thd,
- trx1->id);
- wsrep_report_bf_lock_wait(lock_rec2->trx->mysql_thd,
- lock_rec2->trx->id);
+ wsrep_report_bf_lock_wait(trx->mysql_thd,
+ trx->id);
+ wsrep_report_bf_lock_wait(lock_trx->mysql_thd,
+ lock_trx->id);
/* BF-BF wait is a bug */
ut_error;
}
#endif /* WITH_WSREP */
+#endif /* UNIV_DEBUG */
/*********************************************************************//**
Checks if a lock request for a new lock has to wait for request lock2.
@@ -832,9 +842,11 @@ lock_rec_has_to_wait(
return (FALSE);
}
- /* There should not be two conflicting locks that are
- brute force. If there is it is a bug. */
- wsrep_assert_no_bf_bf_wait(NULL, lock2, trx);
+ /* We very well can let bf to wait normally as other
+ BF will be replayed in case of conflict. For debug
+ builds we will do additional sanity checks to catch
+ unsupported bf wait if any. */
+ ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx));
#endif /* WITH_WSREP */
return(TRUE);
@@ -1111,66 +1123,35 @@ lock_rec_other_has_expl_req(
#endif /* UNIV_DEBUG */
#ifdef WITH_WSREP
-static
-void
-wsrep_kill_victim(
-/*==============*/
- const trx_t * const trx,
- const lock_t *lock)
+static void wsrep_kill_victim(const trx_t * const trx, const lock_t *lock)
{
ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(lock->trx));
+ ut_ad(trx->is_wsrep());
+ trx_t* lock_trx = lock->trx;
+ ut_ad(trx_mutex_own(lock_trx));
+ ut_ad(lock_trx != trx);
- /* quit for native mysql */
- if (!trx->is_wsrep()) return;
+ if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE))
+ return;
- my_bool bf_this = wsrep_thd_is_BF(trx->mysql_thd, FALSE);
- my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE);
- mtr_t mtr;
+ if (lock_trx->state == TRX_STATE_COMMITTED_IN_MEMORY
+ || lock_trx->lock.was_chosen_as_deadlock_victim)
+ return;
- if ((bf_this && !bf_other) ||
- (bf_this && bf_other && wsrep_trx_order_before(
- trx->mysql_thd, lock->trx->mysql_thd))) {
+ my_bool bf_other = wsrep_thd_is_BF(lock_trx->mysql_thd, FALSE);
- if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
- if (UNIV_UNLIKELY(wsrep_debug)) {
- ib::info() << "WSREP: BF victim waiting\n";
- }
+ if (!bf_other
+ || wsrep_trx_order_before(trx->mysql_thd,
+ lock_trx->mysql_thd)) {
+
+ if (lock_trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+ if (UNIV_UNLIKELY(wsrep_debug))
+ WSREP_INFO("BF victim waiting");
/* cannot release lock, until our lock
is in the queue*/
- } else if (lock->trx != trx) {
- if (wsrep_log_conflicts) {
- if (bf_this) {
- ib::info() << "*** Priority TRANSACTION:";
- } else {
- ib::info() << "*** Victim TRANSACTION:";
- }
-
- wsrep_trx_print_locking(stderr, trx, 3000);
-
- if (bf_other) {
- ib::info() << "*** Priority TRANSACTION:";
- } else {
- ib::info() << "*** Victim TRANSACTION:";
- }
- wsrep_trx_print_locking(stderr, lock->trx, 3000);
-
- ib::info() << "*** WAITING FOR THIS LOCK TO BE GRANTED:";
-
- if (lock_get_type(lock) == LOCK_REC) {
- lock_rec_print(stderr, lock, mtr);
- } else {
- lock_table_print(stderr, lock);
- }
-
- ib::info() << " SQL1: "
- << wsrep_thd_query(trx->mysql_thd);
- ib::info() << " SQL2: "
- << wsrep_thd_query(lock->trx->mysql_thd);
- }
-
- wsrep_innobase_kill_one_trx(trx->mysql_thd,
- trx, lock->trx, TRUE);
+ } else {
+ wsrep_innobase_kill_one_trx(trx->mysql_thd, trx,
+ lock_trx, true);
}
}
}
@@ -2410,10 +2391,6 @@ static void lock_rec_dequeue_from_page(lock_t* in_lock)
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
-#ifdef WITH_WSREP
- } else {
- wsrep_assert_no_bf_bf_wait(c, lock, c->trx);
-#endif /* WITH_WSREP */
}
}
} else {
@@ -4342,10 +4319,6 @@ released:
/* Grant the lock */
ut_ad(trx != lock->trx);
lock_grant(lock);
-#ifdef WITH_WSREP
- } else {
- wsrep_assert_no_bf_bf_wait(c, lock, c->trx);
-#endif /* WITH_WSREP */
}
}
} else {