summaryrefslogtreecommitdiff
path: root/sql/rpl_rli.cc
diff options
context:
space:
mode:
authorKristian Nielsen <knielsen@knielsen-hq.org>2015-01-07 14:45:39 +0100
committerKristian Nielsen <knielsen@knielsen-hq.org>2015-01-07 14:45:39 +0100
commitf27817c1d0e6d81392470e9086624e88ae08b11f (patch)
tree07143fafd819462ef1baf0d451d5537f1a60610b /sql/rpl_rli.cc
parent4a3251595cc697bfdb15b67c07514bd3c4779e37 (diff)
downloadmariadb-git-f27817c1d0e6d81392470e9086624e88ae08b11f.tar.gz
MDEV-7326: Server deadlock in connection with parallel replication
The bug occurs when a transaction does a retry after all transactions have done mark_start_commit() in a batch of group commit from the master. In this case, the retrying transaction can unmark_start_commit() after the following batch has already started running and de-allocated the GCO. Then after retry, the transaction will re-do mark_start_commit() on a de-allocated GCO, and also wakeup of later GCOs can be lost. This was seen "in the wild" by a user, even though it is not known exactly what circumstances can lead to retry of one transaction after all transactions in a group have reached the commit phase. The lifetime around GCO was somewhat clunky anyway. With this patch, a GCO lives until rpl_parallel_entry::last_committed_sub_id has reached the last transaction in the GCO. This guarantees that the GCO will still be alive when a transaction does mark_start_commit(). Also, we now loop over the list of active GCOs for wakeup, to ensure we do not lose a wakeup even in the problematic case.
Diffstat (limited to 'sql/rpl_rli.cc')
-rw-r--r--sql/rpl_rli.cc19
1 files changed, 14 insertions, 5 deletions
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 629e046ed0a..a751dd16650 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -1849,11 +1849,20 @@ void rpl_group_info::slave_close_thread_tables(THD *thd)
static void
-mark_start_commit_inner(rpl_parallel_entry *e, group_commit_orderer *gco)
+mark_start_commit_inner(rpl_parallel_entry *e, group_commit_orderer *gco,
+ rpl_group_info *rgi)
{
+ group_commit_orderer *tmp;
uint64 count= ++e->count_committing_event_groups;
- if (gco->next_gco && gco->next_gco->wait_count == count)
- mysql_cond_broadcast(&gco->next_gco->COND_group_commit_orderer);
+ /* Signal any following GCO whose wait_count has been reached now. */
+ tmp= gco;
+ while ((tmp= tmp->next_gco))
+ {
+ uint64 wait_count= tmp->wait_count;
+ if (wait_count > count)
+ break;
+ mysql_cond_broadcast(&tmp->COND_group_commit_orderer);
+ }
}
@@ -1862,7 +1871,7 @@ rpl_group_info::mark_start_commit_no_lock()
{
if (did_mark_start_commit)
return;
- mark_start_commit_inner(parallel_entry, gco);
+ mark_start_commit_inner(parallel_entry, gco, this);
did_mark_start_commit= true;
}
@@ -1877,7 +1886,7 @@ rpl_group_info::mark_start_commit()
e= this->parallel_entry;
mysql_mutex_lock(&e->LOCK_parallel_entry);
- mark_start_commit_inner(e, gco);
+ mark_start_commit_inner(e, gco, this);
mysql_mutex_unlock(&e->LOCK_parallel_entry);
did_mark_start_commit= true;
}