summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeandro Pacheco <leandro.pacheco@galeracluster.com>2021-07-20 12:45:00 -0300
committerJan Lindström <jan.lindstrom@mariadb.com>2021-07-27 08:11:41 +0300
commit2b84e1c9667df5ff9a7b1f4da934e57490b32630 (patch)
treef6221fd502e3f2fbe57fe5ba7d012666908739a0
parent389f5cf76fa2bd5e3dacd074395db4d0ef59c9f2 (diff)
downloadmariadb-git-2b84e1c9667df5ff9a7b1f4da934e57490b32630.tar.gz
MDEV-23080: desync and pause node on BACKUP STAGE BLOCK_DDL
make BACKUP STAGE behave as FTWRL, desyncing and pausing the node to prevent BF threads (appliers) from interfering with blocking stages. This is needed because BF threads don't respect BACKUP MDL locks. Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
-rw-r--r--extra/mariabackup/backup_mysql.cc2
-rw-r--r--mysql-test/suite/galera/r/MDEV-22051.result6
-rw-r--r--mysql-test/suite/galera/r/galera_backup_stage.result78
-rw-r--r--mysql-test/suite/galera/t/galera_backup_stage.test120
-rw-r--r--sql/backup.cc24
-rw-r--r--sql/sql_class.cc1
-rw-r--r--sql/sql_class.h3
-rw-r--r--sql/wsrep_mysqld.cc2
8 files changed, 231 insertions, 5 deletions
diff --git a/extra/mariabackup/backup_mysql.cc b/extra/mariabackup/backup_mysql.cc
index 162980acb21..04a4fb22e14 100644
--- a/extra/mariabackup/backup_mysql.cc
+++ b/extra/mariabackup/backup_mysql.cc
@@ -930,7 +930,7 @@ bool lock_tables(MYSQL *connection)
if (have_galera_enabled)
{
- xb_mysql_query(connection, "SET SESSION wsrep_causal_reads=0", false);
+ xb_mysql_query(connection, "SET SESSION wsrep_sync_wait=0", false);
}
xb_mysql_query(connection, "BACKUP STAGE START", true);
diff --git a/mysql-test/suite/galera/r/MDEV-22051.result b/mysql-test/suite/galera/r/MDEV-22051.result
index 9f5394637c2..0e9756dd20e 100644
--- a/mysql-test/suite/galera/r/MDEV-22051.result
+++ b/mysql-test/suite/galera/r/MDEV-22051.result
@@ -2,14 +2,14 @@ connection node_2;
connection node_1;
FLUSH TABLES WITH READ LOCK;
CREATE TABLE t1 (a INT) ENGINE=InnoDB;
-ERROR 08S01: Aborting TOI: Global Read-Lock (FTWRL) in place.
+ERROR 08S01: Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE.
SET wsrep_OSU_method=RSU;
CREATE TABLE t1 (a INT) ENGINE=InnoDB;
-ERROR 08S01: Aborting TOI: Global Read-Lock (FTWRL) in place.
+ERROR 08S01: Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE.
SET wsrep_OSU_method=TOI;
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
CREATE TABLE t1 (a INT) ENGINE=InnoDB;
-ERROR 08S01: Aborting TOI: Global Read-Lock (FTWRL) in place.
+ERROR 08S01: Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE.
connection node_1;
UNLOCK TABLES;
CREATE TABLE t1 (a INT) ENGINE=InnoDB;
diff --git a/mysql-test/suite/galera/r/galera_backup_stage.result b/mysql-test/suite/galera/r/galera_backup_stage.result
new file mode 100644
index 00000000000..6fb7d1643cd
--- /dev/null
+++ b/mysql-test/suite/galera/r/galera_backup_stage.result
@@ -0,0 +1,78 @@
+connection node_2;
+connection node_1;
+connection node_1;
+CREATE TABLE t1 (f1 varchar(10)) ENGINE=InnoDB;
+BACKUP STAGE START;
+BACKUP STAGE FLUSH;
+BACKUP STAGE END;
+BACKUP STAGE START;
+BACKUP STAGE FLUSH;
+connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+connection node_1a;
+SET SESSION wsrep_sync_wait=0;
+SET SESSION wsrep_retry_autocommit=0;
+INSERT INTO t1 (f1) values ("node1_1");
+ALTER TABLE t1 ADD COLUMN (f2 int(10));
+connection node_2;
+INSERT INTO t1 (f1) values ("node2_1");
+ALTER TABLE t1 ADD COLUMN (f3 int(10));
+connection node_1;
+BACKUP STAGE BLOCK_DDL;
+connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+connection node_1c;
+SET SESSION wsrep_sync_wait=0;
+connection node_2;
+INSERT INTO t1 (f1) values("node2_2");
+ALTER TABLE t1 ADD COLUMN (f5 int(10));
+connection node_1a;
+ALTER TABLE t1 ADD COLUMN (f4 int(10));
+ERROR 08S01: Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE.
+INSERT INTO t1 (f1) values("node1a");;
+connection node_1c;
+connection node_1;
+BACKUP STAGE BLOCK_COMMIT;
+connection node_1c;
+SELECT variable_value="Donor/Desynced" FROM information_schema.global_status WHERE variable_name="wsrep_local_state_comment";
+variable_value="Donor/Desynced"
+1
+connection node_2;
+INSERT INTO t1 (f1) values("node2_3");
+ALTER TABLE t1 ADD COLUMN (f6 int(10));
+connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+connection node_1b;
+SET SESSION wsrep_sync_wait=0;
+SET SESSION wsrep_retry_autocommit=0;
+ALTER TABLE t1 ADD COLUMN (f4 int(10));
+ERROR 08S01: Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE.
+INSERT INTO t1 (f1) values("node1b");;
+connection node_1c;
+SELECT COUNT(*)=2 FROM t1;
+COUNT(*)=2
+1
+SELECT COUNT(*)=3 FROM information_schema.columns WHERE table_name = 't1';
+COUNT(*)=3
+1
+connection node_1;
+BACKUP STAGE END;
+connection node_1a;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+connection node_1b;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+connection node_1;
+SELECT COUNT(*)=4 FROM t1;
+COUNT(*)=4
+1
+SELECT COUNT(*)=5 FROM information_schema.columns WHERE table_name = 't1';
+COUNT(*)=5
+1
+connection node_2;
+SELECT COUNT(*)=4 FROM t1;
+COUNT(*)=4
+1
+SELECT COUNT(*)=5 FROM information_schema.columns WHERE table_name = 't1';
+COUNT(*)=5
+1
+connection node_1;
+DROP TABLE t1;
+call mtr.add_suppression("WSREP: ALTER TABLE isolation failure");
+call mtr.add_suppression("greater than drain seqno");
diff --git a/mysql-test/suite/galera/t/galera_backup_stage.test b/mysql-test/suite/galera/t/galera_backup_stage.test
new file mode 100644
index 00000000000..31d76816355
--- /dev/null
+++ b/mysql-test/suite/galera/t/galera_backup_stage.test
@@ -0,0 +1,120 @@
+#
+# Check that BACKUP STAGE BLOCK_DDL desyncs and pauses the node until BACKUP STAGE END:
+# - Local DDLs will fail immediately
+# - Local DMLs will block until resync
+# - Remote txns will be applied after resync (STAGE END).
+#
+
+--source include/galera_cluster.inc
+--source include/have_innodb.inc
+--source include/have_metadata_lock_info.inc
+
+--connection node_1
+CREATE TABLE t1 (f1 varchar(10)) ENGINE=InnoDB;
+
+# First, check that BACKUP STAGE END skipping desyncing stages is fine
+BACKUP STAGE START;
+BACKUP STAGE FLUSH;
+BACKUP STAGE END;
+
+BACKUP STAGE START;
+BACKUP STAGE FLUSH;
+
+--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
+--connection node_1a
+SET SESSION wsrep_sync_wait=0;
+SET SESSION wsrep_retry_autocommit=0;
+INSERT INTO t1 (f1) values ("node1_1");
+ALTER TABLE t1 ADD COLUMN (f2 int(10));
+
+--connection node_2
+INSERT INTO t1 (f1) values ("node2_1");
+ALTER TABLE t1 ADD COLUMN (f3 int(10));
+
+# BLOCK_DDL desyncs and pauses the node
+--connection node_1
+BACKUP STAGE BLOCK_DDL;
+
+--connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1
+--connection node_1c
+SET SESSION wsrep_sync_wait=0;
+--let $wait_condition = SELECT variable_value="Donor/Desynced" FROM information_schema.global_status WHERE variable_name="wsrep_local_state_comment"
+--source include/wait_condition.inc
+
+--connection node_2
+INSERT INTO t1 (f1) values("node2_2");
+ALTER TABLE t1 ADD COLUMN (f5 int(10));
+
+--connection node_1a
+--error ER_UNKNOWN_COM_ERROR
+ALTER TABLE t1 ADD COLUMN (f4 int(10));
+--let $insert_id = `SELECT CONNECTION_ID()`
+--send INSERT INTO t1 (f1) values("node1a");
+
+# the insert will block during commit inside the provider, in certify. We can't
+# check for sure it is blocked there, so we wait for the thread to at least
+# reach commit stage. In the unlikely case the interleaving is different, the
+# result of the test should not change.
+--connection node_1c
+--let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.processlist WHERE State='Commit' AND ID=$insert_id
+--source include/wait_condition.inc
+--let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.metadata_lock_info WHERE TABLE_NAME='t1' AND THREAD_ID=$insert_id
+--source include/wait_condition.inc
+
+--connection node_1
+BACKUP STAGE BLOCK_COMMIT;
+
+# node only resumes/resyncs upon STAGE END
+--connection node_1c
+SELECT variable_value="Donor/Desynced" FROM information_schema.global_status WHERE variable_name="wsrep_local_state_comment";
+
+--connection node_2
+INSERT INTO t1 (f1) values("node2_3");
+ALTER TABLE t1 ADD COLUMN (f6 int(10));
+
+--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1
+--connection node_1b
+SET SESSION wsrep_sync_wait=0;
+SET SESSION wsrep_retry_autocommit=0;
+--error ER_UNKNOWN_COM_ERROR
+ALTER TABLE t1 ADD COLUMN (f4 int(10));
+--let $insert_id = `SELECT CONNECTION_ID()`
+--send INSERT INTO t1 (f1) values("node1b");
+
+# wait for insert to get blocked
+--connection node_1c
+--let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.processlist WHERE State='Commit' AND ID=$insert_id
+--source include/wait_condition.inc
+--let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.metadata_lock_info WHERE TABLE_NAME='t1' AND THREAD_ID=$insert_id
+--source include/wait_condition.inc
+--let $wait_condition = SELECT COUNT(*)=2 FROM information_schema.processlist WHERE Info like 'INSERT INTO t1 (f1) values("node1%")' AND State = 'Commit'
+--source include/wait_condition.inc
+
+# nothing after BLOCK_DDL is applied
+SELECT COUNT(*)=2 FROM t1;
+SELECT COUNT(*)=3 FROM information_schema.columns WHERE table_name = 't1';
+
+# STAGE END resumes and resyncs the node
+--connection node_1
+BACKUP STAGE END;
+
+# Upon resume, blocked inserts will continue but conflict with the applying alters
+--connection node_1a
+--error ER_LOCK_DEADLOCK
+--reap
+--connection node_1b
+--error ER_LOCK_DEADLOCK
+--reap
+
+--connection node_1
+SELECT COUNT(*)=4 FROM t1;
+SELECT COUNT(*)=5 FROM information_schema.columns WHERE table_name = 't1';
+
+--connection node_2
+SELECT COUNT(*)=4 FROM t1;
+SELECT COUNT(*)=5 FROM information_schema.columns WHERE table_name = 't1';
+
+--connection node_1
+DROP TABLE t1;
+call mtr.add_suppression("WSREP: ALTER TABLE isolation failure");
+call mtr.add_suppression("greater than drain seqno");
diff --git a/sql/backup.cc b/sql/backup.cc
index cff14415d96..c021d0fc552 100644
--- a/sql/backup.cc
+++ b/sql/backup.cc
@@ -34,6 +34,7 @@
#include "sql_insert.h" // kill_delayed_threads
#include "sql_handler.h" // mysql_ha_cleanup_no_free
#include <my_sys.h>
+#include "wsrep_mysqld.h"
static const char *stage_names[]=
{"START", "FLUSH", "BLOCK_DDL", "BLOCK_COMMIT", "END", 0};
@@ -254,6 +255,21 @@ static bool backup_block_ddl(THD *thd)
(void) flush_tables(thd, FLUSH_NON_TRANS_TABLES);
thd->clear_error();
+#ifdef WITH_WSREP
+ /*
+ We desync the node for BACKUP STAGE because applier threads
+ bypass backup MDL locks (see MDL_lock::can_grant_lock)
+ */
+ if (WSREP_NNULL(thd))
+ {
+ Wsrep_server_state &server_state= Wsrep_server_state::instance();
+ if (server_state.desync_and_pause().is_undefined()) {
+ DBUG_RETURN(1);
+ }
+ thd->wsrep_desynced_backup_stage= true;
+ }
+#endif /* WITH_WSREP */
+
/*
block new DDL's, in addition to all previous blocks
We didn't do this lock above, as we wanted DDL's to be executed while
@@ -318,6 +334,14 @@ bool backup_end(THD *thd)
ha_end_backup();
thd->current_backup_stage= BACKUP_FINISHED;
thd->mdl_context.release_lock(backup_flush_ticket);
+#ifdef WITH_WSREP
+ if (WSREP_NNULL(thd) && thd->wsrep_desynced_backup_stage)
+ {
+ Wsrep_server_state &server_state= Wsrep_server_state::instance();
+ server_state.resume_and_resync();
+ thd->wsrep_desynced_backup_stage= false;
+ }
+#endif /* WITH_WSREP */
}
DBUG_RETURN(0);
}
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 7e43605b047..8aaa0ebbe71 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1281,6 +1281,7 @@ void THD::init()
m_wsrep_next_trx_id = WSREP_UNDEFINED_TRX_ID;
wsrep_replicate_GTID = false;
wsrep_aborter = 0;
+ wsrep_desynced_backup_stage= false;
#endif /* WITH_WSREP */
if (variables.sql_log_bin)
diff --git a/sql/sql_class.h b/sql/sql_class.h
index a439da53a7d..8e2b6eed338 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -3011,6 +3011,9 @@ public:
uint server_status,open_options;
enum enum_thread_type system_thread;
enum backup_stages current_backup_stage;
+#ifdef WITH_WSREP
+ bool wsrep_desynced_backup_stage;
+#endif /* WITH_WSREP */
/*
Current or next transaction isolation level.
When a connection is established, the value is taken from
diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc
index 193c06725cf..cabf066abf4 100644
--- a/sql/wsrep_mysqld.cc
+++ b/sql/wsrep_mysqld.cc
@@ -2168,7 +2168,7 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_,
if (Wsrep_server_state::instance().desynced_on_pause())
{
my_message(ER_UNKNOWN_COM_ERROR,
- "Aborting TOI: Global Read-Lock (FTWRL) in place.", MYF(0));
+ "Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE.", MYF(0));
return -1;
}