summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrandon Nesterenko <brandon.nesterenko@mariadb.com>2021-10-13 07:31:32 -0600
committerBrandon Nesterenko <brandon.nesterenko@mariadb.com>2021-10-18 10:43:51 -0600
commit2291f8ef73489fb8ed79768484df1ee4db3583a7 (patch)
tree2243d2e502e7bc615ae29e60fae6c47e563ec324
parent5f63f5dc60a48105d739f606cbf0a575925029d1 (diff)
downloadmariadb-git-2291f8ef73489fb8ed79768484df1ee4db3583a7.tar.gz
MDEV-25284: Assertion `info->type == READ_CACHE || info->type == WRITE_CACHE' failedbb-10.2-MDEV-25284
Problem: ======== This patch addresses two issues. First, if a CHANGE MASTER command is issued and an error happens while locating the replica’s relay logs, the logs can be put into an invalid state where future updates fail and future CHANGE MASTER calls crash the server. More specifically, right before a replica purges the relay logs (part of the `CHANGE MASTER TO` logic), the relay log is temporarily closed with state LOG_TO_BE_OPENED. If the server errors in-between the temporary log closure and purge, i.e. during the function find_log_pos, the log should be closed. MDEV-25284 reveals the log is not properly closed. Second, upon issuing a RESET SLAVE ALL command, a slave’s GTID filters are not cleared (DO_DOMAIN_IDS, IGNORE_DOMIAN_IDS, IGNORE_SERVER_IDS). MySQL had a similar bug report, Bug #18816897, which fixed this issue to clear IGNORE_SERVER_IDS after issuing RESET SLAVE ALL in version 5.7. Solution: ========= To fix the first problem, the CHANGE MASTER error handling logic was extended to transition the relay log state to LOG_CLOSED from LOG_TO_BE_OPENED. To fix the second problem, the RESET SLAVE ALL logic is extended to clear the domain_id filter and ignore_server_ids. Reviewed By: ============ Andrei Elkin <andrei.elkin@mariadb.com>
-rw-r--r--mysql-test/suite/rpl/include/rpl_reset_slave_all_check.inc48
-rw-r--r--mysql-test/suite/rpl/r/rpl_change_master_find_log_pos_err.result43
-rw-r--r--mysql-test/suite/rpl/r/rpl_reset_slave_all_clears_filters.result54
-rw-r--r--mysql-test/suite/rpl/t/rpl_change_master_find_log_pos_err.test93
-rw-r--r--mysql-test/suite/rpl/t/rpl_reset_slave_all_clears_filters.test72
-rw-r--r--sql/log.h14
-rw-r--r--sql/rpl_mi.cc8
-rw-r--r--sql/rpl_mi.h5
-rw-r--r--sql/sql_repl.cc10
9 files changed, 347 insertions, 0 deletions
diff --git a/mysql-test/suite/rpl/include/rpl_reset_slave_all_check.inc b/mysql-test/suite/rpl/include/rpl_reset_slave_all_check.inc
new file mode 100644
index 00000000000..adbaf32ebd7
--- /dev/null
+++ b/mysql-test/suite/rpl/include/rpl_reset_slave_all_check.inc
@@ -0,0 +1,48 @@
+# This file ensures that a slave's id filtering variables (i.e. DO_DOMAIN_IDS,
+# IGNORE_DOMAIN_IDS, and IGNORE_SERVER_IDS) are cleared after issuing
+# `RESET SLAVE ALL`.
+#
+# param $_do_domain_ids Integer list of values to use for DO_DOMAIN_IDS
+# param $_ignore_domain_ids Integer list of values to use for IGNORE_DOMAIN_IDS
+# param $_ignore_server_ids Integer list of values to use for IGNORE_SERVER_IDS
+#
+
+--echo # Id filtering variable values should be empty initially
+let $do_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1);
+let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1);
+let $ignore_server_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Server_Ids, 1);
+
+if (`SELECT "$do_domain_ids_before" != "" OR
+ "$ignore_domain_ids_before" != "" OR
+ "$ignore_server_ids_before" != ""`)
+{
+ die("CHANGE MASTER TO id filter variables are not empty initially");
+}
+
+
+--echo # Set id filtering variables
+eval CHANGE MASTER TO DO_DOMAIN_IDS=$_do_domain_ids, IGNORE_DOMAIN_IDS=$_ignore_domain_ids, IGNORE_SERVER_IDS=$_ignore_server_ids, MASTER_USE_GTID=SLAVE_POS;
+let $do_domain_ids_set= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1);
+let $ignore_domain_ids_set= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1);
+let $ignore_server_ids_set= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Server_Ids, 1);
+--echo # do domain id list: $do_domain_ids_set
+--echo # ignore domain id list: $ignore_domain_ids_set
+--echo # ignore server id list: $ignore_server_ids_set
+
+
+--echo # RESET SLAVE ALL should clear values for all id filtering variables
+RESET SLAVE ALL;
+--replace_result $MASTER_MYPORT MASTER_MYPORT
+eval change master to master_port=$MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
+--source include/start_slave.inc
+--source include/stop_slave.inc
+
+let $do_domain_ids_cleared= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1);
+let $ignore_domain_ids_cleared= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1);
+let $ignore_server_ids_cleared= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Server_Ids, 1);
+if (`SELECT "$do_domain_ids_cleared" != "" OR
+ "$ignore_domain_ids_cleared" != "" OR
+ "$ignore_server_ids_cleared" != ""`)
+{
+ die("RESET SLAVE ALL did not clear id filtering variables");
+}
diff --git a/mysql-test/suite/rpl/r/rpl_change_master_find_log_pos_err.result b/mysql-test/suite/rpl/r/rpl_change_master_find_log_pos_err.result
new file mode 100644
index 00000000000..0ff76b5b60f
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_change_master_find_log_pos_err.result
@@ -0,0 +1,43 @@
+include/master-slave.inc
+[connection master]
+#
+# Failed CHANGE MASTER TO should not change relay log status
+#
+connection slave;
+include/stop_slave.inc
+SET @@debug_dbug="d,simulate_find_log_pos_error";
+CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=SLAVE_POS;
+ERROR HY000: Target log not found in binlog index
+SET @@debug_dbug="";
+include/start_slave.inc
+#
+# Ensure relay log can be updated after a failed CHANGE MASTER
+#
+FLUSH RELAY LOGS;
+include/wait_for_slave_param.inc [Relay_Log_File]
+#
+# Slave should continue to receive data from old master after failed
+# CHANGE MASTER TO
+#
+connection master;
+CREATE TABLE t1 (a int);
+insert into t1 values (1);
+connection slave;
+connection slave;
+#
+# Future CHANGE MASTER calls should succeed
+#
+include/stop_slave.inc
+CHANGE MASTER TO MASTER_USE_GTID=SLAVE_POS;
+include/start_slave.inc
+########################
+# Cleanup
+########################
+connection master;
+DROP TABLE t1;
+connection slave;
+include/stop_slave.inc
+RESET SLAVE ALL;
+change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_reset_slave_all_clears_filters.result b/mysql-test/suite/rpl/r/rpl_reset_slave_all_clears_filters.result
new file mode 100644
index 00000000000..a273aeaa678
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_reset_slave_all_clears_filters.result
@@ -0,0 +1,54 @@
+include/master-slave.inc
+[connection master]
+connection slave;
+include/stop_slave.inc
+#
+# Category 1) DO_DOMAIN_IDS and IGNORE_SERVER_IDS specified together
+#
+# Id filtering variable values should be empty initially
+# Set id filtering variables
+CHANGE MASTER TO DO_DOMAIN_IDS=(1), IGNORE_DOMAIN_IDS=(), IGNORE_SERVER_IDS=(3), MASTER_USE_GTID=SLAVE_POS;
+# do domain id list: 1
+# ignore domain id list:
+# ignore server id list: 3
+# RESET SLAVE ALL should clear values for all id filtering variables
+RESET SLAVE ALL;
+change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
+include/start_slave.inc
+include/stop_slave.inc
+#
+# Category 2) IGNORE_DOMAIN_IDS and IGNORE_SERVER_IDS specified together
+#
+# Id filtering variable values should be empty initially
+# Set id filtering variables
+CHANGE MASTER TO DO_DOMAIN_IDS=(), IGNORE_DOMAIN_IDS=(2), IGNORE_SERVER_IDS=(3), MASTER_USE_GTID=SLAVE_POS;
+# do domain id list:
+# ignore domain id list: 2
+# ignore server id list: 3
+# RESET SLAVE ALL should clear values for all id filtering variables
+RESET SLAVE ALL;
+change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
+include/start_slave.inc
+include/stop_slave.inc
+#
+# Category 3) Null check - edge case with all empty lists to ensure a
+# lack of specification doesn't break anything
+#
+# Id filtering variable values should be empty initially
+# Set id filtering variables
+CHANGE MASTER TO DO_DOMAIN_IDS=(), IGNORE_DOMAIN_IDS=(), IGNORE_SERVER_IDS=(), MASTER_USE_GTID=SLAVE_POS;
+# do domain id list:
+# ignore domain id list:
+# ignore server id list:
+# RESET SLAVE ALL should clear values for all id filtering variables
+RESET SLAVE ALL;
+change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
+include/start_slave.inc
+include/stop_slave.inc
+############################
+# Cleanup
+############################
+connection slave;
+change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_change_master_find_log_pos_err.test b/mysql-test/suite/rpl/t/rpl_change_master_find_log_pos_err.test
new file mode 100644
index 00000000000..d1c2c03f010
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_change_master_find_log_pos_err.test
@@ -0,0 +1,93 @@
+#
+# Purpose:
+# This test ensures that issuing a CHANGE MASTER will not put a replica into
+# an inconsistent state if the slave cannot find the log files (i.e. the call to
+# find_log_pos in reset_logs fails). More specifically, right before a replica
+# purges the relay logs (part of the `CHANGE MASTER TO` logic), the relay log is
+# temporarily closed with state LOG_TO_BE_OPENED. If the server is issued a
+# CHANGE MASTER and it errors in-between the temporary log closure and purge,
+# i.e. during the function find_log_pos, the log should be closed. The bug
+# reported by MDEV-25284 revealed the log is not properly closed, such that
+# future relay log updates fail, and future CHANGE MASTER calls crash the
+# server.
+#
+# Methodology:
+# This test ensures that the relay log is properly closed by ensuring future
+# updates and CHANGE MASTER calls succeed.
+#
+# References:
+# MDEV-25284: Assertion `info->type == READ_CACHE ||
+# info->type == WRITE_CACHE' failed
+#
+--source include/master-slave.inc
+--source include/have_debug.inc
+
+--echo #
+--echo # Failed CHANGE MASTER TO should not change relay log status
+--echo #
+
+--connection slave
+--source include/stop_slave.inc
+SET @@debug_dbug="d,simulate_find_log_pos_error";
+error 1373;
+CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=SLAVE_POS;
+SET @@debug_dbug="";
+--source include/start_slave.inc
+
+
+--echo #
+--echo # Ensure relay log can be updated after a failed CHANGE MASTER
+--echo #
+
+FLUSH RELAY LOGS;
+--let $slave_param= Relay_Log_File
+--let $slave_param_value= slave-relay-bin.000003
+--source include/wait_for_slave_param.inc
+
+
+--echo #
+--echo # Slave should continue to receive data from old master after failed
+--echo # CHANGE MASTER TO
+--echo #
+
+--connection master
+CREATE TABLE t1 (a int);
+insert into t1 values (1);
+--let $master_checksum= `CHECKSUM TABLE t1`
+--sync_slave_with_master
+
+--connection slave
+if ($master_checksum != `CHECKSUM TABLE t1`)
+{
+ die("Replica failed to pull data from primary after failed CHANGE MASTER TO");
+}
+
+
+--echo #
+--echo # Future CHANGE MASTER calls should succeed
+--echo #
+
+--source include/stop_slave.inc
+CHANGE MASTER TO MASTER_USE_GTID=SLAVE_POS;
+--source include/start_slave.inc
+
+
+--echo ########################
+--echo # Cleanup
+--echo ########################
+
+--connection master
+DROP TABLE t1;
+
+--connection slave
+--source include/stop_slave.inc
+RESET SLAVE ALL;
+--replace_result $MASTER_MYPORT MASTER_MYPORT
+eval change master to master_port=$MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
+--source include/start_slave.inc
+
+--disable_query_log
+call mtr.add_suppression("Failed to locate old binlog or relay log files");
+--enable_query_log
+
+--source include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_reset_slave_all_clears_filters.test b/mysql-test/suite/rpl/t/rpl_reset_slave_all_clears_filters.test
new file mode 100644
index 00000000000..7c01ce16586
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_reset_slave_all_clears_filters.test
@@ -0,0 +1,72 @@
+#
+# Purpose:
+# This test validates that after issuing the `SLAVE RESET ALL` command,
+# any corresponding IGNORE_DOMAIN_IDS/DO_DOMAIN_IDS and IGNORE_SERVER_IDS
+# values are cleared.
+#
+#
+# Methodology:
+# To ensure the filtering variables are properly cleared after issuing
+# SLAVE RESET ALL, we categorize different combinations of allowable input
+# into three different options, and ensure that the variables are cleared for
+# each category. The categories are as follows:
+# Category 1) DO_DOMAIN_IDS and IGNORE_SERVER_IDS specified together
+# Category 2) IGNORE_DOMAIN_IDS and IGNORE_SERVER_IDS specified together
+# Category 3) Null check - edge case with all empty lists to ensure a lack
+# of specification doesn't break anything
+#
+# To specify the values, the variables are set in `CHANGE MASTER TO`. To
+# ensure the slave state is correct, we test the domain/server id filtering
+# variable values at the following times while testing each category.
+#
+# Before CHANGE MASTER TO the filtering variables are tested to all be
+# empty.
+#
+# After CHANGE MASTER TO the variables are tested to ensure they reflect
+# those set in the CHANGE MASTER command.
+#
+# After RESET SLAVE ALL the filtering variables are tested to all be
+# empty.
+#
+
+--source include/master-slave.inc
+--source include/have_debug.inc
+
+--connection slave
+--source include/stop_slave.inc
+
+--echo #
+--echo # Category 1) DO_DOMAIN_IDS and IGNORE_SERVER_IDS specified together
+--echo #
+--let $_do_domain_ids= (1)
+--let $_ignore_domain_ids= ()
+--let $_ignore_server_ids= (3)
+--source include/rpl_reset_slave_all_check.inc
+
+--echo #
+--echo # Category 2) IGNORE_DOMAIN_IDS and IGNORE_SERVER_IDS specified together
+--echo #
+--let $_do_domain_ids= ()
+--let $_ignore_domain_ids= (2)
+--let $_ignore_server_ids= (3)
+--source include/rpl_reset_slave_all_check.inc
+
+--echo #
+--echo # Category 3) Null check - edge case with all empty lists to ensure a
+--echo # lack of specification doesn't break anything
+--echo #
+--let $_do_domain_ids= ()
+--let $_ignore_domain_ids= ()
+--let $_ignore_server_ids= ()
+--source include/rpl_reset_slave_all_check.inc
+
+
+--echo ############################
+--echo # Cleanup
+--echo ############################
+--connection slave
+--replace_result $MASTER_MYPORT MASTER_MYPORT
+eval change master to master_port=$MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
+--source include/start_slave.inc
+
+--source include/rpl_end.inc
diff --git a/sql/log.h b/sql/log.h
index 0770861fe01..6896a4ff550 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -896,6 +896,20 @@ public:
void unlock_binlog_end_pos() { mysql_mutex_unlock(&LOCK_binlog_end_pos); }
mysql_mutex_t* get_binlog_end_pos_lock() { return &LOCK_binlog_end_pos; }
+ /*
+ Ensures the log's state is either LOG_OPEN or LOG_CLOSED. If something
+ failed along the desired path and left the log in invalid state, i.e.
+ LOG_TO_BE_OPENED, forces the state to be LOG_CLOSED.
+ */
+ void try_fix_log_state()
+ {
+ mysql_mutex_lock(get_log_lock());
+ /* Only change the log state if it is LOG_TO_BE_OPENED */
+ if (log_state == LOG_TO_BE_OPENED)
+ log_state= LOG_CLOSED;
+ mysql_mutex_unlock(get_log_lock());
+ }
+
int wait_for_update_binlog_end_pos(THD* thd, struct timespec * timeout);
/*
diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc
index 82a462d742b..8ed14962dd9 100644
--- a/sql/rpl_mi.cc
+++ b/sql/rpl_mi.cc
@@ -170,6 +170,8 @@ void Master_info::clear_in_memory_info(bool all)
{
port= MYSQL_PORT;
host[0] = 0; user[0] = 0; password[0] = 0;
+ domain_id_filter.clear_ids();
+ reset_dynamic(&ignore_server_ids);
}
}
@@ -1788,6 +1790,12 @@ void Domain_id_filter::reset_filter()
m_filter= false;
}
+void Domain_id_filter::clear_ids()
+{
+ reset_dynamic(&m_domain_ids[DO_DOMAIN_IDS]);
+ reset_dynamic(&m_domain_ids[IGNORE_DOMAIN_IDS]);
+}
+
/**
Update the do/ignore domain id filter lists.
diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h
index 12574285de0..e80c14fc340 100644
--- a/sql/rpl_mi.h
+++ b/sql/rpl_mi.h
@@ -79,6 +79,11 @@ public:
void reset_filter();
/*
+ Clear do_ids and ignore_ids to disable domain id filtering
+ */
+ void clear_ids();
+
+ /*
Update the do/ignore domain id filter lists.
@param do_ids [IN] domain ids to be kept
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 7ff0e27b008..d6d2dbc0d39 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -3840,6 +3840,16 @@ err:
mi->unlock_slave_threads();
if (ret == FALSE)
my_ok(thd);
+ else
+ {
+ /*
+ Depending on where CHANGE MASTER failed, the logs may be waiting to be
+ reopened. This would break future log updates and CHANGE MASTER calls.
+ `try_fix_log_state()` allows the relay log to fix its state to no longer
+ expect to be reopened.
+ */
+ mi->rli.relay_log.try_fix_log_state();
+ }
DBUG_RETURN(ret);
}