summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMonty <monty@mariadb.org>2021-02-08 21:03:04 +0200
committerMonty <monty@mariadb.org>2021-02-08 21:03:04 +0200
commitffc5d064895cadbc42711efd7dbb6ae1b323f050 (patch)
tree2c795b9d5f1c5e572ebbca5aea6531f328391156
parentbd5ac03896fa6e126690c76c1a9d2246c723f786 (diff)
downloadmariadb-git-ffc5d064895cadbc42711efd7dbb6ae1b323f050.tar.gz
MDEV-24087 s3.replication_partition fails in buildbot wiht replication failure
A few of the failures was because of missing sync_slave_to_master in the test suite. However, the biggest reason for most faulures was that in case of ALTER PARTITION the master writes the query to the binary log before it has updated the .frm and .par files. This causes a problem for an S3 slave as it will start execute the ALTER PARTITION but get old .frm and .par files from S3 which causes "open table" to fail, either with an error or in some case with a crash. Fixed
-rw-r--r--mysql-test/suite/s3/replication.inc2
-rw-r--r--mysql-test/suite/s3/replication_mixed.result2
-rw-r--r--mysql-test/suite/s3/replication_partition.result6
-rw-r--r--mysql-test/suite/s3/replication_partition.test6
-rw-r--r--mysql-test/suite/s3/replication_stmt.result2
-rw-r--r--sql/sql_partition.cc53
6 files changed, 46 insertions, 25 deletions
diff --git a/mysql-test/suite/s3/replication.inc b/mysql-test/suite/s3/replication.inc
index d30733a4396..cfa38d93f98 100644
--- a/mysql-test/suite/s3/replication.inc
+++ b/mysql-test/suite/s3/replication.inc
@@ -81,6 +81,8 @@ show create table t2;
connection master;
drop table t2;
+sync_slave_with_master;
+connection master;
--echo #
--echo # Test RENAME
diff --git a/mysql-test/suite/s3/replication_mixed.result b/mysql-test/suite/s3/replication_mixed.result
index 66ed24d2626..6b9b87a7891 100644
--- a/mysql-test/suite/s3/replication_mixed.result
+++ b/mysql-test/suite/s3/replication_mixed.result
@@ -105,6 +105,8 @@ t2 CREATE TABLE `t2` (
) ENGINE=Aria DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1
connection master;
drop table t2;
+connection slave;
+connection master;
#
# Test RENAME
#
diff --git a/mysql-test/suite/s3/replication_partition.result b/mysql-test/suite/s3/replication_partition.result
index 2b9297ea9a0..0a57a1b872d 100644
--- a/mysql-test/suite/s3/replication_partition.result
+++ b/mysql-test/suite/s3/replication_partition.result
@@ -13,10 +13,14 @@ PARTITION BY HASH (c1)
PARTITIONS 3;
INSERT INTO t1 VALUE (1), (2), (101), (102), (201), (202);
ALTER TABLE t1 ENGINE=S3;
+connection slave;
+connection master;
ALTER TABLE t1 ADD PARTITION PARTITIONS 6;
select sum(c1) from t1;
sum(c1)
609
+connection slave;
+connection master;
ALTER TABLE t1 ADD COLUMN c INT;
select sum(c1) from t1;
sum(c1)
@@ -108,6 +112,8 @@ select sum(c1) from t1;
ERROR 42S02: Table 'database.t1' doesn't exist
start slave;
connection master;
+connection slave;
+connection master;
#
# Check altering partitioned table to S3 and back
# Checks also rename partitoned table and drop partition
diff --git a/mysql-test/suite/s3/replication_partition.test b/mysql-test/suite/s3/replication_partition.test
index 8a8699920bf..254924f9051 100644
--- a/mysql-test/suite/s3/replication_partition.test
+++ b/mysql-test/suite/s3/replication_partition.test
@@ -29,8 +29,12 @@ CREATE TABLE t1 (
PARTITIONS 3;
INSERT INTO t1 VALUE (1), (2), (101), (102), (201), (202);
ALTER TABLE t1 ENGINE=S3;
+sync_slave_with_master;
+connection master;
ALTER TABLE t1 ADD PARTITION PARTITIONS 6;
select sum(c1) from t1;
+sync_slave_with_master;
+connection master;
ALTER TABLE t1 ADD COLUMN c INT;
select sum(c1) from t1;
sync_slave_with_master;
@@ -85,6 +89,8 @@ select sum(c1) from t1;
--file_exists $MYSQLD_DATADIR/$database/t1.par
start slave;
connection master;
+sync_slave_with_master;
+connection master;
--echo #
--echo # Check altering partitioned table to S3 and back
diff --git a/mysql-test/suite/s3/replication_stmt.result b/mysql-test/suite/s3/replication_stmt.result
index 077029e2d7d..4b8306687db 100644
--- a/mysql-test/suite/s3/replication_stmt.result
+++ b/mysql-test/suite/s3/replication_stmt.result
@@ -105,6 +105,8 @@ t2 CREATE TABLE `t2` (
) ENGINE=Aria DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1
connection master;
drop table t2;
+connection slave;
+connection master;
#
# Test RENAME
#
diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc
index 9051484b6b8..3d33731da5c 100644
--- a/sql/sql_partition.cc
+++ b/sql/sql_partition.cc
@@ -7174,23 +7174,24 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
4) Close the table that have already been opened but didn't stumble on
the abort locked previously. This is done as part of the
alter_close_table call.
- 5) Write the bin log
- Unfortunately the writing of the binlog is not synchronised with
- other logging activities. So no matter in which order the binlog
- is written compared to other activities there will always be cases
- where crashes make strange things occur. In this placement it can
- happen that the ALTER TABLE DROP PARTITION gets performed in the
- master but not in the slaves if we have a crash, after writing the
- ddl log but before writing the binlog. A solution to this would
- require writing the statement first in the ddl log and then
- when recovering from the crash read the binlog and insert it into
- the binlog if not written already.
+ 5) Old place for binary logging
6) Install the previously written shadow frm file
7) Prepare handlers for drop of partitions
8) Drop the partitions
9) Remove entries from ddl log
10) Reopen table if under lock tables
- 11) Complete query
+ 11) Write the bin log
+ Unfortunately the writing of the binlog is not synchronised with
+ other logging activities. So no matter in which order the binlog
+ is written compared to other activities there will always be cases
+ where crashes make strange things occur. In this placement it can
+ happen that the ALTER TABLE DROP PARTITION gets performed in the
+ master but not in the slaves if we have a crash, after writing the
+ ddl log but before writing the binlog. A solution to this would
+ require writing the statement first in the ddl log and then
+ when recovering from the crash read the binlog and insert it into
+ the binlog if not written already.
+ 12) Complete query
We insert Error injections at all places where it could be interesting
to test if recovery is properly done.
@@ -7211,9 +7212,6 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
alter_close_table(lpt) ||
ERROR_INJECT_CRASH("crash_drop_partition_5") ||
ERROR_INJECT_ERROR("fail_drop_partition_5") ||
- ((!thd->lex->no_write_to_binlog) &&
- (write_bin_log(thd, FALSE,
- thd->query(), thd->query_length()), FALSE)) ||
ERROR_INJECT_CRASH("crash_drop_partition_6") ||
ERROR_INJECT_ERROR("fail_drop_partition_6") ||
(frm_install= TRUE, FALSE) ||
@@ -7225,6 +7223,9 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
ERROR_INJECT_CRASH("crash_drop_partition_8") ||
ERROR_INJECT_ERROR("fail_drop_partition_8") ||
(write_log_completed(lpt, FALSE), FALSE) ||
+ ((!thd->lex->no_write_to_binlog) &&
+ (write_bin_log(thd, FALSE,
+ thd->query(), thd->query_length()), FALSE)) ||
ERROR_INJECT_CRASH("crash_drop_partition_9") ||
ERROR_INJECT_ERROR("fail_drop_partition_9"))
{
@@ -7257,7 +7258,7 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
3) Write an entry to remove the new parttions if crash occurs
4) Add the new partitions.
5) Close all instances of the table and remove them from the table cache.
- 6) Write binlog
+ 6) Old place for write binlog
7) Now the change is completed except for the installation of the
new frm file. We thus write an action in the log to change to
the shadow frm file
@@ -7265,7 +7266,8 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
added to the table.
9) Remove entries from ddl log
10)Reopen tables if under lock tables
- 11)Complete query
+ 11)Write to binlog
+ 12)Complete query
*/
if (write_log_drop_shadow_frm(lpt) ||
ERROR_INJECT_CRASH("crash_add_partition_1") ||
@@ -7285,9 +7287,6 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
alter_close_table(lpt) ||
ERROR_INJECT_CRASH("crash_add_partition_6") ||
ERROR_INJECT_ERROR("fail_add_partition_6") ||
- ((!thd->lex->no_write_to_binlog) &&
- (write_bin_log(thd, FALSE,
- thd->query(), thd->query_length()), FALSE)) ||
ERROR_INJECT_CRASH("crash_add_partition_7") ||
ERROR_INJECT_ERROR("fail_add_partition_7") ||
write_log_rename_frm(lpt) ||
@@ -7300,6 +7299,9 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
ERROR_INJECT_CRASH("crash_add_partition_9") ||
ERROR_INJECT_ERROR("fail_add_partition_9") ||
(write_log_completed(lpt, FALSE), FALSE) ||
+ ((!thd->lex->no_write_to_binlog) &&
+ (write_bin_log(thd, FALSE,
+ thd->query(), thd->query_length()), FALSE)) ||
ERROR_INJECT_CRASH("crash_add_partition_10") ||
ERROR_INJECT_ERROR("fail_add_partition_10"))
{
@@ -7356,13 +7358,14 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
5) Close the table.
6) Log that operation is completed and log all complete actions
needed to complete operation from here.
- 7) Write bin log.
+ 7) Old place for write bin log.
8) Prepare handlers for rename and delete of partitions.
9) Rename and drop the reorged partitions such that they are no
longer used and rename those added to their real new names.
10) Install the shadow frm file.
11) Reopen the table if under lock tables.
- 12) Complete query.
+ 12) Write to binlog
+ 13) Complete query.
*/
if (write_log_drop_shadow_frm(lpt) ||
ERROR_INJECT_CRASH("crash_change_partition_1") ||
@@ -7386,9 +7389,6 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
(action_completed= TRUE, FALSE) ||
ERROR_INJECT_CRASH("crash_change_partition_7") ||
ERROR_INJECT_ERROR("fail_change_partition_7") ||
- ((!thd->lex->no_write_to_binlog) &&
- (write_bin_log(thd, FALSE,
- thd->query(), thd->query_length()), FALSE)) ||
ERROR_INJECT_CRASH("crash_change_partition_8") ||
ERROR_INJECT_ERROR("fail_change_partition_8") ||
((frm_install= TRUE), FALSE) ||
@@ -7403,6 +7403,9 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
ERROR_INJECT_CRASH("crash_change_partition_11") ||
ERROR_INJECT_ERROR("fail_change_partition_11") ||
(write_log_completed(lpt, FALSE), FALSE) ||
+ ((!thd->lex->no_write_to_binlog) &&
+ (write_bin_log(thd, FALSE,
+ thd->query(), thd->query_length()), FALSE)) ||
ERROR_INJECT_CRASH("crash_change_partition_12") ||
ERROR_INJECT_ERROR("fail_change_partition_12"))
{