diff options
author | Monty <monty@mariadb.org> | 2020-03-30 14:50:03 +0300 |
---|---|---|
committer | Monty <monty@mariadb.org> | 2020-04-19 17:33:51 +0300 |
commit | eca5c2c67ff1854b186b0e1b8dd342cb988e94d2 (patch) | |
tree | 7a6f35362dfc38469f0842cc98c0c28fd02236e4 | |
parent | 78357796e8069f8cd041ea0bb0f5802234bdd2bc (diff) | |
download | mariadb-git-eca5c2c67ff1854b186b0e1b8dd342cb988e94d2.tar.gz |
Added support for more functions when using partitioned S3 tables
MDEV-22088 S3 partitioning support
All ALTER PARTITION commands should now work on S3 tables except
REBUILD PARTITION
TRUNCATE PARTITION
REORGANIZE PARTITION
In addition, PARTIONED S3 TABLES can also be replicated.
This is achived by storing the partition tables .frm and .par file on S3
for partitioned shared (S3) tables.
The discovery methods are enchanced by allowing engines that supports
discovery to also support of the partitioned tables .frm and .par file
Things in more detail
- The .frm and .par files of partitioned tables are stored in S3 and kept
in sync.
- Added hton callback create_partitioning_metadata to inform handler
that metadata for a partitoned file has changed
- Added back handler::discover_check_version() to be able to check if
a table's or a part table's definition has changed.
- Added handler::check_if_updates_are_ignored(). Needed for partitioning.
- Renamed rebind() -> rebind_psi(), as it was before.
- Changed CHF_xxx hadnler flags to an enum
- Changed some checks from using table->file->ht to use
table->file->partition_ht() to get discovery to work with partitioning.
- If TABLE_SHARE::init_from_binary_frm_image() fails, ensure that we
don't leave any .frm or .par files around.
- Fixed that writefrm() doesn't leave unusable .frm files around
- Appended extension to path for writefrm() to be able to reuse to function
for creating .par files.
- Added DBUG_PUSH("") to a a few functions that caused a lot of not
critical tracing.
48 files changed, 1665 insertions, 311 deletions
diff --git a/mysql-test/main/partition_myisam.result b/mysql-test/main/partition_myisam.result index 8f35075261a..3a549060d8e 100644 --- a/mysql-test/main/partition_myisam.result +++ b/mysql-test/main/partition_myisam.result @@ -87,10 +87,9 @@ test.t1 check Error Failed to read from the .par file test.t1 check error Corrupt SELECT * FROM t1; ERROR HY000: Failed to read from the .par file -# Note that it is currently impossible to drop a partitioned table -# without the .par file DROP TABLE t1; -ERROR HY000: Got error 1 "Operation not permitted" from storage engine partition +Warnings: +Warning 1017 Can't find file: './test/t1.par' (errno: 2 "No such file or directory") # # Bug#50392: insert_id is not reset for partitioned tables # auto_increment on duplicate entry diff --git a/mysql-test/main/partition_myisam.test b/mysql-test/main/partition_myisam.test index 4d083c37b68..b26b619a958 100644 --- a/mysql-test/main/partition_myisam.test +++ b/mysql-test/main/partition_myisam.test @@ -121,12 +121,8 @@ FLUSH TABLES; CHECK TABLE t1; --error ER_FAILED_READ_FROM_PAR_FILE SELECT * FROM t1; ---echo # Note that it is currently impossible to drop a partitioned table ---echo # without the .par file ---replace_result "Not owner" "Operation not permitted" ---error ER_GET_ERRNO +--replace_result $MYSQLD_DATADIR ./ DROP TABLE t1; ---remove_file $MYSQLD_DATADIR/test/t1.frm --remove_file $MYSQLD_DATADIR/test/t1#P#p0.MYI --remove_file $MYSQLD_DATADIR/test/t1#P#p0.MYD diff --git a/mysql-test/main/partition_not_blackhole.result b/mysql-test/main/partition_not_blackhole.result index ff1e51df892..6cb8dea80c8 100644 --- a/mysql-test/main/partition_not_blackhole.result +++ b/mysql-test/main/partition_not_blackhole.result @@ -11,6 +11,6 @@ t1 SHOW CREATE TABLE t1; ERROR HY000: Failed to read from the .par file DROP TABLE t1; -ERROR HY000: Got error 1 "Operation not permitted" from storage engine partition +ERROR HY000: Got error 175 "File too short; Expected more data in file" from storage engine partition t1.frm t1.par diff --git a/mysql-test/suite/parts/t/partition_debug_innodb.test b/mysql-test/suite/parts/t/partition_debug_innodb.test index 0d065d0e3f2..13dbefd59b9 100644 --- a/mysql-test/suite/parts/t/partition_debug_innodb.test +++ b/mysql-test/suite/parts/t/partition_debug_innodb.test @@ -4,7 +4,8 @@ --source include/have_debug.inc --source include/have_innodb.inc --source include/have_partition.inc -# Don't test this under valgrind, memory leaks will occur +# Don't test this under valgrind, memory leaks will occur. Also SIGKILL may +# not get trough and the test will hang. --source include/not_valgrind.inc # Crash tests don't work with embedded --source include/not_embedded.inc diff --git a/mysql-test/suite/s3/my.cnf b/mysql-test/suite/s3/my.cnf index cb0f837a128..1958a04343f 100644 --- a/mysql-test/suite/s3/my.cnf +++ b/mysql-test/suite/s3/my.cnf @@ -9,12 +9,3 @@ s3=ON #s3-access-key=... #s3-secret-key=... #s3-region=eu-north-1 - -[mysqld.2] -s3=ON -#s3-host-name=s3.amazonaws.com -#s3-protocol-version=Amazon -#s3-bucket=MariaDB -#s3-access-key=... -#s3-secret-key=... -#s3-region=eu-north-1 diff --git a/mysql-test/suite/s3/partitions.result b/mysql-test/suite/s3/partition.result index c7f9a9d8cc7..7020ce88483 100644 --- a/mysql-test/suite/s3/partitions.result +++ b/mysql-test/suite/s3/partition.result @@ -10,10 +10,17 @@ ALTER TABLE t1 ENGINE=S3; SELECT count(*) FROM t1; count(*) 6 +SHOW TABLES; +Tables_in_s3 +t1 ALTER TABLE t1 COALESCE PARTITION 2; -ERROR HY000: Storage engine S3 of the table `s3`.`t1` doesn't have this option +ERROR 42000: Table 't1' uses an extension that doesn't exist in this MariaDB version +SHOW WARNINGS; +Level Code Message +Error 1112 Table 't1' uses an extension that doesn't exist in this MariaDB version +Error 6 Error on delete of './s3/t1#P#p0#TMP#.MAI' (Errcode: 2 "No such file or directory") +Error 6 Error on delete of './s3/t1#P#p0#TMP#.MAD' (Errcode: 2 "No such file or directory") ALTER TABLE t1 ADD PARTITION PARTITIONS 6; -ERROR HY000: Storage engine S3 of the table `s3`.`t1` doesn't have this option SELECT count(*) FROM t1; count(*) 6 @@ -61,7 +68,7 @@ SELECT count(*) FROM t2; count(*) 6 ALTER TABLE t2 REBUILD PARTITION p0, p1; -ERROR HY000: Storage engine S3 of the table `s3`.`t2` doesn't have this option +ERROR 42000: Table 't2' uses an extension that doesn't exist in this MariaDB version ALTER TABLE t2 OPTIMIZE PARTITION p0, p1; Table Op Msg_type Msg_text s3.t2 optimize Error Table 't2' is read only @@ -77,14 +84,38 @@ SELECT count(*) FROM t2; count(*) 6 ALTER TABLE t2 ADD PARTITION (PARTITION p4 VALUES LESS THAN (400)); -ERROR HY000: Storage engine S3 of the table `s3`.`t2` doesn't have this option +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `c1` int(11) DEFAULT NULL, + `c2` int(11) DEFAULT NULL +) ENGINE=S3 DEFAULT CHARSET=latin1 + PARTITION BY RANGE (`c1`) +SUBPARTITION BY HASH (`c2`) +SUBPARTITIONS 2 +(PARTITION `p0` VALUES LESS THAN (100) ENGINE = S3, + PARTITION `p1` VALUES LESS THAN (200) ENGINE = S3, + PARTITION `p3` VALUES LESS THAN (300) ENGINE = S3, + PARTITION `p4` VALUES LESS THAN (400) ENGINE = S3) ALTER TABLE t2 -REORGANIZE PARTITION p3 INTO ( +REORGANIZE PARTITION p4 INTO ( PARTITION n0 VALUES LESS THAN (500), PARTITION n1 VALUES LESS THAN (600) ); -ERROR HY000: Storage engine S3 of the table `s3`.`t2` doesn't have this option +ERROR 42000: Table 't2' uses an extension that doesn't exist in this MariaDB version ALTER TABLE t2 DROP PARTITION p3; +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `c1` int(11) DEFAULT NULL, + `c2` int(11) DEFAULT NULL +) ENGINE=S3 DEFAULT CHARSET=latin1 + PARTITION BY RANGE (`c1`) +SUBPARTITION BY HASH (`c2`) +SUBPARTITIONS 2 +(PARTITION `p0` VALUES LESS THAN (100) ENGINE = S3, + PARTITION `p1` VALUES LESS THAN (200) ENGINE = S3, + PARTITION `p4` VALUES LESS THAN (400) ENGINE = S3) SELECT count(*) from t2; count(*) 4 @@ -99,6 +130,12 @@ count(*) 4 # Test for REMOVE PARTITIONING ALTER TABLE t2 REMOVE PARTITIONING; +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `c1` int(11) DEFAULT NULL, + `c2` int(11) DEFAULT NULL +) ENGINE=S3 DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1 SELECT count(*) FROM t2; count(*) 4 diff --git a/mysql-test/suite/s3/partitions.test b/mysql-test/suite/s3/partition.test index 196a72e2826..03bbc2f0da9 100644 --- a/mysql-test/suite/s3/partitions.test +++ b/mysql-test/suite/s3/partition.test @@ -2,7 +2,6 @@ --source include/have_s3.inc --source create_database.inc - --echo # Test for COALESCE PARTITION, ALTER TABLE and ADD PARTITIONS --echo # for tables with HASH partitions CREATE TABLE t1 ( @@ -13,11 +12,15 @@ CREATE TABLE t1 ( INSERT INTO t1 VALUE (1), (2), (101), (102), (201), (202); ALTER TABLE t1 ENGINE=S3; SELECT count(*) FROM t1; +# Check that partition tables are not shown; +--replace_result $database s3 +SHOW TABLES; + --replace_result $database s3 ---error ER_ILLEGAL_HA +--error ER_UNSUPPORTED_EXTENSION ALTER TABLE t1 COALESCE PARTITION 2; --replace_result $database s3 ---error ER_ILLEGAL_HA +SHOW WARNINGS; ALTER TABLE t1 ADD PARTITION PARTITIONS 6; SELECT count(*) FROM t1; ALTER TABLE t1 ADD COLUMN c INT; @@ -55,7 +58,7 @@ SELECT count(*) FROM t2; ALTER TABLE t2 CHECK PARTITION p3; SELECT count(*) FROM t2; --replace_result $database s3 ---error ER_ILLEGAL_HA +--error ER_UNSUPPORTED_EXTENSION ALTER TABLE t2 REBUILD PARTITION p0, p1; --replace_result $database s3 ALTER TABLE t2 OPTIMIZE PARTITION p0, p1; @@ -64,16 +67,17 @@ SELECT count(*) FROM t2; ALTER TABLE t2 REPAIR PARTITION p0, p1; SELECT count(*) FROM t2; --replace_result $database s3 ---error ER_ILLEGAL_HA ALTER TABLE t2 ADD PARTITION (PARTITION p4 VALUES LESS THAN (400)); +SHOW CREATE TABLE t2; --replace_result $database s3 ---error ER_ILLEGAL_HA +--error ER_UNSUPPORTED_EXTENSION ALTER TABLE t2 - REORGANIZE PARTITION p3 INTO ( + REORGANIZE PARTITION p4 INTO ( PARTITION n0 VALUES LESS THAN (500), PARTITION n1 VALUES LESS THAN (600) ); ALTER TABLE t2 DROP PARTITION p3; +SHOW CREATE TABLE t2; SELECT count(*) from t2; --echo # Test for ALTER TABLE @@ -84,6 +88,7 @@ SELECT count(*) FROM t2; --echo # Test for REMOVE PARTITIONING ALTER TABLE t2 REMOVE PARTITIONING; +SHOW CREATE TABLE t2; SELECT count(*) FROM t2; DROP TABLE t2; diff --git a/mysql-test/suite/s3/partition_create_fail.result b/mysql-test/suite/s3/partition_create_fail.result new file mode 100644 index 00000000000..923a54b5971 --- /dev/null +++ b/mysql-test/suite/s3/partition_create_fail.result @@ -0,0 +1,15 @@ +SET @saved_dbug = @@debug_dbug; +CREATE TABLE p0 ( +c1 int primary key, +c2 int DEFAULT NULL +) ENGINE=InnoDB; +insert into p0 select seq,seq from seq_1_to_10; +SET debug_dbug='+d,failed_create_partitioning_metadata'; +alter table p0 engine=s3 +PARTITION BY RANGE (c1) +(PARTITION p0 VALUES LESS THAN (100)); +ERROR HY000: Simulated crash +SET debug_dbug=@saved_dbug; +drop table p0; +drop table p0; +ERROR 42S02: Unknown table 's3.p0' diff --git a/mysql-test/suite/s3/partition_create_fail.test b/mysql-test/suite/s3/partition_create_fail.test new file mode 100644 index 00000000000..ed77a43e336 --- /dev/null +++ b/mysql-test/suite/s3/partition_create_fail.test @@ -0,0 +1,39 @@ +--source include/have_partition.inc +--source include/have_s3.inc +--source include/have_debug.inc +--source include/have_innodb.inc +--source include/have_sequence.inc +--source create_database.inc + +SET @saved_dbug = @@debug_dbug; + +# Test failure in create of partition table + +CREATE TABLE p0 ( + c1 int primary key, + c2 int DEFAULT NULL +) ENGINE=InnoDB; +insert into p0 select seq,seq from seq_1_to_10; + +SET debug_dbug='+d,failed_create_partitioning_metadata'; + +--error 1041 +alter table p0 engine=s3 + PARTITION BY RANGE (c1) +(PARTITION p0 VALUES LESS THAN (100)); + +SET debug_dbug=@saved_dbug; + +drop table p0; + +# If something went wrong, then we have a copy of the .frm file in S3 and +# the following drop table will not fail + +--replace_result $database s3 +--error ER_BAD_TABLE_ERROR +drop table p0; + +# +# clean up +# +--source drop_database.inc diff --git a/mysql-test/suite/s3/partition_move.result b/mysql-test/suite/s3/partition_move.result new file mode 100644 index 00000000000..e183334f083 --- /dev/null +++ b/mysql-test/suite/s3/partition_move.result @@ -0,0 +1,74 @@ +CREATE TABLE p0 ( +c1 int primary key, +c2 int DEFAULT NULL +) ENGINE=InnoDB; +insert into p0 select seq,seq from seq_1_to_99; +alter table p0 engine=s3 , rename to archive +PARTITION BY RANGE (c1) +(PARTITION p0 VALUES LESS THAN (100)); +show create table archive; +Table Create Table +archive CREATE TABLE `archive` ( + `c1` int(11) NOT NULL, + `c2` int(11) DEFAULT NULL, + PRIMARY KEY (`c1`) +) ENGINE=S3 DEFAULT CHARSET=latin1 + PARTITION BY RANGE (`c1`) +(PARTITION `p0` VALUES LESS THAN (100) ENGINE = S3) +CREATE TABLE t1 ( +c1 int primary key, +c2 int DEFAULT NULL +) ENGINE=InnoDB +PARTITION BY RANGE (c1) +(PARTITION p1 VALUES LESS THAN (200), +PARTITION p2 VALUES LESS THAN (300), +PARTITION p3 VALUES LESS THAN (400)); +insert into t1 select seq,seq from seq_100_to_399; +create table p1 like t1; +alter table p1 remove partitioning; +alter table t1 exchange partition p1 with table p1; +alter table t1 drop partition p1; +show create table p1; +Table Create Table +p1 CREATE TABLE `p1` ( + `c1` int(11) NOT NULL, + `c2` int(11) DEFAULT NULL, + PRIMARY KEY (`c1`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +select count(*) from p1; +count(*) +100 +alter table p1 engine=s3; +alter table archive add partition (partition p1 values less than (200)); +alter table archive exchange partition p1 with table p1; +select count(*) from p1; +count(*) +0 +drop table p1; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c1` int(11) NOT NULL, + `c2` int(11) DEFAULT NULL, + PRIMARY KEY (`c1`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 + PARTITION BY RANGE (`c1`) +(PARTITION `p2` VALUES LESS THAN (300) ENGINE = InnoDB, + PARTITION `p3` VALUES LESS THAN (400) ENGINE = InnoDB) +show create table archive; +Table Create Table +archive CREATE TABLE `archive` ( + `c1` int(11) NOT NULL, + `c2` int(11) DEFAULT NULL, + PRIMARY KEY (`c1`) +) ENGINE=S3 DEFAULT CHARSET=latin1 + PARTITION BY RANGE (`c1`) +(PARTITION `p0` VALUES LESS THAN (100) ENGINE = S3, + PARTITION `p1` VALUES LESS THAN (200) ENGINE = S3) +select count(*) from t1; +count(*) +200 +select count(*) from archive; +count(*) +199 +drop table t1,archive; diff --git a/mysql-test/suite/s3/partition_move.test b/mysql-test/suite/s3/partition_move.test new file mode 100644 index 00000000000..35edbd75b5a --- /dev/null +++ b/mysql-test/suite/s3/partition_move.test @@ -0,0 +1,80 @@ +--source include/have_partition.inc +--source include/have_innodb.inc +--source include/have_s3.inc +--source include/have_sequence.inc +--source create_database.inc + +# +# The purpose of this test is to show how to move an partition from an existing +# InnoDB partitioned table (t1) to a partitioned table in S3 (archive) +# + +# +# We start by creating a partioned table in S3 with one existing partion p0 +# + +CREATE TABLE p0 ( + c1 int primary key, + c2 int DEFAULT NULL +) ENGINE=InnoDB; +insert into p0 select seq,seq from seq_1_to_99; + +alter table p0 engine=s3 , rename to archive + PARTITION BY RANGE (c1) +(PARTITION p0 VALUES LESS THAN (100)); + +show create table archive; + +# +# Then we create the table t1 that contains multiple partitions. +# Partition p1 is the one that we want to move to 'archive' +# + +CREATE TABLE t1 ( + c1 int primary key, + c2 int DEFAULT NULL +) ENGINE=InnoDB + PARTITION BY RANGE (c1) + (PARTITION p1 VALUES LESS THAN (200), + PARTITION p2 VALUES LESS THAN (300), + PARTITION p3 VALUES LESS THAN (400)); +insert into t1 select seq,seq from seq_100_to_399; + +# +# Then it's time to do the real work. +# + +# First we move partition p1 to a normal InnoDB table + +create table p1 like t1; +alter table p1 remove partitioning; +alter table t1 exchange partition p1 with table p1; +alter table t1 drop partition p1; + +show create table p1; +select count(*) from p1; + +# Then change the table engine to s3 and move it into archive + +alter table p1 engine=s3; +alter table archive add partition (partition p1 values less than (200)); +alter table archive exchange partition p1 with table p1; + +# p1 will be empty as this was the new partition that we just created +select count(*) from p1; +drop table p1; + +# +# The p1 partition has now been moved from t1 to archive. Check the result +# + +show create table t1; +show create table archive; +select count(*) from t1; +select count(*) from archive; +drop table t1,archive; + +# +# clean up +# +--source drop_database.inc diff --git a/mysql-test/suite/s3/partitions-master.opt b/mysql-test/suite/s3/partitions-master.opt deleted file mode 100644 index bbb6d7f9ff4..00000000000 --- a/mysql-test/suite/s3/partitions-master.opt +++ /dev/null @@ -1 +0,0 @@ ---loose-partition diff --git a/mysql-test/suite/s3/replication_mixed.cnf b/mysql-test/suite/s3/replication_mixed.cnf index 0c5b0629cbf..9313546f637 100644 --- a/mysql-test/suite/s3/replication_mixed.cnf +++ b/mysql-test/suite/s3/replication_mixed.cnf @@ -1,2 +1,3 @@ !include ../rpl/my.cnf !include ./my.cnf +!include ./slave.cnf diff --git a/mysql-test/suite/s3/replication_mixed.test b/mysql-test/suite/s3/replication_mixed.test index b2dbc3958bd..d10d586dab9 100644 --- a/mysql-test/suite/s3/replication_mixed.test +++ b/mysql-test/suite/s3/replication_mixed.test @@ -1,5 +1,5 @@ ---source include/master-slave.inc --source include/have_binlog_format_mixed.inc +--source include/master-slave.inc set binlog_format=mixed; RESET MASTER; diff --git a/mysql-test/suite/s3/replication_partition.cnf b/mysql-test/suite/s3/replication_partition.cnf new file mode 100644 index 00000000000..9313546f637 --- /dev/null +++ b/mysql-test/suite/s3/replication_partition.cnf @@ -0,0 +1,3 @@ +!include ../rpl/my.cnf +!include ./my.cnf +!include ./slave.cnf diff --git a/mysql-test/suite/s3/replication_partition.result b/mysql-test/suite/s3/replication_partition.result new file mode 100644 index 00000000000..9df6216ab2c --- /dev/null +++ b/mysql-test/suite/s3/replication_partition.result @@ -0,0 +1,280 @@ +include/master-slave.inc +[connection master] +connection slave; +use database; +connection master; +# +# Check replication of parititioned S3 tables +# +CREATE TABLE t1 ( +c1 INT DEFAULT NULL +) ENGINE=Aria +PARTITION BY HASH (c1) +PARTITIONS 3; +INSERT INTO t1 VALUE (1), (2), (101), (102), (201), (202); +ALTER TABLE t1 ENGINE=S3; +ALTER TABLE t1 ADD PARTITION PARTITIONS 6; +select sum(c1) from t1; +sum(c1) +609 +ALTER TABLE t1 ADD COLUMN c INT; +select sum(c1) from t1; +sum(c1) +609 +connection slave; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c1` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL +) ENGINE=S3 DEFAULT CHARSET=latin1 + PARTITION BY HASH (`c1`) +PARTITIONS 9 +select sum(c1) from t1; +sum(c1) +609 +connection master; +drop table t1; +# +# Checking that the slave is keeping in sync with changed partitions +# +CREATE TABLE t1 ( +c1 int primary key, +c2 int DEFAULT NULL +) ENGINE=InnoDB +PARTITION BY RANGE (c1) +(PARTITION p1 VALUES LESS THAN (200), +PARTITION p2 VALUES LESS THAN (300), +PARTITION p3 VALUES LESS THAN (400)); +insert into t1 select seq*100,seq*100 from seq_1_to_3; +alter table t1 engine=S3; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c1` int(11) NOT NULL, + `c2` int(11) DEFAULT NULL, + PRIMARY KEY (`c1`) +) ENGINE=S3 DEFAULT CHARSET=latin1 + PARTITION BY RANGE (`c1`) +(PARTITION `p1` VALUES LESS THAN (200) ENGINE = S3, + PARTITION `p2` VALUES LESS THAN (300) ENGINE = S3, + PARTITION `p3` VALUES LESS THAN (400) ENGINE = S3) +connection slave; +select sum(c1) from t1; +sum(c1) +600 +stop slave; +connection master; +ALTER TABLE t1 ADD PARTITION (PARTITION p4 VALUES LESS THAN (500)); +connection slave; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c1` int(11) NOT NULL, + `c2` int(11) DEFAULT NULL, + PRIMARY KEY (`c1`) +) ENGINE=S3 DEFAULT CHARSET=latin1 + PARTITION BY RANGE (`c1`) +(PARTITION `p1` VALUES LESS THAN (200) ENGINE = S3, + PARTITION `p2` VALUES LESS THAN (300) ENGINE = S3, + PARTITION `p3` VALUES LESS THAN (400) ENGINE = S3, + PARTITION `p4` VALUES LESS THAN (500) ENGINE = S3) +select sum(c1) from t1; +sum(c1) +600 +start slave; +connection master; +connection slave; +select sum(c1)+0 from t1; +sum(c1)+0 +600 +stop slave; +flush tables; +select sum(c1)+0 from t1; +sum(c1)+0 +600 +connection master; +drop table t1; +connection slave; +select sum(c1) from t1; +ERROR 42S02: Table 'database.t1' doesn't exist +start slave; +connection master; +# +# Check altering partitioned table to S3 and back +# Checks also rename partitoned table and drop partition +# +CREATE TABLE t2 ( +c1 int primary key, +c2 int DEFAULT NULL +) ENGINE=InnoDB +PARTITION BY RANGE (c1) +(PARTITION p1 VALUES LESS THAN (200), +PARTITION p2 VALUES LESS THAN (300), +PARTITION p3 VALUES LESS THAN (400)); +insert into t2 select seq*100,seq*100 from seq_1_to_3; +alter table t2 engine=S3; +rename table t2 to t1; +alter table t1 drop partition p1; +connection slave; +select sum(c1) from t1; +sum(c1) +500 +connection master; +alter table t1 engine=innodb; +connection slave; +select sum(c1) from t1; +sum(c1) +500 +connection master; +drop table t1; +# +# Check that slaves ignores changes to S3 tables. +# +connection master; +CREATE TABLE t1 ( +c1 int primary key, +c2 int DEFAULT NULL +) ENGINE=InnoDB +PARTITION BY RANGE (c1) +(PARTITION p1 VALUES LESS THAN (200), +PARTITION p2 VALUES LESS THAN (300), +PARTITION p3 VALUES LESS THAN (400)); +insert into t1 select seq*100,seq*100 from seq_1_to_3; +create table t2 like t1; +alter table t2 remove partitioning; +insert into t2 values (450,450); +connection slave; +stop slave; +connection master; +alter table t1 engine=s3; +alter table t2 engine=s3; +ALTER TABLE t1 ADD PARTITION (PARTITION p4 VALUES LESS THAN (500)); +alter table t1 exchange partition p4 with table t2; +select count(*) from t1; +count(*) +4 +drop table t1,t2; +connection slave; +start slave; +connection master; +connection slave; +select sum(c1) from t1; +ERROR 42S02: Table 'database.t1' doesn't exist +connection master; +# +# Check slave binary log +# +connection slave; +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # create database database +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; CREATE TABLE t1 ( +c1 INT DEFAULT NULL +) ENGINE=Aria +PARTITION BY HASH (c1) +PARTITIONS 3 +slave-bin.000001 # Gtid # # BEGIN GTID #-#-# +slave-bin.000001 # Query # # use `database`; INSERT INTO t1 VALUE (1), (2), (101), (102), (201), (202) +slave-bin.000001 # Query # # COMMIT +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; ALTER TABLE t1 ENGINE=S3 +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; set @@sql_if_exists=1; ALTER TABLE t1 ADD PARTITION PARTITIONS 6 +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; set @@sql_if_exists=1; ALTER TABLE t1 ADD COLUMN c INT +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1` /* generated by server */ +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; CREATE TABLE t1 ( +c1 int primary key, +c2 int DEFAULT NULL +) ENGINE=InnoDB +PARTITION BY RANGE (c1) +(PARTITION p1 VALUES LESS THAN (200), +PARTITION p2 VALUES LESS THAN (300), +PARTITION p3 VALUES LESS THAN (400)) +slave-bin.000001 # Gtid # # BEGIN GTID #-#-# +slave-bin.000001 # Query # # use `database`; insert into t1 select seq*100,seq*100 from seq_1_to_3 +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; alter table t1 engine=S3 +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; set @@sql_if_exists=1; ALTER TABLE t1 ADD PARTITION (PARTITION p4 VALUES LESS THAN (500)) +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; flush tables +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1` /* generated by server */ +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; CREATE TABLE t2 ( +c1 int primary key, +c2 int DEFAULT NULL +) ENGINE=InnoDB +PARTITION BY RANGE (c1) +(PARTITION p1 VALUES LESS THAN (200), +PARTITION p2 VALUES LESS THAN (300), +PARTITION p3 VALUES LESS THAN (400)) +slave-bin.000001 # Gtid # # BEGIN GTID #-#-# +slave-bin.000001 # Query # # use `database`; insert into t2 select seq*100,seq*100 from seq_1_to_3 +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; alter table t2 engine=S3 +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; set @@sql_if_exists=1; rename table t2 to t1 +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; set @@sql_if_exists=1; alter table t1 drop partition p1 +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1` /* generated by server */ +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; CREATE TABLE `t1` ( + `c1` int(11) NOT NULL, + `c2` int(11) DEFAULT NULL, + PRIMARY KEY (`c1`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 + PARTITION BY RANGE (`c1`) +(PARTITION `p2` VALUES LESS THAN (300) ENGINE = InnoDB, + PARTITION `p3` VALUES LESS THAN (400) ENGINE = InnoDB) +slave-bin.000001 # Gtid # # BEGIN GTID #-#-# +slave-bin.000001 # Annotate_rows # # alter table t1 engine=innodb +slave-bin.000001 # Table_map # # table_id: # (database.t1) +slave-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1` /* generated by server */ +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; CREATE TABLE t1 ( +c1 int primary key, +c2 int DEFAULT NULL +) ENGINE=InnoDB +PARTITION BY RANGE (c1) +(PARTITION p1 VALUES LESS THAN (200), +PARTITION p2 VALUES LESS THAN (300), +PARTITION p3 VALUES LESS THAN (400)) +slave-bin.000001 # Gtid # # BEGIN GTID #-#-# +slave-bin.000001 # Query # # use `database`; insert into t1 select seq*100,seq*100 from seq_1_to_3 +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; create table t2 like t1 +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; alter table t2 remove partitioning +slave-bin.000001 # Gtid # # BEGIN GTID #-#-# +slave-bin.000001 # Query # # use `database`; insert into t2 values (450,450) +slave-bin.000001 # Xid # # COMMIT /* XID */ +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; alter table t1 engine=s3 +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; alter table t2 engine=s3 +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; set @@sql_if_exists=1; ALTER TABLE t1 ADD PARTITION (PARTITION p4 VALUES LESS THAN (500)) +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; set @@sql_if_exists=1; alter table t1 exchange partition p4 with table t2 +slave-bin.000001 # Gtid # # GTID #-#-# +slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1`,`t2` /* generated by server */ +connection master; +# +# clean up +# +connection slave; +include/rpl_end.inc diff --git a/mysql-test/suite/s3/replication_partition.test b/mysql-test/suite/s3/replication_partition.test new file mode 100644 index 00000000000..8751ab33730 --- /dev/null +++ b/mysql-test/suite/s3/replication_partition.test @@ -0,0 +1,170 @@ +--source include/have_s3.inc +--source include/have_partition.inc +--source include/have_binlog_format_mixed.inc +--source include/have_innodb.inc +--source include/have_sequence.inc +--source include/master-slave.inc +--source create_database.inc + +connection slave; +let $MYSQLD_DATADIR= `select @@datadir`; +--replace_result $database database +--eval use $database +connection master; + +--echo # +--echo # Check replication of parititioned S3 tables +--echo # + +CREATE TABLE t1 ( + c1 INT DEFAULT NULL +) ENGINE=Aria + PARTITION BY HASH (c1) + PARTITIONS 3; +INSERT INTO t1 VALUE (1), (2), (101), (102), (201), (202); +ALTER TABLE t1 ENGINE=S3; +ALTER TABLE t1 ADD PARTITION PARTITIONS 6; +select sum(c1) from t1; +ALTER TABLE t1 ADD COLUMN c INT; +select sum(c1) from t1; +sync_slave_with_master; +show create table t1; +select sum(c1) from t1; +connection master; +drop table t1; + +--echo # +--echo # Checking that the slave is keeping in sync with changed partitions +--echo # + +CREATE TABLE t1 ( + c1 int primary key, + c2 int DEFAULT NULL +) ENGINE=InnoDB + PARTITION BY RANGE (c1) + (PARTITION p1 VALUES LESS THAN (200), + PARTITION p2 VALUES LESS THAN (300), + PARTITION p3 VALUES LESS THAN (400)); +insert into t1 select seq*100,seq*100 from seq_1_to_3; +alter table t1 engine=S3; +show create table t1; + +sync_slave_with_master; +select sum(c1) from t1; +--file_exists $MYSQLD_DATADIR/$database/t1.frm +--file_exists $MYSQLD_DATADIR/$database/t1.par +stop slave; +connection master; +ALTER TABLE t1 ADD PARTITION (PARTITION p4 VALUES LESS THAN (500)); +connection slave; +show create table t1; +select sum(c1) from t1; +start slave; +connection master; +sync_slave_with_master; +select sum(c1)+0 from t1; +stop slave; + +# .frm amd .par files should not exists on the salve as it has just seen the +# ALTER TABLE which cased the removal of the .frm and .par files. The table +# from the above "select sum()" came from table cache and was used as it's +# id matches the one in S3 +--error 1 +--file_exists $MYSQLD_DATADIR/$database/t1.frm +--error 1 +--file_exists $MYSQLD_DATADIR/$database/t1.par +# Flushing the table cache will force the .frm and .par files to be +# re-generated +flush tables; +select sum(c1)+0 from t1; +--file_exists $MYSQLD_DATADIR/$database/t1.frm +--file_exists $MYSQLD_DATADIR/$database/t1.par + +connection master; +drop table t1; +connection slave; +--file_exists $MYSQLD_DATADIR/$database/t1.par +--replace_result $database database +--error ER_NO_SUCH_TABLE +select sum(c1) from t1; +--error 1 +--file_exists $MYSQLD_DATADIR/$database/t1.par +start slave; +connection master; + +--echo # +--echo # Check altering partitioned table to S3 and back +--echo # Checks also rename partitoned table and drop partition +--echo # + +CREATE TABLE t2 ( + c1 int primary key, + c2 int DEFAULT NULL +) ENGINE=InnoDB + PARTITION BY RANGE (c1) + (PARTITION p1 VALUES LESS THAN (200), + PARTITION p2 VALUES LESS THAN (300), + PARTITION p3 VALUES LESS THAN (400)); +insert into t2 select seq*100,seq*100 from seq_1_to_3; +alter table t2 engine=S3; +rename table t2 to t1; +alter table t1 drop partition p1; +sync_slave_with_master; +select sum(c1) from t1; +connection master; +alter table t1 engine=innodb; +sync_slave_with_master; +select sum(c1) from t1; +connection master; +drop table t1; + +--echo # +--echo # Check that slaves ignores changes to S3 tables. +--echo # + +connection master; +CREATE TABLE t1 ( + c1 int primary key, + c2 int DEFAULT NULL +) ENGINE=InnoDB + PARTITION BY RANGE (c1) + (PARTITION p1 VALUES LESS THAN (200), + PARTITION p2 VALUES LESS THAN (300), + PARTITION p3 VALUES LESS THAN (400)); +insert into t1 select seq*100,seq*100 from seq_1_to_3; +create table t2 like t1; +alter table t2 remove partitioning; +insert into t2 values (450,450); +sync_slave_with_master; +stop slave; +connection master; +alter table t1 engine=s3; +alter table t2 engine=s3; +ALTER TABLE t1 ADD PARTITION (PARTITION p4 VALUES LESS THAN (500)); +alter table t1 exchange partition p4 with table t2; +select count(*) from t1; +drop table t1,t2; +connection slave; +start slave; +connection master; +sync_slave_with_master; +--replace_result $database database +--error ER_NO_SUCH_TABLE +select sum(c1) from t1; +connection master; + +--echo # +--echo # Check slave binary log +--echo # + +sync_slave_with_master; +--let $binlog_database=$database +--source include/show_binlog_events.inc +connection master; + +--echo # +--echo # clean up +--echo # +--source drop_database.inc +sync_slave_with_master; +--source include/rpl_end.inc diff --git a/mysql-test/suite/s3/replication_stmt.cnf b/mysql-test/suite/s3/replication_stmt.cnf index 0c5b0629cbf..9313546f637 100644 --- a/mysql-test/suite/s3/replication_stmt.cnf +++ b/mysql-test/suite/s3/replication_stmt.cnf @@ -1,2 +1,3 @@ !include ../rpl/my.cnf !include ./my.cnf +!include ./slave.cnf diff --git a/mysql-test/suite/s3/replication_stmt.test b/mysql-test/suite/s3/replication_stmt.test index 0d0de166c64..aba5d155e6c 100644 --- a/mysql-test/suite/s3/replication_stmt.test +++ b/mysql-test/suite/s3/replication_stmt.test @@ -1,5 +1,5 @@ ---source include/master-slave.inc --source include/have_binlog_format_statement.inc +--source include/master-slave.inc set binlog_format=statement; RESET MASTER; diff --git a/mysql-test/suite/s3/slave.cnf b/mysql-test/suite/s3/slave.cnf new file mode 100644 index 00000000000..250a46e6322 --- /dev/null +++ b/mysql-test/suite/s3/slave.cnf @@ -0,0 +1,8 @@ +[mysqld.2] +s3=ON +#s3-host-name=s3.amazonaws.com +#s3-protocol-version=Amazon +#s3-bucket=MariaDB +#s3-access-key=... +#s3-secret-key=... +#s3-region=eu-north-1 diff --git a/mysys/my_symlink.c b/mysys/my_symlink.c index cbee78a7f5c..323ae69a39c 100644 --- a/mysys/my_symlink.c +++ b/mysys/my_symlink.c @@ -154,7 +154,8 @@ int my_realpath(char *to, const char *filename, myf MyFlags) original name but will at least be able to resolve paths that starts with '.'. */ - DBUG_PRINT("error",("realpath failed with errno: %d", errno)); + if (MyFlags) + DBUG_PRINT("error",("realpath failed with errno: %d", errno)); my_errno=errno; if (MyFlags & MY_WME) my_error(EE_REALPATH, MYF(0), filename, my_errno); diff --git a/sql/discover.cc b/sql/discover.cc index e49a2a3b0c0..4267f97cf59 100644 --- a/sql/discover.cc +++ b/sql/discover.cc @@ -99,34 +99,32 @@ int readfrm(const char *name, const uchar **frmdata, size_t *len) /* - Write the content of a frm data pointer - to a frm file. + Write the content of a frm data pointer to a frm or par file. - @param path path to table-file "db/name" - @param frmdata frm data - @param len length of the frmdata + @param path full path to table-file "db/name.frm" or .par + @param db Database name. Only used for my_error() + @param table Table name. Only used for my_error() + @param data data to write to file + @param len length of the data @retval 0 ok @retval - 2 Could not write file + <> 0 Could not write file. In this case the file is not created */ -int writefrm(const char *path, const char *db, const char *table, - bool tmp_table, const uchar *frmdata, size_t len) +int writefile(const char *path, const char *db, const char *table, + bool tmp_table, const uchar *data, size_t len) { - char file_name[FN_REFLEN+1]; int error; int create_flags= O_RDWR | O_TRUNC; - DBUG_ENTER("writefrm"); + DBUG_ENTER("writefile"); DBUG_PRINT("enter",("name: '%s' len: %lu ",path, (ulong) len)); if (tmp_table) create_flags|= O_EXCL | O_NOFOLLOW; - strxnmov(file_name, sizeof(file_name)-1, path, reg_ext, NullS); - - File file= mysql_file_create(key_file_frm, file_name, + File file= mysql_file_create(key_file_frm, path, CREATE_MODE, create_flags, MYF(0)); if (unlikely((error= file < 0))) @@ -138,16 +136,19 @@ int writefrm(const char *path, const char *db, const char *table, } else { - error= (int)mysql_file_write(file, frmdata, len, MYF(MY_WME | MY_NABP)); + error= (int)mysql_file_write(file, data, len, MYF(MY_WME | MY_NABP)); if (!error && !tmp_table && opt_sync_frm) error= mysql_file_sync(file, MYF(MY_WME)) || - my_sync_dir_by_file(file_name, MYF(MY_WME)); + my_sync_dir_by_file(path, MYF(MY_WME)); error|= mysql_file_close(file, MYF(MY_WME)); + if (error) + my_delete(path, MYF(0)); } DBUG_RETURN(error); -} /* writefrm */ +} /* writefile */ + static inline void advance(FILEINFO* &from, FILEINFO* &to, FILEINFO* cur, bool &skip) @@ -155,7 +156,7 @@ static inline void advance(FILEINFO* &from, FILEINFO* &to, if (skip) // if not copying from= cur; // just advance the start pointer else // if copying - if (to == from) // but to the same place (not shifting the data) + if (to == from) // but to the same place, not shifting the data from= to= cur; // advance both pointers else // otherwise while (from < cur) // have to copy [from...cur) to [to...) diff --git a/sql/discover.h b/sql/discover.h index f14be662dbc..1775f5d6551 100644 --- a/sql/discover.h +++ b/sql/discover.h @@ -21,8 +21,8 @@ int extension_based_table_discovery(MY_DIR *dirp, const char *ext, #ifdef MYSQL_SERVER int readfrm(const char *name, const uchar **data, size_t *length); -int writefrm(const char *path, const char *db, const char *table, - bool tmp_table, const uchar *frmdata, size_t len); +int writefile(const char *path, const char *db, const char *table, + bool tmp_table, const uchar *frmdata, size_t len); /* a helper to delete an frm file, given a path w/o .frm extension */ inline void deletefrm(const char *path) diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 0ec1f2138ab..4052b647f4d 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -80,7 +80,7 @@ HA_READ_BEFORE_WRITE_REMOVAL |\ HA_CAN_TABLES_WITHOUT_ROLLBACK) -static const char *ha_par_ext= ".par"; +static const char *ha_par_ext= PAR_EXT; /**************************************************************************** MODULE create/delete handler object @@ -629,7 +629,8 @@ int ha_partition::rename_table(const char *from, const char *to) SYNOPSIS create_partitioning_metadata() - name Full path of table name + path Path to the new frm file (without ext) + old_p Path to the old frm file (without ext) create_info Create info generated for CREATE TABLE RETURN VALUE @@ -645,9 +646,10 @@ int ha_partition::rename_table(const char *from, const char *to) */ int ha_partition::create_partitioning_metadata(const char *path, - const char *old_path, - int action_flag) + const char *old_path, + chf_create_flags action_flag) { + partition_element *part; DBUG_ENTER("ha_partition::create_partitioning_metadata"); /* @@ -665,7 +667,8 @@ int ha_partition::create_partitioning_metadata(const char *path, if ((action_flag == CHF_DELETE_FLAG && mysql_file_delete(key_file_ha_partition_par, name, MYF(MY_WME))) || (action_flag == CHF_RENAME_FLAG && - mysql_file_rename(key_file_ha_partition_par, old_name, name, MYF(MY_WME)))) + mysql_file_rename(key_file_ha_partition_par, old_name, name, + MYF(MY_WME)))) { DBUG_RETURN(TRUE); } @@ -678,6 +681,19 @@ int ha_partition::create_partitioning_metadata(const char *path, DBUG_RETURN(1); } } + + /* m_part_info is only NULL when we failed to create a partition table */ + if (m_part_info) + { + part= m_part_info->partitions.head(); + if ((part->engine_type)->create_partitioning_metadata && + ((part->engine_type)->create_partitioning_metadata)(path, old_path, + action_flag)) + { + my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0)); + DBUG_RETURN(1); + } + } DBUG_RETURN(0); } @@ -1604,6 +1620,7 @@ int ha_partition::prepare_new_partition(TABLE *tbl, if (!(file->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION)) tbl->s->connect_string= p_elem->connect_string; + create_info->options|= HA_CREATE_TMP_ALTER; if ((error= file->ha_create(part_name, tbl, create_info))) { /* @@ -1619,7 +1636,8 @@ int ha_partition::prepare_new_partition(TABLE *tbl, } DBUG_PRINT("info", ("partition %s created", part_name)); if (unlikely((error= file->ha_open(tbl, part_name, m_mode, - m_open_test_lock | HA_OPEN_NO_PSI_CALL)))) + m_open_test_lock | HA_OPEN_NO_PSI_CALL | + HA_OPEN_FOR_CREATE)))) goto error_open; DBUG_PRINT("info", ("partition %s opened", part_name)); @@ -2336,7 +2354,7 @@ char *ha_partition::update_table_comment(const char *comment) Handle delete and rename table @param from Full path of old table - @param to Full path of new table + @param to Full path of new table. May be NULL in case of delete @return Operation status @retval >0 Error @@ -2361,14 +2379,20 @@ uint ha_partition::del_ren_table(const char *from, const char *to) const char *to_path= NULL; uint i; handler **file, **abort_file; + THD *thd= ha_thd(); DBUG_ENTER("ha_partition::del_ren_table"); - if (get_from_handler_file(from, ha_thd()->mem_root, false)) - DBUG_RETURN(TRUE); + if (get_from_handler_file(from, thd->mem_root, false)) + DBUG_RETURN(my_errno ? my_errno : ENOENT); DBUG_ASSERT(m_file_buffer); DBUG_PRINT("enter", ("from: (%s) to: (%s)", from, to ? to : "(nil)")); name_buffer_ptr= m_name_buffer_ptr; + file= m_file; + /* The command should be logged with IF EXISTS if using a shared table */ + if (m_file[0]->ht->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) + thd->replication_flags|= OPTION_IF_EXISTS; + if (to == NULL) { /* @@ -2378,6 +2402,11 @@ uint ha_partition::del_ren_table(const char *from, const char *to) if (unlikely((error= handler::delete_table(from)))) DBUG_RETURN(error); } + + if (ha_check_if_updates_are_ignored(thd, partition_ht(), + to ? "RENAME" : "DROP")) + DBUG_RETURN(0); + /* Since ha_partition has HA_FILE_BASED, it must alter underlying table names if they do not have HA_FILE_BASED and lower_case_table_names == 2. @@ -2424,7 +2453,33 @@ uint ha_partition::del_ren_table(const char *from, const char *to) goto rename_error; } } + + /* Update .par file in the handlers that supports it */ + if ((*m_file)->ht->create_partitioning_metadata) + { + error= (*m_file)->ht->create_partitioning_metadata(to, from, + to == NULL ? + CHF_DELETE_FLAG : + CHF_RENAME_FLAG); + DBUG_EXECUTE_IF("failed_create_partitioning_metadata", + { my_message_sql(ER_OUT_OF_RESOURCES,"Simulated crash",MYF(0)); + error= 1; + }); + if (error) + { + if (to) + { + (void) handler::rename_table(to, from); + (void) (*m_file)->ht->create_partitioning_metadata(from, to, + CHF_RENAME_FLAG); + goto rename_error; + } + else + save_error=error; + } + } DBUG_RETURN(save_error); + rename_error: name_buffer_ptr= m_name_buffer_ptr; for (abort_file= file, file= m_file; file < abort_file; file++) @@ -3691,6 +3746,7 @@ err_alloc: statement which uses a table from the table cache. Will also use as many PSI_tables as there are partitions. */ + #ifdef HAVE_M_PSI_PER_PARTITION void ha_partition::unbind_psi() { @@ -3729,6 +3785,16 @@ int ha_partition::rebind() #endif /* HAVE_M_PSI_PER_PARTITION */ +/* + Check if the table definition has changed for the part tables + We use the first partition for the check. +*/ + +int ha_partition::discover_check_version() +{ + return m_file[0]->discover_check_version(); +} + /** Clone the open and locked partitioning handler. @@ -11382,6 +11448,12 @@ int ha_partition::end_bulk_delete() } +bool ha_partition::check_if_updates_are_ignored(const char *op) const +{ + return (handler::check_if_updates_are_ignored(op) || + ha_check_if_updates_are_ignored(table->in_use, partition_ht(), op)); +} + /** Perform initialization for a direct update request. diff --git a/sql/ha_partition.h b/sql/ha_partition.h index eb10cf84e76..225d69b1a56 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -22,7 +22,7 @@ #include "queues.h" /* QUEUE */ #define PARTITION_BYTES_IN_POS 2 - +#define PAR_EXT ".par" /** Struct used for partition_name_hash */ typedef struct st_part_name_def @@ -555,8 +555,10 @@ public: int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info) override; int create_partitioning_metadata(const char *name, - const char *old_name, int action_flag) + const char *old_name, + chf_create_flags action_flag) override; + bool check_if_updates_are_ignored(const char *op) const override; void update_create_info(HA_CREATE_INFO *create_info) override; char *update_table_comment(const char *comment) override; int change_partitions(HA_CREATE_INFO *create_info, const char *path, @@ -686,6 +688,7 @@ public: virtual void unbind_psi(); virtual int rebind(); #endif + int discover_check_version() override; /* ------------------------------------------------------------------------- MODULE change record diff --git a/sql/ha_sequence.h b/sql/ha_sequence.h index bc799cca9e5..72e59a40479 100644 --- a/sql/ha_sequence.h +++ b/sql/ha_sequence.h @@ -140,9 +140,9 @@ public: int rename_table(const char *from, const char *to) { return file->rename_table(from, to); } void unbind_psi() - { return file->unbind_psi(); } - int rebind() - { return file->rebind(); } + { file->unbind_psi(); } + void rebind_psi() + { file->rebind_psi(); } bool auto_repair(int error) const { return file->auto_repair(error); } diff --git a/sql/handler.cc b/sql/handler.cc index 2d3f64a7d2b..893453ba1c2 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -2830,14 +2830,13 @@ void handler::unbind_psi() PSI_CALL_unbind_table(m_psi); } -int handler::rebind() +void handler::rebind_psi() { /* Notify the instrumentation that this table is now owned by this thread. */ m_psi= PSI_CALL_rebind_table(ha_table_share_psi(), this, m_psi); - return 0; } @@ -4870,7 +4869,7 @@ void handler::ha_drop_table(const char *name) { DBUG_ASSERT(m_lock_type == F_UNLCK); - if (ha_check_if_updates_are_ignored(ha_thd(), ht, "DROP")) + if (check_if_updates_are_ignored("DROP")) return; mark_trx_read_write(); @@ -4906,7 +4905,7 @@ handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info_arg) int handler::ha_create_partitioning_metadata(const char *name, const char *old_name, - int action_flag) + chf_create_flags action_flag) { /* Normally this is done when unlocked, but in fast_alter_partition_table, @@ -5760,6 +5759,12 @@ bool ha_table_exists(THD *thd, const LEX_CSTRING *db, If statement is ignored, write a note */ +bool handler::check_if_updates_are_ignored(const char *op) const +{ + return ha_check_if_updates_are_ignored(table->in_use, ht, op); +} + + bool ha_check_if_updates_are_ignored(THD *thd, handlerton *hton, const char *op) { diff --git a/sql/handler.h b/sql/handler.h index 5a7d886b394..316f7d3c796 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -90,6 +90,15 @@ enum enum_alter_inplace_result { HA_ALTER_INPLACE_NO_LOCK }; +/* Flags for create_partitioning_metadata() */ + +enum chf_create_flags { + CHF_CREATE_FLAG, + CHF_DELETE_FLAG, + CHF_RENAME_FLAG, + CHF_INDEX_FLAG +}; + /* Bits in table_flags() to show what database can do */ #define HA_NO_TRANSACTIONS (1ULL << 0) /* Doesn't support transactions */ @@ -1670,6 +1679,14 @@ struct handlerton /* Server shutdown early notification.*/ void (*pre_shutdown)(void); + + /* + Inform handler that partitioning engine has changed the .frm and the .par + files + */ + int (*create_partitioning_metadata)(const char *path, + const char *old_path, + chf_create_flags action_flag); }; @@ -3183,7 +3200,10 @@ private: public: virtual void unbind_psi(); - virtual int rebind(); + virtual void rebind_psi(); + /* Return error if definition doesn't match for already opened table */ + virtual int discover_check_version() { return 0; } + /** Put the handler in 'batch' mode when collecting table io instrumented events. @@ -3416,7 +3436,7 @@ public: int ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info); int ha_create_partitioning_metadata(const char *name, const char *old_name, - int action_flag); + chf_create_flags action_flag); int ha_change_partitions(HA_CREATE_INFO *create_info, const char *path, @@ -3901,6 +3921,7 @@ public: virtual void get_dynamic_partition_info(PARTITION_STATS *stat_info, uint part_id); virtual void set_partitions_to_open(List<String> *partition_names) {} + virtual bool check_if_updates_are_ignored(const char *op) const; virtual int change_partitions_to_open(List<String> *partition_names) { return 0; } virtual int extra(enum ha_extra_function operation) @@ -4081,11 +4102,6 @@ public: void update_global_table_stats(); void update_global_index_stats(); -#define CHF_CREATE_FLAG 0 -#define CHF_DELETE_FLAG 1 -#define CHF_RENAME_FLAG 2 -#define CHF_INDEX_FLAG 3 - /** @note lock_count() can return > 1 if the table is MERGE or partitioned. */ @@ -4918,8 +4934,9 @@ public: virtual void drop_table(const char *name); virtual int create(const char *name, TABLE *form, HA_CREATE_INFO *info)=0; - virtual int create_partitioning_metadata(const char *name, const char *old_name, - int action_flag) + virtual int create_partitioning_metadata(const char *name, + const char *old_name, + chf_create_flags action_flag) { return FALSE; } virtual int change_partitions(HA_CREATE_INFO *create_info, @@ -5123,9 +5140,11 @@ public: int ha_discover_table(THD *thd, TABLE_SHARE *share); int ha_discover_table_names(THD *thd, LEX_CSTRING *db, MY_DIR *dirp, Discovered_table_list *result, bool reusable); -bool ha_table_exists(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table_name, +bool ha_table_exists(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *table_name, handlerton **hton= 0, bool *is_sequence= 0); -bool ha_check_if_updates_are_ignored(THD *thd, handlerton *hton, const char *op); +bool ha_check_if_updates_are_ignored(THD *thd, handlerton *hton, + const char *op); #endif /* MYSQL_SERVER */ /* key cache */ diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc index 9ac0ac7a551..131f74c2753 100644 --- a/sql/sql_alter.cc +++ b/sql/sql_alter.cc @@ -376,9 +376,6 @@ bool Sql_cmd_alter_table::execute(THD *thd) /* first table of first SELECT_LEX */ TABLE_LIST *first_table= (TABLE_LIST*) select_lex->table_list.first; - if (thd->variables.option_bits & OPTION_IF_EXISTS) - lex->create_info.set(DDL_options_st::OPT_IF_EXISTS); - const bool used_engine= lex->create_info.used_fields & HA_CREATE_USED_ENGINE; DBUG_ASSERT((m_storage_engine_name.str != NULL) == used_engine); if (used_engine) diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 13dc9aca177..1606e8cb7eb 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -1957,13 +1957,14 @@ retry_share: if (table) { DBUG_ASSERT(table->file != NULL); - if (table->file->rebind() == HA_ERR_TABLE_DEF_CHANGED) + if (table->file->discover_check_version()) { tc_release_table(table); (void) ot_ctx->request_backoff_action(Open_table_context::OT_DISCOVER, table_list); DBUG_RETURN(TRUE); } + table->file->rebind_psi(); #ifdef WITH_PARTITION_STORAGE_ENGINE part_names_error= set_partitions_as_used(table_list, table); #endif diff --git a/sql/sql_class.h b/sql/sql_class.h index 08a1a4cf2c8..07136c10e1d 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -2500,7 +2500,8 @@ public: // track down slow pthread_create ulonglong prior_thr_create_utime, thr_create_utime; ulonglong start_utime, utime_after_lock, utime_after_query; - + /* This can be used by handlers to send signals to the SQL level */ + ulonglong replication_flags; // Process indicator struct { /* diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 53ccdb5d4c3..b1756b83056 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -5906,6 +5906,8 @@ mysql_execute_command(THD *thd) case SQLCOM_CALL: case SQLCOM_REVOKE: case SQLCOM_GRANT: + if (thd->variables.option_bits & OPTION_IF_EXISTS) + lex->create_info.set(DDL_options_st::OPT_IF_EXISTS); DBUG_ASSERT(lex->m_sql_cmd != NULL); res= lex->m_sql_cmd->execute(thd); DBUG_PRINT("result", ("res: %d killed: %d is_error: %d", diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc index d5915f65998..5d89307ba31 100644 --- a/sql/sql_partition.cc +++ b/sql/sql_partition.cc @@ -7033,6 +7033,8 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, bool action_completed= FALSE; bool frm_install= FALSE; MDL_ticket *mdl_ticket= table->mdl_ticket; + /* option_bits is used to mark if we should log the query with IF EXISTS */ + ulonglong save_option_bits= thd->variables.option_bits; DBUG_ENTER("fast_alter_partition_table"); DBUG_ASSERT(table->m_needs_reopen); @@ -7053,6 +7055,10 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, lpt->pack_frm_data= NULL; lpt->pack_frm_len= 0; + /* Add IF EXISTS to binlog if shared table */ + if (table->file->partition_ht()->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) + thd->variables.option_bits|= OPTION_IF_EXISTS; + if (table->file->alter_table_flags(alter_info->flags) & HA_PARTITION_ONE_PHASE) { @@ -7377,6 +7383,7 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, if (alter_partition_lock_handling(lpt)) goto err; } + thd->variables.option_bits= save_option_bits; downgrade_mdl_if_lock_tables_mode(thd, mdl_ticket, MDL_SHARED_NO_READ_WRITE); /* A final step is to write the query to the binlog and send ok to the @@ -7384,6 +7391,7 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, */ DBUG_RETURN(fast_end_partition(thd, lpt->copied, lpt->deleted, table_list)); err: + thd->variables.option_bits= save_option_bits; downgrade_mdl_if_lock_tables_mode(thd, mdl_ticket, MDL_SHARED_NO_READ_WRITE); DBUG_RETURN(TRUE); } diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc index ed77c0938f3..13547a0a8be 100644 --- a/sql/sql_partition_admin.cc +++ b/sql/sql_partition_admin.cc @@ -46,6 +46,16 @@ bool Sql_cmd_partition_unsupported::execute(THD *) #else +static bool return_with_logging(THD *thd) +{ + if (thd->slave_thread && + write_bin_log_with_if_exists(thd, true, false, true)) + return(true); + my_ok(thd); + return(false); +} + + bool Sql_cmd_alter_table_exchange_partition::execute(THD *thd) { /* Moved from mysql_execute_command */ @@ -501,7 +511,8 @@ bool Sql_cmd_alter_table_exchange_partition:: MDL_ticket *swap_table_mdl_ticket= NULL; MDL_ticket *part_table_mdl_ticket= NULL; uint table_counter; - bool error= TRUE; + bool error= TRUE, force_if_exists= 0; + ulonglong save_option_bits= thd->variables.option_bits; DBUG_ENTER("mysql_exchange_partition"); DBUG_ASSERT(alter_info->partition_flags & ALTER_PARTITION_EXCHANGE); @@ -529,14 +540,40 @@ bool Sql_cmd_alter_table_exchange_partition:: table_list->mdl_request.set_type(MDL_SHARED_NO_WRITE); if (unlikely(open_tables(thd, &table_list, &table_counter, 0, &alter_prelocking_strategy))) + { + if (thd->lex->if_exists() && + thd->get_stmt_da()->sql_errno() == ER_NO_SUCH_TABLE) + { + /* + ALTER TABLE IF EXISTS was used on not existing table + We have to log the query on a slave as the table may be a shared one + from the master and we need to ensure that the next slave can see + the statement as this slave may not have the table shared + */ + thd->clear_error(); + if (thd->slave_thread && + write_bin_log(thd, true, thd->query(), thd->query_length())) + DBUG_RETURN(true); + my_ok(thd); + DBUG_RETURN(false); + } DBUG_RETURN(true); + } part_table= table_list->table; swap_table= swap_table_list->table; + if (part_table->file->check_if_updates_are_ignored("ALTER")) + DBUG_RETURN(return_with_logging(thd)); + if (unlikely(check_exchange_partition(swap_table, part_table))) DBUG_RETURN(TRUE); + /* Add IF EXISTS to binlog if shared table */ + if (part_table->file->partition_ht()->flags & + HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) + force_if_exists= 1; + /* set lock pruning on first table */ partition_name= alter_info->partition_names.head(); if (unlikely(table_list->table->part_info-> @@ -638,6 +675,9 @@ bool Sql_cmd_alter_table_exchange_partition:: */ (void) thd->locked_tables_list.reopen_tables(thd, false); + if (force_if_exists) + thd->variables.option_bits|= OPTION_IF_EXISTS; + if (unlikely((error= write_bin_log(thd, TRUE, thd->query(), thd->query_length())))) { @@ -648,6 +688,7 @@ bool Sql_cmd_alter_table_exchange_partition:: (void) exchange_name_with_ddl_log(thd, part_file_name, swap_file_name, temp_file_name, table_hton); } + thd->variables.option_bits= save_option_bits; err: if (thd->locked_tables_mode) @@ -746,7 +787,7 @@ bool Sql_cmd_alter_table_truncate_partition::execute(THD *thd) Alter_info *alter_info= &thd->lex->alter_info; uint table_counter, i; List<String> partition_names_list; - bool binlog_stmt; + bool binlog_stmt, force_if_exists= 0; DBUG_ENTER("Sql_cmd_alter_table_truncate_partition::execute"); /* @@ -784,16 +825,41 @@ bool Sql_cmd_alter_table_truncate_partition::execute(THD *thd) #endif /* WITH_WSREP */ if (open_tables(thd, &first_table, &table_counter, 0)) - DBUG_RETURN(true); + { + if (thd->lex->if_exists() && + thd->get_stmt_da()->sql_errno() == ER_NO_SUCH_TABLE) + { + /* + ALTER TABLE IF EXISTS was used on not existing table + We have to log the query on a slave as the table may be a shared one + from the master and we need to ensure that the next slave can see + the statement as this slave may not have the table shared + */ + thd->clear_error(); + DBUG_RETURN(return_with_logging(thd)); + } + DBUG_RETURN(TRUE); + } - if (!first_table->table || first_table->view || - first_table->table->s->db_type() != partition_hton) + if (!first_table->table || first_table->view) { my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); DBUG_RETURN(TRUE); } - + if (first_table->table->file->check_if_updates_are_ignored("ALTER")) + DBUG_RETURN(return_with_logging(thd)); + + if (first_table->table->s->db_type() != partition_hton) + { + my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); + DBUG_RETURN(TRUE); + } + + if (first_table->table->file->partition_ht()->flags & + HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) + force_if_exists= 1; + /* Prune all, but named partitions, to avoid excessive calls to external_lock(). @@ -845,9 +911,14 @@ bool Sql_cmd_alter_table_truncate_partition::execute(THD *thd) */ if (likely(error != HA_ERR_WRONG_COMMAND)) { + ulonglong save_option_bits= thd->variables.option_bits; + if (force_if_exists) + thd->variables.option_bits|= OPTION_IF_EXISTS; + query_cache_invalidate3(thd, first_table, FALSE); if (binlog_stmt) error|= write_bin_log(thd, !error, thd->query(), thd->query_length()); + thd->variables.option_bits= save_option_bits; } /* diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc index 0200113deae..5fafd9fa28a 100644 --- a/sql/sql_rename.cc +++ b/sql/sql_rename.cc @@ -335,6 +335,7 @@ do_rename(THD *thd, TABLE_LIST *ren_table, const LEX_CSTRING *new_db, if (hton->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) *force_if_exists= 1; + thd->replication_flags= 0; if (!(rc= mysql_rename_table(hton, &ren_table->db, &old_alias, new_db, &new_alias, 0))) { @@ -357,6 +358,8 @@ do_rename(THD *thd, TABLE_LIST *ren_table, const LEX_CSTRING *new_db, &ren_table->db, &old_alias, NO_FK_CHECKS); } } + if (thd->replication_flags & OPTION_IF_EXISTS) + *force_if_exists= 1; } else { @@ -398,8 +401,8 @@ do_rename(THD *thd, TABLE_LIST *ren_table, const LEX_CSTRING *new_db, empty. RETURN - false Ok - true rename failed + 0 Ok + table pointer to the table list element which rename failed */ static TABLE_LIST * diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 10f5e1f397e..0e80ac1d4f1 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -29,7 +29,8 @@ #include "lock.h" // mysql_unlock_tables #include "strfunc.h" // find_type2, find_set #include "sql_truncate.h" // regenerate_locked_table -#include "sql_partition.h" // mem_alloc_error, +#include "ha_partition.h" // PAR_EXT + // mem_alloc_error, // partition_info // NOT_A_PARTITION_ID #include "sql_db.h" // load_db_opt_by_name @@ -1106,9 +1107,6 @@ static int execute_ddl_log_action(THD *thd, DDL_LOG_ENTRY *ddl_log_entry) int error= TRUE; char to_path[FN_REFLEN]; char from_path[FN_REFLEN]; -#ifdef WITH_PARTITION_STORAGE_ENGINE - char *par_ext= (char*)".par"; -#endif handlerton *hton; DBUG_ENTER("execute_ddl_log_action"); @@ -1162,7 +1160,7 @@ static int execute_ddl_log_action(THD *thd, DDL_LOG_ENTRY *ddl_log_entry) break; } #ifdef WITH_PARTITION_STORAGE_ENGINE - strxmov(to_path, ddl_log_entry->name, par_ext, NullS); + strxmov(to_path, ddl_log_entry->name, PAR_EXT, NullS); (void) mysql_file_delete(key_file_partition_ddl_log, to_path, MYF(MY_WME)); #endif } @@ -1199,8 +1197,8 @@ static int execute_ddl_log_action(THD *thd, DDL_LOG_ENTRY *ddl_log_entry) if (mysql_file_rename(key_file_frm, from_path, to_path, MYF(MY_WME))) break; #ifdef WITH_PARTITION_STORAGE_ENGINE - strxmov(to_path, ddl_log_entry->name, par_ext, NullS); - strxmov(from_path, ddl_log_entry->from_name, par_ext, NullS); + strxmov(to_path, ddl_log_entry->name, PAR_EXT, NullS); + strxmov(from_path, ddl_log_entry->from_name, PAR_EXT, NullS); (void) mysql_file_rename(key_file_partition_ddl_log, from_path, to_path, MYF(MY_WME)); #endif } @@ -1857,8 +1855,8 @@ bool mysql_write_frm(ALTER_PARTITION_PARAM_TYPE *lpt, uint flags) goto end; } - int error= writefrm(shadow_path, lpt->db.str, lpt->table_name.str, - lpt->create_info->tmp_table(), frm.str, frm.length); + int error= writefile(shadow_frm_name, lpt->db.str, lpt->table_name.str, + lpt->create_info->tmp_table(), frm.str, frm.length); my_free(const_cast<uchar*>(frm.str)); if (unlikely(error) || @@ -1895,13 +1893,13 @@ bool mysql_write_frm(ALTER_PARTITION_PARAM_TYPE *lpt, uint flags) if (mysql_file_delete(key_file_frm, frm_name, MYF(MY_WME)) || #ifdef WITH_PARTITION_STORAGE_ENGINE lpt->table->file->ha_create_partitioning_metadata(path, shadow_path, - CHF_DELETE_FLAG) || + CHF_DELETE_FLAG) || deactivate_ddl_log_entry(part_info->frm_log_entry->entry_pos) || (sync_ddl_log(), FALSE) || mysql_file_rename(key_file_frm, shadow_frm_name, frm_name, MYF(MY_WME)) || lpt->table->file->ha_create_partitioning_metadata(path, shadow_path, - CHF_RENAME_FLAG)) + CHF_RENAME_FLAG)) #else mysql_file_rename(key_file_frm, shadow_frm_name, frm_name, MYF(MY_WME))) @@ -2499,6 +2497,7 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, table_type->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) log_if_exists= 1; + thd->replication_flags= 0; if ((error= ha_delete_table(thd, table_type, path, &db, &table->table_name, !dont_log_query))) { @@ -2528,6 +2527,8 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, DBUG_ASSERT(frm_delete_error); } } + if (thd->replication_flags & OPTION_IF_EXISTS) + log_if_exists= 1; if (likely(!error)) { @@ -2769,7 +2770,7 @@ bool log_drop_table(THD *thd, const LEX_CSTRING *db_name, /** - Quickly remove a table. + Quickly remove a table without bin logging @param thd Thread context. @param base The handlerton handle. @@ -5880,7 +5881,7 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, when the slave executes the command. */ force_generated_create= - (((src_table->table->s->db_type()->flags & + (((src_table->table->file->partition_ht()->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) && src_table->table->s->db_type() != local_create_info.db_type)); #endif @@ -7929,7 +7930,7 @@ static bool mysql_inplace_alter_table(THD *thd, /* Notify the engine that the table definition has changed */ - hton= table->file->ht; + hton= table->file->partition_ht(); if (hton->notify_tabledef_changed) { char db_buff[FN_REFLEN], table_buff[FN_REFLEN]; @@ -9815,7 +9816,7 @@ bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db, Alter_table_ctx alter_ctx(thd, table_list, tables_opened, new_db, new_name); mdl_ticket= table->mdl_ticket; - if (ha_check_if_updates_are_ignored(thd, table->s->db_type(), "ALTER")) + if (table->file->check_if_updates_are_ignored("ALTER")) { /* Table is a shared table. Remove the .frm file. Discovery will create @@ -9825,12 +9826,14 @@ bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db, MDL_EXCLUSIVE, thd->variables.lock_wait_timeout)) DBUG_RETURN(1); - quick_rm_table(thd, 0, &table_list->db, &table_list->table_name, - FRM_ONLY, 0); + quick_rm_table(thd, table->file->ht, &table_list->db, + &table_list->table_name, + NO_HA_TABLE, 0); goto end_inplace; } if (!if_exists && - (table->s->db_type()->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE)) + (table->file->partition_ht()->flags & + HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE)) { /* Table is a shared table that may not exist on the slave. @@ -10551,8 +10554,9 @@ do_continue:; write the CREATE TABLE statement for the new table to the log and log all inserted rows to the table. */ - if ((table->s->db_type()->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) && - (table->s->db_type() != new_table->s->db_type()) && + if ((table->file->partition_ht()->flags & + HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) && + (table->file->partition_ht() != new_table->file->partition_ht()) && (mysql_bin_log.is_open() && (thd->variables.option_bits & OPTION_BIN_LOG))) { diff --git a/sql/sql_table.h b/sql/sql_table.h index b9dada41e80..d67ceb6ebdd 100644 --- a/sql/sql_table.h +++ b/sql/sql_table.h @@ -264,6 +264,8 @@ bool mysql_write_frm(ALTER_PARTITION_PARAM_TYPE *lpt, uint flags); int write_bin_log(THD *thd, bool clear_error, char const *query, ulong query_length, bool is_trans= FALSE); +int write_bin_log_with_if_exists(THD *thd, bool clear_error, + bool is_trans, bool add_if_exists); bool write_ddl_log_entry(DDL_LOG_ENTRY *ddl_log_entry, DDL_LOG_MEMORY_ENTRY **active_entry); bool write_execute_ddl_log_entry(uint first_entry, diff --git a/sql/table.cc b/sql/table.cc index fe096835144..92e3d2e4800 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -25,7 +25,8 @@ // primary_key_name #include "sql_parse.h" // free_items #include "strfunc.h" // unhex_type2 -#include "sql_partition.h" // mysql_unpack_partition, +#include "ha_partition.h" // PART_EXT + // mysql_unpack_partition, // fix_partition_func, partition_info #include "sql_base.h" #include "create_options.h" @@ -618,9 +619,13 @@ enum open_frm_error open_table_def(THD *thd, TABLE_SHARE *share, uint flags) path); if (flags & GTS_FORCE_DISCOVERY) { + const char *path2= share->normalized_path.str; DBUG_ASSERT(flags & GTS_TABLE); DBUG_ASSERT(flags & GTS_USE_DISCOVERY); - mysql_file_delete_with_symlink(key_file_frm, path, "", MYF(0)); + /* Delete .frm and .par files */ + mysql_file_delete_with_symlink(key_file_frm, path2, reg_ext, MYF(0)); + mysql_file_delete_with_symlink(key_file_partition_ddl_log, path2, PAR_EXT, + MYF(0)); file= -1; } else @@ -1669,6 +1674,9 @@ public: /** Read data from a binary .frm file image into a TABLE_SHARE + @param write Write the .frm and .par file. These are not created if + the function returns an error. + @note frm bytes at the following offsets are unused in MariaDB 10.0: @@ -1679,12 +1687,13 @@ public: 42..46 are unused since 5.0 (were for RAID support) Also, there're few unused bytes in forminfo. - */ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, const uchar *frm_image, - size_t frm_length) + size_t frm_length, + const uchar *par_image, + size_t par_length) { TABLE_SHARE *share= this; uint new_frm_ver, field_pack_length, new_field_pack_flag; @@ -1715,24 +1724,31 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, uint len; uint ext_key_parts= 0; plugin_ref se_plugin= 0; - bool vers_can_native= false; + bool vers_can_native= false, frm_created= 0; Field_data_type_info_array field_data_type_info_array; - MEM_ROOT *old_root= thd->mem_root; Virtual_column_info **table_check_constraints; extra2_fields extra2; - DBUG_ENTER("TABLE_SHARE::init_from_binary_frm_image"); keyinfo= &first_keyinfo; thd->mem_root= &share->mem_root; - if (write && write_frm_image(frm_image, frm_length)) - goto err; - if (frm_length < FRM_HEADER_SIZE + FRM_FORMINFO_SIZE) goto err; + if (write) + { + frm_created= 1; + if (write_frm_image(frm_image, frm_length)) + goto err; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (par_image) + if (write_par_image(par_image, par_length)) + goto err; +#endif + } + share->frm_version= frm_image[2]; /* Check if .frm file created by MySQL 5.0. In this case we want to @@ -2069,6 +2085,17 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, if (keyinfo->algorithm == HA_KEY_ALG_LONG_HASH) hash_fields++; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (par_image && plugin_data(se_plugin, handlerton*) == partition_hton) + { + /* + Discovery returned a partition plugin. Change to use it. The partition + engine will then use discovery to find the rest of the plugin tables, + which may be in the original engine used for discovery + */ + share->db_plugin= se_plugin; + } +#endif if (share->db_plugin && !plugin_equals(share->db_plugin, se_plugin)) goto err; // wrong engine (someone changed the frm under our feet?) @@ -3196,6 +3223,19 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, DBUG_RETURN(0); err: + if (frm_created) + { + char path[FN_REFLEN+1]; + strxnmov(path, FN_REFLEN, normalized_path.str, reg_ext, NullS); + my_delete(path, MYF(0)); +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (par_image) + { + strxnmov(path, FN_REFLEN, normalized_path.str, PAR_EXT, NullS); + my_delete(path, MYF(0)); + } +#endif + } share->db_plugin= NULL; share->error= OPEN_FRM_CORRUPTED; share->open_errno= my_errno; @@ -3361,7 +3401,19 @@ ret: bool TABLE_SHARE::write_frm_image(const uchar *frm, size_t len) { - return writefrm(normalized_path.str, db.str, table_name.str, false, frm, len); + char file_name[FN_REFLEN+1]; + strxnmov(file_name, sizeof(file_name)-1, normalized_path.str, reg_ext, + NullS); + return writefile(file_name, db.str, table_name.str, false, + frm, len); +} + +bool TABLE_SHARE::write_par_image(const uchar *par, size_t len) +{ + char file_name[FN_REFLEN+1]; + strxnmov(file_name, sizeof(file_name)-1, normalized_path.str, PAR_EXT, + NullS); + return writefile(file_name, db.str, table_name.str, false, par, len); } @@ -4136,7 +4188,7 @@ partititon_err: the fact that table doesn't in fact exist and remove the stray .frm file. */ - if (share->db_type()->discover_table && + if (outparam->file->partition_ht()->discover_table && (ha_err == ENOENT || ha_err == HA_ERR_NO_SUCH_TABLE)) error= OPEN_FRM_DISCOVER; diff --git a/sql/table.h b/sql/table.h index ddd33142fd7..d93f466077f 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1038,7 +1038,9 @@ struct TABLE_SHARE discovering the table over and over again */ int init_from_binary_frm_image(THD *thd, bool write, - const uchar *frm_image, size_t frm_length); + const uchar *frm_image, size_t frm_length, + const uchar *par_image=0, + size_t par_length=0); /* populates TABLE_SHARE from the table description, specified as the @@ -1053,7 +1055,9 @@ struct TABLE_SHARE writes the frm image to an frm file, corresponding to this table */ bool write_frm_image(const uchar *frm_image, size_t frm_length); + bool write_par_image(const uchar *par_image, size_t par_length); + /* Only used by tokudb */ bool write_frm_image(void) { return frm_image ? write_frm_image(frm_image->str, frm_image->length) : 0; } diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 10952192be8..724d4022f16 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -2724,7 +2724,7 @@ void ha_maria::drop_table(const char *name) { DBUG_ASSERT(file->s->temporary); (void) ha_close(); - (void) maria_delete_table_files(name, 1, 0); + (void) maria_delete_table_files(name, 1, MY_WME); } diff --git a/storage/maria/ha_s3.cc b/storage/maria/ha_s3.cc index 668530a7157..c86e2e4f816 100644 --- a/storage/maria/ha_s3.cc +++ b/storage/maria/ha_s3.cc @@ -218,7 +218,7 @@ ha_create_table_option s3_table_option_list[]= ha_s3::ha_s3(handlerton *hton, TABLE_SHARE *table_arg) - :ha_maria(hton, table_arg), in_alter_table(0) + :ha_maria(hton, table_arg), in_alter_table(S3_NO_ALTER) { /* Remove things that S3 doesn't support */ int_table_flags&= ~(HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | @@ -255,9 +255,10 @@ void ha_s3::register_handler(MARIA_HA *file) int ha_s3::write_row(const uchar *buf) { + DBUG_ENTER("ha_s3::write_row"); if (in_alter_table) - return ha_maria::write_row(buf); - return HA_ERR_WRONG_COMMAND; + DBUG_RETURN(ha_maria::write_row(buf)); + DBUG_RETURN(HA_ERR_WRONG_COMMAND); } /* Return true if S3 can be used */ @@ -282,6 +283,7 @@ static my_bool s3_info_init(S3_INFO *info) return 0; } + /** Fill information in S3_INFO including paths to table and database @@ -298,9 +300,26 @@ static my_bool s3_info_init(S3_INFO *s3_info, const char *path, strmake(database_buff, s3_info->database.str, MY_MIN(database_length, s3_info->database.length)); s3_info->database.str= database_buff; + s3_info->base_table= s3_info->table; return s3_info_init(s3_info); } +/* + Check if table is a temporary table that is stored in Aria +*/ + +static int is_mariadb_internal_tmp_table(const char *table_name) +{ + int length; + /* Temporary table from ALTER TABLE */ + if (!strncmp(table_name, "#sql-", 5)) + return 1; + length= strlen(table_name); + if (length > 5 && !strncmp(table_name + length - 5, "#TMP#", 5)) + return 1; + return 0; +} + /** Drop S3 table @@ -317,21 +336,47 @@ int ha_s3::delete_table(const char *name) error= s3_info_init(&s3_info, name, database, sizeof(database)-1); /* If internal on disk temporary table, let Aria take care of it */ - if (!strncmp(s3_info.table.str, "#sql-", 5)) + if (is_mariadb_internal_tmp_table(s3_info.table.str)) DBUG_RETURN(ha_maria::delete_table(name)); if (error) DBUG_RETURN(HA_ERR_UNSUPPORTED); if (!(s3_client= s3_open_connection(&s3_info))) - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); + DBUG_RETURN(HA_ERR_NO_CONNECTION); error= aria_delete_from_s3(s3_client, s3_info.bucket.str, s3_info.database.str, s3_info.table.str,0); - ms3_deinit(s3_client); + s3_deinit(s3_client); DBUG_RETURN(error); } +/* + The table is a temporary table as part of ALTER TABLE. + + Copy the on disk 'temporary' Aria table to S3 and delete the Aria table +*/ + +static int move_table_to_s3(ms3_st *s3_client, + S3_INFO *to_s3_info, + const char *local_name, + bool is_partition) +{ + int error; + DBUG_ASSERT(!is_mariadb_internal_tmp_table(to_s3_info->table.str)); + + if (!(error= aria_copy_to_s3(s3_client, to_s3_info->bucket.str, local_name, + to_s3_info->database.str, + to_s3_info->table.str, + 0, 0, 1, 0, !is_partition))) + { + /* Table now in S3. Remove original files table files, keep .frm */ + error= maria_delete_table_files(local_name, 1, 0); + } + return error; +} + + /** Copy an Aria table to S3 or rename a table in S3 @@ -343,8 +388,8 @@ int ha_s3::delete_table(const char *name) int ha_s3::rename_table(const char *from, const char *to) { - S3_INFO to_s3_info, from_s3_info; - char to_name[FN_REFLEN], from_name[FN_REFLEN], frm_name[FN_REFLEN]; + S3_INFO to_s3_info; + char to_name[NAME_LEN+1], frm_name[FN_REFLEN]; ms3_st *s3_client; MY_STAT stat_info; int error; @@ -352,7 +397,7 @@ int ha_s3::rename_table(const char *from, const char *to) (strstr(to, "#P#") != NULL); DBUG_ENTER("ha_s3::rename_table"); - if (s3_info_init(&to_s3_info, to, to_name, NAME_LEN)) + if (s3_info_init(&to_s3_info, to, to_name, sizeof(to_name)-1)) DBUG_RETURN(HA_ERR_UNSUPPORTED); if (!(s3_client= s3_open_connection(&to_s3_info))) DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); @@ -363,34 +408,35 @@ int ha_s3::rename_table(const char *from, const char *to) and the .MAI file for the table is on disk */ fn_format(frm_name, from, "", reg_ext, MYF(0)); - if (!strncmp(from + dirname_length(from), "#sql-", 5) && + if (is_mariadb_internal_tmp_table(from + dirname_length(from)) && (is_partition || my_stat(frm_name, &stat_info, MYF(0)))) { - /* - The table is a temporary table as part of ALTER TABLE. - Copy the on disk temporary Aria table to S3. - */ - error= aria_copy_to_s3(s3_client, to_s3_info.bucket.str, from, - to_s3_info.database.str, - to_s3_info.table.str, - 0, 0, 0, 0, !is_partition); - if (!error) - { - /* Remove original files table files, keep .frm */ - fn_format(from_name, from, "", MARIA_NAME_DEXT, - MY_APPEND_EXT|MY_UNPACK_FILENAME); - my_delete(from_name, MYF(MY_WME | ME_WARNING)); - fn_format(from_name, from, "", MARIA_NAME_IEXT, - MY_APPEND_EXT|MY_UNPACK_FILENAME); - my_delete(from_name, MYF(MY_WME | ME_WARNING)); - } + error= move_table_to_s3(s3_client, &to_s3_info, from, is_partition); } else { + char from_name[NAME_LEN+1]; + S3_INFO from_s3_info; /* The table is an internal S3 table. Do the renames */ - s3_info_init(&from_s3_info, from, from_name, NAME_LEN); + s3_info_init(&from_s3_info, from, from_name, sizeof(from_name)-1); - error= aria_rename_s3(s3_client, to_s3_info.bucket.str, + if (is_mariadb_internal_tmp_table(to + dirname_length(to))) + { + /* + The table is renamed to a temporary table. This only happens + in the case of an ALTER PARTITION failure and there will be soon + a delete issued for the temporary table. The only thing we can do + is to remove the from table. We will get an extra errors for the + uppcoming but we will ignore this minor problem for now as this + is an unlikely event and the extra warnings are just annoying, + not critical. + */ + error= aria_delete_from_s3(s3_client, from_s3_info.bucket.str, + from_s3_info.database.str, + from_s3_info.table.str,0); + } + else + error= aria_rename_s3(s3_client, to_s3_info.bucket.str, from_s3_info.database.str, from_s3_info.table.str, to_s3_info.database.str, @@ -398,7 +444,7 @@ int ha_s3::rename_table(const char *from, const char *to) !is_partition && !current_thd->lex->alter_info.partition_flags); } - ms3_deinit(s3_client); + s3_deinit(s3_client); DBUG_RETURN(error); } @@ -428,7 +474,9 @@ int ha_s3::create(const char *name, TABLE *table_arg, if (share->table_type == TABLE_TYPE_SEQUENCE) DBUG_RETURN(HA_ERR_UNSUPPORTED); - if (ha_create_info->tmp_table()) + /* When using partitions, S3 only supports adding and remove partitions */ + if ((table_arg->in_use->lex->alter_info.partition_flags & + ~(ALTER_PARTITION_REMOVE | ALTER_PARTITION_ADD | ALTER_PARTITION_INFO))) DBUG_RETURN(HA_ERR_UNSUPPORTED); if (!s3_usable()) @@ -441,20 +489,50 @@ int ha_s3::create(const char *name, TABLE *table_arg, if (error) DBUG_RETURN(error); - /* Create the .frm file. Needed for ha_s3::rename_table() later */ - if (!table_arg->s->read_frm_image((const uchar**) &frm_ptr, &frm_len)) +#ifdef MOVE_FILES_TO_S3_ON_CREATE + /* + If we are in ADD PARTITION and we created a new table (not + temporary table, which will be moved as part of the final rename), + we should move it S3 right away. The other option would to move + it as part of close(). We prefer to do this here as there is no error + checking with close() which would leave incomplete tables around in + case of failures. The downside is that we can't move rows around as + part of changing partitions, but that is not a big problem with S3 + as it's readonly anyway. + */ + if (!is_mariadb_internal_tmp_table(name + dirname_length(name)) && + strstr(name, "#P#")) + { + S3_INFO to_s3_info; + char database[NAME_LEN+1]; + ms3_st *s3_client; + + if (s3_info_init(&to_s3_info, name, database, sizeof(database)-1)) + DBUG_RETURN(HA_ERR_UNSUPPORTED); + if (!(s3_client= s3_open_connection(&to_s3_info))) + DBUG_RETURN(HA_ERR_NO_CONNECTION); + + /* Note that if error is set, then the empty temp table was not removed */ + error= move_table_to_s3(s3_client, &to_s3_info, name, 1); + s3_deinit(s3_client); + if (error) + maria_delete_table_files(name, 1, 0); + else +#endif /* MOVE_TABLE_TO_S3 */ { - table_arg->s->write_frm_image(frm_ptr, frm_len); - table_arg->s->free_frm_image(frm_ptr); + /* Create the .frm file. Needed for ha_s3::rename_table() later */ + if (!table_arg->s->read_frm_image((const uchar**) &frm_ptr, &frm_len)) + { + table_arg->s->write_frm_image(frm_ptr, frm_len); + table_arg->s->free_frm_image(frm_ptr); + } } - - DBUG_RETURN(0); + DBUG_RETURN(error); } /** Open table - @notes Table is read only, except if opened by ALTER as in this case we are creating the S3 table. @@ -462,6 +540,7 @@ int ha_s3::create(const char *name, TABLE *table_arg, int ha_s3::open(const char *name, int mode, uint open_flags) { + bool internal_tmp_table= 0; int res; S3_INFO s3_info; DBUG_ENTER("ha_s3:open"); @@ -473,24 +552,39 @@ int ha_s3::open(const char *name, int mode, uint open_flags) DBUG_RETURN(EACCES); open_args= 0; - if (!(open_flags & HA_OPEN_FOR_CREATE)) + internal_tmp_table= is_mariadb_internal_tmp_table(name + + dirname_length(name)); + + if (!(open_flags & HA_OPEN_FOR_CREATE) && !internal_tmp_table) { (void) s3_info_init(&s3_info); s3_info.tabledef_version= table->s->tabledef_version; - + s3_info.base_table= table->s->table_name; + /* Pass the above arguments to maria_open() */ open_args= &s3_info; + in_alter_table= S3_NO_ALTER; } + else + { + /* + Table was created as an Aria table that will be moved to S3 either + by rename_table() or external_lock() + */ + bool is_partition= (strstr(name, "#P#") != NULL); + in_alter_table= (!is_partition ? S3_ALTER_TABLE : + internal_tmp_table ? S3_ADD_TMP_PARTITION : + S3_ADD_PARTITION); + } + DBUG_PRINT("info", ("in_alter_table: %d", in_alter_table)); if (!(res= ha_maria::open(name, mode, open_flags))) { - if ((open_flags & HA_OPEN_FOR_CREATE)) - in_alter_table= 1; - else + if (open_args) { /* - We have to modify the pagecache callbacks for the data file, - index file and for bitmap handling + Table is in S3. We have to modify the pagecache callbacks for the + data file, index file and for bitmap handling. */ file->s->pagecache= &s3_pagecache; file->dfile.big_block_size= file->s->kfile.big_block_size= @@ -503,6 +597,63 @@ int ha_s3::open(const char *name, int mode, uint open_flags) } +int ha_s3::external_lock(THD * thd, int lock_type) +{ + int error; + DBUG_ENTER("ha_s3::external_lock"); + + error= ha_maria::external_lock(thd, lock_type); + if (in_alter_table == S3_ADD_PARTITION && !error && lock_type == F_UNLCK) + { + /* + This was a new partition. All data is now copied to the table + so it's time to move it to S3) + */ + + MARIA_SHARE *share= file->s; + uint org_open_count; + + /* First, flush all data to the Aria table */ + if (flush_pagecache_blocks(share->pagecache, &share->kfile, + FLUSH_RELEASE)) + error= my_errno; + if (flush_pagecache_blocks(share->pagecache, &share->bitmap.file, + FLUSH_RELEASE)) + error= my_errno; + org_open_count= share->state.open_count; + if (share->global_changed) + share->state.open_count--; + if (_ma_state_info_write(share, MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET | + MA_STATE_INFO_WRITE_LOCK)) + error= my_errno; + share->state.open_count= org_open_count; + + if (!error) + { + S3_INFO to_s3_info; + char database[NAME_LEN+1], *name= file->s->open_file_name.str; + ms3_st *s3_client; + + /* Copy data to S3 */ + if (s3_info_init(&to_s3_info, name, database, sizeof(database)-1)) + DBUG_RETURN(HA_ERR_UNSUPPORTED); + if (!(s3_client= s3_open_connection(&to_s3_info))) + DBUG_RETURN(HA_ERR_NO_CONNECTION); + + /* + Note that if error is set, then the empty temp table was not + removed + */ + error= move_table_to_s3(s3_client, &to_s3_info, name, 1); + s3_deinit(s3_client); + + maria_delete_table_files(name, 1, 0); + } + } + DBUG_RETURN(error); +} + + /****************************************************************************** Storage engine handler definitions ******************************************************************************/ @@ -541,7 +692,7 @@ static int s3_hton_panic(handlerton *hton, ha_panic_function flag) static int s3_discover_table(handlerton *hton, THD* thd, TABLE_SHARE *share) { S3_INFO s3_info; - S3_BLOCK block; + S3_BLOCK frm_block, par_block; ms3_st *s3_client; int error; DBUG_ENTER("s3_discover_table"); @@ -549,21 +700,26 @@ static int s3_discover_table(handlerton *hton, THD* thd, TABLE_SHARE *share) if (s3_info_init(&s3_info)) DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); if (!(s3_client= s3_open_connection(&s3_info))) - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); + DBUG_RETURN(HA_ERR_NO_CONNECTION); s3_info.database= share->db; s3_info.table= share->table_name; + s3_info.base_table= share->table_name; - if (s3_get_frm(s3_client, &s3_info, &block)) + if (s3_get_def(s3_client, &s3_info, &frm_block, "frm")) { - s3_free(&block); - ms3_deinit(s3_client); + s3_free(&frm_block); + s3_deinit(s3_client); DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); } + (void) s3_get_def(s3_client, &s3_info, &par_block, "par"); + error= share->init_from_binary_frm_image(thd, 1, - block.str, block.length); - s3_free(&block); - ms3_deinit(s3_client); + frm_block.str, frm_block.length, + par_block.str, par_block.length); + s3_free(&frm_block); + s3_free(&par_block); + s3_deinit(s3_client); DBUG_RETURN((my_errno= error)); } @@ -598,13 +754,15 @@ static int s3_discover_table_existance(handlerton *hton, const char *db, s3_info.table.length= strlen(table_name); res= s3_frm_exists(s3_client, &s3_info); - ms3_deinit(s3_client); + s3_deinit(s3_client); DBUG_RETURN(res == 0); // Return 1 if exists } /** Return a list of all S3 tables in a database + + Partitoned tables are not shown */ static int s3_discover_table_names(handlerton *hton __attribute__((unused)), @@ -632,17 +790,20 @@ static int s3_discover_table_names(handlerton *hton __attribute__((unused)), if ((error= ms3_list_dir(s3_client, s3_info.bucket.str, aws_path, &org_list))) goto end; - + for (list= org_list ; list ; list= list->next) { - const char *name= list->key + db->length + 1; // Skip database and / - size_t name_length= strlen(name)-1; // Remove end / - result->add_table(name, name_length); + const char *name= list->key + db->length + 1; // Skip database and '/' + if (!strstr(name, "#P#")) + { + size_t name_length= strlen(name)-1; // Remove end '/' + result->add_table(name, name_length); + } } if (org_list) ms3_list_free(org_list); end: - ms3_deinit(s3_client); + s3_deinit(s3_client); DBUG_RETURN(0); } @@ -660,20 +821,14 @@ int ha_s3::discover_check_version() { S3_INFO s3_info= *file->s->s3_path; s3_info.tabledef_version= table->s->tabledef_version; - return s3_check_frm_version(file->s3, &s3_info); -} - - -int ha_s3::rebind() -{ - if (int error= handler::rebind()) - return error; - if (discover_check_version()) - { - handler::unbind_psi(); - return HA_ERR_TABLE_DEF_CHANGED; - } - return 0; + /* + We have to change the database and table as the table may part of a + partitoned table. In this case we want to check the frm file for the + partitioned table, not the part table. + */ + s3_info.base_table= table->s->table_name; + return (s3_check_frm_version(file->s3, &s3_info) ? + HA_ERR_TABLE_DEF_CHANGED : 0); } @@ -698,14 +853,14 @@ static int s3_notify_tabledef_changed(handlerton *hton __attribute__((unused)), DBUG_RETURN(0); s3_info.database= *db; - s3_info.table= *table; + s3_info.base_table= *table; s3_info.tabledef_version= *org_tabledef_version; if (s3_check_frm_version(s3_client, &s3_info)) { error= 1; goto err; } - + strxnmov(aws_path, sizeof(aws_path)-1, db->str, "/", table->str, "/frm", NullS); @@ -714,10 +869,80 @@ static int s3_notify_tabledef_changed(handlerton *hton __attribute__((unused)), error= 2; err: - ms3_deinit(s3_client); + s3_deinit(s3_client); + DBUG_RETURN(error); +} + + +/** + Update the .frm and .par file of a partitioned table stored in s3 + + Logic is: + - Skip temporary tables used internally by ALTER TABLE and ALTER PARTITION + - In case of delete, delete the .frm and .par file from S3 + - In case of create, copy the .frm and .par files to S3 + - In case of rename: + - Delete from old_path if not internal temporary file and if exists + - Copy new .frm and .par file to S3 + + To ensure that this works with the reply logic from ALTER PARTITION + there should be no errors, only notes, for deletes. +*/ + +static int s3_create_partitioning_metadata(const char *path, + const char *old_path, + chf_create_flags action_flag) +{ + ms3_st *s3_client; + S3_INFO s3_info; + int error= 0; + char database[NAME_LEN+1]; + const char *tmp_path; + DBUG_ENTER("s3_create_partitioning_metadata"); + + /* Path is empty in case of delete */ + tmp_path= path ? path : old_path; + + if (s3_info_init(&s3_info, tmp_path, database, sizeof(database)-1)) + DBUG_RETURN(HA_ERR_UNSUPPORTED); + if (!(s3_client= s3_open_connection(&s3_info))) + DBUG_RETURN(HA_ERR_NO_CONNECTION); + + switch (action_flag) { + case CHF_DELETE_FLAG: + case CHF_RENAME_FLAG: + { + if (!is_mariadb_internal_tmp_table(old_path + dirname_length(old_path))) + { + S3_INFO s3_info2; + char database2[NAME_LEN+1]; + s3_info_init(&s3_info2, old_path, database2, sizeof(database2)-1); + + partition_delete_from_s3(s3_client, s3_info2.bucket.str, + s3_info2.database.str, s3_info2.table.str, + MYF(ME_NOTE)); + } + if (action_flag == CHF_DELETE_FLAG) + break; + } + /* Fall through */ + case CHF_CREATE_FLAG: + if (!is_mariadb_internal_tmp_table(path + dirname_length(path))) + error= partition_copy_to_s3(s3_client, s3_info.bucket.str, + path, old_path, + s3_info.database.str, s3_info.table.str); + break; + case CHF_INDEX_FLAG: + break; + } + s3_deinit(s3_client); DBUG_RETURN(error); } - + + +/** + Initialize s3 plugin +*/ static int ha_s3_init(void *p) { @@ -740,6 +965,7 @@ static int ha_s3_init(void *p) s3_hton->discover_table_names= s3_discover_table_names; s3_hton->discover_table_existence= s3_discover_table_existance; s3_hton->notify_tabledef_changed= s3_notify_tabledef_changed; + s3_hton->create_partitioning_metadata= s3_create_partitioning_metadata; s3_hton->tablefile_extensions= no_exts; s3_hton->commit= 0; s3_hton->rollback= 0; diff --git a/storage/maria/ha_s3.h b/storage/maria/ha_s3.h index 61502449dac..57fdeb03e6b 100644 --- a/storage/maria/ha_s3.h +++ b/storage/maria/ha_s3.h @@ -21,7 +21,9 @@ class ha_s3 :public ha_maria { - bool in_alter_table; + enum alter_table_op + { S3_NO_ALTER, S3_ALTER_TABLE, S3_ADD_PARTITION, S3_ADD_TMP_PARTITION }; + alter_table_op in_alter_table; S3_INFO *open_args; public: @@ -33,28 +35,35 @@ public: int write_row(const uchar *buf); int update_row(const uchar * old_data, const uchar * new_data) { - return HA_ERR_WRONG_COMMAND; + DBUG_ENTER("update_row"); + DBUG_RETURN(HA_ERR_WRONG_COMMAND); } int delete_row(const uchar * buf) { - return HA_ERR_WRONG_COMMAND; + DBUG_ENTER("delete_row"); + DBUG_RETURN(HA_ERR_WRONG_COMMAND); } int check(THD * thd, HA_CHECK_OPT * check_opt) { - return HA_ERR_WRONG_COMMAND; + DBUG_ENTER("delete_row"); + DBUG_RETURN(HA_ERR_WRONG_COMMAND); } int analyze(THD * thd, HA_CHECK_OPT * check_opt) { - return HA_ERR_WRONG_COMMAND; + DBUG_ENTER("analyze"); + DBUG_RETURN(HA_ERR_WRONG_COMMAND); } int repair(THD * thd, HA_CHECK_OPT * check_opt) { - return HA_ERR_WRONG_COMMAND; + DBUG_ENTER("repair"); + DBUG_RETURN(HA_ERR_WRONG_COMMAND); } int preload_keys(THD * thd, HA_CHECK_OPT * check_opt) { - return HA_ERR_WRONG_COMMAND; + DBUG_ENTER("preload_keys"); + DBUG_RETURN(HA_ERR_WRONG_COMMAND); } + int external_lock(THD * thd, int lock_type); /* drop_table() is only used for internal temporary tables, not applicable for s3 @@ -64,7 +73,7 @@ public: } int delete_table(const char *name); int rename_table(const char *from, const char *to); - int discover_check_version(); + int discover_check_version() override; int rebind(); S3_INFO *s3_open_args() { return open_args; } void register_handler(MARIA_HA *file); diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index d66e3bda4de..2885b788e6b 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -107,6 +107,7 @@ int maria_close(register MARIA_HA *info) /* Avoid _ma_mark_file_changed() when flushing pages */ share->global_changed= 1; + /* Flush page cache if BLOCK format */ if ((*share->once_end)(share)) error= my_errno; /* diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c index 01d9c4c4ec2..90e6b5250c1 100644 --- a/storage/maria/ma_delete_table.c +++ b/storage/maria/ma_delete_table.c @@ -78,22 +78,32 @@ int maria_delete_table(const char *name) DBUG_RETURN(1); } - DBUG_RETURN(maria_delete_table_files(name, 0, sync_dir)); + DBUG_RETURN(maria_delete_table_files(name, 0, sync_dir | MY_WME)); } +/** + Delete all files related to a aria table +*/ -int maria_delete_table_files(const char *name, my_bool temporary, myf sync_dir) +int maria_delete_table_files(const char *name, my_bool temporary, myf flags) { + int error= 0; DBUG_ENTER("maria_delete_table_files"); - if (mysql_file_delete_with_symlink(key_file_kfile, name, MARIA_NAME_IEXT, MYF(MY_WME | sync_dir)) || - mysql_file_delete_with_symlink(key_file_dfile, name, MARIA_NAME_DEXT, MYF(MY_WME | sync_dir))) - DBUG_RETURN(my_errno); - + if (mysql_file_delete_with_symlink(key_file_kfile, name, MARIA_NAME_IEXT, + flags)) + error= my_errno; + if (mysql_file_delete_with_symlink(key_file_dfile, name, MARIA_NAME_DEXT, + flags)) + error= my_errno; if (!temporary) { - mysql_file_delete_with_symlink(key_file_dfile, name, ".TMD", MYF(0)); + /* This is delete a possible temporary aria_chk file */ + mysql_file_delete_with_symlink(key_file_dfile, name, DATA_TMP_EXT, MYF(0)); +#ifdef SUPPORT_ARIA_PACK + /* This is delete a possible temporary aria_pack file */ mysql_file_delete_with_symlink(key_file_dfile, name, ".OLD", MYF(0)); +#endif } - DBUG_RETURN(0); + DBUG_RETURN(error); } diff --git a/storage/maria/s3_func.c b/storage/maria/s3_func.c index 6497a0f090a..f1493455d48 100644 --- a/storage/maria/s3_func.c +++ b/storage/maria/s3_func.c @@ -34,8 +34,8 @@ static void convert_index_to_s3_format(uchar *header, ulong block_size, static void convert_index_to_disk_format(uchar *header); static void convert_frm_to_s3_format(uchar *header); static void convert_frm_to_disk_format(uchar *header); -static int s3_read_frm_from_disk(const char *filename, uchar **to, - size_t *to_size); +static int s3_read_file_from_disk(const char *filename, uchar **to, + size_t *to_size, my_bool print_error); /* Used by ha_s3.cc and tools to define different protocol options */ @@ -120,6 +120,7 @@ S3_INFO *s3_info_copy(S3_INFO *old) &tmp.bucket.str, old->bucket.length+1, &tmp.database.str, old->database.length+1, &tmp.table.str, old->table.length+1, + &tmp.base_table.str, old->base_table.length+1, NullS)) return 0; /* Copy lengths and new pointers to to */ @@ -132,6 +133,7 @@ S3_INFO *s3_info_copy(S3_INFO *old) /* Database may not be null terminated */ strmake((char*) to->database.str, old->database.str, old->database.length); strmov((char*) to->table.str, old->table.str); + strmov((char*) to->base_table.str, old->base_table.str); return to; } @@ -158,6 +160,17 @@ ms3_st *s3_open_connection(S3_INFO *s3) return s3_client; } +/** + close a connection to s3 +*/ + +void s3_deinit(ms3_st *s3_client) +{ + DBUG_PUSH(""); /* Avoid tracing free calls */ + ms3_deinit(s3_client); + DBUG_POP(); +} + /****************************************************************************** High level functions to copy tables to and from S3 @@ -190,7 +203,7 @@ static void fix_suffix(char *to_end, ulong nr) */ static my_bool copy_from_file(ms3_st *s3_client, const char *aws_bucket, - const char *aws_path, + char *aws_path, File file, my_off_t start, my_off_t file_end, uchar *block, size_t block_size, my_bool compression, my_bool display) @@ -327,7 +340,7 @@ int aria_copy_to_s3(ms3_st *s3_client, const char *aws_bucket, ensure that discovery of the table will work. */ fn_format(filename, path, "", ".frm", MY_REPLACE_EXT); - if (!s3_read_frm_from_disk(filename, &alloc_block, &frm_length)) + if (!s3_read_file_from_disk(filename, &alloc_block, &frm_length,0)) { if (display) printf("Copying frm file %s\n", filename); @@ -455,7 +468,7 @@ err: if (frm_created) { end= strmov(aws_path_end,"/frm"); - (void) s3_delete_object(s3_client, aws_bucket, aws_path, 0); + (void) s3_delete_object(s3_client, aws_bucket, aws_path, MYF(ME_NOTE)); } if (file >= 0) my_close(file, MYF(0)); @@ -710,7 +723,7 @@ int aria_delete_from_s3(ms3_st *s3_client, const char *aws_bucket, printf("Delete of base information and frm\n"); strmov(aws_path_end,"/aria"); - if (s3_delete_object(s3_client, aws_bucket, aws_path, 1)) + if (s3_delete_object(s3_client, aws_bucket, aws_path, MYF(MY_WME))) error= 1; /* @@ -719,7 +732,7 @@ int aria_delete_from_s3(ms3_st *s3_client, const char *aws_bucket, */ strmov(aws_path_end,"/frm"); /* Ignore error if .frm file doesn't exist */ - s3_delete_object(s3_client, aws_bucket, aws_path, 0); + s3_delete_object(s3_client, aws_bucket, aws_path, MYF(ME_NOTE)); DBUG_RETURN(error); } @@ -729,7 +742,6 @@ int aria_delete_from_s3(ms3_st *s3_client, const char *aws_bucket, Rename a table in s3 */ - int aria_rename_s3(ms3_st *s3_client, const char *aws_bucket, const char *from_database, const char *from_table, const char *to_database, const char *to_table, @@ -759,28 +771,149 @@ int aria_rename_s3(ms3_st *s3_client, const char *aws_bucket, strmov(to_aws_path_end,"/index"); error= s3_rename_directory(s3_client, aws_bucket, from_aws_path, to_aws_path, - 1); + MYF(MY_WME)); strmov(from_aws_path_end,"/data"); strmov(to_aws_path_end,"/data"); error|= s3_rename_directory(s3_client, aws_bucket, from_aws_path, - to_aws_path, 1); + to_aws_path, MYF(MY_WME)); if (rename_frm) { strmov(from_aws_path_end, "/frm"); strmov(to_aws_path_end, "/frm"); - s3_rename_object(s3_client, aws_bucket, from_aws_path, to_aws_path, 1); + s3_rename_object(s3_client, aws_bucket, from_aws_path, to_aws_path, + MYF(MY_WME)); } strmov(from_aws_path_end,"/aria"); strmov(to_aws_path_end,"/aria"); - if (s3_rename_object(s3_client, aws_bucket, from_aws_path, to_aws_path, 1)) + if (s3_rename_object(s3_client, aws_bucket, from_aws_path, to_aws_path, + MYF(MY_WME))) error= 1; DBUG_RETURN(error); } +/** + Copy all partition files related to a table from S3 (.frm and .par) + + @param s3_client s3 client connection + @param aws_bucket bucket to use + @param path The path to the partitioned table files (no extension) + @param old_path In some cases the partioned files are not yet renamed. + This points to the temporary files that will later + be renamed to the partioned table + @param database Database for the partitioned table + @param database table name for the partitioned table +*/ + +int partition_copy_to_s3(ms3_st *s3_client, const char *aws_bucket, + const char *path, const char *old_path, + const char *database, const char *table_name) +{ + char aws_path[FN_REFLEN+100]; + char filename[FN_REFLEN]; + char *aws_path_end; + uchar *alloc_block= 0; + ms3_status_st status; + size_t frm_length; + int error; + DBUG_ENTER("partition_copy_to_s3"); + DBUG_PRINT("enter",("from: %s database: %s table: %s", + path, database, table_name)); + + if (!old_path) + old_path= path; + + aws_path_end= strxmov(aws_path, database, "/", table_name, "/", NullS); + strmov(aws_path_end, "frm"); + fn_format(filename, old_path, "", ".frm", MY_REPLACE_EXT); + + /* Just to be safe, delete any conflicting object */ + if (!ms3_status(s3_client, aws_bucket, aws_path, &status)) + { + if ((error= s3_delete_object(s3_client, aws_bucket, aws_path, + MYF(ME_FATAL)))) + DBUG_RETURN(error); + } + if ((error= s3_read_file_from_disk(filename, &alloc_block, &frm_length, 0))) + { + /* + In case of ADD PARTITION PARTITON the .frm file is already renamed. + Copy the renamed file if it exists. + */ + fn_format(filename, path, "", ".frm", MY_REPLACE_EXT); + if ((error= s3_read_file_from_disk(filename, &alloc_block, &frm_length, + 1))) + goto err; + } + if ((error= s3_put_object(s3_client, aws_bucket, aws_path, alloc_block, + frm_length, 0))) + goto err; + + /* + Note that because ha_partiton::rename_table() is called before + this function, the .par table already has it's final name! + */ + fn_format(filename, path, "", ".par", MY_REPLACE_EXT); + strmov(aws_path_end, "par"); + if (!ms3_status(s3_client, aws_bucket, aws_path, &status)) + { + if ((error= s3_delete_object(s3_client, aws_bucket, aws_path, + MYF(ME_FATAL)))) + goto err; + } + + my_free(alloc_block); + alloc_block= 0; + if ((error=s3_read_file_from_disk(filename, &alloc_block, &frm_length, 1))) + goto err; + if ((error= s3_put_object(s3_client, aws_bucket, aws_path, alloc_block, + frm_length, 0))) + { + /* Delete the .frm file created above */ + strmov(aws_path_end, "frm"); + (void) s3_delete_object(s3_client, aws_bucket, aws_path, + MYF(ME_FATAL)); + goto err; + } + error= 0; + +err: + my_free(alloc_block); + DBUG_RETURN(error); +} + + +/** + Drop all partition files related to a table from S3 +*/ + +int partition_delete_from_s3(ms3_st *s3_client, const char *aws_bucket, + const char *database, const char *table, + myf error_flags) +{ + char aws_path[FN_REFLEN+100]; + char *aws_path_end; + int error=0, res; + DBUG_ENTER("partition_delete_from_s3"); + + aws_path_end= strxmov(aws_path, database, "/", table, NullS); + strmov(aws_path_end, "/par"); + + if ((res= s3_delete_object(s3_client, aws_bucket, aws_path, error_flags))) + error= res; + /* + Delete .frm last as this is used by discovery to check if a s3 table + exists + */ + strmov(aws_path_end, "/frm"); + if ((res= s3_delete_object(s3_client, aws_bucket, aws_path, error_flags))) + error= res; + + DBUG_RETURN(error); +} /****************************************************************************** Low level functions interfacing with libmarias3 @@ -794,9 +927,9 @@ int aria_rename_s3(ms3_st *s3_client, const char *aws_bucket, */ -my_bool s3_put_object(ms3_st *s3_client, const char *aws_bucket, - const char *name, uchar *data, size_t length, - my_bool compression) +int s3_put_object(ms3_st *s3_client, const char *aws_bucket, + const char *name, uchar *data, size_t length, + my_bool compression) { uint8_t error; const char *errmsg; @@ -816,14 +949,14 @@ my_bool s3_put_object(ms3_st *s3_client, const char *aws_bucket, } if (likely(!(error= ms3_put(s3_client, aws_bucket, name, data, length)))) - DBUG_RETURN(FALSE); + DBUG_RETURN(0); if (!(errmsg= ms3_server_error(s3_client))) errmsg= ms3_error(error); my_printf_error(EE_WRITE, "Got error from put_object(%s): %d %s", MYF(0), name, error, errmsg); - DBUG_RETURN(TRUE); + DBUG_RETURN(EE_WRITE); } @@ -835,11 +968,12 @@ my_bool s3_put_object(ms3_st *s3_client, const char *aws_bucket, @param print_error 2 Print error that table doesn't exists */ -my_bool s3_get_object(ms3_st *s3_client, const char *aws_bucket, - const char *name, S3_BLOCK *block, - my_bool compression, int print_error) +int s3_get_object(ms3_st *s3_client, const char *aws_bucket, + const char *name, S3_BLOCK *block, + my_bool compression, int print_error) { uint8_t error; + int result= 0; uchar *data; DBUG_ENTER("s3_get_object"); DBUG_PRINT("enter", ("name: %s compression: %d", name, compression)); @@ -866,9 +1000,9 @@ my_bool s3_get_object(ms3_st *s3_client, const char *aws_bucket, s3_free(block); my_printf_error(HA_ERR_NOT_A_TABLE, "Block '%s' is not compressed", MYF(0), name); - DBUG_RETURN(TRUE); + DBUG_RETURN(HA_ERR_NOT_A_TABLE); } - DBUG_RETURN(FALSE); + DBUG_RETURN(0); } if (((uchar*)block->str)[0] > 1) @@ -876,7 +1010,7 @@ my_bool s3_get_object(ms3_st *s3_client, const char *aws_bucket, s3_free(block); my_printf_error(HA_ERR_NOT_A_TABLE, "Block '%s' is not compressed", MYF(0), name); - DBUG_RETURN(TRUE); + DBUG_RETURN(HA_ERR_NOT_A_TABLE); } length= uint3korr(block->str+1); @@ -885,7 +1019,7 @@ my_bool s3_get_object(ms3_st *s3_client, const char *aws_bucket, length, MYF(MY_WME | MY_THREAD_SPECIFIC)))) { s3_free(block); - DBUG_RETURN(TRUE); + DBUG_RETURN(EE_OUTOFMEMORY); } if (uncompress(data, &length, block->str + COMPRESS_HEADER, block->length - COMPRESS_HEADER)) @@ -894,23 +1028,27 @@ my_bool s3_get_object(ms3_st *s3_client, const char *aws_bucket, "Got error uncompressing s3 packet", MYF(0)); s3_free(block); my_free(data); - DBUG_RETURN(TRUE); + DBUG_RETURN(ER_NET_UNCOMPRESS_ERROR); } s3_free(block); block->str= block->alloc_ptr= data; block->length= length; } - DBUG_RETURN(FALSE); + DBUG_RETURN(0); } - if (print_error) + + if (error == 9) { - if (error == 9) - { - my_errno= print_error == 1 ? EE_FILENOTFOUND : HA_ERR_NO_SUCH_TABLE; + result= my_errno= (print_error == 1 ? EE_FILENOTFOUND : + HA_ERR_NO_SUCH_TABLE); + if (print_error) my_printf_error(my_errno, "Expected object '%s' didn't exist", MYF(0), name); - } - else + } + else + { + result= my_errno= EE_READ; + if (print_error) { const char *errmsg; if (!(errmsg= ms3_server_error(s3_client))) @@ -918,40 +1056,43 @@ my_bool s3_get_object(ms3_st *s3_client, const char *aws_bucket, my_printf_error(EE_READ, "Got error from get_object(%s): %d %s", MYF(0), name, error, errmsg); - my_errno= EE_READ; } } s3_free(block); - DBUG_RETURN(TRUE); + DBUG_RETURN(result); } -my_bool s3_delete_object(ms3_st *s3_client, const char *aws_bucket, - const char *name, my_bool print_error) +int s3_delete_object(ms3_st *s3_client, const char *aws_bucket, + const char *name, myf error_flags) { uint8_t error; + int result; DBUG_ENTER("s3_delete_object"); DBUG_PRINT("enter", ("name: %s", name)); if (likely(!(error= ms3_delete(s3_client, aws_bucket, name)))) - DBUG_RETURN(FALSE); + DBUG_RETURN(0); - if (print_error) + if (error_flags) { + error_flags&= ~MY_WME; if (error == 9) - my_printf_error(EE_FILENOTFOUND, "Expected object '%s' didn't exist", - MYF(0), name); + my_printf_error(result= EE_FILENOTFOUND, + "Expected object '%s' didn't exist", + error_flags, name); else { const char *errmsg; if (!(errmsg= ms3_server_error(s3_client))) errmsg= ms3_error(error); - my_printf_error(EE_READ, "Got error from delete_object(%s): %d %s", - MYF(0), name, error, errmsg); + my_printf_error(result= EE_READ, + "Got error from delete_object(%s): %d %s", + error_flags, name, error, errmsg); } } - DBUG_RETURN(TRUE); + DBUG_RETURN(result); } @@ -980,7 +1121,7 @@ int s3_delete_directory(ms3_st *s3_client, const char *aws_bucket, } for (list= org_list ; list ; list= list->next) - if (s3_delete_object(s3_client, aws_bucket, list->key, 1)) + if (s3_delete_object(s3_client, aws_bucket, list->key, MYF(MY_WME))) error= 1; if (org_list) ms3_list_free(org_list); @@ -990,7 +1131,7 @@ int s3_delete_directory(ms3_st *s3_client, const char *aws_bucket, my_bool s3_rename_object(ms3_st *s3_client, const char *aws_bucket, const char *from_name, const char *to_name, - my_bool print_error) + myf error_flags) { uint8_t error; DBUG_ENTER("s3_rename_object"); @@ -1001,12 +1142,13 @@ my_bool s3_rename_object(ms3_st *s3_client, const char *aws_bucket, aws_bucket, to_name)))) DBUG_RETURN(FALSE); - if (print_error) + if (error_flags) { + error_flags&= ~MY_WME; if (error == 9) { my_printf_error(EE_FILENOTFOUND, "Expected object '%s' didn't exist", - MYF(0), from_name); + error_flags, from_name); } else { @@ -1015,7 +1157,7 @@ my_bool s3_rename_object(ms3_st *s3_client, const char *aws_bucket, errmsg= ms3_error(error); my_printf_error(EE_READ, "Got error from move_object(%s -> %s): %d %", - MYF(0), + error_flags, from_name, to_name, error, errmsg); } } @@ -1025,7 +1167,7 @@ my_bool s3_rename_object(ms3_st *s3_client, const char *aws_bucket, int s3_rename_directory(ms3_st *s3_client, const char *aws_bucket, const char *from_name, const char *to_name, - my_bool print_error) + myf error_flags) { ms3_list_st *list, *org_list= 0; my_bool error= 0; @@ -1039,7 +1181,8 @@ int s3_rename_directory(ms3_st *s3_client, const char *aws_bucket, errmsg= ms3_error(error); my_printf_error(EE_FILENOTFOUND, - "Can't get list of files from %s. Error: %d %s", MYF(0), + "Can't get list of files from %s. Error: %d %s", + MYF(error_flags & ~MY_WME), from_name, error, errmsg); DBUG_RETURN(EE_FILENOTFOUND); } @@ -1052,7 +1195,7 @@ int s3_rename_directory(ms3_st *s3_client, const char *aws_bucket, { strmake(end, sep, (sizeof(name) - (end-name) - 1)); if (s3_rename_object(s3_client, aws_bucket, list->key, name, - print_error)) + error_flags)) error= 1; } } @@ -1180,18 +1323,19 @@ my_bool set_database_and_table_from_path(S3_INFO *s3, const char *path) Read frm from the disk */ -static int s3_read_frm_from_disk(const char *filename, uchar **to, - size_t *to_size) +static int s3_read_file_from_disk(const char *filename, uchar **to, + size_t *to_size, my_bool print_error) { File file; uchar *alloc_block; size_t file_size; + int error; *to= 0; if ((file= my_open(filename, O_RDONLY | O_SHARE | O_NOFOLLOW | O_CLOEXEC, - MYF(MY_WME))) < 0) - return(1); + MYF(print_error ? MY_WME: 0))) < 0) + return(my_errno); file_size= (size_t) my_seek(file, 0L, MY_SEEK_END, MYF(0)); if (!(alloc_block= my_malloc(PSI_NOT_INSTRUMENTED, file_size, MYF(MY_WME)))) @@ -1206,25 +1350,27 @@ static int s3_read_frm_from_disk(const char *filename, uchar **to, return 0; err: + error= my_errno; my_free(alloc_block); my_close(file, MYF(0)); - return 1; + return error; } /** - Get .frm from S3 + Get .frm or par from S3 @return 0 ok @return 1 error */ -my_bool s3_get_frm(ms3_st *s3_client, S3_INFO *s3_info, S3_BLOCK *block) +my_bool s3_get_def(ms3_st *s3_client, S3_INFO *s3_info, S3_BLOCK *block, + const char *ext) { char aws_path[AWS_PATH_LENGTH]; strxnmov(aws_path, sizeof(aws_path)-1, s3_info->database.str, "/", - s3_info->table.str, "/frm", NullS); + s3_info->table.str, "/", ext, NullS); return s3_get_object(s3_client, s3_info->bucket.str, aws_path, block, 0, 0); @@ -1333,10 +1479,13 @@ int s3_check_frm_version(ms3_st *s3_client, S3_INFO *s3_info) DBUG_ENTER("s3_check_frm_version"); strxnmov(aws_path, sizeof(aws_path)-1, s3_info->database.str, "/", - s3_info->table.str, "/frm", NullS); + s3_info->base_table.str, "/frm", NullS); if (s3_get_object(s3_client, s3_info->bucket.str, aws_path, &block, 0, 0)) + { + DBUG_PRINT("exit", ("No object found")); DBUG_RETURN(2); /* Ignore check, use old frm */ + } if (get_tabledef_version_from_frm(uuid, (uchar*) block.str, block.length) || s3_info->tabledef_version.length != MY_UUID_SIZE) @@ -1350,6 +1499,8 @@ int s3_check_frm_version(ms3_st *s3_client, S3_INFO *s3_info) s3_free(&block); if (res) DBUG_PRINT("error", ("Wrong table version")); + else + DBUG_PRINT("error", ("Version strings matches")); DBUG_RETURN(res); } diff --git a/storage/maria/s3_func.h b/storage/maria/s3_func.h index cfba29cd11f..09fb6802cca 100644 --- a/storage/maria/s3_func.h +++ b/storage/maria/s3_func.h @@ -31,11 +31,20 @@ extern TYPELIB s3_protocol_typelib; typedef struct s3_info { + /* Connection strings */ LEX_CSTRING access_key, secret_key, region, bucket, host_name; - /* The following will be filled in by maria_open() */ + + /* Will be set by caller or by ma_open() */ LEX_CSTRING database, table; + /* + Name of the partition table if the table is partitioned. If not, it's set + to be same as table. This is used to know which frm file to read to + check table version. + */ + LEX_CSTRING base_table; + /* Sent to open to verify version */ LEX_CUSTRING tabledef_version; @@ -68,17 +77,18 @@ int aria_rename_s3(ms3_st *s3_client, const char *aws_bucket, const char *to_database, const char *to_table, my_bool rename_frm); ms3_st *s3_open_connection(S3_INFO *s3); -my_bool s3_put_object(ms3_st *s3_client, const char *aws_bucket, - const char *name, uchar *data, size_t length, - my_bool compression); -my_bool s3_get_object(ms3_st *s3_client, const char *aws_bucket, - const char *name, S3_BLOCK *block, my_bool compression, - int print_error); -my_bool s3_delete_object(ms3_st *s3_client, const char *aws_bucket, - const char *name, my_bool print_error); +void s3_deinit(ms3_st *s3_client); +int s3_put_object(ms3_st *s3_client, const char *aws_bucket, + const char *name, uchar *data, size_t length, + my_bool compression); +int s3_get_object(ms3_st *s3_client, const char *aws_bucket, + const char *name, S3_BLOCK *block, my_bool compression, + int print_error); +int s3_delete_object(ms3_st *s3_client, const char *aws_bucket, + const char *name, myf error_flags); my_bool s3_rename_object(ms3_st *s3_client, const char *aws_bucket, const char *from_name, const char *to_name, - my_bool print_error); + myf error_flags); void s3_free(S3_BLOCK *data); my_bool s3_copy_from_file(ms3_st *s3_client, const char *aws_bucket, char *aws_path, File file, my_off_t start, @@ -92,11 +102,18 @@ int s3_delete_directory(ms3_st *s3_client, const char *aws_bucket, const char *path); int s3_rename_directory(ms3_st *s3_client, const char *aws_bucket, const char *from_name, const char *to_name, - my_bool print_error); + myf error_flags); +int partition_delete_from_s3(ms3_st *s3_client, const char *aws_bucket, + const char *database, const char *table, + myf error_flags); +int partition_copy_to_s3(ms3_st *s3_client, const char *aws_bucket, + const char *path, const char *old_path, + const char *database, const char *table_name); S3_INFO *s3_info_copy(S3_INFO *old); my_bool set_database_and_table_from_path(S3_INFO *s3, const char *path); -my_bool s3_get_frm(ms3_st *s3_client, S3_INFO *S3_info, S3_BLOCK *block); +my_bool s3_get_def(ms3_st *s3_client, S3_INFO *S3_info, S3_BLOCK *block, + const char *ext); my_bool s3_frm_exists(ms3_st *s3_client, S3_INFO *s3_info); int s3_check_frm_version(ms3_st *s3_client, S3_INFO *s3_info); my_bool read_index_header(ms3_st *client, S3_INFO *s3, S3_BLOCK *block); diff --git a/storage/mroonga/ha_mroonga.cpp b/storage/mroonga/ha_mroonga.cpp index c58c3c39125..6f302e89213 100644 --- a/storage/mroonga/ha_mroonga.cpp +++ b/storage/mroonga/ha_mroonga.cpp @@ -16975,15 +16975,15 @@ void ha_mroonga::unbind_psi() DBUG_VOID_RETURN; } -int ha_mroonga::wrapper_rebind() +void ha_mroonga::wrapper_rebind() { MRN_DBUG_ENTER_METHOD(); MRN_SET_WRAP_SHARE_KEY(share, table->s); MRN_SET_WRAP_TABLE_KEY(this, table); - int error= wrap_handler->rebind(); + wrap_handler->rebind_psi(); MRN_SET_BASE_SHARE_KEY(share, table->s); MRN_SET_BASE_TABLE_KEY(this, table); - DBUG_RETURN(error); + DBUG_VOID_RETURN; } void ha_mroonga::storage_rebind() @@ -16992,22 +16992,17 @@ void ha_mroonga::storage_rebind() DBUG_VOID_RETURN; } -int ha_mroonga::rebind() +void ha_mroonga::rebind_psi() { MRN_DBUG_ENTER_METHOD(); - if (int error= handler::rebind()) - DBUG_RETURN(error); + handler::rebind_psi(); if (share->wrapper_mode) { - if (int error= wrapper_rebind()) - { - handler::unbind_psi(); - DBUG_RETURN(error); - } + wrapper_rebind(); } else { storage_rebind(); } - DBUG_RETURN(0); + DBUG_VOID_RETURN; } #endif diff --git a/storage/mroonga/ha_mroonga.hpp b/storage/mroonga/ha_mroonga.hpp index 1304cce4504..f129ab74aa2 100644 --- a/storage/mroonga/ha_mroonga.hpp +++ b/storage/mroonga/ha_mroonga.hpp @@ -631,7 +631,7 @@ protected: void free_foreign_key_create_info(char* str) mrn_override; #ifdef MRN_HAVE_HA_REBIND_PSI void unbind_psi() mrn_override; - int rebind() mrn_override; + void rebind_psi() mrn_override; #endif my_bool register_query_cache_table(THD *thd, const char *table_key, @@ -1290,7 +1290,7 @@ private: #ifdef MRN_HAVE_HA_REBIND_PSI void wrapper_unbind_psi(); void storage_unbind_psi(); - int wrapper_rebind(); + void wrapper_rebind(); void storage_rebind(); #endif my_bool wrapper_register_query_cache_table(THD *thd, |