summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2018-03-21 16:18:21 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2018-03-21 22:58:52 +0200
commit3d7915f000b15ad296e3ff18d750f36a4f17de0e (patch)
treea5abe2dd6e6b8a633130597aa08937fcc92c965b
parent82aeb6b59640b9733c4026bda71887720153b70a (diff)
parent4629db0dd6442ea7c2d3ecd636060bc4d21f2d19 (diff)
downloadmariadb-git-3d7915f000b15ad296e3ff18d750f36a4f17de0e.tar.gz
Merge 10.1 into 10.2
-rw-r--r--include/my_valgrind.h2
-rw-r--r--mysql-test/r/having.result14
-rw-r--r--mysql-test/suite/engines/iuds/r/update_time.result2
-rw-r--r--mysql-test/suite/engines/iuds/t/update_time.test1
-rw-r--r--mysql-test/suite/galera/disabled.def1
-rw-r--r--mysql-test/suite/galera/t/galera_var_reject_queries.test2
-rw-r--r--mysql-test/suite/innodb/r/default_row_format_compatibility.result3
-rw-r--r--mysql-test/suite/innodb/t/default_row_format_compatibility.test7
-rw-r--r--mysql-test/suite/plugins/r/disks.result14
-rw-r--r--mysql-test/suite/plugins/t/disks.test11
-rw-r--r--mysql-test/t/having.test18
-rw-r--r--plugin/information_schema_disks/CMakeLists.txt5
-rw-r--r--plugin/information_schema_disks/README.txt101
-rw-r--r--plugin/information_schema_disks/information_schema_disks.cc154
-rw-r--r--storage/innobase/fil/fil0fil.cc494
-rw-r--r--storage/innobase/include/fil0fil.h84
-rw-r--r--storage/innobase/mem/mem0mem.cc5
-rw-r--r--storage/innobase/os/os0file.cc128
-rw-r--r--storage/innobase/row/row0import.cc713
-rw-r--r--storage/xtradb/fil/fil0fil.cc487
-rw-r--r--storage/xtradb/include/fil0fil.h101
-rw-r--r--storage/xtradb/mem/mem0mem.cc5
-rw-r--r--storage/xtradb/os/os0file.cc46
-rw-r--r--storage/xtradb/row/row0import.cc760
24 files changed, 1538 insertions, 1620 deletions
diff --git a/include/my_valgrind.h b/include/my_valgrind.h
index 870fb453179..cb0886bda46 100644
--- a/include/my_valgrind.h
+++ b/include/my_valgrind.h
@@ -35,6 +35,8 @@
# define MEM_CHECK_DEFINED(a,len) VALGRIND_CHECK_MEM_IS_DEFINED(a,len)
#elif defined(__SANITIZE_ADDRESS__)
# include <sanitizer/asan_interface.h>
+/* How to do manual poisoning:
+https://github.com/google/sanitizers/wiki/AddressSanitizerManualPoisoning */
# define MEM_UNDEFINED(a,len) ASAN_UNPOISON_MEMORY_REGION(a,len)
# define MEM_NOACCESS(a,len) ASAN_POISON_MEMORY_REGION(a,len)
# define MEM_CHECK_ADDRESSABLE(a,len) ((void) 0)
diff --git a/mysql-test/r/having.result b/mysql-test/r/having.result
index fca4c43ed20..c64d0579962 100644
--- a/mysql-test/r/having.result
+++ b/mysql-test/r/having.result
@@ -721,6 +721,20 @@ SELECT * FROM t1 JOIN t2 ON c1 = c2 HAVING c2 > 'a' ORDER BY c2 LIMIT 1;
c1 c2
x x
DROP TABLE t1,t2;
+#
+# MDEV-6736: Valgrind warnings 'Invalid read' in subselect_engine::calc_const_tables with SQ
+# in WHERE and HAVING, ORDER BY, materialization+semijoin
+#
+CREATE TABLE t1 (a INT) ENGINE=MyISAM;
+INSERT INTO t1 VALUES (3),(8);
+CREATE TABLE t2 (b INT) ENGINE=MyISAM;
+INSERT INTO t2 VALUES (2),(1);
+SELECT a FROM t1
+WHERE 9 IN ( SELECT MIN( a ) FROM t1 )
+HAVING a <> ( SELECT COUNT(*) FROM t2 )
+ORDER BY a;
+a
+DROP TABLE t1,t2;
End of 10.0 tests
#
# MDEV-10716: Assertion `real_type() != FIELD_ITEM' failed in
diff --git a/mysql-test/suite/engines/iuds/r/update_time.result b/mysql-test/suite/engines/iuds/r/update_time.result
index 48ddb82d521..131780059dd 100644
--- a/mysql-test/suite/engines/iuds/r/update_time.result
+++ b/mysql-test/suite/engines/iuds/r/update_time.result
@@ -1215,7 +1215,7 @@ c1 c2
838:59:59 838:59:59
UPDATE IGNORE t1 SET t1.c2='99999.99999' WHERE c1 BETWEEN 080000 AND 100000;
Warnings:
-Warning 1265 Data truncated for column 'c2' at row 1
+Warning 1265 Data truncated for column 'c2' at row N
SELECT * FROM t1;
c1 c2
-12:12:12 12:12:12
diff --git a/mysql-test/suite/engines/iuds/t/update_time.test b/mysql-test/suite/engines/iuds/t/update_time.test
index 5ce69b46986..bdfe81d3c7c 100644
--- a/mysql-test/suite/engines/iuds/t/update_time.test
+++ b/mysql-test/suite/engines/iuds/t/update_time.test
@@ -172,6 +172,7 @@ SELECT * FROM t1;
# Update using range
# EXPLAIN SELECT * FROM t1 WHERE c1 BETWEEN 080000 AND 100000;
+--replace_regex /(Data truncated for column 'c2' at row) [1-9][0-9]*/\1 N/
UPDATE IGNORE t1 SET t1.c2='99999.99999' WHERE c1 BETWEEN 080000 AND 100000;
--sorted_result
SELECT * FROM t1;
diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def
index f6bf945ed1f..29673ce5884 100644
--- a/mysql-test/suite/galera/disabled.def
+++ b/mysql-test/suite/galera/disabled.def
@@ -56,3 +56,4 @@ galera_ist_progress: MDEV-15236 galera_ist_progress fails when trying to read tr
galera_gtid : MDEV-13549 Galera test failures 10.1
galera_gtid_slave : MDEV-13549 Galera test failures 10.1
galera_unicode_identifiers : MDEV-13549 Galera test failures 10.1
+galera.galera_gcs_fc_limit : MDEV-13549 Galera test failures 10.1
diff --git a/mysql-test/suite/galera/t/galera_var_reject_queries.test b/mysql-test/suite/galera/t/galera_var_reject_queries.test
index b1af9d8aa2b..6859855c35f 100644
--- a/mysql-test/suite/galera/t/galera_var_reject_queries.test
+++ b/mysql-test/suite/galera/t/galera_var_reject_queries.test
@@ -23,7 +23,7 @@ SELECT * FROM t1;
SET GLOBAL wsrep_reject_queries = ALL_KILL;
--connection node_1a
---error ER_CONNECTION_KILLED,2013
+--error ER_CONNECTION_KILLED,2013,2006
SELECT * FROM t1;
--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1
diff --git a/mysql-test/suite/innodb/r/default_row_format_compatibility.result b/mysql-test/suite/innodb/r/default_row_format_compatibility.result
index c0fb8f029a5..d63a3955d0d 100644
--- a/mysql-test/suite/innodb/r/default_row_format_compatibility.result
+++ b/mysql-test/suite/innodb/r/default_row_format_compatibility.result
@@ -41,6 +41,9 @@ SHOW TABLE STATUS LIKE 'tab';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
tab InnoDB # Compact # # # # # # NULL # NULL NULL latin1_swedish_ci NULL
ALTER TABLE tab DISCARD TABLESPACE;
+call mtr.add_suppression("InnoDB: Tried to read .* bytes at offset 0");
+ALTER TABLE tab IMPORT TABLESPACE;
+ERROR HY000: Internal error: Cannot reset LSNs in table `test`.`tab` : I/O error
ALTER TABLE tab IMPORT TABLESPACE;
SELECT * FROM tab;
a
diff --git a/mysql-test/suite/innodb/t/default_row_format_compatibility.test b/mysql-test/suite/innodb/t/default_row_format_compatibility.test
index 17ab21ca06b..0f433b1fcfe 100644
--- a/mysql-test/suite/innodb/t/default_row_format_compatibility.test
+++ b/mysql-test/suite/innodb/t/default_row_format_compatibility.test
@@ -81,7 +81,14 @@ SHOW TABLE STATUS LIKE 'tab';
ALTER TABLE tab DISCARD TABLESPACE;
# Move the *ibd,*.cfg file into orginal location
+--copy_file $MYSQLD_DATADIR/tab.cfg $MYSQLD_DATADIR/test/tab.ibd
--move_file $MYSQLD_DATADIR/tab.cfg $MYSQLD_DATADIR/test/tab.cfg
+
+call mtr.add_suppression("InnoDB: Tried to read .* bytes at offset 0");
+
+--error ER_INTERNAL_ERROR
+ALTER TABLE tab IMPORT TABLESPACE;
+--remove_file $MYSQLD_DATADIR/test/tab.ibd
--move_file $MYSQLD_DATADIR/tab.ibd $MYSQLD_DATADIR/test/tab.ibd
# Check import is successful (because same row_format)
diff --git a/mysql-test/suite/plugins/r/disks.result b/mysql-test/suite/plugins/r/disks.result
new file mode 100644
index 00000000000..dba8db046d3
--- /dev/null
+++ b/mysql-test/suite/plugins/r/disks.result
@@ -0,0 +1,14 @@
+install plugin DISKS soname 'disks';
+show create table information_schema.disks;
+Table Create Table
+DISKS CREATE TEMPORARY TABLE `DISKS` (
+ `Disk` varchar(4096) NOT NULL DEFAULT '',
+ `Path` varchar(4096) NOT NULL DEFAULT '',
+ `Total` int(32) NOT NULL DEFAULT 0,
+ `Used` int(32) NOT NULL DEFAULT 0,
+ `Available` int(32) NOT NULL DEFAULT 0
+) ENGINE=MEMORY DEFAULT CHARSET=utf8
+select sum(Total) > sum(Available), sum(Total)>sum(Used) from information_schema.disks;
+sum(Total) > sum(Available) sum(Total)>sum(Used)
+1 1
+uninstall plugin DISKS;
diff --git a/mysql-test/suite/plugins/t/disks.test b/mysql-test/suite/plugins/t/disks.test
new file mode 100644
index 00000000000..a2371b97584
--- /dev/null
+++ b/mysql-test/suite/plugins/t/disks.test
@@ -0,0 +1,11 @@
+--source include/not_windows.inc
+
+if (!$DISKS_SO) {
+ skip No DISKS plugin;
+}
+
+install plugin DISKS soname 'disks';
+show create table information_schema.disks;
+select sum(Total) > sum(Available), sum(Total)>sum(Used) from information_schema.disks;
+
+uninstall plugin DISKS;
diff --git a/mysql-test/t/having.test b/mysql-test/t/having.test
index 3675a09d82f..c9231fef3be 100644
--- a/mysql-test/t/having.test
+++ b/mysql-test/t/having.test
@@ -759,6 +759,24 @@ SELECT * FROM t1 JOIN t2 ON c1 = c2 HAVING c2 > 'a' ORDER BY c2 LIMIT 1;
DROP TABLE t1,t2;
+--echo #
+--echo # MDEV-6736: Valgrind warnings 'Invalid read' in subselect_engine::calc_const_tables with SQ
+--echo # in WHERE and HAVING, ORDER BY, materialization+semijoin
+--echo #
+
+CREATE TABLE t1 (a INT) ENGINE=MyISAM;
+INSERT INTO t1 VALUES (3),(8);
+
+CREATE TABLE t2 (b INT) ENGINE=MyISAM;
+INSERT INTO t2 VALUES (2),(1);
+
+SELECT a FROM t1
+WHERE 9 IN ( SELECT MIN( a ) FROM t1 )
+HAVING a <> ( SELECT COUNT(*) FROM t2 )
+ORDER BY a;
+
+DROP TABLE t1,t2;
+
--echo End of 10.0 tests
--echo #
diff --git a/plugin/information_schema_disks/CMakeLists.txt b/plugin/information_schema_disks/CMakeLists.txt
new file mode 100644
index 00000000000..a0ed929c62c
--- /dev/null
+++ b/plugin/information_schema_disks/CMakeLists.txt
@@ -0,0 +1,5 @@
+IF(NOT WIN32)
+ INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/sql)
+ MYSQL_ADD_PLUGIN(DISKS information_schema_disks.cc MODULE_ONLY RECOMPILE_FOR_EMBEDDED)
+ENDIF()
+
diff --git a/plugin/information_schema_disks/README.txt b/plugin/information_schema_disks/README.txt
new file mode 100644
index 00000000000..1c9b8fb6283
--- /dev/null
+++ b/plugin/information_schema_disks/README.txt
@@ -0,0 +1,101 @@
+Information Schema Disks
+------------------------
+This is a proof-of-concept information schema plugin that allows the
+disk space situation to be monitored. When installed, it can be used
+as follows:
+
+ > select * from information_schema.disks;
+ +-----------+-----------------------+-----------+----------+-----------+
+ | Disk | Path | Total | Used | Available |
+ +-----------+-----------------------+-----------+----------+-----------+
+ | /dev/sda3 | / | 47929956 | 30666304 | 14805864 |
+ | /dev/sda1 | /boot/efi | 191551 | 3461 | 188090 |
+ | /dev/sda4 | /home | 174679768 | 80335392 | 85448120 |
+ | /dev/sdb1 | /mnt/hdd | 961301832 | 83764 | 912363644 |
+ | /dev/sdb1 | /home/wikman/Music | 961301832 | 83764 | 912363644 |
+ | /dev/sdb1 | /home/wikman/Videos | 961301832 | 83764 | 912363644 |
+ | /dev/sdb1 | /home/wikman/hdd | 961301832 | 83764 | 912363644 |
+ | /dev/sdb1 | /home/wikman/Pictures | 961301832 | 83764 | 912363644 |
+ | /dev/sda3 | /var/lib/docker/aufs | 47929956 | 30666304 | 14805864 |
+ +-----------+-----------------------+-----------+----------+-----------+
+ 9 rows in set (0.00 sec)
+
+- 'Disk' is the name of the disk itself.
+- 'Path' is the mount point of the disk.
+- 'Total' is the total space in KiB.
+- 'Used' is the used amount of space in KiB, and
+- 'Available' is the amount of space in KiB available to non-root users.
+
+Note that as the amount of space available to root may be more that what
+is available to non-root users, 'available' + 'used' may be less than 'total'.
+
+All paths to which a particular disk has been mounted are reported. The
+rationale is that someone might want to take different action e.g. depending
+on which disk is relevant for a particular path. This leads to the same disk
+being reported multiple times. An alternative to this would be to have two
+tables; disks and mounts.
+
+ > select * from information_schema.disks;
+ +-----------+-----------+----------+-----------+
+ | Disk | Total | Used | Available |
+ +-----------+-----------+----------+-----------+
+ | /dev/sda3 | 47929956 | 30666304 | 14805864 |
+ | /dev/sda1 | 191551 | 3461 | 188090 |
+ | /dev/sda4 | 174679768 | 80335392 | 85448120 |
+ | /dev/sdb1 | 961301832 | 83764 | 912363644 |
+ +-----------+-----------+----------+-----------+
+
+ > select * from information_schema.mounts;
+ +-----------------------+-----------+
+ | Path | Disk |
+ +-----------------------+-----------+
+ | / | /dev/sda3 |
+ | /boot/efi | /dev/sda1 |
+ | /home | /dev/sda4 |
+ | /mnt/hdd | /dev/sdb1 |
+ | /home/wikman/Music | /dev/sdb1 |
+ ...
+
+
+Building
+--------
+- Ensure that the directory information_schema_disks is in the top-level
+ directory of the server.
+- Add
+
+ ADD_SUBDIRECTORY(information_schema_disks)
+
+ to the top-level CMakeLists.txt
+
+> Invoke make
+
+ $ make
+
+Installation
+------------
+- Copy information_schema_disks/libinformation_schema_disks.so to the plugin
+ directory of the server:
+
+ $ cd information_schema_disks
+ $ sudo cp libinformation_schema_disks.so plugin-directory-of-server
+
+- Using mysql, install the plugin:
+
+ MariaDB [(none)]> install plugin disks soname 'libinformation_schema_disks.so';
+
+Usage
+-----
+The plugin appears as the table 'disks' in 'information_schema'.
+
+ MariaDB [(none)]> select * from information_schema.disks;
+ +-----------+-----------------------+-----------+----------+-----------+
+ | Disk | Path | Total | Used | Available |
+ +-----------+-----------------------+-----------+----------+-----------+
+ | /dev/sda3 | / | 47929956 | 30666308 | 14805860 |
+ | /dev/sda1 | /boot/efi | 191551 | 3461 | 188090 |
+ | /dev/sda4 | /home | 174679768 | 80348148 | 85435364 |
+ | /dev/sdb1 | /mnt/hdd | 961301832 | 83764 | 912363644 |
+ | /dev/sdb1 | /home/wikman/Music | 961301832 | 83764 | 912363644 |
+ | /dev/sdb1 | /home/wikman/Videos | 961301832 | 83764 | 912363644 |
+ ...
+
diff --git a/plugin/information_schema_disks/information_schema_disks.cc b/plugin/information_schema_disks/information_schema_disks.cc
new file mode 100644
index 00000000000..b5e3a6dc728
--- /dev/null
+++ b/plugin/information_schema_disks/information_schema_disks.cc
@@ -0,0 +1,154 @@
+/*
+ Copyright (c) 2017, MariaDB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
+
+#include <sys/statvfs.h>
+#include <sys/types.h>
+#include <mntent.h>
+#include <sql_class.h>
+#include <table.h>
+#include <innodb_priv.h>
+
+namespace
+{
+
+struct st_mysql_information_schema disks_table_info = { MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION };
+
+ST_FIELD_INFO disks_table_fields[]=
+{
+ { "Disk", PATH_MAX, MYSQL_TYPE_STRING, 0, 0 ,0, 0 },
+ { "Path", PATH_MAX, MYSQL_TYPE_STRING, 0, 0 ,0, 0 },
+ { "Total", 32, MYSQL_TYPE_LONG, 0, 0 ,0 ,0 }, // Total amount available
+ { "Used", 32, MYSQL_TYPE_LONG, 0, 0 ,0 ,0 }, // Amount of space used
+ { "Available", 32, MYSQL_TYPE_LONG, 0, 0 ,0 ,0 }, // Amount available to users other than root.
+ { 0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0 }
+};
+
+int disks_table_add_row(THD* pThd,
+ TABLE* pTable,
+ const char* zDisk,
+ const char* zPath,
+ const struct statvfs& info)
+{
+ // From: http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/statvfs.h.html
+ //
+ // f_frsize Fundamental file system block size.
+ // f_blocks Total number of blocks on file system in units of f_frsize.
+ // f_bfree Total number of free blocks.
+ // f_bavail Number of free blocks available to non-privileged process.
+
+ size_t total = (info.f_frsize * info.f_blocks) / 1024;
+ size_t used = (info.f_frsize * (info.f_blocks - info.f_bfree)) / 1024;
+ size_t avail = (info.f_frsize * info.f_bavail) / 1024;
+
+ pTable->field[0]->store(zDisk, strlen(zDisk), system_charset_info);
+ pTable->field[1]->store(zPath, strlen(zPath), system_charset_info);
+ pTable->field[2]->store(total);
+ pTable->field[3]->store(used);
+ pTable->field[4]->store(avail);
+
+ // 0 means success.
+ return (schema_table_store_record(pThd, pTable) != 0) ? 1 : 0;
+}
+
+int disks_table_add_row(THD* pThd, TABLE* pTable, const char* zDisk, const char* zPath)
+{
+ int rv = 0;
+
+ struct statvfs info;
+
+ if (statvfs(zPath, &info) == 0) // We ignore failures.
+ {
+ rv = disks_table_add_row(pThd, pTable, zDisk, zPath, info);
+ }
+
+ return rv;
+}
+
+int disks_fill_table(THD* pThd, TABLE_LIST* pTables, Item* pCond)
+{
+ int rv = 1;
+ TABLE* pTable = pTables->table;
+
+ FILE* pFile = setmntent("/etc/mtab", "r");
+
+ if (pFile)
+ {
+ const size_t BUFFER_SIZE = 4096; // 4K should be sufficient.
+
+ char* pBuffer = new (std::nothrow) char [BUFFER_SIZE];
+
+ if (pBuffer)
+ {
+ rv = 0;
+
+ struct mntent ent;
+ struct mntent* pEnt;
+
+ while ((rv == 0) && (pEnt = getmntent_r(pFile, &ent, pBuffer, BUFFER_SIZE)))
+ {
+ // We only report the ones that refer to physical disks.
+ if (pEnt->mnt_fsname[0] == '/')
+ {
+ rv = disks_table_add_row(pThd, pTable, pEnt->mnt_fsname, pEnt->mnt_dir);
+ }
+ }
+
+ delete [] pBuffer;
+ }
+ else
+ {
+ rv = 1;
+ }
+
+ endmntent(pFile);
+ }
+
+ return rv;
+}
+
+int disks_table_init(void *ptr)
+{
+ ST_SCHEMA_TABLE* pSchema_table = (ST_SCHEMA_TABLE*)ptr;
+
+ pSchema_table->fields_info = disks_table_fields;
+ pSchema_table->fill_table = disks_fill_table;
+ return 0;
+}
+
+}
+
+extern "C"
+{
+
+mysql_declare_plugin(disks_library)
+{
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &disks_table_info, /* type-specific descriptor */
+ "DISKS", /* table name */
+ "MariaDB", /* author */
+ "Disk space information", /* description */
+ PLUGIN_LICENSE_GPL, /* license type */
+ disks_table_init, /* init function */
+ NULL,
+ 0x0100, /* version = 1.0 */
+ NULL, /* no status variables */
+ NULL, /* no system variables */
+ NULL, /* no reserved information */
+ 0 /* no flags */
+}
+mysql_declare_plugin_end;
+
+}
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index dc8952f8bdf..3b4bb51aba1 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -24,9 +24,7 @@ The tablespace memory cache
Created 10/25/1995 Heikki Tuuri
*******************************************************/
-#include "ha_prototypes.h"
-#include "fil0pagecompress.h"
-#include "fsp0pagecompress.h"
+#include "fil0fil.h"
#include "fil0crypt.h"
#include "btr0btr.h"
@@ -5682,496 +5680,6 @@ fil_close(void)
}
/********************************************************************//**
-Initializes a buffer control block when the buf_pool is created. */
-static
-void
-fil_buf_block_init(
-/*===============*/
- buf_block_t* block, /*!< in: pointer to control block */
- byte* frame) /*!< in: pointer to buffer frame */
-{
- UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
-
- block->frame = frame;
-
- block->page.io_fix = BUF_IO_NONE;
- /* There are assertions that check for this. */
- block->page.buf_fix_count = 1;
- block->page.state = BUF_BLOCK_READY_FOR_USE;
-
- page_zip_des_init(&block->page.zip);
-}
-
-struct fil_iterator_t {
- pfs_os_file_t file; /*!< File handle */
- const char* filepath; /*!< File path name */
- os_offset_t start; /*!< From where to start */
- os_offset_t end; /*!< Where to stop */
- os_offset_t file_size; /*!< File size in bytes */
- ulint page_size; /*!< Page size */
- ulint n_io_buffers; /*!< Number of pages to use
- for IO */
- byte* io_buffer; /*!< Buffer to use for IO */
- fil_space_crypt_t *crypt_data; /*!< MariaDB Crypt data (if encrypted) */
- byte* crypt_io_buffer; /*!< MariaDB IO buffer when
- encrypted */
- dict_table_t* table; /*!< Imported table */
-};
-
-/********************************************************************//**
-TODO: This can be made parallel trivially by chunking up the file and creating
-a callback per thread. Main benefit will be to use multiple CPUs for
-checksums and compressed tables. We have to do compressed tables block by
-block right now. Secondly we need to decompress/compress and copy too much
-of data. These are CPU intensive.
-
-Iterate over all the pages in the tablespace.
-@param iter Tablespace iterator
-@param block block to use for IO
-@param callback Callback to inspect and update page contents
-@retval DB_SUCCESS or error code */
-static
-dberr_t
-fil_iterate(
-/*========*/
- const fil_iterator_t& iter,
- buf_block_t* block,
- PageCallback& callback)
-{
- os_offset_t offset;
- ulint page_no = 0;
- ulint space_id = callback.get_space_id();
- ulint n_bytes = iter.n_io_buffers * iter.page_size;
-
- ut_ad(!srv_read_only_mode);
-
- /* TODO: For compressed tables we do a lot of useless
- copying for non-index pages. Unfortunately, it is
- required by buf_zip_decompress() */
- const bool row_compressed
- = callback.get_page_size().is_compressed();
-
- for (offset = iter.start; offset < iter.end; offset += n_bytes) {
-
- byte* io_buffer = iter.io_buffer;
-
- block->frame = io_buffer;
-
- if (row_compressed) {
- page_zip_des_init(&block->page.zip);
- page_zip_set_size(&block->page.zip, iter.page_size);
-
- block->page.size.copy_from(
- page_size_t(iter.page_size,
- univ_page_size.logical(),
- true));
-
- block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
- ut_d(block->page.zip.m_external = true);
- ut_ad(iter.page_size
- == callback.get_page_size().physical());
-
- /* Zip IO is done in the compressed page buffer. */
- io_buffer = block->page.zip.data;
- }
-
- /* We have to read the exact number of bytes. Otherwise the
- InnoDB IO functions croak on failed reads. */
-
- n_bytes = static_cast<ulint>(
- ut_min(static_cast<os_offset_t>(n_bytes),
- iter.end - offset));
-
- ut_ad(n_bytes > 0);
- ut_ad(!(n_bytes % iter.page_size));
-
- const bool encrypted = iter.crypt_data != NULL
- && iter.crypt_data->should_encrypt();
- /* Use additional crypt io buffer if tablespace is encrypted */
- byte* const readptr = encrypted
- ? iter.crypt_io_buffer : io_buffer;
- byte* const writeptr = readptr;
- IORequest read_request(IORequest::READ);
- dberr_t err = os_file_read(
- read_request, iter.file, readptr, offset,
- (ulint) n_bytes);
-
- if (err != DB_SUCCESS) {
-
- ib::error() << "os_file_read() failed";
-
- return(err);
- }
-
- bool updated = false;
- os_offset_t page_off = offset;
- ulint n_pages_read = (ulint) n_bytes / iter.page_size;
- bool decrypted = false;
-
- for (ulint i = 0; i < n_pages_read; ++i) {
- ulint size = iter.page_size;
- dberr_t err = DB_SUCCESS;
- byte* src = readptr + (i * size);
- byte* dst = io_buffer + (i * size);
- bool frame_changed = false;
-
- ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
-
- const bool page_compressed
- = page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
- || page_type == FIL_PAGE_PAGE_COMPRESSED;
-
- /* If tablespace is encrypted, we need to decrypt
- the page. Note that tablespaces are not in
- fil_system during import. */
- if (encrypted) {
- decrypted = fil_space_decrypt(
- iter.crypt_data,
- dst, //dst
- callback.get_page_size(),
- src, // src
- &err); // src
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- if (decrypted) {
- updated = true;
- } else if (!page_compressed
- && !row_compressed) {
- block->frame = src;
- frame_changed = true;
- } else {
- memcpy(dst, src, size);
- }
- }
-
- /* If the original page is page_compressed, we need
- to decompress page before we can update it. */
- if (page_compressed) {
- fil_decompress_page(NULL, dst, ulong(size),
- NULL);
- updated = true;
- }
-
- buf_block_set_file_page(
- block, page_id_t(space_id, page_no++));
-
- if ((err = callback(page_off, block)) != DB_SUCCESS) {
-
- return(err);
-
- } else if (!updated) {
- updated = buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE;
- }
-
- buf_block_set_state(block, BUF_BLOCK_NOT_USED);
- buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
-
- /* If tablespace is encrypted we use additional
- temporary scratch area where pages are read
- for decrypting readptr == crypt_io_buffer != io_buffer.
-
- Destination for decryption is a buffer pool block
- block->frame == dst == io_buffer that is updated.
- Pages that did not require decryption even when
- tablespace is marked as encrypted are not copied
- instead block->frame is set to src == readptr.
-
- For encryption we again use temporary scratch area
- writeptr != io_buffer == dst
- that is then written to the tablespace
-
- (1) For normal tables io_buffer == dst == writeptr
- (2) For only page compressed tables
- io_buffer == dst == writeptr
- (3) For encrypted (and page compressed)
- readptr != io_buffer == dst != writeptr
- */
-
- ut_ad(!encrypted && !page_compressed ?
- src == dst && dst == writeptr + (i * size):1);
- ut_ad(page_compressed && !encrypted ?
- src == dst && dst == writeptr + (i * size):1);
- ut_ad(encrypted ?
- src != dst && dst != writeptr + (i * size):1);
-
- if (encrypted) {
- memcpy(writeptr + (i * size),
- row_compressed ? block->page.zip.data :
- block->frame, size);
- }
-
- if (frame_changed) {
- block->frame = dst;
- }
-
- src = io_buffer + (i * size);
-
- if (page_compressed) {
- ulint len = 0;
-
- byte * res = fil_compress_page(
- NULL,
- src,
- NULL,
- size,
- dict_table_page_compression_level(iter.table),
- 512,/* FIXME: use proper block size */
- encrypted,
- &len);
-
- if (len != size) {
- memset(res+len, 0, size-len);
- }
-
- updated = true;
- }
-
- /* If tablespace is encrypted, encrypt page before we
- write it back. Note that we should not encrypt the
- buffer that is in buffer pool. */
- /* NOTE: At this stage of IMPORT the
- buffer pool is not being used at all! */
- if (decrypted && encrypted) {
- byte *dest = writeptr + (i * size);
- ulint space = mach_read_from_4(
- src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- ulint offset = mach_read_from_4(src + FIL_PAGE_OFFSET);
- ib_uint64_t lsn = mach_read_from_8(src + FIL_PAGE_LSN);
-
- byte* tmp = fil_encrypt_buf(
- iter.crypt_data,
- space,
- offset,
- lsn,
- src,
- callback.get_page_size(),
- dest);
-
- if (tmp == src) {
- /* TODO: remove unnecessary memcpy's */
- memcpy(dest, src, iter.page_size);
- }
-
- updated = true;
- }
-
- page_off += iter.page_size;
- block->frame += iter.page_size;
- }
-
- IORequest write_request(IORequest::WRITE);
-
- /* A page was updated in the set, write back to disk.
- Note: We don't have the compression algorithm, we write
- out the imported file as uncompressed. */
-
- if (updated
- && (err = os_file_write(
- write_request,
- iter.filepath, iter.file, writeptr,
- offset, (ulint) n_bytes)) != DB_SUCCESS) {
-
- ib::error() << "os_file_write() failed";
- return(err);
- }
-
- /* Clean up the temporal buffer. */
- memset(writeptr, 0, n_bytes);
- }
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************//**
-Iterate over all the pages in the tablespace.
-@param table the table definiton in the server
-@param n_io_buffers number of blocks to read and write together
-@param callback functor that will do the page updates
-@return DB_SUCCESS or error code */
-dberr_t
-fil_tablespace_iterate(
-/*===================*/
- dict_table_t* table,
- ulint n_io_buffers,
- PageCallback& callback)
-{
- dberr_t err;
- pfs_os_file_t file;
- char* filepath;
- bool success;
-
- ut_a(n_io_buffers > 0);
- ut_ad(!srv_read_only_mode);
-
- DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
- return(DB_CORRUPTION););
-
- /* Make sure the data_dir_path is set. */
- dict_get_and_save_data_dir_path(table, false);
-
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- ut_a(table->data_dir_path);
-
- filepath = fil_make_filepath(
- table->data_dir_path, table->name.m_name, IBD, true);
- } else {
- filepath = fil_make_filepath(
- NULL, table->name.m_name, IBD, false);
- }
-
- if (filepath == NULL) {
- return(DB_OUT_OF_MEMORY);
- }
-
- file = os_file_create_simple_no_error_handling(
- innodb_data_file_key, filepath,
- OS_FILE_OPEN, OS_FILE_READ_WRITE, srv_read_only_mode, &success);
-
- DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
- {
- static bool once;
-
- if (!once || ut_rnd_interval(0, 10) == 5) {
- once = true;
- success = false;
- os_file_close(file);
- }
- });
-
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ib::error() << "Trying to import a tablespace, but could not"
- " open the tablespace file " << filepath;
-
- ut_free(filepath);
-
- return(DB_TABLESPACE_NOT_FOUND);
-
- } else {
- err = DB_SUCCESS;
- }
-
- callback.set_file(filepath, file);
-
- os_offset_t file_size = os_file_get_size(file);
- ut_a(file_size != (os_offset_t) -1);
-
- /* The block we will use for every physical page */
- buf_block_t* block;
-
- block = reinterpret_cast<buf_block_t*>(ut_zalloc_nokey(sizeof(*block)));
-
- mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);
-
- /* Allocate a page to read in the tablespace header, so that we
- can determine the page size and zip size (if it is compressed).
- We allocate an extra page in case it is a compressed table. One
- page is to ensure alignement. */
-
- void* page_ptr = ut_malloc_nokey(3 * UNIV_PAGE_SIZE);
- byte* page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
-
- fil_buf_block_init(block, page);
-
- /* Read the first page and determine the page and zip size. */
-
- IORequest request(IORequest::READ);
-
- err = os_file_read(request, file, page, 0, UNIV_PAGE_SIZE);
-
- if (err != DB_SUCCESS) {
-
- err = DB_IO_ERROR;
-
- } else if ((err = callback.init(file_size, block)) == DB_SUCCESS) {
- fil_iterator_t iter;
-
- iter.file = file;
- iter.start = 0;
- iter.end = file_size;
- iter.filepath = filepath;
- iter.file_size = file_size;
- iter.n_io_buffers = n_io_buffers;
- iter.page_size = callback.get_page_size().physical();
- iter.table = table;
-
- /* read (optional) crypt data */
- iter.crypt_data = fil_space_read_crypt_data(
- callback.get_page_size(), page);
-
- if (err == DB_SUCCESS) {
-
- /* Compressed pages can't be optimised for block IO
- for now. We do the IMPORT page by page. */
-
- if (callback.get_page_size().is_compressed()) {
- iter.n_io_buffers = 1;
- ut_a(iter.page_size
- == callback.get_page_size().physical());
- }
-
- /** Add an extra page for compressed page scratch
- area. */
- void* io_buffer = ut_malloc_nokey(
- (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
-
- iter.io_buffer = static_cast<byte*>(
- ut_align(io_buffer, UNIV_PAGE_SIZE));
-
- void* crypt_io_buffer;
- if (iter.crypt_data) {
- crypt_io_buffer = static_cast<byte*>(
- ut_malloc_nokey((2 + iter.n_io_buffers)
- * UNIV_PAGE_SIZE));
- iter.crypt_io_buffer = static_cast<byte*>(
- ut_align(crypt_io_buffer,
- UNIV_PAGE_SIZE));
- } else {
- crypt_io_buffer = NULL;
- }
-
- err = fil_iterate(iter, block, callback);
-
- if (iter.crypt_data) {
- fil_space_destroy_crypt_data(&iter.crypt_data);
- }
-
- ut_free(io_buffer);
- ut_free(crypt_io_buffer);
- }
- }
-
- if (err == DB_SUCCESS) {
-
- ib::info() << "Sync to disk";
-
- if (!os_file_flush(file)) {
- ib::info() << "os_file_flush() failed!";
- err = DB_IO_ERROR;
- } else {
- ib::info() << "Sync to disk - done!";
- }
- }
-
- os_file_close(file);
-
- ut_free(page_ptr);
- ut_free(filepath);
-
- mutex_free(&block->mutex);
-
- ut_free(block);
-
- return(err);
-}
-
-/********************************************************************//**
Delete the tablespace file and any related files like .cfg.
This should not be called for temporary tables.
@param[in] ibd_filepath File path of the IBD tablespace */
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 249755f1ff6..d79aa0415a6 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1391,90 +1391,6 @@ fil_delete_file(
/*============*/
const char* path); /*!< in: filepath of the ibd tablespace */
-/** Callback functor. */
-struct PageCallback {
-
- /** Default constructor */
- PageCallback()
- :
- m_page_size(0, 0, false),
- m_filepath() UNIV_NOTHROW {}
-
- virtual ~PageCallback() UNIV_NOTHROW {}
-
- /** Called for page 0 in the tablespace file at the start.
- @param file_size size of the file in bytes
- @param block contents of the first page in the tablespace file
- @retval DB_SUCCESS or error code. */
- virtual dberr_t init(
- os_offset_t file_size,
- const buf_block_t* block) UNIV_NOTHROW = 0;
-
- /** Called for every page in the tablespace. If the page was not
- updated then its state must be set to BUF_PAGE_NOT_USED. For
- compressed tables the page descriptor memory will be at offset:
- block->frame + UNIV_PAGE_SIZE;
- @param offset physical offset within the file
- @param block block read from file, note it is not from the buffer pool
- @retval DB_SUCCESS or error code. */
- virtual dberr_t operator()(
- os_offset_t offset,
- buf_block_t* block) UNIV_NOTHROW = 0;
-
- /** Set the name of the physical file and the file handle that is used
- to open it for the file that is being iterated over.
- @param filename the name of the tablespace file
- @param file OS file handle */
- void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
- {
- m_file = file;
- m_filepath = filename;
- }
-
- /**
- @return the space id of the tablespace */
- virtual ulint get_space_id() const UNIV_NOTHROW = 0;
-
- /**
- @retval the space flags of the tablespace being iterated over */
- virtual ulint get_space_flags() const UNIV_NOTHROW = 0;
-
- /** The compressed page size
- @return the compressed page size */
- const page_size_t& get_page_size() const
- {
- return(m_page_size);
- }
-
- /** The tablespace page size. */
- page_size_t m_page_size;
-
- /** File handle to the tablespace */
- pfs_os_file_t m_file;
-
- /** Physical file path. */
- const char* m_filepath;
-
-protected:
- // Disable copying
- PageCallback(const PageCallback&);
- PageCallback& operator=(const PageCallback&);
-};
-
-/********************************************************************//**
-Iterate over all the pages in the tablespace.
-@param table the table definiton in the server
-@param n_io_buffers number of blocks to read and write together
-@param callback functor that will do the page updates
-@return DB_SUCCESS or error code */
-dberr_t
-fil_tablespace_iterate(
-/*===================*/
- dict_table_t* table,
- ulint n_io_buffers,
- PageCallback& callback)
- MY_ATTRIBUTE((warn_unused_result));
-
/********************************************************************//**
Looks for a pre-existing fil_space_t with the given tablespace ID
and, if found, returns the name and filepath in newly allocated buffers that the caller must free.
diff --git a/storage/innobase/mem/mem0mem.cc b/storage/innobase/mem/mem0mem.cc
index ca83891d459..c101980d543 100644
--- a/storage/innobase/mem/mem0mem.cc
+++ b/storage/innobase/mem/mem0mem.cc
@@ -348,6 +348,11 @@ mem_heap_create_block_func(
heap->total_size += len;
}
+ /* Poison all available memory. Individual chunks will be unpoisoned on
+ every mem_heap_alloc() call. */
+ compile_time_assert(MEM_BLOCK_HEADER_SIZE >= sizeof *block);
+ UNIV_MEM_FREE(block + 1, len - sizeof *block);
+
ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
return(block);
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 14bd79a0c5d..699c21b3327 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -701,28 +701,50 @@ static
bool
os_aio_validate();
+/** Handle errors for file operations.
+@param[in] name name of a file or NULL
+@param[in] operation operation
+@param[in] should_abort whether to abort on an unknown error
+@param[in] on_error_silent whether to suppress reports of non-fatal errors
+@return true if we should retry the operation */
+static MY_ATTRIBUTE((warn_unused_result))
+bool
+os_file_handle_error_cond_exit(
+ const char* name,
+ const char* operation,
+ bool should_abort,
+ bool on_error_silent);
+
/** Does error handling when a file operation fails.
-@param[in] name File name or NULL
-@param[in] operation Name of operation e.g., "read", "write"
+@param[in] name name of a file or NULL
+@param[in] operation operation name that failed
@return true if we should retry the operation */
static
bool
os_file_handle_error(
const char* name,
- const char* operation);
+ const char* operation)
+{
+ /* Exit in case of unknown error */
+ return(os_file_handle_error_cond_exit(name, operation, true, false));
+}
-/**
-Does error handling when a file operation fails.
-@param[in] name File name or NULL
-@param[in] operation Name of operation e.g., "read", "write"
-@param[in] silent if true then don't print any message to the log.
+/** Does error handling when a file operation fails.
+@param[in] name name of a file or NULL
+@param[in] operation operation name that failed
+@param[in] on_error_silent if true then don't print any message to the log.
@return true if we should retry the operation */
static
bool
os_file_handle_error_no_exit(
const char* name,
const char* operation,
- bool silent);
+ bool on_error_silent)
+{
+ /* Don't exit in case of unknown error */
+ return(os_file_handle_error_cond_exit(
+ name, operation, false, on_error_silent));
+}
/** Does simulated AIO. This function should be called by an i/o-handler
thread.
@@ -5077,52 +5099,31 @@ os_file_read_page(
ut_ad(type.validate());
ut_ad(n > 0);
- for (;;) {
- ssize_t n_bytes;
-
- n_bytes = os_file_pread(type, file, buf, n, offset, &err);
-
- if (o != NULL) {
- *o = n_bytes;
- }
-
- if (err != DB_SUCCESS && !exit_on_err) {
-
- return(err);
-
- } else if ((ulint) n_bytes == n) {
- return(DB_SUCCESS);
- }
-
- ib::error() << "Tried to read " << n
- << " bytes at offset " << offset
- << ", but was only able to read " << n_bytes;
-
- if (exit_on_err) {
+ ssize_t n_bytes = os_file_pread(type, file, buf, n, offset, &err);
- if (!os_file_handle_error(NULL, "read")) {
- /* Hard error */
- break;
- }
+ if (o) {
+ *o = n_bytes;
+ }
- } else if (!os_file_handle_error_no_exit(NULL, "read", false)) {
+ if (ulint(n_bytes) == n || (err != DB_SUCCESS && !exit_on_err)) {
+ return err;
+ }
- /* Hard error */
- break;
- }
+ ib::error() << "Tried to read " << n << " bytes at offset "
+ << offset << ", but was only able to read " << n_bytes;
- if (n_bytes > 0 && (ulint) n_bytes < n) {
- n -= (ulint) n_bytes;
- offset += (ulint) n_bytes;
- buf = reinterpret_cast<uchar*>(buf) + (ulint) n_bytes;
- }
+ if (!os_file_handle_error_cond_exit(
+ NULL, "read", exit_on_err, false)) {
+ ib::fatal()
+ << "Cannot read from file. OS error number "
+ << errno << ".";
}
- ib::fatal()
- << "Cannot read from file. OS error number "
- << errno << ".";
+ if (err == DB_SUCCESS) {
+ err = DB_IO_ERROR;
+ }
- return(err);
+ return err;
}
/** Retrieves the last error number if an error occurs in a file io function.
@@ -5228,37 +5229,6 @@ os_file_handle_error_cond_exit(
return(false);
}
-/** Does error handling when a file operation fails.
-@param[in] name name of a file or NULL
-@param[in] operation operation name that failed
-@return true if we should retry the operation */
-static
-bool
-os_file_handle_error(
- const char* name,
- const char* operation)
-{
- /* Exit in case of unknown error */
- return(os_file_handle_error_cond_exit(name, operation, true, false));
-}
-
-/** Does error handling when a file operation fails.
-@param[in] name name of a file or NULL
-@param[in] operation operation name that failed
-@param[in] on_error_silent if true then don't print any message to the log.
-@return true if we should retry the operation */
-static
-bool
-os_file_handle_error_no_exit(
- const char* name,
- const char* operation,
- bool on_error_silent)
-{
- /* Don't exit in case of unknown error */
- return(os_file_handle_error_cond_exit(
- name, operation, false, on_error_silent));
-}
-
#ifndef _WIN32
/** Tries to disable OS caching on an opened file descriptor.
@param[in] fd file descriptor to alter
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index 82e35472cb8..4f7813929f2 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -37,6 +37,7 @@ Created 2012-02-08 by Sunny Bains.
#include "row0mysql.h"
#include "srv0start.h"
#include "row0quiesce.h"
+#include "fil0pagecompress.h"
#include "ut0new.h"
#include <vector>
@@ -45,12 +46,10 @@ Created 2012-02-08 by Sunny Bains.
#include <my_aes.h>
#endif
-/** The size of the buffer to use for IO. Note: os_file_read() doesn't expect
-reads to fail. If you set the buffer size to be greater than a multiple of the
-file size then it will assert. TODO: Fix this limitation of the IO functions.
-@param n page size of the tablespace.
-@retval number of pages */
-#define IO_BUFFER_SIZE(m, n) ((m) / (n))
+/** The size of the buffer to use for IO.
+@param n physical page size
+@return number of pages */
+#define IO_BUFFER_SIZE(n) ((1024 * 1024) / n)
/** For gathering stats on records during phase I */
struct row_stats_t {
@@ -346,12 +345,14 @@ private:
/** Functor that is called for each physical page that is read from the
tablespace file. */
-class AbstractCallback : public PageCallback {
+class AbstractCallback
+{
public:
/** Constructor
@param trx covering transaction */
AbstractCallback(trx_t* trx)
:
+ m_page_size(0, 0, false),
m_trx(trx),
m_space(ULINT_UNDEFINED),
m_xdes(),
@@ -384,31 +385,50 @@ public:
return(m_space_flags);
}
-protected:
- /** Get the data page depending on the table type, compressed or not.
- @param block block read from disk
- @retval the buffer frame */
- buf_frame_t* get_frame(buf_block_t* block) const UNIV_NOTHROW
+ /**
+ Set the name of the physical file and the file handle that is used
+ to open it for the file that is being iterated over.
+ @param filename the physical name of the tablespace file
+ @param file OS file handle */
+ void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
{
- if (is_compressed_table()) {
- return(block->page.zip.data);
- }
-
- return(buf_block_get_frame(block));
+ m_file = file;
+ m_filepath = filename;
}
- /** Check for session interrupt. If required we could
- even flush to disk here every N pages.
- @retval DB_SUCCESS or error code */
- dberr_t periodic_check() UNIV_NOTHROW
- {
- if (trx_is_interrupted(m_trx)) {
- return(DB_INTERRUPTED);
- }
+ const page_size_t& get_page_size() const { return m_page_size; }
- return(DB_SUCCESS);
+ const char* filename() const { return m_filepath; }
+
+ /**
+ Called for every page in the tablespace. If the page was not
+ updated then its state must be set to BUF_PAGE_NOT_USED. For
+ compressed tables the page descriptor memory will be at offset:
+ block->frame + UNIV_PAGE_SIZE;
+ @param offset - physical offset within the file
+ @param block - block read from file, note it is not from the buffer pool
+ @retval DB_SUCCESS or error code. */
+ virtual dberr_t operator()(
+ os_offset_t offset,
+ buf_block_t* block) UNIV_NOTHROW = 0;
+
+ /**
+ @return the space id of the tablespace */
+ virtual ulint get_space_id() const UNIV_NOTHROW = 0;
+
+ bool is_interrupted() const { return trx_is_interrupted(m_trx); }
+
+ /**
+ Get the data page depending on the table type, compressed or not.
+ @param block - block read from disk
+ @retval the buffer frame */
+ static byte* get_frame(const buf_block_t* block)
+ {
+ return block->page.zip.data
+ ? block->page.zip.data : block->frame;
}
+protected:
/** Get the physical offset of the extent descriptor within the page.
@param page_no page number of the extent descriptor
@param page contents of the page containing the extent descriptor.
@@ -488,6 +508,15 @@ protected:
}
protected:
+ /** The tablespace page size. */
+ page_size_t m_page_size;
+
+ /** File handle to the tablespace */
+ pfs_os_file_t m_file;
+
+ /** Physical file path. */
+ const char* m_filepath;
+
/** Covering transaction. */
trx_t* m_trx;
@@ -564,9 +593,7 @@ AbstractCallback::init(
m_free_limit = mach_read_from_4(page + FSP_FREE_LIMIT);
m_space = mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID);
- dberr_t err = set_current_xdes(0, page);
-
- return(err);
+ return set_current_xdes(0, page);
}
/**
@@ -637,11 +664,7 @@ FetchIndexRootPages::operator() (
os_offset_t offset,
buf_block_t* block) UNIV_NOTHROW
{
- dberr_t err;
-
- if ((err = periodic_check()) != DB_SUCCESS) {
- return(err);
- }
+ if (is_interrupted()) return DB_INTERRUPTED;
const page_t* page = get_frame(block);
@@ -654,9 +677,9 @@ FetchIndexRootPages::operator() (
<< ", file offset: "
<< (offset / m_page_size.physical());
- err = DB_CORRUPTION;
+ return DB_CORRUPTION;
} else if (page_type == FIL_PAGE_TYPE_XDES) {
- err = set_current_xdes(block->page.id.page_no(), page);
+ return set_current_xdes(block->page.id.page_no(), page);
} else if (fil_page_index_page_check(page)
&& !is_free(block->page.id.page_no())
&& page_is_root(page)) {
@@ -680,7 +703,7 @@ FetchIndexRootPages::operator() (
}
}
- return(err);
+ return DB_SUCCESS;
}
/**
@@ -808,14 +831,6 @@ public:
os_offset_t offset,
buf_block_t* block) UNIV_NOTHROW;
private:
-
- /** Status returned by PageConverter::validate() */
- enum import_page_status_t {
- IMPORT_PAGE_STATUS_OK, /*!< Page is OK */
- IMPORT_PAGE_STATUS_ALL_ZERO, /*!< Page is all zeros */
- IMPORT_PAGE_STATUS_CORRUPTED /*!< Page is corrupted */
- };
-
/** Update the page, set the space id, max trx id and index id.
@param block block read from file
@param page_type type of the page
@@ -824,17 +839,6 @@ private:
buf_block_t* block,
ulint& page_type) UNIV_NOTHROW;
-#ifdef UNIV_DEBUG
- /**
- @return true error condition is enabled. */
- bool trigger_corruption() UNIV_NOTHROW
- {
- return(false);
- }
- #else
-#define trigger_corruption() (false)
-#endif /* UNIV_DEBUG */
-
/** Update the space, index id, trx id.
@param block block to convert
@return DB_SUCCESS or error code */
@@ -846,14 +850,6 @@ private:
@retval DB_SUCCESS or error code */
dberr_t update_records(buf_block_t* block) UNIV_NOTHROW;
- /** Validate the page, check for corruption.
- @param offset physical offset within file.
- @param page page read from file.
- @return 0 on success, 1 if all zero, 2 if corrupted */
- import_page_status_t validate(
- os_offset_t offset,
- buf_block_t* page) UNIV_NOTHROW;
-
/** Validate the space flags and update tablespace header page.
@param block block read from file, not from the buffer pool.
@retval DB_SUCCESS or error code */
@@ -1530,6 +1526,7 @@ IndexPurge::purge() UNIV_NOTHROW
/** Constructor
@param cfg config of table being imported.
@param trx transaction covering the import */
+inline
PageConverter::PageConverter(
row_import* cfg,
trx_t* trx)
@@ -1553,6 +1550,7 @@ PageConverter::PageConverter(
@param offsets column offsets for the record
@param i column ordinal value
@return DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::adjust_cluster_index_blob_column(
rec_t* rec,
@@ -1581,13 +1579,11 @@ PageConverter::adjust_cluster_index_blob_column(
field += BTR_EXTERN_SPACE_ID - BTR_EXTERN_FIELD_REF_SIZE + len;
- if (is_compressed_table()) {
- mach_write_to_4(field, get_space_id());
+ mach_write_to_4(field, get_space_id());
+ if (m_page_zip_ptr) {
page_zip_write_blob_ptr(
m_page_zip_ptr, rec, m_cluster_index, offsets, i, 0);
- } else {
- mlog_write_ulint(field, get_space_id(), MLOG_4BYTES, 0);
}
return(DB_SUCCESS);
@@ -1598,6 +1594,7 @@ stored columns.
@param rec record to update
@param offsets column offsets for the record
@return DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::adjust_cluster_index_blob_columns(
rec_t* rec,
@@ -1630,6 +1627,7 @@ BLOB reference, write the new space id.
@param rec record to update
@param offsets column offsets for the record
@return DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::adjust_cluster_index_blob_ref(
rec_t* rec,
@@ -1652,6 +1650,7 @@ PageConverter::adjust_cluster_index_blob_ref(
re-organising the B+tree.
@param offsets current row offsets.
@return true if purge succeeded */
+inline
bool
PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
{
@@ -1674,6 +1673,7 @@ PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
@param rec record to update
@param offsets column offsets for the record
@return DB_SUCCESS or error code. */
+inline
dberr_t
PageConverter::adjust_cluster_record(
const dict_index_t* index,
@@ -1700,6 +1700,7 @@ PageConverter::adjust_cluster_record(
rows that can't be purged optimistically.
@param block block to update
@retval DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::update_records(
buf_block_t* block) UNIV_NOTHROW
@@ -1763,6 +1764,7 @@ PageConverter::update_records(
/** Update the space, index id, trx id.
@return DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::update_index_page(
buf_block_t* block) UNIV_NOTHROW
@@ -1843,6 +1845,7 @@ PageConverter::update_index_page(
/** Validate the space flags and update tablespace header page.
@param block block read from file, not from the buffer pool.
@retval DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::update_header(
buf_block_t* block) UNIV_NOTHROW
@@ -1879,6 +1882,7 @@ PageConverter::update_header(
/** Update the page, set the space id, max trx id and index id.
@param block block read from file
@retval DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::update_page(
buf_block_t* block,
@@ -1886,6 +1890,14 @@ PageConverter::update_page(
{
dberr_t err = DB_SUCCESS;
+ ut_ad(!block->page.zip.data == !is_compressed_table());
+
+ if (block->page.zip.data) {
+ m_page_zip_ptr = &block->page.zip;
+ } else {
+ ut_ad(!m_page_zip_ptr);
+ }
+
switch (page_type = fil_page_get_type(get_frame(block))) {
case FIL_PAGE_TYPE_FSP_HDR:
ut_a(block->page.id.page_no() == 0);
@@ -1940,117 +1952,41 @@ PageConverter::update_page(
return(DB_CORRUPTION);
}
-/** Validate the page
-@param offset physical offset within file.
-@param page page read from file.
-@return status */
-PageConverter::import_page_status_t
-PageConverter::validate(
- os_offset_t offset,
- buf_block_t* block) UNIV_NOTHROW
-{
- buf_frame_t* page = get_frame(block);
-
- /* Check that the page number corresponds to the offset in
- the file. Flag as corrupt if it doesn't. Disable the check
- for LSN in buf_page_is_corrupted() */
-
- if (buf_page_is_corrupted(
- false, page, get_page_size(), NULL)
- || (page_get_page_no(page) != offset / m_page_size.physical()
- && page_get_page_no(page) != 0)) {
-
- return(IMPORT_PAGE_STATUS_CORRUPTED);
-
- } else if (offset > 0 && page_get_page_no(page) == 0) {
-
- /* The page is all zero: do nothing. We already checked
- for all NULs in buf_page_is_corrupted() */
- return(IMPORT_PAGE_STATUS_ALL_ZERO);
- }
-
- return(IMPORT_PAGE_STATUS_OK);
-}
-
/** Called for every page in the tablespace. If the page was not
updated then its state must be set to BUF_PAGE_NOT_USED.
-@param offset physical offset within the file
@param block block read from file, note it is not from the buffer pool
@retval DB_SUCCESS or error code. */
dberr_t
-PageConverter::operator() (
- os_offset_t offset,
- buf_block_t* block) UNIV_NOTHROW
+PageConverter::operator() (os_offset_t, buf_block_t* block) UNIV_NOTHROW
{
+ /* If we already had an old page with matching number
+ in the buffer pool, evict it now, because
+ we no longer evict the pages on DISCARD TABLESPACE. */
+ buf_page_get_gen(block->page.id, get_page_size(),
+ RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
+ __FILE__, __LINE__, NULL, NULL);
+
ulint page_type;
- dberr_t err = DB_SUCCESS;
- if ((err = periodic_check()) != DB_SUCCESS) {
- return(err);
- }
+ dberr_t err = update_page(block, page_type);
+ if (err != DB_SUCCESS) return err;
- if (is_compressed_table()) {
- m_page_zip_ptr = &block->page.zip;
+ if (!block->page.zip.data) {
+ buf_flush_init_for_writing(
+ NULL, block->frame, NULL, m_current_lsn);
+ } else if (fil_page_type_is_index(page_type)) {
+ buf_flush_init_for_writing(
+ NULL, block->page.zip.data, &block->page.zip,
+ m_current_lsn);
} else {
- ut_ad(m_page_zip_ptr == 0);
+ /* Calculate and update the checksum of non-index
+ pages for ROW_FORMAT=COMPRESSED tables. */
+ buf_flush_update_zip_checksum(
+ block->page.zip.data, get_page_size().physical(),
+ m_current_lsn);
}
- switch (validate(offset, block)) {
- case IMPORT_PAGE_STATUS_OK:
-
- /* We have to decompress the compressed pages before
- we can work on them */
-
- if ((err = update_page(block, page_type)) != DB_SUCCESS) {
- break;
- }
-
- /* Note: For compressed pages this function will write to the
- zip descriptor and for uncompressed pages it will write to
- page (ie. the block->frame). Therefore the caller should write
- out the descriptor contents and not block->frame for compressed
- pages. */
-
- if (!is_compressed_table()
- || fil_page_type_is_index(page_type)) {
-
- buf_flush_init_for_writing(
- !is_compressed_table() ? block : NULL,
- !is_compressed_table()
- ? block->frame : block->page.zip.data,
- !is_compressed_table() ? 0 : m_page_zip_ptr,
- m_current_lsn);
- } else {
- /* Calculate and update the checksum of non-btree
- pages for compressed tables explicitly here. */
-
- buf_flush_update_zip_checksum(
- get_frame(block), get_page_size().physical(),
- m_current_lsn);
- }
-
- break;
-
- case IMPORT_PAGE_STATUS_ALL_ZERO:
- /* The page is all zero: leave it as is. */
- break;
-
- case IMPORT_PAGE_STATUS_CORRUPTED:
-
- ib::warn() << "Page " << (offset / m_page_size.physical())
- << " at offset " << offset
- << " looks corrupted in file " << m_filepath;
-
- err = DB_CORRUPTION;
- }
-
- /* If we already had and old page with matching number
- in the buffer pool, evict it now, because
- we no longer evict the pages on DISCARD TABLESPACE. */
- buf_page_get_gen(block->page.id, get_page_size(),
- RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
- __FILE__, __LINE__, NULL, NULL);
- return(err);
+ return DB_SUCCESS;
}
/*****************************************************************//**
@@ -3328,6 +3264,441 @@ row_import_update_discarded_flag(
return(err);
}
+struct fil_iterator_t {
+ pfs_os_file_t file; /*!< File handle */
+ const char* filepath; /*!< File path name */
+ os_offset_t start; /*!< From where to start */
+ os_offset_t end; /*!< Where to stop */
+ os_offset_t file_size; /*!< File size in bytes */
+ ulint n_io_buffers; /*!< Number of pages to use
+ for IO */
+ byte* io_buffer; /*!< Buffer to use for IO */
+ fil_space_crypt_t *crypt_data; /*!< Crypt data (if encrypted) */
+ byte* crypt_io_buffer; /*!< IO buffer when encrypted */
+};
+
+/********************************************************************//**
+TODO: This can be made parallel trivially by chunking up the file and creating
+a callback per thread. . Main benefit will be to use multiple CPUs for
+checksums and compressed tables. We have to do compressed tables block by
+block right now. Secondly we need to decompress/compress and copy too much
+of data. These are CPU intensive.
+
+Iterate over all the pages in the tablespace.
+@param iter - Tablespace iterator
+@param block - block to use for IO
+@param callback - Callback to inspect and update page contents
+@retval DB_SUCCESS or error code */
+static
+dberr_t
+fil_iterate(
+/*========*/
+ const fil_iterator_t& iter,
+ buf_block_t* block,
+ AbstractCallback& callback)
+{
+ os_offset_t offset;
+ const ulint size = callback.get_page_size().physical();
+ ulint n_bytes = iter.n_io_buffers * size;
+
+ ut_ad(!srv_read_only_mode);
+
+ /* TODO: For ROW_FORMAT=COMPRESSED tables we do a lot of useless
+ copying for non-index pages. Unfortunately, it is
+ required by buf_zip_decompress() */
+
+ for (offset = iter.start; offset < iter.end; offset += n_bytes) {
+ if (callback.is_interrupted()) {
+ return DB_INTERRUPTED;
+ }
+
+ byte* io_buffer = iter.io_buffer;
+ block->frame = io_buffer;
+
+ if (block->page.zip.data) {
+ /* Zip IO is done in the compressed page buffer. */
+ io_buffer = block->page.zip.data;
+ }
+
+ /* We have to read the exact number of bytes. Otherwise the
+ InnoDB IO functions croak on failed reads. */
+
+ n_bytes = ulint(ut_min(os_offset_t(n_bytes),
+ iter.end - offset));
+
+ ut_ad(n_bytes > 0);
+ ut_ad(!(n_bytes % size));
+
+ const bool encrypted = iter.crypt_data != NULL
+ && iter.crypt_data->should_encrypt();
+ /* Use additional crypt io buffer if tablespace is encrypted */
+ byte* const readptr = encrypted
+ ? iter.crypt_io_buffer : io_buffer;
+ byte* const writeptr = readptr;
+
+ IORequest read_request(IORequest::READ);
+ read_request.disable_partial_io_warnings();
+
+ dberr_t err = os_file_read_no_error_handling(
+ read_request, iter.file, readptr, offset, n_bytes, 0);
+ if (err != DB_SUCCESS) {
+ ib::error() << iter.filepath
+ << ": os_file_read() failed";
+ }
+
+ bool updated = false;
+ os_offset_t page_off = offset;
+ ulint n_pages_read = n_bytes / size;
+ bool decrypted = false;
+ block->page.id.set_page_no(ulint(page_off / size));
+
+ for (ulint i = 0; i < n_pages_read;
+ block->page.id.set_page_no(block->page.id.page_no() + 1),
+ ++i, page_off += size, block->frame += size) {
+ err = DB_SUCCESS;
+ byte* src = readptr + i * size;
+ byte* dst = io_buffer + i * size;
+ bool frame_changed = false;
+ ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
+ const bool page_compressed
+ = page_type
+ == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+ || page_type == FIL_PAGE_PAGE_COMPRESSED;
+ const ulint page_no = page_get_page_no(src);
+ if (!page_no && page_off) {
+ const ulint* b = reinterpret_cast<const ulint*>
+ (src);
+ const ulint* const e = b + size / sizeof *b;
+ do {
+ if (*b++) {
+ goto page_corrupted;
+ }
+ } while (b != e);
+
+ /* Proceed to the next page,
+ because this one is all zero. */
+ continue;
+ }
+
+ if (page_no != page_off / size) {
+ goto page_corrupted;
+ }
+
+ if (encrypted) {
+ decrypted = fil_space_decrypt(
+ iter.crypt_data, dst,
+ callback.get_page_size(), src, &err);
+
+ if (err != DB_SUCCESS) {
+ return err;
+ }
+
+ if (decrypted) {
+ updated = true;
+ } else {
+ if (!page_compressed
+ && !block->page.zip.data) {
+ block->frame = src;
+ frame_changed = true;
+ } else {
+ memcpy(dst, src, size);
+ }
+ }
+ }
+
+ /* If the original page is page_compressed, we need
+ to decompress it before adjusting further. */
+ if (page_compressed) {
+ fil_decompress_page(NULL, dst, ulong(size),
+ NULL);
+ updated = true;
+ } else if (buf_page_is_corrupted(
+ false,
+ encrypted && !frame_changed
+ ? dst : src,
+ callback.get_page_size(), NULL)) {
+page_corrupted:
+ ib::warn() << callback.filename()
+ << ": Page " << (offset / size)
+ << " at offset " << offset
+ << " looks corrupted.";
+ return DB_CORRUPTION;
+ }
+
+ if ((err = callback(page_off, block)) != DB_SUCCESS) {
+ return err;
+ } else if (!updated) {
+ updated = buf_block_get_state(block)
+ == BUF_BLOCK_FILE_PAGE;
+ }
+
+ /* If tablespace is encrypted we use additional
+ temporary scratch area where pages are read
+ for decrypting readptr == crypt_io_buffer != io_buffer.
+
+ Destination for decryption is a buffer pool block
+ block->frame == dst == io_buffer that is updated.
+ Pages that did not require decryption even when
+ tablespace is marked as encrypted are not copied
+ instead block->frame is set to src == readptr.
+
+ For encryption we again use temporary scratch area
+ writeptr != io_buffer == dst
+ that is then written to the tablespace
+
+ (1) For normal tables io_buffer == dst == writeptr
+ (2) For only page compressed tables
+ io_buffer == dst == writeptr
+ (3) For encrypted (and page compressed)
+ readptr != io_buffer == dst != writeptr
+ */
+
+ ut_ad(!encrypted && !page_compressed ?
+ src == dst && dst == writeptr + (i * size):1);
+ ut_ad(page_compressed && !encrypted ?
+ src == dst && dst == writeptr + (i * size):1);
+ ut_ad(encrypted ?
+ src != dst && dst != writeptr + (i * size):1);
+
+ if (encrypted) {
+ memcpy(writeptr + (i * size),
+ callback.get_frame(block), size);
+ }
+
+ if (frame_changed) {
+ block->frame = dst;
+ }
+
+ src = io_buffer + (i * size);
+
+ if (page_compressed) {
+ ulint len = 0;
+
+ fil_compress_page(
+ NULL,
+ src,
+ NULL,
+ size,
+ 0,/* FIXME: compression level */
+ 512,/* FIXME: use proper block size */
+ encrypted,
+ &len);
+ ut_ad(len <= size);
+ memset(src + len, 0, size - len);
+ updated = true;
+ }
+
+ /* Encrypt the page if encryption was used. */
+ if (encrypted && decrypted) {
+ byte *dest = writeptr + i * size;
+ byte* tmp = fil_encrypt_buf(
+ iter.crypt_data,
+ block->page.id.space(),
+ block->page.id.page_no(),
+ mach_read_from_8(src + FIL_PAGE_LSN),
+ src, callback.get_page_size(), dest);
+
+ if (tmp == src) {
+ /* TODO: remove unnecessary memcpy's */
+ memcpy(dest, src, size);
+ }
+
+ updated = true;
+ }
+ }
+
+ /* A page was updated in the set, write back to disk. */
+ if (updated) {
+ IORequest write_request(IORequest::WRITE);
+
+ err = os_file_write(write_request,
+ iter.filepath, iter.file,
+ writeptr, offset, n_bytes);
+
+ if (err != DB_SUCCESS) {
+ return err;
+ }
+ }
+ }
+
+ return DB_SUCCESS;
+}
+
+/********************************************************************//**
+Iterate over all the pages in the tablespace.
+@param table - the table definiton in the server
+@param n_io_buffers - number of blocks to read and write together
+@param callback - functor that will do the page updates
+@return DB_SUCCESS or error code */
+static
+dberr_t
+fil_tablespace_iterate(
+/*===================*/
+ dict_table_t* table,
+ ulint n_io_buffers,
+ AbstractCallback& callback)
+{
+ dberr_t err;
+ pfs_os_file_t file;
+ char* filepath;
+
+ ut_a(n_io_buffers > 0);
+ ut_ad(!srv_read_only_mode);
+
+ DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
+ return(DB_CORRUPTION););
+
+ /* Make sure the data_dir_path is set. */
+ dict_get_and_save_data_dir_path(table, false);
+
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ ut_a(table->data_dir_path);
+
+ filepath = fil_make_filepath(
+ table->data_dir_path, table->name.m_name, IBD, true);
+ } else {
+ filepath = fil_make_filepath(
+ NULL, table->name.m_name, IBD, false);
+ }
+
+ if (!filepath) {
+ return(DB_OUT_OF_MEMORY);
+ } else {
+ bool success;
+
+ file = os_file_create_simple_no_error_handling(
+ innodb_data_file_key, filepath,
+ OS_FILE_OPEN, OS_FILE_READ_WRITE, false, &success);
+
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+ ib::error() << "Trying to import a tablespace,"
+ " but could not open the tablespace file "
+ << filepath;
+ ut_free(filepath);
+ return DB_TABLESPACE_NOT_FOUND;
+ } else {
+ err = DB_SUCCESS;
+ }
+ }
+
+ callback.set_file(filepath, file);
+
+ os_offset_t file_size = os_file_get_size(file);
+ ut_a(file_size != (os_offset_t) -1);
+
+ /* Allocate a page to read in the tablespace header, so that we
+ can determine the page size and zip_size (if it is compressed).
+ We allocate an extra page in case it is a compressed table. One
+ page is to ensure alignement. */
+
+ void* page_ptr = ut_malloc_nokey(3 * UNIV_PAGE_SIZE);
+ byte* page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
+
+ buf_block_t* block = reinterpret_cast<buf_block_t*>
+ (ut_zalloc_nokey(sizeof *block));
+ block->frame = page;
+ block->page.id.copy_from(page_id_t(0, 0));
+ block->page.io_fix = BUF_IO_NONE;
+ block->page.buf_fix_count = 1;
+ block->page.state = BUF_BLOCK_FILE_PAGE;
+
+ /* Read the first page and determine the page and zip size. */
+
+ IORequest request(IORequest::READ);
+ request.disable_partial_io_warnings();
+
+ err = os_file_read_no_error_handling(request, file, page, 0,
+ UNIV_PAGE_SIZE, 0);
+
+ if (err == DB_SUCCESS) {
+ err = callback.init(file_size, block);
+ }
+
+ if (err == DB_SUCCESS) {
+ block->page.id.copy_from(
+ page_id_t(callback.get_space_id(), 0));
+ block->page.size.copy_from(callback.get_page_size());
+ if (block->page.size.is_compressed()) {
+ page_zip_set_size(&block->page.zip,
+ callback.get_page_size().physical());
+ /* ROW_FORMAT=COMPRESSED is not optimised for block IO
+ for now. We do the IMPORT page by page. */
+ n_io_buffers = 1;
+ }
+
+ fil_iterator_t iter;
+
+ /* read (optional) crypt data */
+ iter.crypt_data = fil_space_read_crypt_data(
+ callback.get_page_size(), page);
+
+ /* If tablespace is encrypted, it needs extra buffers */
+ if (iter.crypt_data && n_io_buffers > 1) {
+ /* decrease io buffers so that memory
+ consumption will not double */
+ n_io_buffers /= 2;
+ }
+
+ iter.file = file;
+ iter.start = 0;
+ iter.end = file_size;
+ iter.filepath = filepath;
+ iter.file_size = file_size;
+ iter.n_io_buffers = n_io_buffers;
+
+ /* Add an extra page for compressed page scratch area. */
+ void* io_buffer = ut_malloc_nokey(
+ (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+
+ iter.io_buffer = static_cast<byte*>(
+ ut_align(io_buffer, UNIV_PAGE_SIZE));
+
+ void* crypt_io_buffer = NULL;
+ if (iter.crypt_data) {
+ crypt_io_buffer = ut_malloc_nokey(
+ (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+ iter.crypt_io_buffer = static_cast<byte*>(
+ ut_align(crypt_io_buffer, UNIV_PAGE_SIZE));
+ }
+
+ if (block->page.zip.ssize) {
+ ut_ad(iter.n_io_buffers == 1);
+ block->frame = iter.io_buffer;
+ block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
+ }
+
+ err = fil_iterate(iter, block, callback);
+
+ if (iter.crypt_data) {
+ fil_space_destroy_crypt_data(&iter.crypt_data);
+ }
+
+ ut_free(crypt_io_buffer);
+ ut_free(io_buffer);
+ }
+
+ if (err == DB_SUCCESS) {
+ ib::info() << "Sync to disk";
+
+ if (!os_file_flush(file)) {
+ ib::info() << "os_file_flush() failed!";
+ err = DB_IO_ERROR;
+ } else {
+ ib::info() << "Sync to disk - done!";
+ }
+ }
+
+ os_file_close(file);
+
+ ut_free(page_ptr);
+ ut_free(filepath);
+ ut_free(block);
+
+ return(err);
+}
+
/*****************************************************************//**
Imports a tablespace. The space id in the .ibd file must match the space id
of the table in the data dictionary.
@@ -3448,9 +3819,7 @@ row_import_for_mysql(
FetchIndexRootPages fetchIndexRootPages(table, trx);
err = fil_tablespace_iterate(
- table, IO_BUFFER_SIZE(
- cfg.m_page_size.physical(),
- cfg.m_page_size.physical()),
+ table, IO_BUFFER_SIZE(cfg.m_page_size.physical()),
fetchIndexRootPages);
if (err == DB_SUCCESS) {
@@ -3488,9 +3857,7 @@ row_import_for_mysql(
/* Set the IO buffer size in pages. */
err = fil_tablespace_iterate(
- table, IO_BUFFER_SIZE(
- cfg.m_page_size.physical(),
- cfg.m_page_size.physical()), converter);
+ table, IO_BUFFER_SIZE(cfg.m_page_size.physical()), converter);
DBUG_EXECUTE_IF("ib_import_reset_space_and_lsn_failure",
err = DB_TOO_MANY_CONCURRENT_TRXS;);
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index 7985700367d..ef8ace7f864 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -25,8 +25,6 @@ Created 10/25/1995 Heikki Tuuri
*******************************************************/
#include "fil0fil.h"
-#include "fil0pagecompress.h"
-#include "fsp0pagecompress.h"
#include "fil0crypt.h"
#include <debug_sync.h>
@@ -49,12 +47,10 @@ Created 10/25/1995 Heikki Tuuri
#include "page0zip.h"
#include "trx0sys.h"
#include "row0mysql.h"
-#include "os0file.h"
#ifndef UNIV_HOTBACKUP
# include "buf0lru.h"
# include "ibuf0ibuf.h"
# include "sync0sync.h"
-# include "os0sync.h"
#else /* !UNIV_HOTBACKUP */
# include "srv0srv.h"
static ulint srv_data_read, srv_data_written;
@@ -704,7 +700,7 @@ add_size:
space->size += node->size;
}
- ulint atomic_writes = fsp_flags_get_atomic_writes(space->flags);
+ ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(space->flags);
/* printf("Opening file %s\n", node->name); */
@@ -4110,7 +4106,6 @@ fil_open_single_table_tablespace(
fsp_open_info remote;
ulint tablespaces_found = 0;
ulint valid_tablespaces_found = 0;
- ulint atomic_writes = 0;
fil_space_crypt_t* crypt_data = NULL;
#ifdef UNIV_SYNC_DEBUG
@@ -4124,8 +4119,8 @@ fil_open_single_table_tablespace(
return(DB_CORRUPTION);
}
- ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK));
- atomic_writes = fsp_flags_get_atomic_writes(flags);
+ ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, id));
+ const ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
memset(&def, 0, sizeof(def));
memset(&dict, 0, sizeof(dict));
@@ -6151,7 +6146,8 @@ fil_io(
} else if (type == OS_FILE_WRITE) {
ut_ad(!srv_read_only_mode);
srv_stats.data_written.add(len);
- if (fil_page_is_index_page((byte *)buf)) {
+ if (mach_read_from_2(static_cast<const byte*>(buf)
+ + FIL_PAGE_TYPE) == FIL_PAGE_INDEX) {
srv_stats.index_pages_written.inc();
} else {
srv_stats.non_index_pages_written.inc();
@@ -6684,479 +6680,6 @@ fil_close(void)
}
/********************************************************************//**
-Initializes a buffer control block when the buf_pool is created. */
-static
-void
-fil_buf_block_init(
-/*===============*/
- buf_block_t* block, /*!< in: pointer to control block */
- byte* frame) /*!< in: pointer to buffer frame */
-{
- UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
-
- block->frame = frame;
-
- block->page.io_fix = BUF_IO_NONE;
- /* There are assertions that check for this. */
- block->page.buf_fix_count = 1;
- block->page.state = BUF_BLOCK_READY_FOR_USE;
-
- page_zip_des_init(&block->page.zip);
-}
-
-struct fil_iterator_t {
- pfs_os_file_t file; /*!< File handle */
- const char* filepath; /*!< File path name */
- os_offset_t start; /*!< From where to start */
- os_offset_t end; /*!< Where to stop */
- os_offset_t file_size; /*!< File size in bytes */
- ulint page_size; /*!< Page size */
- ulint n_io_buffers; /*!< Number of pages to use
- for IO */
- byte* io_buffer; /*!< Buffer to use for IO */
- fil_space_crypt_t *crypt_data; /*!< Crypt data (if encrypted) */
- byte* crypt_io_buffer; /*!< IO buffer when encrypted */
-};
-
-/********************************************************************//**
-TODO: This can be made parallel trivially by chunking up the file and creating
-a callback per thread. . Main benefit will be to use multiple CPUs for
-checksums and compressed tables. We have to do compressed tables block by
-block right now. Secondly we need to decompress/compress and copy too much
-of data. These are CPU intensive.
-
-Iterate over all the pages in the tablespace.
-@param iter - Tablespace iterator
-@param block - block to use for IO
-@param callback - Callback to inspect and update page contents
-@retval DB_SUCCESS or error code */
-static
-dberr_t
-fil_iterate(
-/*========*/
- const fil_iterator_t& iter,
- buf_block_t* block,
- PageCallback& callback)
-{
- os_offset_t offset;
- ulint page_no = 0;
- ulint space_id = callback.get_space_id();
- ulint n_bytes = iter.n_io_buffers * iter.page_size;
-
- ut_ad(!srv_read_only_mode);
-
- /* TODO: For compressed tables we do a lot of useless
- copying for non-index pages. Unfortunately, it is
- required by buf_zip_decompress() */
- const bool row_compressed = callback.get_zip_size() > 0;
-
- for (offset = iter.start; offset < iter.end; offset += n_bytes) {
-
- byte* io_buffer = iter.io_buffer;
-
- block->frame = io_buffer;
-
- if (row_compressed) {
- page_zip_des_init(&block->page.zip);
- page_zip_set_size(&block->page.zip, iter.page_size);
- block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
- ut_d(block->page.zip.m_external = true);
- ut_ad(iter.page_size == callback.get_zip_size());
-
- /* Zip IO is done in the compressed page buffer. */
- io_buffer = block->page.zip.data;
- }
-
- /* We have to read the exact number of bytes. Otherwise the
- InnoDB IO functions croak on failed reads. */
-
- n_bytes = static_cast<ulint>(
- ut_min(static_cast<os_offset_t>(n_bytes),
- iter.end - offset));
-
- ut_ad(n_bytes > 0);
- ut_ad(!(n_bytes % iter.page_size));
-
- const bool encrypted = iter.crypt_data != NULL
- && iter.crypt_data->should_encrypt();
- /* Use additional crypt io buffer if tablespace is encrypted */
- byte* const readptr = encrypted
- ? iter.crypt_io_buffer : io_buffer;
- byte* const writeptr = readptr;
-
- if (!os_file_read(iter.file, readptr, offset, (ulint) n_bytes)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
-
- return(DB_IO_ERROR);
- }
-
- bool updated = false;
- os_offset_t page_off = offset;
- ulint n_pages_read = (ulint) n_bytes / iter.page_size;
- bool decrypted = false;
-
- for (ulint i = 0; i < n_pages_read; ++i) {
- ulint size = iter.page_size;
- dberr_t err = DB_SUCCESS;
- byte* src = readptr + (i * size);
- byte* dst = io_buffer + (i * size);
- bool frame_changed = false;
-
- ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
-
- const bool page_compressed
- = page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
- || page_type == FIL_PAGE_PAGE_COMPRESSED;
-
- /* If tablespace is encrypted, we need to decrypt
- the page. Note that tablespaces are not in
- fil_system during import. */
- if (encrypted) {
- decrypted = fil_space_decrypt(
- iter.crypt_data,
- dst, //dst
- iter.page_size,
- src, // src
- &err); // src
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- if (decrypted) {
- updated = true;
- } else {
- if (!page_compressed && !row_compressed) {
- block->frame = src;
- frame_changed = true;
- } else {
- memcpy(dst, src, size);
- }
- }
- }
-
- /* If the original page is page_compressed, we need
- to decompress page before we can update it. */
- if (page_compressed) {
- fil_decompress_page(NULL, dst, ulong(size),
- NULL);
- updated = true;
- }
-
- buf_block_set_file_page(block, space_id, page_no++);
-
- if ((err = callback(page_off, block)) != DB_SUCCESS) {
-
- return(err);
-
- } else if (!updated) {
- updated = buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE;
- }
-
- buf_block_set_state(block, BUF_BLOCK_NOT_USED);
- buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
-
- /* If tablespace is encrypted we use additional
- temporary scratch area where pages are read
- for decrypting readptr == crypt_io_buffer != io_buffer.
-
- Destination for decryption is a buffer pool block
- block->frame == dst == io_buffer that is updated.
- Pages that did not require decryption even when
- tablespace is marked as encrypted are not copied
- instead block->frame is set to src == readptr.
-
- For encryption we again use temporary scratch area
- writeptr != io_buffer == dst
- that is then written to the tablespace
-
- (1) For normal tables io_buffer == dst == writeptr
- (2) For only page compressed tables
- io_buffer == dst == writeptr
- (3) For encrypted (and page compressed)
- readptr != io_buffer == dst != writeptr
- */
-
- ut_ad(!encrypted && !page_compressed ?
- src == dst && dst == writeptr + (i * size):1);
- ut_ad(page_compressed && !encrypted ?
- src == dst && dst == writeptr + (i * size):1);
- ut_ad(encrypted ?
- src != dst && dst != writeptr + (i * size):1);
-
- if (encrypted) {
- memcpy(writeptr + (i * size),
- row_compressed ? block->page.zip.data :
- block->frame, size);
- }
-
- if (frame_changed) {
- block->frame = dst;
- }
-
- src = io_buffer + (i * size);
-
- if (page_compressed) {
- ulint len = 0;
-
- fil_compress_page(
- NULL,
- src,
- NULL,
- size,
- 0,/* FIXME: compression level */
- 512,/* FIXME: use proper block size */
- encrypted,
- &len);
-
- updated = true;
- }
-
- /* If tablespace is encrypted, encrypt page before we
- write it back. Note that we should not encrypt the
- buffer that is in buffer pool. */
- /* NOTE: At this stage of IMPORT the
- buffer pool is not being used at all! */
- if (decrypted && encrypted) {
- byte *dest = writeptr + (i * size);
- ulint space = mach_read_from_4(
- src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- ulint offset = mach_read_from_4(src + FIL_PAGE_OFFSET);
- ib_uint64_t lsn = mach_read_from_8(src + FIL_PAGE_LSN);
-
- byte* tmp = fil_encrypt_buf(
- iter.crypt_data,
- space,
- offset,
- lsn,
- src,
- iter.page_size == UNIV_PAGE_SIZE ? 0 : iter.page_size,
- dest);
-
- if (tmp == src) {
- /* TODO: remove unnecessary memcpy's */
- memcpy(dest, src, size);
- }
-
- updated = true;
- }
-
- page_off += iter.page_size;
- block->frame += iter.page_size;
- }
-
- /* A page was updated in the set, write back to disk. */
- if (updated
- && !os_file_write(
- iter.filepath, iter.file, writeptr,
- offset, (ulint) n_bytes)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed");
-
- return(DB_IO_ERROR);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************//**
-Iterate over all the pages in the tablespace.
-@param table - the table definiton in the server
-@param n_io_buffers - number of blocks to read and write together
-@param callback - functor that will do the page updates
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_tablespace_iterate(
-/*===================*/
- dict_table_t* table,
- ulint n_io_buffers,
- PageCallback& callback)
-{
- dberr_t err;
- pfs_os_file_t file;
- char* filepath;
-
- ut_a(n_io_buffers > 0);
- ut_ad(!srv_read_only_mode);
-
- DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
- return(DB_CORRUPTION););
-
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- dict_get_and_save_data_dir_path(table, false);
- ut_a(table->data_dir_path);
-
- filepath = os_file_make_remote_pathname(
- table->data_dir_path, table->name, "ibd");
- } else {
- filepath = fil_make_ibd_name(table->name, false);
- }
-
- {
- ibool success;
-
- file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, filepath,
- OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE);
-
- DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
- {
- static bool once;
-
- if (!once || ut_rnd_interval(0, 10) == 5) {
- once = true;
- success = FALSE;
- os_file_close(file);
- }
- });
-
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Trying to import a tablespace, but could not "
- "open the tablespace file %s", filepath);
-
- mem_free(filepath);
-
- return(DB_TABLESPACE_NOT_FOUND);
-
- } else {
- err = DB_SUCCESS;
- }
- }
-
- callback.set_file(filepath, file);
-
- os_offset_t file_size = os_file_get_size(file);
- ut_a(file_size != (os_offset_t) -1);
-
- /* The block we will use for every physical page */
- buf_block_t block;
-
- memset(&block, 0x0, sizeof(block));
-
- /* Allocate a page to read in the tablespace header, so that we
- can determine the page size and zip_size (if it is compressed).
- We allocate an extra page in case it is a compressed table. One
- page is to ensure alignement. */
-
- void* page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE);
- byte* page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
-
- fil_buf_block_init(&block, page);
-
- /* Read the first page and determine the page and zip size. */
-
- if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) {
-
- err = DB_IO_ERROR;
-
- } else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) {
- fil_iterator_t iter;
-
- iter.file = file;
- iter.start = 0;
- iter.end = file_size;
- iter.filepath = filepath;
- iter.file_size = file_size;
- iter.n_io_buffers = n_io_buffers;
- iter.page_size = callback.get_page_size();
-
- /* In MariaDB/MySQL 5.6 tablespace does not exist
- during import, therefore we can't use space directly
- here. */
- ulint crypt_data_offset = fsp_header_get_crypt_offset(
- callback.get_zip_size());
-
- /* read (optional) crypt data */
- iter.crypt_data = fil_space_read_crypt_data(
- 0, page, crypt_data_offset);
-
- /* Compressed pages can't be optimised for block IO for now.
- We do the IMPORT page by page. */
-
- if (callback.get_zip_size() > 0) {
- iter.n_io_buffers = 1;
- ut_a(iter.page_size == callback.get_zip_size());
- }
-
- /** If tablespace is encrypted, it needs extra buffers */
- if (iter.crypt_data != NULL) {
- /* decrease io buffers so that memory
- * consumption doesnt double
- * note: the +1 is to avoid n_io_buffers getting down to 0 */
- iter.n_io_buffers = (iter.n_io_buffers + 1) / 2;
- }
-
- /** Add an extra page for compressed page scratch area. */
-
- void* io_buffer = mem_alloc(
- (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
-
- iter.io_buffer = static_cast<byte*>(
- ut_align(io_buffer, UNIV_PAGE_SIZE));
-
- void* crypt_io_buffer = NULL;
- if (iter.crypt_data != NULL) {
- crypt_io_buffer = mem_alloc(
- (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
- iter.crypt_io_buffer = static_cast<byte*>(
- ut_align(crypt_io_buffer, UNIV_PAGE_SIZE));
- }
-
- err = fil_iterate(iter, &block, callback);
-
- mem_free(io_buffer);
-
- if (crypt_io_buffer != NULL) {
- mem_free(crypt_io_buffer);
- iter.crypt_io_buffer = NULL;
- fil_space_destroy_crypt_data(&iter.crypt_data);
- }
- }
-
- if (err == DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk");
-
- if (!os_file_flush(file)) {
- ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!");
- err = DB_IO_ERROR;
- } else {
- ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!");
- }
- }
-
- os_file_close(file);
-
- mem_free(page_ptr);
- mem_free(filepath);
-
- return(err);
-}
-
-/**
-Set the tablespace compressed table size.
-@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
-dberr_t
-PageCallback::set_zip_size(const buf_frame_t* page) UNIV_NOTHROW
-{
- m_zip_size = fsp_header_get_zip_size(page);
-
- if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) {
- return(DB_CORRUPTION);
- }
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************//**
Delete the tablespace file and any related files like .cfg.
This should not be called for temporary tables. */
UNIV_INTERN
diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h
index 8c3bf7d2b06..8a4aa9b7dff 100644
--- a/storage/xtradb/include/fil0fil.h
+++ b/storage/xtradb/include/fil0fil.h
@@ -1309,107 +1309,6 @@ fil_delete_file(
/*============*/
const char* path); /*!< in: filepath of the ibd tablespace */
-/** Callback functor. */
-struct PageCallback {
-
- /**
- Default constructor */
- PageCallback()
- :
- m_zip_size(),
- m_page_size(),
- m_filepath() UNIV_NOTHROW {}
-
- virtual ~PageCallback() UNIV_NOTHROW {}
-
- /**
- Called for page 0 in the tablespace file at the start.
- @param file_size - size of the file in bytes
- @param block - contents of the first page in the tablespace file
- @retval DB_SUCCESS or error code.*/
- virtual dberr_t init(
- os_offset_t file_size,
- const buf_block_t* block) UNIV_NOTHROW = 0;
-
- /**
- Called for every page in the tablespace. If the page was not
- updated then its state must be set to BUF_PAGE_NOT_USED. For
- compressed tables the page descriptor memory will be at offset:
- block->frame + UNIV_PAGE_SIZE;
- @param offset - physical offset within the file
- @param block - block read from file, note it is not from the buffer pool
- @retval DB_SUCCESS or error code. */
- virtual dberr_t operator()(
- os_offset_t offset,
- buf_block_t* block) UNIV_NOTHROW = 0;
-
- /**
- Set the name of the physical file and the file handle that is used
- to open it for the file that is being iterated over.
- @param filename - then physical name of the tablespace file.
- @param file - OS file handle */
- void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
- {
- m_file = file;
- m_filepath = filename;
- }
-
- /**
- @return the space id of the tablespace */
- virtual ulint get_space_id() const UNIV_NOTHROW = 0;
-
- /** The compressed page size
- @return the compressed page size */
- ulint get_zip_size() const
- {
- return(m_zip_size);
- }
-
- /**
- Set the tablespace compressed table size.
- @return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
- dberr_t set_zip_size(const buf_frame_t* page) UNIV_NOTHROW;
-
- /** The compressed page size
- @return the compressed page size */
- ulint get_page_size() const
- {
- return(m_page_size);
- }
-
- /** Compressed table page size */
- ulint m_zip_size;
-
- /** The tablespace page size. */
- ulint m_page_size;
-
- /** File handle to the tablespace */
- pfs_os_file_t m_file;
-
- /** Physical file path. */
- const char* m_filepath;
-
-protected:
- // Disable copying
- PageCallback(const PageCallback&);
- PageCallback& operator=(const PageCallback&);
-};
-
-/********************************************************************//**
-Iterate over all the pages in the tablespace.
-@param table - the table definiton in the server
-@param n_io_buffers - number of blocks to read and write together
-@param callback - functor that will do the page updates
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_tablespace_iterate(
-/*===================*/
- dict_table_t* table,
- ulint n_io_buffers,
- PageCallback& callback)
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
/*******************************************************************//**
Checks if a single-table tablespace for a given table name exists in the
tablespace memory cache.
diff --git a/storage/xtradb/mem/mem0mem.cc b/storage/xtradb/mem/mem0mem.cc
index b9f190509ee..f91126697fc 100644
--- a/storage/xtradb/mem/mem0mem.cc
+++ b/storage/xtradb/mem/mem0mem.cc
@@ -406,6 +406,11 @@ mem_heap_create_block_func(
heap->total_size += len;
}
+ /* Poison all available memory. Individual chunks will be unpoisoned on
+ every mem_heap_alloc() call. */
+ compile_time_assert(MEM_BLOCK_HEADER_SIZE >= sizeof *block);
+ UNIV_MEM_FREE(block + 1, len - sizeof *block);
+
ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
return(block);
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 634ebb2af49..6a63f31b37a 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -3169,15 +3169,21 @@ try_again:
overlapped.hEvent = win_get_syncio_event();
ret = ReadFile(file, buf, n, NULL, &overlapped);
if (ret) {
- ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
- }
- else if(GetLastError() == ERROR_IO_PENDING) {
- ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
+ ret = GetOverlappedResult(file, &overlapped, &len, FALSE);
+ } else if (GetLastError() == ERROR_IO_PENDING) {
+ ret = GetOverlappedResult(file, &overlapped, &len, TRUE);
}
MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
- if (ret && len == n) {
+ if (!ret) {
+ } else if (len == n) {
return(TRUE);
+ } else {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Tried to read " ULINTPF " bytes at offset "
+ UINT64PF ". Was only able to read %lu.",
+ n, offset, ret);
+ return FALSE;
}
#else /* __WIN__ */
ibool retry;
@@ -3204,6 +3210,7 @@ try_again:
"Tried to read " ULINTPF " bytes at offset "
UINT64PF ". Was only able to read %ld.",
n, offset, (lint) ret);
+ return FALSE;
}
#endif /* __WIN__ */
retry = os_file_handle_error(NULL, "read", __FILE__, __LINE__);
@@ -3272,15 +3279,21 @@ try_again:
overlapped.hEvent = win_get_syncio_event();
ret = ReadFile(file, buf, n, NULL, &overlapped);
if (ret) {
- ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
- }
- else if(GetLastError() == ERROR_IO_PENDING) {
- ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
+ ret = GetOverlappedResult(file, &overlapped, &len, FALSE);
+ } else if (GetLastError() == ERROR_IO_PENDING) {
+ ret = GetOverlappedResult(file, &overlapped, &len, TRUE);
}
MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
- if (ret && len == n) {
+ if (!ret) {
+ } else if (len == n) {
return(TRUE);
+ } else {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Tried to read " ULINTPF " bytes at offset "
+ UINT64PF ". Was only able to read %lu.",
+ n, offset, len);
+ return FALSE;
}
#else /* __WIN__ */
ibool retry;
@@ -3303,6 +3316,7 @@ try_again:
"Tried to read " ULINTPF " bytes at offset "
UINT64PF ". Was only able to read %ld.",
n, offset, (lint) ret);
+ return FALSE;
}
#endif /* __WIN__ */
retry = os_file_handle_error_no_exit(NULL, "read", FALSE, __FILE__, __LINE__);
@@ -3383,10 +3397,9 @@ retry:
overlapped.hEvent = win_get_syncio_event();
ret = WriteFile(file, buf, n, NULL, &overlapped);
if (ret) {
- ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
- }
- else if ( GetLastError() == ERROR_IO_PENDING) {
- ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
+ ret = GetOverlappedResult(file, &overlapped, &len, FALSE);
+ } else if (GetLastError() == ERROR_IO_PENDING) {
+ ret = GetOverlappedResult(file, &overlapped, &len, TRUE);
}
MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
@@ -6588,8 +6601,7 @@ os_file_trim(
DWORD tmp;
if (ret) {
ret = GetOverlappedResult(slot->file, &overlapped, &tmp, FALSE);
- }
- else if (GetLastError() == ERROR_IO_PENDING) {
+ } else if (GetLastError() == ERROR_IO_PENDING) {
ret = GetOverlappedResult(slot->file, &overlapped, &tmp, TRUE);
}
if (!ret) {
diff --git a/storage/xtradb/row/row0import.cc b/storage/xtradb/row/row0import.cc
index 18939d4cab9..20fc0045749 100644
--- a/storage/xtradb/row/row0import.cc
+++ b/storage/xtradb/row/row0import.cc
@@ -40,13 +40,11 @@ Created 2012-02-08 by Sunny Bains.
#include "row0mysql.h"
#include "srv0start.h"
#include "row0quiesce.h"
-#include "buf0buf.h"
+#include "fil0pagecompress.h"
#include <vector>
-/** The size of the buffer to use for IO. Note: os_file_read() doesn't expect
-reads to fail. If you set the buffer size to be greater than a multiple of the
-file size then it will assert. TODO: Fix this limitation of the IO functions.
+/** The size of the buffer to use for IO.
@param n - page size of the tablespace.
@retval number of pages */
#define IO_BUFFER_SIZE(n) ((1024 * 1024) / n)
@@ -362,7 +360,8 @@ private:
/** Functor that is called for each physical page that is read from the
tablespace file. */
-class AbstractCallback : public PageCallback {
+class AbstractCallback
+{
public:
/** Constructor
@param trx - covering transaction */
@@ -395,32 +394,62 @@ public:
return(get_zip_size() > 0);
}
-protected:
/**
- Get the data page depending on the table type, compressed or not.
- @param block - block read from disk
- @retval the buffer frame */
- buf_frame_t* get_frame(buf_block_t* block) const UNIV_NOTHROW
+ Set the name of the physical file and the file handle that is used
+ to open it for the file that is being iterated over.
+ @param filename - then physical name of the tablespace file.
+ @param file - OS file handle */
+ void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
{
- if (is_compressed_table()) {
- return(block->page.zip.data);
- }
+ m_file = file;
+ m_filepath = filename;
+ }
- return(buf_block_get_frame(block));
+ /** The compressed page size
+ @return the compressed page size */
+ ulint get_zip_size() const
+ {
+ return(m_zip_size);
}
- /** Check for session interrupt. If required we could
- even flush to disk here every N pages.
- @retval DB_SUCCESS or error code */
- dberr_t periodic_check() UNIV_NOTHROW
+ /** The compressed page size
+ @return the compressed page size */
+ ulint get_page_size() const
{
- if (trx_is_interrupted(m_trx)) {
- return(DB_INTERRUPTED);
- }
+ return(m_page_size);
+ }
- return(DB_SUCCESS);
+ const char* filename() const { return m_filepath; }
+
+ /**
+ Called for every page in the tablespace. If the page was not
+ updated then its state must be set to BUF_PAGE_NOT_USED. For
+ compressed tables the page descriptor memory will be at offset:
+ block->frame + UNIV_PAGE_SIZE;
+ @param offset - physical offset within the file
+ @param block - block read from file, note it is not from the buffer pool
+ @retval DB_SUCCESS or error code. */
+ virtual dberr_t operator()(
+ os_offset_t offset,
+ buf_block_t* block) UNIV_NOTHROW = 0;
+
+ /**
+ @return the space id of the tablespace */
+ virtual ulint get_space_id() const UNIV_NOTHROW = 0;
+
+ bool is_interrupted() const { return trx_is_interrupted(m_trx); }
+
+ /**
+ Get the data page depending on the table type, compressed or not.
+ @param block - block read from disk
+ @retval the buffer frame */
+ static byte* get_frame(const buf_block_t* block)
+ {
+ return block->page.zip.data
+ ? block->page.zip.data : block->frame;
}
+protected:
/**
Get the physical offset of the extent descriptor within the page.
@param page_no - page number of the extent descriptor
@@ -510,6 +539,18 @@ protected:
}
protected:
+ /** Compressed table page size */
+ ulint m_zip_size;
+
+ /** The tablespace page size. */
+ ulint m_page_size;
+
+ /** File handle to the tablespace */
+ pfs_os_file_t m_file;
+
+ /** Physical file path. */
+ const char* m_filepath;
+
/** Covering transaction. */
trx_t* m_trx;
@@ -566,9 +607,9 @@ AbstractCallback::init(
/* Since we don't know whether it is a compressed table
or not, the data is always read into the block->frame. */
- dberr_t err = set_zip_size(block->frame);
+ m_zip_size = fsp_header_get_zip_size(page);
- if (err != DB_SUCCESS) {
+ if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) {
return(DB_CORRUPTION);
}
@@ -605,11 +646,7 @@ AbstractCallback::init(
m_free_limit = mach_read_from_4(page + FSP_FREE_LIMIT);
m_space = mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID);
- if ((err = set_current_xdes(0, page)) != DB_SUCCESS) {
- return(err);
- }
-
- return(DB_SUCCESS);
+ return set_current_xdes(0, page);
}
/**
@@ -682,11 +719,7 @@ FetchIndexRootPages::operator() (
os_offset_t offset,
buf_block_t* block) UNIV_NOTHROW
{
- dberr_t err;
-
- if ((err = periodic_check()) != DB_SUCCESS) {
- return(err);
- }
+ if (is_interrupted()) return DB_INTERRUPTED;
const page_t* page = get_frame(block);
@@ -699,9 +732,9 @@ FetchIndexRootPages::operator() (
block->page.offset,
(ulint) (offset / m_page_size));
- err = DB_CORRUPTION;
+ return DB_CORRUPTION;
} else if (page_type == FIL_PAGE_TYPE_XDES) {
- err = set_current_xdes(block->page.offset, page);
+ return set_current_xdes(block->page.offset, page);
} else if (page_type == FIL_PAGE_INDEX
&& !is_free(block->page.offset)
&& is_root_page(page)) {
@@ -726,7 +759,7 @@ FetchIndexRootPages::operator() (
}
}
- return(err);
+ return DB_SUCCESS;
}
/**
@@ -850,14 +883,6 @@ public:
os_offset_t offset,
buf_block_t* block) UNIV_NOTHROW;
private:
-
- /** Status returned by PageConverter::validate() */
- enum import_page_status_t {
- IMPORT_PAGE_STATUS_OK, /*!< Page is OK */
- IMPORT_PAGE_STATUS_ALL_ZERO, /*!< Page is all zeros */
- IMPORT_PAGE_STATUS_CORRUPTED /*!< Page is corrupted */
- };
-
/**
Update the page, set the space id, max trx id and index id.
@param block - block read from file
@@ -867,17 +892,6 @@ private:
buf_block_t* block,
ulint& page_type) UNIV_NOTHROW;
-#if defined UNIV_DEBUG
- /**
- @return true error condition is enabled. */
- bool trigger_corruption() UNIV_NOTHROW
- {
- return(false);
- }
- #else
-#define trigger_corruption() (false)
-#endif /* UNIV_DEBUG */
-
/**
Update the space, index id, trx id.
@param block - block to convert
@@ -891,15 +905,6 @@ private:
dberr_t update_records(buf_block_t* block) UNIV_NOTHROW;
/**
- Validate the page, check for corruption.
- @param offset - physical offset within file.
- @param page - page read from file.
- @return 0 on success, 1 if all zero, 2 if corrupted */
- import_page_status_t validate(
- os_offset_t offset,
- buf_block_t* page) UNIV_NOTHROW;
-
- /**
Validate the space flags and update tablespace header page.
@param block - block read from file, not from the buffer pool.
@retval DB_SUCCESS or error code */
@@ -1316,8 +1321,8 @@ row_import::match_schema(
return(DB_ERROR);
} else if (m_table->n_cols != m_n_cols) {
ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
- "Number of columns don't match, table has %u "
- "columns but the tablespace meta-data file has "
+ "Number of columns don't match, table has %u"
+ " columns but the tablespace meta-data file has "
ULINTPF " columns",
m_table->n_cols, m_n_cols);
@@ -1597,6 +1602,7 @@ IndexPurge::purge() UNIV_NOTHROW
Constructor
* @param cfg - config of table being imported.
* @param trx - transaction covering the import */
+inline
PageConverter::PageConverter(
row_import* cfg,
trx_t* trx)
@@ -1621,6 +1627,7 @@ Adjust the BLOB reference for a single column that is externally stored
@param offsets - column offsets for the record
@param i - column ordinal value
@return DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::adjust_cluster_index_blob_column(
rec_t* rec,
@@ -1673,6 +1680,7 @@ stored columns.
@param rec - record to update
@param offsets - column offsets for the record
@return DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::adjust_cluster_index_blob_columns(
rec_t* rec,
@@ -1706,6 +1714,7 @@ BLOB reference, write the new space id.
@param rec - record to update
@param offsets - column offsets for the record
@return DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::adjust_cluster_index_blob_ref(
rec_t* rec,
@@ -1729,6 +1738,7 @@ Purge delete-marked records, only if it is possible to do so without
re-organising the B+tree.
@param offsets - current row offsets.
@return true if purge succeeded */
+inline
bool
PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
{
@@ -1753,6 +1763,7 @@ Adjust the BLOB references and sys fields for the current record.
@param offsets - column offsets for the record
@param deleted - true if row is delete marked
@return DB_SUCCESS or error code. */
+inline
dberr_t
PageConverter::adjust_cluster_record(
const dict_index_t* index,
@@ -1781,6 +1792,7 @@ Update the BLOB refrences and write UNDO log entries for
rows that can't be purged optimistically.
@param block - block to update
@retval DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::update_records(
buf_block_t* block) UNIV_NOTHROW
@@ -1846,6 +1858,7 @@ PageConverter::update_records(
/**
Update the space, index id, trx id.
@return DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::update_index_page(
buf_block_t* block) UNIV_NOTHROW
@@ -1915,6 +1928,7 @@ PageConverter::update_index_page(
Validate the space flags and update tablespace header page.
@param block - block read from file, not from the buffer pool.
@retval DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::update_header(
buf_block_t* block) UNIV_NOTHROW
@@ -1954,6 +1968,7 @@ PageConverter::update_header(
Update the page, set the space id, max trx id and index id.
@param block - block read from file
@retval DB_SUCCESS or error code */
+inline
dberr_t
PageConverter::update_page(
buf_block_t* block,
@@ -1961,6 +1976,14 @@ PageConverter::update_page(
{
dberr_t err = DB_SUCCESS;
+ ut_ad(!block->page.zip.data == !is_compressed_table());
+
+ if (block->page.zip.data) {
+ m_page_zip_ptr = &block->page.zip;
+ } else {
+ ut_ad(!m_page_zip_ptr);
+ }
+
switch (page_type = fil_page_get_type(get_frame(block))) {
case FIL_PAGE_TYPE_FSP_HDR:
/* Work directly on the uncompressed page headers. */
@@ -2016,140 +2039,45 @@ PageConverter::update_page(
}
/**
-Validate the page
-@param offset - physical offset within file.
-@param page - page read from file.
-@return status */
-PageConverter::import_page_status_t
-PageConverter::validate(
- os_offset_t offset,
- buf_block_t* block) UNIV_NOTHROW
-{
- buf_frame_t* page = get_frame(block);
-
- /* Check that the page number corresponds to the offset in
- the file. Flag as corrupt if it doesn't. Disable the check
- for LSN in buf_page_is_corrupted() */
-
- if (buf_page_is_corrupted(false, page, get_zip_size(), NULL)
- || (page_get_page_no(page) != offset / m_page_size
- && page_get_page_no(page) != 0)) {
-
- return(IMPORT_PAGE_STATUS_CORRUPTED);
-
- } else if (offset > 0 && page_get_page_no(page) == 0) {
- ulint checksum;
-
- checksum = mach_read_from_4(page + FIL_PAGE_SPACE_OR_CHKSUM);
- if (checksum != 0) {
- /* Checksum check passed in buf_page_is_corrupted(). */
- ib_logf(IB_LOG_LEVEL_WARN,
- "%s: Page %lu checksum " ULINTPF
- " should be zero.",
- m_filepath, (ulong) (offset / m_page_size),
- checksum);
- }
-
- const byte* b = page + FIL_PAGE_OFFSET;
- const byte* e = page + m_page_size
- - FIL_PAGE_END_LSN_OLD_CHKSUM;
-
- /* If the page number is zero and offset > 0 then
- the entire page MUST consist of zeroes. If not then
- we flag it as corrupt. */
-
- while (b != e) {
-
- if (*b++ && !trigger_corruption()) {
- return(IMPORT_PAGE_STATUS_CORRUPTED);
- }
- }
-
- /* The page is all zero: do nothing. */
- return(IMPORT_PAGE_STATUS_ALL_ZERO);
- }
-
- return(IMPORT_PAGE_STATUS_OK);
-}
-
-/**
Called for every page in the tablespace. If the page was not
updated then its state must be set to BUF_PAGE_NOT_USED.
-@param offset - physical offset within the file
@param block - block read from file, note it is not from the buffer pool
@retval DB_SUCCESS or error code. */
dberr_t
-PageConverter::operator() (
- os_offset_t offset,
- buf_block_t* block) UNIV_NOTHROW
+PageConverter::operator() (os_offset_t, buf_block_t* block) UNIV_NOTHROW
{
- ulint page_type;
- dberr_t err = DB_SUCCESS;
-
- if ((err = periodic_check()) != DB_SUCCESS) {
- return(err);
- }
-
- if (is_compressed_table()) {
- m_page_zip_ptr = &block->page.zip;
- } else {
- ut_ad(m_page_zip_ptr == 0);
- }
-
- switch(validate(offset, block)) {
- case IMPORT_PAGE_STATUS_OK:
-
- /* We have to decompress the compressed pages before
- we can work on them */
-
- if ((err = update_page(block, page_type)) != DB_SUCCESS) {
- break;
- }
-
- /* Note: For compressed pages this function will write to the
- zip descriptor and for uncompressed pages it will write to
- page (ie. the block->frame). Therefore the caller should write
- out the descriptor contents and not block->frame for compressed
- pages. */
-
- if (!is_compressed_table() || page_type == FIL_PAGE_INDEX) {
-
- buf_flush_init_for_writing(
- !is_compressed_table()
- ? block->frame : block->page.zip.data,
- !is_compressed_table() ? 0 : m_page_zip_ptr,
- m_current_lsn);
- } else {
- /* Calculate and update the checksum of non-btree
- pages for compressed tables explicitly here. */
-
- buf_flush_update_zip_checksum(
- get_frame(block), get_zip_size(),
- m_current_lsn);
- }
-
- break;
+ /* If we already had an old page with matching number
+ in the buffer pool, evict it now, because
+ we no longer evict the pages on DISCARD TABLESPACE. */
+ buf_page_get_gen(get_space_id(), get_zip_size(), block->page.offset,
+ RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
+ __FILE__, __LINE__, NULL);
- case IMPORT_PAGE_STATUS_ALL_ZERO:
- /* The page is all zero: leave it as is. */
- break;
+ ulint page_type;
- case IMPORT_PAGE_STATUS_CORRUPTED:
+ dberr_t err = update_page(block, page_type);
+ if (err != DB_SUCCESS) return err;
- ib_logf(IB_LOG_LEVEL_WARN,
- "%s: Page %lu at offset " UINT64PF " looks corrupted.",
- m_filepath, (ulong) (offset / m_page_size), offset);
+ /* Note: For compressed pages this function will write to the
+ zip descriptor and for uncompressed pages it will write to
+ page (ie. the block->frame). Therefore the caller should write
+ out the descriptor contents and not block->frame for compressed
+ pages. */
- err = DB_CORRUPTION;
+ if (!is_compressed_table() || page_type == FIL_PAGE_INDEX) {
+ buf_flush_init_for_writing(
+ get_frame(block),
+ block->page.zip.data ? &block->page.zip : NULL,
+ m_current_lsn);
+ } else {
+ /* Calculate and update the checksum of non-btree
+ pages for compressed tables explicitly here. */
+ buf_flush_update_zip_checksum(
+ get_frame(block), get_zip_size(),
+ m_current_lsn);
}
- /* If we already had and old page with matching number
- in the buffer pool, evict it now, because
- we no longer evict the pages on DISCARD TABLESPACE. */
- buf_page_get_gen(get_space_id(), get_zip_size(), block->page.offset,
- RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
- __FILE__, __LINE__, NULL);
- return(err);
+ return DB_SUCCESS;
}
/*****************************************************************//**
@@ -3424,6 +3352,460 @@ row_import_update_discarded_flag(
return(err);
}
+struct fil_iterator_t {
+ pfs_os_file_t file; /*!< File handle */
+ const char* filepath; /*!< File path name */
+ os_offset_t start; /*!< From where to start */
+ os_offset_t end; /*!< Where to stop */
+ os_offset_t file_size; /*!< File size in bytes */
+ ulint page_size; /*!< Page size */
+ ulint n_io_buffers; /*!< Number of pages to use
+ for IO */
+ byte* io_buffer; /*!< Buffer to use for IO */
+ fil_space_crypt_t *crypt_data; /*!< Crypt data (if encrypted) */
+ byte* crypt_io_buffer; /*!< IO buffer when encrypted */
+};
+
+/********************************************************************//**
+TODO: This can be made parallel trivially by chunking up the file and creating
+a callback per thread. . Main benefit will be to use multiple CPUs for
+checksums and compressed tables. We have to do compressed tables block by
+block right now. Secondly we need to decompress/compress and copy too much
+of data. These are CPU intensive.
+
+Iterate over all the pages in the tablespace.
+@param iter - Tablespace iterator
+@param block - block to use for IO
+@param callback - Callback to inspect and update page contents
+@retval DB_SUCCESS or error code */
+static
+dberr_t
+fil_iterate(
+/*========*/
+ const fil_iterator_t& iter,
+ buf_block_t* block,
+ AbstractCallback& callback)
+{
+ os_offset_t offset;
+ ulint n_bytes = iter.n_io_buffers * iter.page_size;
+
+ ut_ad(!srv_read_only_mode);
+
+ /* TODO: For ROW_FORMAT=COMPRESSED tables we do a lot of useless
+ copying for non-index pages. Unfortunately, it is
+ required by buf_zip_decompress() */
+
+ for (offset = iter.start; offset < iter.end; offset += n_bytes) {
+ if (callback.is_interrupted()) {
+ return DB_INTERRUPTED;
+ }
+
+ byte* io_buffer = iter.io_buffer;
+ block->frame = io_buffer;
+
+ if (block->page.zip.data) {
+ /* Zip IO is done in the compressed page buffer. */
+ io_buffer = block->page.zip.data;
+ ut_ad(PAGE_ZIP_MATCH(block->frame, &block->page.zip));
+ }
+
+ /* We have to read the exact number of bytes. Otherwise the
+ InnoDB IO functions croak on failed reads. */
+
+ n_bytes = ulint(ut_min(os_offset_t(n_bytes),
+ iter.end - offset));
+
+ ut_ad(n_bytes > 0);
+ ut_ad(!(n_bytes % iter.page_size));
+
+ const bool encrypted = iter.crypt_data != NULL
+ && iter.crypt_data->should_encrypt();
+ /* Use additional crypt io buffer if tablespace is encrypted */
+ byte* const readptr = encrypted
+ ? iter.crypt_io_buffer : io_buffer;
+ byte* const writeptr = readptr;
+
+ if (!os_file_read_no_error_handling(iter.file, readptr,
+ offset, n_bytes)) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
+ return DB_IO_ERROR;
+ }
+
+ bool updated = false;
+ os_offset_t page_off = offset;
+ ulint n_pages_read = (ulint) n_bytes / iter.page_size;
+ bool decrypted = false;
+ const ulint size = iter.page_size;
+ block->page.offset = page_off / size;
+
+ for (ulint i = 0; i < n_pages_read;
+ ++i, page_off += size, block->frame += size,
+ block->page.offset++) {
+ dberr_t err = DB_SUCCESS;
+ byte* src = readptr + (i * size);
+ byte* dst = io_buffer + (i * size);
+ bool frame_changed = false;
+ ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
+ const bool page_compressed
+ = page_type
+ == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+ || page_type == FIL_PAGE_PAGE_COMPRESSED;
+ const ulint page_no = page_get_page_no(src);
+ if (!page_no && page_off) {
+ const ulint* b = reinterpret_cast<const ulint*>
+ (src);
+ const ulint* const e = b + size / sizeof *b;
+ do {
+ if (*b++) {
+ goto page_corrupted;
+ }
+ } while (b != e);
+
+ /* Proceed to the next page,
+ because this one is all zero. */
+ continue;
+ }
+
+ if (page_no != page_off / size) {
+ goto page_corrupted;
+ }
+
+ if (encrypted) {
+ decrypted = fil_space_decrypt(
+ iter.crypt_data, dst,
+ iter.page_size, src, &err);
+
+ if (err != DB_SUCCESS) {
+ return err;
+ }
+
+ if (decrypted) {
+ updated = true;
+ } else {
+ if (!page_compressed
+ && !block->page.zip.data) {
+ block->frame = src;
+ frame_changed = true;
+ } else {
+ memcpy(dst, src, size);
+ }
+ }
+ }
+
+ /* If the original page is page_compressed, we need
+ to decompress it before adjusting further. */
+ if (page_compressed) {
+ fil_decompress_page(NULL, dst, ulong(size),
+ NULL);
+ updated = true;
+ } else if (buf_page_is_corrupted(
+ false,
+ encrypted && !frame_changed
+ ? dst : src,
+ callback.get_zip_size(), NULL)) {
+page_corrupted:
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "%s: Page %lu at offset "
+ UINT64PF " looks corrupted.",
+ callback.filename(),
+ ulong(offset / size), offset);
+ return DB_CORRUPTION;
+ }
+
+ if ((err = callback(page_off, block)) != DB_SUCCESS) {
+ return err;
+ } else if (!updated) {
+ updated = buf_block_get_state(block)
+ == BUF_BLOCK_FILE_PAGE;
+ }
+
+ /* If tablespace is encrypted we use additional
+ temporary scratch area where pages are read
+ for decrypting readptr == crypt_io_buffer != io_buffer.
+
+ Destination for decryption is a buffer pool block
+ block->frame == dst == io_buffer that is updated.
+ Pages that did not require decryption even when
+ tablespace is marked as encrypted are not copied
+ instead block->frame is set to src == readptr.
+
+ For encryption we again use temporary scratch area
+ writeptr != io_buffer == dst
+ that is then written to the tablespace
+
+ (1) For normal tables io_buffer == dst == writeptr
+ (2) For only page compressed tables
+ io_buffer == dst == writeptr
+ (3) For encrypted (and page compressed)
+ readptr != io_buffer == dst != writeptr
+ */
+
+ ut_ad(!encrypted && !page_compressed ?
+ src == dst && dst == writeptr + (i * size):1);
+ ut_ad(page_compressed && !encrypted ?
+ src == dst && dst == writeptr + (i * size):1);
+ ut_ad(encrypted ?
+ src != dst && dst != writeptr + (i * size):1);
+
+ if (encrypted) {
+ memcpy(writeptr + (i * size),
+ callback.get_frame(block), size);
+ }
+
+ if (frame_changed) {
+ block->frame = dst;
+ }
+
+ src = io_buffer + (i * size);
+
+ if (page_compressed) {
+ ulint len = 0;
+
+ fil_compress_page(
+ NULL,
+ src,
+ NULL,
+ size,
+ 0,/* FIXME: compression level */
+ 512,/* FIXME: use proper block size */
+ encrypted,
+ &len);
+
+ updated = true;
+ }
+
+ /* If tablespace is encrypted, encrypt page before we
+ write it back. Note that we should not encrypt the
+ buffer that is in buffer pool. */
+ /* NOTE: At this stage of IMPORT the
+ buffer pool is not being used at all! */
+ if (decrypted && encrypted) {
+ byte *dest = writeptr + (i * size);
+ ulint space = mach_read_from_4(
+ src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ ulint offset = mach_read_from_4(src + FIL_PAGE_OFFSET);
+ ib_uint64_t lsn = mach_read_from_8(src + FIL_PAGE_LSN);
+
+ byte* tmp = fil_encrypt_buf(
+ iter.crypt_data,
+ space,
+ offset,
+ lsn,
+ src,
+ iter.page_size == UNIV_PAGE_SIZE ? 0 : iter.page_size,
+ dest);
+
+ if (tmp == src) {
+ /* TODO: remove unnecessary memcpy's */
+ memcpy(dest, src, size);
+ }
+
+ updated = true;
+ }
+ }
+
+ /* A page was updated in the set, write back to disk. */
+ if (updated
+ && !os_file_write(
+ iter.filepath, iter.file, writeptr,
+ offset, (ulint) n_bytes)) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed");
+ return DB_IO_ERROR;
+ }
+ }
+
+ return DB_SUCCESS;
+}
+
+/********************************************************************//**
+Iterate over all the pages in the tablespace.
+@param table - the table definiton in the server
+@param n_io_buffers - number of blocks to read and write together
+@param callback - functor that will do the page updates
+@return DB_SUCCESS or error code */
+static
+dberr_t
+fil_tablespace_iterate(
+/*===================*/
+ dict_table_t* table,
+ ulint n_io_buffers,
+ AbstractCallback& callback)
+{
+ dberr_t err;
+ pfs_os_file_t file;
+ char* filepath;
+
+ ut_a(n_io_buffers > 0);
+ ut_ad(!srv_read_only_mode);
+
+ DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
+ return(DB_CORRUPTION););
+
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ dict_get_and_save_data_dir_path(table, false);
+ ut_a(table->data_dir_path);
+
+ filepath = os_file_make_remote_pathname(
+ table->data_dir_path, table->name, "ibd");
+ } else {
+ filepath = fil_make_ibd_name(table->name, false);
+ }
+
+ {
+ ibool success;
+
+ file = os_file_create_simple_no_error_handling(
+ innodb_file_data_key, filepath,
+ OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE);
+
+ DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
+ {
+ static bool once;
+
+ if (!once || ut_rnd_interval(0, 10) == 5) {
+ once = true;
+ success = FALSE;
+ os_file_close(file);
+ }
+ });
+
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Trying to import a tablespace, but could not "
+ "open the tablespace file %s", filepath);
+
+ mem_free(filepath);
+
+ return(DB_TABLESPACE_NOT_FOUND);
+
+ } else {
+ err = DB_SUCCESS;
+ }
+ }
+
+ callback.set_file(filepath, file);
+
+ os_offset_t file_size = os_file_get_size(file);
+ ut_a(file_size != (os_offset_t) -1);
+
+ /* Allocate a page to read in the tablespace header, so that we
+ can determine the page size and zip_size (if it is compressed).
+ We allocate an extra page in case it is a compressed table. One
+ page is to ensure alignement. */
+
+ void* page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE);
+ byte* page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
+
+ /* The block we will use for every physical page */
+ buf_block_t block;
+
+ memset(&block, 0, sizeof block);
+ block.frame = page;
+ block.page.space = callback.get_space_id();
+ block.page.io_fix = BUF_IO_NONE;
+ block.page.buf_fix_count = 1;
+ block.page.state = BUF_BLOCK_FILE_PAGE;
+
+ /* Read the first page and determine the page and zip size. */
+
+ if (!os_file_read_no_error_handling(file, page, 0, UNIV_PAGE_SIZE)) {
+
+ err = DB_IO_ERROR;
+
+ } else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) {
+ if (const ulint zip_size = callback.get_zip_size()) {
+ page_zip_set_size(&block.page.zip, zip_size);
+ /* ROW_FORMAT=COMPRESSED is not optimised for block IO
+ for now. We do the IMPORT page by page. */
+ n_io_buffers = 1;
+ }
+
+ fil_iterator_t iter;
+
+ iter.file = file;
+ iter.start = 0;
+ iter.end = file_size;
+ iter.filepath = filepath;
+ iter.file_size = file_size;
+ iter.n_io_buffers = n_io_buffers;
+ iter.page_size = callback.get_page_size();
+
+ /* In MariaDB/MySQL 5.6 tablespace does not exist
+ during import, therefore we can't use space directly
+ here. */
+ ulint crypt_data_offset = fsp_header_get_crypt_offset(
+ callback.get_zip_size());
+
+ /* read (optional) crypt data */
+ iter.crypt_data = fil_space_read_crypt_data(
+ 0, page, crypt_data_offset);
+
+ /** If tablespace is encrypted, it needs extra buffers */
+ if (iter.crypt_data != NULL) {
+ /* decrease io buffers so that memory
+ * consumption doesnt double
+ * note: the +1 is to avoid n_io_buffers getting down to 0 */
+ iter.n_io_buffers = (iter.n_io_buffers + 1) / 2;
+ }
+
+ /** Add an extra page for compressed page scratch area. */
+
+ void* io_buffer = mem_alloc(
+ (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+
+ iter.io_buffer = static_cast<byte*>(
+ ut_align(io_buffer, UNIV_PAGE_SIZE));
+
+ void* crypt_io_buffer = NULL;
+ if (iter.crypt_data != NULL) {
+ crypt_io_buffer = mem_alloc(
+ (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+ iter.crypt_io_buffer = static_cast<byte*>(
+ ut_align(crypt_io_buffer, UNIV_PAGE_SIZE));
+ }
+
+ if (block.page.zip.ssize) {
+ ut_ad(iter.n_io_buffers == 1);
+ block.frame = iter.io_buffer;
+ block.page.zip.data = block.frame + UNIV_PAGE_SIZE;
+ ut_d(block.page.zip.m_external = true);
+ }
+
+ err = fil_iterate(iter, &block, callback);
+
+ mem_free(io_buffer);
+
+ if (crypt_io_buffer != NULL) {
+ mem_free(crypt_io_buffer);
+ iter.crypt_io_buffer = NULL;
+ fil_space_destroy_crypt_data(&iter.crypt_data);
+ }
+ }
+
+ if (err == DB_SUCCESS) {
+
+ ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk");
+
+ if (!os_file_flush(file)) {
+ ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!");
+ err = DB_IO_ERROR;
+ } else {
+ ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!");
+ }
+ }
+
+ os_file_close(file);
+
+ mem_free(page_ptr);
+ mem_free(filepath);
+
+ return(err);
+}
+
/*****************************************************************//**
Imports a tablespace. The space id in the .ibd file must match the space id
of the table in the data dictionary.