Merge 10.1 into 10.2

author: Marko Mäkelä <marko.makela@mariadb.com> 2018-03-21 16:18:21 +0200
committer: Marko Mäkelä <marko.makela@mariadb.com> 2018-03-21 22:58:52 +0200
commit: 3d7915f000b15ad296e3ff18d750f36a4f17de0e (patch)
tree: a5abe2dd6e6b8a633130597aa08937fcc92c965b
parent: 82aeb6b59640b9733c4026bda71887720153b70a (diff)
parent: 4629db0dd6442ea7c2d3ecd636060bc4d21f2d19 (diff)
download: mariadb-git-3d7915f000b15ad296e3ff18d750f36a4f17de0e.tar.gz
24 files changed, 1538 insertions, 1620 deletions
diff --git a/include/my_valgrind.h b/include/my_valgrind.h
index 870fb453179..cb0886bda46 100644
--- a/include/my_valgrind.h
+++ b/include/my_valgrind.h
@@ -35,6 +35,8 @@
 # define MEM_CHECK_DEFINED(a,len) VALGRIND_CHECK_MEM_IS_DEFINED(a,len)
 #elif defined(__SANITIZE_ADDRESS__)
 # include <sanitizer/asan_interface.h>
+/* How to do manual poisoning:
+https://github.com/google/sanitizers/wiki/AddressSanitizerManualPoisoning */
 # define MEM_UNDEFINED(a,len) ASAN_UNPOISON_MEMORY_REGION(a,len)
 # define MEM_NOACCESS(a,len) ASAN_POISON_MEMORY_REGION(a,len)
 # define MEM_CHECK_ADDRESSABLE(a,len) ((void) 0)
diff --git a/mysql-test/r/having.result b/mysql-test/r/having.result
index fca4c43ed20..c64d0579962 100644
--- a/mysql-test/r/having.result
+++ b/mysql-test/r/having.result
@@ -721,6 +721,20 @@ SELECT * FROM t1 JOIN t2 ON c1 = c2 HAVING c2 > 'a' ORDER BY c2 LIMIT 1;
 c1	c2
 x	x
 DROP TABLE t1,t2;
+#
+# MDEV-6736: Valgrind warnings 'Invalid read' in subselect_engine::calc_const_tables with SQ 
+# in WHERE and HAVING, ORDER BY, materialization+semijoin
+#
+CREATE TABLE t1 (a INT) ENGINE=MyISAM;
+INSERT INTO t1 VALUES (3),(8);
+CREATE TABLE t2 (b INT) ENGINE=MyISAM;
+INSERT INTO t2 VALUES (2),(1);
+SELECT a FROM t1
+WHERE 9 IN ( SELECT MIN( a ) FROM t1 )
+HAVING a <> ( SELECT COUNT(*) FROM t2 ) 
+ORDER BY a;
+a
+DROP TABLE t1,t2;
 End of 10.0 tests
 #
 # MDEV-10716: Assertion `real_type() != FIELD_ITEM' failed in
diff --git a/mysql-test/suite/engines/iuds/r/update_time.result b/mysql-test/suite/engines/iuds/r/update_time.result
index 48ddb82d521..131780059dd 100644
--- a/mysql-test/suite/engines/iuds/r/update_time.result
+++ b/mysql-test/suite/engines/iuds/r/update_time.result
@@ -1215,7 +1215,7 @@ c1	c2
 838:59:59	838:59:59
 UPDATE IGNORE t1 SET t1.c2='99999.99999' WHERE c1 BETWEEN 080000 AND 100000;
 Warnings:
-Warning	1265	Data truncated for column 'c2' at row 1
+Warning	1265	Data truncated for column 'c2' at row N
 SELECT * FROM t1;
 c1	c2
 -12:12:12	12:12:12
diff --git a/mysql-test/suite/engines/iuds/t/update_time.test b/mysql-test/suite/engines/iuds/t/update_time.test
index 5ce69b46986..bdfe81d3c7c 100644
--- a/mysql-test/suite/engines/iuds/t/update_time.test
+++ b/mysql-test/suite/engines/iuds/t/update_time.test
@@ -172,6 +172,7 @@ SELECT * FROM t1;
 
 # Update using range
 # EXPLAIN SELECT * FROM t1 WHERE c1 BETWEEN 080000 AND 100000;
+--replace_regex /(Data truncated for column 'c2' at row) [1-9][0-9]*/\1 N/
 UPDATE IGNORE t1 SET t1.c2='99999.99999' WHERE c1 BETWEEN 080000 AND 100000;
 --sorted_result
 SELECT * FROM t1;
diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def
index f6bf945ed1f..29673ce5884 100644
--- a/mysql-test/suite/galera/disabled.def
+++ b/mysql-test/suite/galera/disabled.def
@@ -56,3 +56,4 @@ galera_ist_progress: MDEV-15236 galera_ist_progress fails when trying to read tr
 galera_gtid : MDEV-13549 Galera test failures 10.1
 galera_gtid_slave : MDEV-13549 Galera test failures 10.1
 galera_unicode_identifiers : MDEV-13549 Galera test failures 10.1
+galera.galera_gcs_fc_limit : MDEV-13549 Galera test failures 10.1
diff --git a/mysql-test/suite/galera/t/galera_var_reject_queries.test b/mysql-test/suite/galera/t/galera_var_reject_queries.test
index b1af9d8aa2b..6859855c35f 100644
--- a/mysql-test/suite/galera/t/galera_var_reject_queries.test
+++ b/mysql-test/suite/galera/t/galera_var_reject_queries.test
@@ -23,7 +23,7 @@ SELECT * FROM t1;
 SET GLOBAL wsrep_reject_queries = ALL_KILL;
 
 --connection node_1a
---error ER_CONNECTION_KILLED,2013
+--error ER_CONNECTION_KILLED,2013,2006
 SELECT * FROM t1;
 
 --connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1
diff --git a/mysql-test/suite/innodb/r/default_row_format_compatibility.result b/mysql-test/suite/innodb/r/default_row_format_compatibility.result
index c0fb8f029a5..d63a3955d0d 100644
--- a/mysql-test/suite/innodb/r/default_row_format_compatibility.result
+++ b/mysql-test/suite/innodb/r/default_row_format_compatibility.result
@@ -41,6 +41,9 @@ SHOW TABLE STATUS LIKE 'tab';
 Name	Engine	Version	Row_format	Rows	Avg_row_length	Data_length	Max_data_length	Index_length	Data_free	Auto_increment	Create_time	Update_time	Check_time	Collation	Checksum	Create_options	Comment
 tab	InnoDB	#	Compact	#	#	#	#	#	#	NULL	#	NULL	NULL	latin1_swedish_ci	NULL		
 ALTER TABLE tab DISCARD TABLESPACE;
+call mtr.add_suppression("InnoDB: Tried to read .* bytes at offset 0");
+ALTER TABLE tab IMPORT TABLESPACE;
+ERROR HY000: Internal error: Cannot reset LSNs in table `test`.`tab` : I/O error
 ALTER TABLE tab IMPORT TABLESPACE;
 SELECT * FROM tab;
 a
diff --git a/mysql-test/suite/innodb/t/default_row_format_compatibility.test b/mysql-test/suite/innodb/t/default_row_format_compatibility.test
index 17ab21ca06b..0f433b1fcfe 100644
--- a/mysql-test/suite/innodb/t/default_row_format_compatibility.test
+++ b/mysql-test/suite/innodb/t/default_row_format_compatibility.test
@@ -81,7 +81,14 @@ SHOW TABLE STATUS LIKE 'tab';
 ALTER TABLE tab DISCARD TABLESPACE;
 
 # Move the *ibd,*.cfg file into orginal location
+--copy_file $MYSQLD_DATADIR/tab.cfg $MYSQLD_DATADIR/test/tab.ibd
 --move_file $MYSQLD_DATADIR/tab.cfg $MYSQLD_DATADIR/test/tab.cfg
+
+call mtr.add_suppression("InnoDB: Tried to read .* bytes at offset 0");
+
+--error ER_INTERNAL_ERROR
+ALTER TABLE tab IMPORT TABLESPACE;
+--remove_file $MYSQLD_DATADIR/test/tab.ibd
 --move_file $MYSQLD_DATADIR/tab.ibd $MYSQLD_DATADIR/test/tab.ibd
 
 # Check import is successful (because same row_format)
diff --git a/mysql-test/suite/plugins/r/disks.result b/mysql-test/suite/plugins/r/disks.result
new file mode 100644
index 00000000000..dba8db046d3
--- /dev/null
+++ b/mysql-test/suite/plugins/r/disks.result
@@ -0,0 +1,14 @@
+install plugin DISKS soname 'disks';
+show create table information_schema.disks;
+Table	Create Table
+DISKS	CREATE TEMPORARY TABLE `DISKS` (
+  `Disk` varchar(4096) NOT NULL DEFAULT '',
+  `Path` varchar(4096) NOT NULL DEFAULT '',
+  `Total` int(32) NOT NULL DEFAULT 0,
+  `Used` int(32) NOT NULL DEFAULT 0,
+  `Available` int(32) NOT NULL DEFAULT 0
+) ENGINE=MEMORY DEFAULT CHARSET=utf8
+select sum(Total) > sum(Available), sum(Total)>sum(Used) from information_schema.disks;
+sum(Total) > sum(Available)	sum(Total)>sum(Used)
+1	1
+uninstall plugin DISKS;
diff --git a/mysql-test/suite/plugins/t/disks.test b/mysql-test/suite/plugins/t/disks.test
new file mode 100644
index 00000000000..a2371b97584
--- /dev/null
+++ b/mysql-test/suite/plugins/t/disks.test
@@ -0,0 +1,11 @@
+--source include/not_windows.inc
+
+if (!$DISKS_SO) {
+  skip No DISKS plugin;
+}
+
+install plugin DISKS soname 'disks';
+show create table information_schema.disks;
+select sum(Total) > sum(Available), sum(Total)>sum(Used) from information_schema.disks;
+
+uninstall plugin DISKS;
diff --git a/mysql-test/t/having.test b/mysql-test/t/having.test
index 3675a09d82f..c9231fef3be 100644
--- a/mysql-test/t/having.test
+++ b/mysql-test/t/having.test
@@ -759,6 +759,24 @@ SELECT * FROM t1 JOIN t2 ON c1 = c2 HAVING c2 > 'a' ORDER BY c2 LIMIT 1;
 
 DROP TABLE t1,t2;
 
+--echo #
+--echo # MDEV-6736: Valgrind warnings 'Invalid read' in subselect_engine::calc_const_tables with SQ 
+--echo # in WHERE and HAVING, ORDER BY, materialization+semijoin
+--echo #
+
+CREATE TABLE t1 (a INT) ENGINE=MyISAM;
+INSERT INTO t1 VALUES (3),(8);
+
+CREATE TABLE t2 (b INT) ENGINE=MyISAM;
+INSERT INTO t2 VALUES (2),(1);
+
+SELECT a FROM t1
+WHERE 9 IN ( SELECT MIN( a ) FROM t1 )
+HAVING a <> ( SELECT COUNT(*) FROM t2 ) 
+ORDER BY a;
+
+DROP TABLE t1,t2;
+
 --echo End of 10.0 tests
 
 --echo #
diff --git a/plugin/information_schema_disks/CMakeLists.txt b/plugin/information_schema_disks/CMakeLists.txt
new file mode 100644
index 00000000000..a0ed929c62c
--- /dev/null
+++ b/plugin/information_schema_disks/CMakeLists.txt
@@ -0,0 +1,5 @@
+IF(NOT WIN32)
+  INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/sql)
+  MYSQL_ADD_PLUGIN(DISKS information_schema_disks.cc MODULE_ONLY RECOMPILE_FOR_EMBEDDED)
+ENDIF()
+
diff --git a/plugin/information_schema_disks/README.txt b/plugin/information_schema_disks/README.txt
new file mode 100644
index 00000000000..1c9b8fb6283
--- /dev/null
+++ b/plugin/information_schema_disks/README.txt
@@ -0,0 +1,101 @@
+Information Schema Disks
+------------------------
+This is a proof-of-concept information schema plugin that allows the
+disk space situation to be monitored. When installed, it can be used
+as follows:
+
+  > select * from information_schema.disks;
+  +-----------+-----------------------+-----------+----------+-----------+
+  | Disk      | Path                  | Total     | Used     | Available |
+  +-----------+-----------------------+-----------+----------+-----------+
+  | /dev/sda3 | /                     |  47929956 | 30666304 |  14805864 |
+  | /dev/sda1 | /boot/efi             |    191551 |     3461 |    188090 |
+  | /dev/sda4 | /home                 | 174679768 | 80335392 |  85448120 |
+  | /dev/sdb1 | /mnt/hdd              | 961301832 |    83764 | 912363644 |
+  | /dev/sdb1 | /home/wikman/Music    | 961301832 |    83764 | 912363644 |
+  | /dev/sdb1 | /home/wikman/Videos   | 961301832 |    83764 | 912363644 |
+  | /dev/sdb1 | /home/wikman/hdd      | 961301832 |    83764 | 912363644 |
+  | /dev/sdb1 | /home/wikman/Pictures | 961301832 |    83764 | 912363644 |
+  | /dev/sda3 | /var/lib/docker/aufs  |  47929956 | 30666304 |  14805864 |
+  +-----------+-----------------------+-----------+----------+-----------+
+  9 rows in set (0.00 sec)
+
+- 'Disk' is the name of the disk itself.
+- 'Path' is the mount point of the disk.
+- 'Total' is the total space in KiB.
+- 'Used' is the used amount of space in KiB, and
+- 'Available' is the amount of space in KiB available to non-root users.
+
+Note that as the amount of space available to root may be more that what
+is available to non-root users, 'available' + 'used' may be less than 'total'.
+
+All paths to which a particular disk has been mounted are reported. The
+rationale is that someone might want to take different action e.g. depending
+on which disk is relevant for a particular path. This leads to the same disk
+being reported multiple times. An alternative to this would be to have two
+tables; disks and mounts.
+
+  > select * from information_schema.disks;
+  +-----------+-----------+----------+-----------+
+  | Disk      | Total     | Used     | Available |
+  +-----------+-----------+----------+-----------+
+  | /dev/sda3 |  47929956 | 30666304 |  14805864 |
+  | /dev/sda1 |    191551 |     3461 |    188090 |
+  | /dev/sda4 | 174679768 | 80335392 |  85448120 |
+  | /dev/sdb1 | 961301832 |    83764 | 912363644 |
+  +-----------+-----------+----------+-----------+
+
+  > select * from information_schema.mounts;
+  +-----------------------+-----------+
+  | Path                  | Disk      |
+  +-----------------------+-----------+
+  | /                     | /dev/sda3 |
+  | /boot/efi             | /dev/sda1 |
+  | /home                 | /dev/sda4 |
+  | /mnt/hdd              | /dev/sdb1 |
+  | /home/wikman/Music    | /dev/sdb1 |
+  ...
+
+
+Building
+--------
+- Ensure that the directory information_schema_disks is in the top-level
+  directory of the server.
+- Add
+
+  ADD_SUBDIRECTORY(information_schema_disks)
+
+  to the top-level CMakeLists.txt
+
+> Invoke make
+
+  $ make
+
+Installation
+------------
+- Copy information_schema_disks/libinformation_schema_disks.so to the plugin
+  directory of the server:
+
+  $ cd information_schema_disks
+  $ sudo cp libinformation_schema_disks.so plugin-directory-of-server
+
+- Using mysql, install the plugin:
+
+  MariaDB [(none)]> install plugin disks soname 'libinformation_schema_disks.so';
+
+Usage
+-----
+The plugin appears as the table 'disks' in 'information_schema'.
+
+  MariaDB [(none)]> select * from information_schema.disks;
+  +-----------+-----------------------+-----------+----------+-----------+
+  | Disk      | Path                  | Total     | Used     | Available |
+  +-----------+-----------------------+-----------+----------+-----------+
+  | /dev/sda3 | /                     |  47929956 | 30666308 |  14805860 |
+  | /dev/sda1 | /boot/efi             |    191551 |     3461 |    188090 |
+  | /dev/sda4 | /home                 | 174679768 | 80348148 |  85435364 |
+  | /dev/sdb1 | /mnt/hdd              | 961301832 |    83764 | 912363644 |
+  | /dev/sdb1 | /home/wikman/Music    | 961301832 |    83764 | 912363644 |
+  | /dev/sdb1 | /home/wikman/Videos   | 961301832 |    83764 | 912363644 |
+  ...
+
diff --git a/plugin/information_schema_disks/information_schema_disks.cc b/plugin/information_schema_disks/information_schema_disks.cc
new file mode 100644
index 00000000000..b5e3a6dc728
--- /dev/null
+++ b/plugin/information_schema_disks/information_schema_disks.cc
@@ -0,0 +1,154 @@
+/*
+   Copyright (c) 2017, MariaDB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA */
+
+#include <sys/statvfs.h>
+#include <sys/types.h>
+#include <mntent.h>
+#include <sql_class.h>
+#include <table.h>
+#include <innodb_priv.h>
+
+namespace
+{
+
+struct st_mysql_information_schema disks_table_info = { MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION };
+
+ST_FIELD_INFO disks_table_fields[]=
+{
+    { "Disk",      PATH_MAX, MYSQL_TYPE_STRING, 0, 0 ,0, 0 },
+    { "Path",      PATH_MAX, MYSQL_TYPE_STRING, 0, 0 ,0, 0 },
+    { "Total",           32, MYSQL_TYPE_LONG,   0, 0 ,0 ,0 }, // Total amount available
+    { "Used",            32, MYSQL_TYPE_LONG,   0, 0 ,0 ,0 }, // Amount of space used
+    { "Available",       32, MYSQL_TYPE_LONG,   0, 0 ,0 ,0 }, // Amount available to users other than root.
+    { 0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0 }
+};
+
+int disks_table_add_row(THD* pThd,
+                        TABLE* pTable,
+                        const char* zDisk,
+                        const char* zPath,
+                        const struct statvfs& info)
+{
+    // From: http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/statvfs.h.html
+    //
+    // f_frsize   Fundamental file system block size.
+    // f_blocks   Total number of blocks on file system in units of f_frsize.
+    // f_bfree    Total number of free blocks.
+    // f_bavail   Number of free blocks available to non-privileged process.
+
+    size_t total = (info.f_frsize * info.f_blocks) / 1024;
+    size_t used  = (info.f_frsize * (info.f_blocks - info.f_bfree)) / 1024;
+    size_t avail = (info.f_frsize * info.f_bavail) / 1024;
+
+    pTable->field[0]->store(zDisk, strlen(zDisk), system_charset_info);
+    pTable->field[1]->store(zPath, strlen(zPath), system_charset_info);
+    pTable->field[2]->store(total);
+    pTable->field[3]->store(used);
+    pTable->field[4]->store(avail);
+
+    // 0 means success.
+    return (schema_table_store_record(pThd, pTable) != 0) ? 1 : 0;
+}
+
+int disks_table_add_row(THD* pThd, TABLE* pTable, const char* zDisk, const char* zPath)
+{
+    int rv = 0;
+
+    struct statvfs info;
+
+    if (statvfs(zPath, &info) == 0) // We ignore failures.
+    {
+        rv = disks_table_add_row(pThd, pTable, zDisk, zPath, info);
+    }
+
+    return rv;
+}
+
+int disks_fill_table(THD* pThd, TABLE_LIST* pTables, Item* pCond)
+{
+    int rv = 1;
+    TABLE* pTable = pTables->table;
+
+    FILE* pFile = setmntent("/etc/mtab", "r");
+
+    if (pFile)
+    {
+        const size_t BUFFER_SIZE = 4096; // 4K should be sufficient.
+
+        char* pBuffer = new (std::nothrow) char [BUFFER_SIZE];
+
+        if (pBuffer)
+        {
+            rv = 0;
+
+            struct mntent ent;
+            struct mntent* pEnt;
+
+            while ((rv == 0) && (pEnt = getmntent_r(pFile, &ent, pBuffer, BUFFER_SIZE)))
+            {
+                // We only report the ones that refer to physical disks.
+                if (pEnt->mnt_fsname[0] == '/')
+                {
+                    rv = disks_table_add_row(pThd, pTable, pEnt->mnt_fsname, pEnt->mnt_dir);
+                }
+            }
+
+            delete [] pBuffer;
+        }
+        else
+        {
+            rv = 1;
+        }
+
+        endmntent(pFile);
+    }
+
+    return rv;
+}
+
+int disks_table_init(void *ptr)
+{
+    ST_SCHEMA_TABLE* pSchema_table = (ST_SCHEMA_TABLE*)ptr;
+
+    pSchema_table->fields_info = disks_table_fields;
+    pSchema_table->fill_table = disks_fill_table;
+    return 0;
+}
+
+}
+
+extern "C"
+{
+
+mysql_declare_plugin(disks_library)
+{
+    MYSQL_INFORMATION_SCHEMA_PLUGIN,
+    &disks_table_info,                 /* type-specific descriptor */
+    "DISKS",                           /* table name */
+    "MariaDB",                         /* author */
+    "Disk space information",          /* description */
+    PLUGIN_LICENSE_GPL,                /* license type */
+    disks_table_init,                  /* init function */
+    NULL,
+    0x0100,                            /* version = 1.0 */
+    NULL,                              /* no status variables */
+    NULL,                              /* no system variables */
+    NULL,                              /* no reserved information */
+    0                                  /* no flags */
+}
+mysql_declare_plugin_end;
+
+}
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index dc8952f8bdf..3b4bb51aba1 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -24,9 +24,7 @@ The tablespace memory cache
 Created 10/25/1995 Heikki Tuuri
 *******************************************************/
 
-#include "ha_prototypes.h"
-#include "fil0pagecompress.h"
-#include "fsp0pagecompress.h"
+#include "fil0fil.h"
 #include "fil0crypt.h"
 
 #include "btr0btr.h"
@@ -5682,496 +5680,6 @@ fil_close(void)
 }
 
 /********************************************************************//**
-Initializes a buffer control block when the buf_pool is created. */
-static
-void
-fil_buf_block_init(
-/*===============*/
-	buf_block_t*	block,		/*!< in: pointer to control block */
-	byte*		frame)		/*!< in: pointer to buffer frame */
-{
-	UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
-
-	block->frame = frame;
-
-	block->page.io_fix = BUF_IO_NONE;
-	/* There are assertions that check for this. */
-	block->page.buf_fix_count = 1;
-	block->page.state = BUF_BLOCK_READY_FOR_USE;
-
-	page_zip_des_init(&block->page.zip);
-}
-
-struct fil_iterator_t {
-	pfs_os_file_t	file;			/*!< File handle */
-	const char*	filepath;		/*!< File path name */
-	os_offset_t	start;			/*!< From where to start */
-	os_offset_t	end;			/*!< Where to stop */
-	os_offset_t	file_size;		/*!< File size in bytes */
-	ulint		page_size;		/*!< Page size */
-	ulint		n_io_buffers;		/*!< Number of pages to use
-						for IO */
-	byte*		io_buffer;		/*!< Buffer to use for IO */
-	fil_space_crypt_t *crypt_data;		/*!< MariaDB Crypt data (if encrypted) */
-	byte*           crypt_io_buffer;        /*!< MariaDB IO buffer when
-						encrypted */
-	dict_table_t*	table;			/*!< Imported table */
-};
-
-/********************************************************************//**
-TODO: This can be made parallel trivially by chunking up the file and creating
-a callback per thread. Main benefit will be to use multiple CPUs for
-checksums and compressed tables. We have to do compressed tables block by
-block right now. Secondly we need to decompress/compress and copy too much
-of data. These are CPU intensive.
-
-Iterate over all the pages in the tablespace.
-@param iter Tablespace iterator
-@param block block to use for IO
-@param callback Callback to inspect and update page contents
-@retval DB_SUCCESS or error code */
-static
-dberr_t
-fil_iterate(
-/*========*/
-	const fil_iterator_t&	iter,
-	buf_block_t*		block,
-	PageCallback&		callback)
-{
-	os_offset_t		offset;
-	ulint			page_no = 0;
-	ulint			space_id = callback.get_space_id();
-	ulint			n_bytes = iter.n_io_buffers * iter.page_size;
-
-	ut_ad(!srv_read_only_mode);
-
-	/* TODO: For compressed tables we do a lot of useless
-	copying for non-index pages. Unfortunately, it is
-	required by buf_zip_decompress() */
-	const bool	row_compressed
-		= callback.get_page_size().is_compressed();
-
-	for (offset = iter.start; offset < iter.end; offset += n_bytes) {
-
-		byte*		io_buffer = iter.io_buffer;
-
-		block->frame = io_buffer;
-
-		if (row_compressed) {
-			page_zip_des_init(&block->page.zip);
-			page_zip_set_size(&block->page.zip, iter.page_size);
-
-			block->page.size.copy_from(
-				page_size_t(iter.page_size,
-					    univ_page_size.logical(),
-					    true));
-
-			block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
-			ut_d(block->page.zip.m_external = true);
-			ut_ad(iter.page_size
-			      == callback.get_page_size().physical());
-
-			/* Zip IO is done in the compressed page buffer. */
-			io_buffer = block->page.zip.data;
-		}
-
-		/* We have to read the exact number of bytes. Otherwise the
-		InnoDB IO functions croak on failed reads. */
-
-		n_bytes = static_cast<ulint>(
-			ut_min(static_cast<os_offset_t>(n_bytes),
-			       iter.end - offset));
-
-		ut_ad(n_bytes > 0);
-		ut_ad(!(n_bytes % iter.page_size));
-
-		const bool	encrypted = iter.crypt_data != NULL
-			&& iter.crypt_data->should_encrypt();
-		/* Use additional crypt io buffer if tablespace is encrypted */
-		byte* const	readptr = encrypted
-			? iter.crypt_io_buffer : io_buffer;
-		byte* const	writeptr = readptr;
-		IORequest	read_request(IORequest::READ);
-		dberr_t		err = os_file_read(
-			read_request, iter.file, readptr, offset,
-			(ulint) n_bytes);
-
-		if (err != DB_SUCCESS) {
-
-			ib::error() << "os_file_read() failed";
-
-			return(err);
-		}
-
-		bool		updated = false;
-		os_offset_t	page_off = offset;
-		ulint		n_pages_read = (ulint) n_bytes / iter.page_size;
-		bool		decrypted = false;
-
-		for (ulint i = 0; i < n_pages_read; ++i) {
-			ulint 	size	= iter.page_size;
-			dberr_t	err	= DB_SUCCESS;
-			byte*	src	= readptr + (i * size);
-			byte*	dst	= io_buffer + (i * size);
-			bool	frame_changed = false;
-
-			ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
-
-			const bool page_compressed
-				= page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
-				|| page_type == FIL_PAGE_PAGE_COMPRESSED;
-
-			/* If tablespace is encrypted, we need to decrypt
-			the page. Note that tablespaces are not in
-			fil_system during import. */
-			if (encrypted) {
-				decrypted = fil_space_decrypt(
-							iter.crypt_data,
-							dst, //dst
-							callback.get_page_size(),
-							src, // src
-							&err); // src
-
-				if (err != DB_SUCCESS) {
-					return(err);
-				}
-
-				if (decrypted) {
-					updated = true;
-				} else if (!page_compressed
-					   && !row_compressed) {
-					block->frame = src;
-					frame_changed = true;
-				} else {
-					memcpy(dst, src, size);
-				}
-			}
-
-			/* If the original page is page_compressed, we need
-			to decompress page before we can update it. */
-			if (page_compressed) {
-				fil_decompress_page(NULL, dst, ulong(size),
-						    NULL);
-				updated = true;
-			}
-
-			buf_block_set_file_page(
-				block, page_id_t(space_id, page_no++));
-
-			if ((err = callback(page_off, block)) != DB_SUCCESS) {
-
-				return(err);
-
-			} else if (!updated) {
-				updated = buf_block_get_state(block)
-					== BUF_BLOCK_FILE_PAGE;
-			}
-
-			buf_block_set_state(block, BUF_BLOCK_NOT_USED);
-			buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
-
-			/* If tablespace is encrypted we use additional
-			temporary scratch area where pages are read
-			for decrypting readptr == crypt_io_buffer != io_buffer.
-
-			Destination for decryption is a buffer pool block
-			block->frame == dst == io_buffer that is updated.
-			Pages that did not require decryption even when
-			tablespace is marked as encrypted are not copied
-			instead block->frame is set to src == readptr.
-
-			For encryption we again use temporary scratch area
-			writeptr != io_buffer == dst
-			that is then written to the tablespace
-
-			(1) For normal tables io_buffer == dst == writeptr
-			(2) For only page compressed tables
-			io_buffer == dst == writeptr
-			(3) For encrypted (and page compressed)
-			readptr != io_buffer == dst != writeptr
-			*/
-
-			ut_ad(!encrypted && !page_compressed ?
-			      src == dst && dst == writeptr + (i * size):1);
-			ut_ad(page_compressed && !encrypted ?
-			      src == dst && dst == writeptr + (i * size):1);
-			ut_ad(encrypted ?
-			      src != dst && dst != writeptr + (i * size):1);
-
-			if (encrypted) {
-				memcpy(writeptr + (i * size),
-					row_compressed ? block->page.zip.data :
-					block->frame, size);
-			}
-
-			if (frame_changed) {
-				block->frame = dst;
-			}
-
-			src =  io_buffer + (i * size);
-
-			if (page_compressed) {
-				ulint len = 0;
-
-				byte * res = fil_compress_page(
-					NULL,
-					src,
-					NULL,
-					size,
-					dict_table_page_compression_level(iter.table),
-					512,/* FIXME: use proper block size */
-					encrypted,
-					&len);
-
-				if (len != size) {
-					memset(res+len, 0, size-len);
-				}
-
-				updated = true;
-			}
-
-			/* If tablespace is encrypted, encrypt page before we
-			write it back. Note that we should not encrypt the
-			buffer that is in buffer pool. */
-			/* NOTE: At this stage of IMPORT the
-			buffer pool is not being used at all! */
-			if (decrypted && encrypted) {
-				byte *dest = writeptr + (i * size);
-				ulint space = mach_read_from_4(
-					src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-				ulint offset = mach_read_from_4(src + FIL_PAGE_OFFSET);
-				ib_uint64_t lsn = mach_read_from_8(src + FIL_PAGE_LSN);
-
-				byte* tmp = fil_encrypt_buf(
-							iter.crypt_data,
-							space,
-							offset,
-							lsn,
-							src,
-							callback.get_page_size(),
-							dest);
-
-				if (tmp == src) {
-					/* TODO: remove unnecessary memcpy's */
-					memcpy(dest, src, iter.page_size);
-				}
-
-				updated = true;
-			}
-
-			page_off += iter.page_size;
-			block->frame += iter.page_size;
-		}
-
-		IORequest	write_request(IORequest::WRITE);
-
-		/* A page was updated in the set, write back to disk.
-		Note: We don't have the compression algorithm, we write
-		out the imported file as uncompressed. */
-
-		if (updated
-		    && (err = os_file_write(
-				write_request,
-				iter.filepath, iter.file, writeptr,
-				offset, (ulint) n_bytes)) != DB_SUCCESS) {
-
-			ib::error() << "os_file_write() failed";
-			return(err);
-		}
-
-		/* Clean up the temporal buffer. */
-		memset(writeptr, 0, n_bytes);
-	}
-
-	return(DB_SUCCESS);
-}
-
-/********************************************************************//**
-Iterate over all the pages in the tablespace.
-@param table the table definiton in the server
-@param n_io_buffers number of blocks to read and write together
-@param callback functor that will do the page updates
-@return DB_SUCCESS or error code */
-dberr_t
-fil_tablespace_iterate(
-/*===================*/
-	dict_table_t*	table,
-	ulint		n_io_buffers,
-	PageCallback&	callback)
-{
-	dberr_t		err;
-	pfs_os_file_t	file;
-	char*		filepath;
-	bool		success;
-
-	ut_a(n_io_buffers > 0);
-	ut_ad(!srv_read_only_mode);
-
-	DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
-			return(DB_CORRUPTION););
-
-	/* Make sure the data_dir_path is set. */
-	dict_get_and_save_data_dir_path(table, false);
-
-	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
-		ut_a(table->data_dir_path);
-
-		filepath = fil_make_filepath(
-			table->data_dir_path, table->name.m_name, IBD, true);
-	} else {
-		filepath = fil_make_filepath(
-			NULL, table->name.m_name, IBD, false);
-	}
-
-	if (filepath == NULL) {
-		return(DB_OUT_OF_MEMORY);
-	}
-
-	file = os_file_create_simple_no_error_handling(
-		innodb_data_file_key, filepath,
-		OS_FILE_OPEN, OS_FILE_READ_WRITE, srv_read_only_mode, &success);
-
-	DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
-	{
-		static bool once;
-
-		if (!once || ut_rnd_interval(0, 10) == 5) {
-			once = true;
-			success = false;
-			os_file_close(file);
-		}
-	});
-
-	if (!success) {
-		/* The following call prints an error message */
-		os_file_get_last_error(true);
-
-		ib::error() << "Trying to import a tablespace, but could not"
-			" open the tablespace file " << filepath;
-
-		ut_free(filepath);
-
-		return(DB_TABLESPACE_NOT_FOUND);
-
-	} else {
-		err = DB_SUCCESS;
-	}
-
-	callback.set_file(filepath, file);
-
-	os_offset_t	file_size = os_file_get_size(file);
-	ut_a(file_size != (os_offset_t) -1);
-
-	/* The block we will use for every physical page */
-	buf_block_t*	block;
-
-	block = reinterpret_cast<buf_block_t*>(ut_zalloc_nokey(sizeof(*block)));
-
-	mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);
-
-	/* Allocate a page to read in the tablespace header, so that we
-	can determine the page size and zip size (if it is compressed).
-	We allocate an extra page in case it is a compressed table. One
-	page is to ensure alignement. */
-
-	void*	page_ptr = ut_malloc_nokey(3 * UNIV_PAGE_SIZE);
-	byte*	page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
-
-	fil_buf_block_init(block, page);
-
-	/* Read the first page and determine the page and zip size. */
-
-	IORequest	request(IORequest::READ);
-
-	err = os_file_read(request, file, page, 0, UNIV_PAGE_SIZE);
-
-	if (err != DB_SUCCESS) {
-
-		err = DB_IO_ERROR;
-
-	} else if ((err = callback.init(file_size, block)) == DB_SUCCESS) {
-		fil_iterator_t	iter;
-
-		iter.file = file;
-		iter.start = 0;
-		iter.end = file_size;
-		iter.filepath = filepath;
-		iter.file_size = file_size;
-		iter.n_io_buffers = n_io_buffers;
-		iter.page_size = callback.get_page_size().physical();
-		iter.table = table;
-
-		/* read (optional) crypt data */
-		iter.crypt_data = fil_space_read_crypt_data(
-			callback.get_page_size(), page);
-
-		if (err == DB_SUCCESS) {
-
-			/* Compressed pages can't be optimised for block IO
-			for now.  We do the IMPORT page by page. */
-
-			if (callback.get_page_size().is_compressed()) {
-				iter.n_io_buffers = 1;
-				ut_a(iter.page_size
-				     == callback.get_page_size().physical());
-			}
-
-			/** Add an extra page for compressed page scratch
-			area. */
-			void*	io_buffer = ut_malloc_nokey(
-				(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
-
-			iter.io_buffer = static_cast<byte*>(
-				ut_align(io_buffer, UNIV_PAGE_SIZE));
-
-			void*	crypt_io_buffer;
-			if (iter.crypt_data) {
-				crypt_io_buffer = static_cast<byte*>(
-					ut_malloc_nokey((2 + iter.n_io_buffers)
-							* UNIV_PAGE_SIZE));
-				iter.crypt_io_buffer = static_cast<byte*>(
-					ut_align(crypt_io_buffer,
-						 UNIV_PAGE_SIZE));
-			} else {
-				crypt_io_buffer = NULL;
-			}
-
-			err = fil_iterate(iter, block, callback);
-
-			if (iter.crypt_data) {
-				fil_space_destroy_crypt_data(&iter.crypt_data);
-			}
-
-			ut_free(io_buffer);
-			ut_free(crypt_io_buffer);
-		}
-	}
-
-	if (err == DB_SUCCESS) {
-
-		ib::info() << "Sync to disk";
-
-		if (!os_file_flush(file)) {
-			ib::info() << "os_file_flush() failed!";
-			err = DB_IO_ERROR;
-		} else {
-			ib::info() << "Sync to disk - done!";
-		}
-	}
-
-	os_file_close(file);
-
-	ut_free(page_ptr);
-	ut_free(filepath);
-
-	mutex_free(&block->mutex);
-
-	ut_free(block);
-
-	return(err);
-}
-
-/********************************************************************//**
 Delete the tablespace file and any related files like .cfg.
 This should not be called for temporary tables.
 @param[in] ibd_filepath File path of the IBD tablespace */
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 249755f1ff6..d79aa0415a6 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1391,90 +1391,6 @@ fil_delete_file(
 /*============*/
 	const char*	path);	/*!< in: filepath of the ibd tablespace */
 
-/** Callback functor. */
-struct PageCallback {
-
-	/** Default constructor */
-	PageCallback()
-		:
-		m_page_size(0, 0, false),
-		m_filepath() UNIV_NOTHROW {}
-
-	virtual ~PageCallback() UNIV_NOTHROW {}
-
-	/** Called for page 0 in the tablespace file at the start.
-	@param file_size size of the file in bytes
-	@param block contents of the first page in the tablespace file
-	@retval DB_SUCCESS or error code. */
-	virtual dberr_t init(
-		os_offset_t		file_size,
-		const buf_block_t*	block) UNIV_NOTHROW = 0;
-
-	/** Called for every page in the tablespace. If the page was not
-	updated then its state must be set to BUF_PAGE_NOT_USED. For
-	compressed tables the page descriptor memory will be at offset:
-		block->frame + UNIV_PAGE_SIZE;
-	@param offset physical offset within the file
-	@param block block read from file, note it is not from the buffer pool
-	@retval DB_SUCCESS or error code. */
-	virtual dberr_t operator()(
-		os_offset_t	offset,
-		buf_block_t*	block) UNIV_NOTHROW = 0;
-
-	/** Set the name of the physical file and the file handle that is used
-	to open it for the file that is being iterated over.
-	@param filename the name of the tablespace file
-	@param file OS file handle */
-	void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
-	{
-		m_file = file;
-		m_filepath = filename;
-	}
-
-	/**
-	@return the space id of the tablespace */
-	virtual ulint get_space_id() const UNIV_NOTHROW = 0;
-
-	/**
-	@retval the space flags of the tablespace being iterated over */
-	virtual ulint get_space_flags() const UNIV_NOTHROW = 0;
-
-	/** The compressed page size
-	@return the compressed page size */
-	const page_size_t& get_page_size() const
-	{
-		return(m_page_size);
-	}
-
-	/** The tablespace page size. */
-	page_size_t		m_page_size;
-
-	/** File handle to the tablespace */
-	pfs_os_file_t		m_file;
-
-	/** Physical file path. */
-	const char*		m_filepath;
-
-protected:
-	// Disable copying
-	PageCallback(const PageCallback&);
-	PageCallback& operator=(const PageCallback&);
-};
-
-/********************************************************************//**
-Iterate over all the pages in the tablespace.
-@param table the table definiton in the server
-@param n_io_buffers number of blocks to read and write together
-@param callback functor that will do the page updates
-@return DB_SUCCESS or error code */
-dberr_t
-fil_tablespace_iterate(
-/*===================*/
-	dict_table_t*		table,
-	ulint			n_io_buffers,
-	PageCallback&		callback)
-	MY_ATTRIBUTE((warn_unused_result));
-
 /********************************************************************//**
 Looks for a pre-existing fil_space_t with the given tablespace ID
 and, if found, returns the name and filepath in newly allocated buffers that the caller must free.
diff --git a/storage/innobase/mem/mem0mem.cc b/storage/innobase/mem/mem0mem.cc
index ca83891d459..c101980d543 100644
--- a/storage/innobase/mem/mem0mem.cc
+++ b/storage/innobase/mem/mem0mem.cc
@@ -348,6 +348,11 @@ mem_heap_create_block_func(
 		heap->total_size += len;
 	}
 
+	/* Poison all available memory. Individual chunks will be unpoisoned on
+	every mem_heap_alloc() call. */
+	compile_time_assert(MEM_BLOCK_HEADER_SIZE >= sizeof *block);
+	UNIV_MEM_FREE(block + 1, len - sizeof *block);
+
 	ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
 
 	return(block);
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 14bd79a0c5d..699c21b3327 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -701,28 +701,50 @@ static
 bool
 os_aio_validate();
 
+/** Handle errors for file operations.
+@param[in]	name		name of a file or NULL
+@param[in]	operation	operation
+@param[in]	should_abort	whether to abort on an unknown error
+@param[in]	on_error_silent	whether to suppress reports of non-fatal errors
+@return true if we should retry the operation */
+static MY_ATTRIBUTE((warn_unused_result))
+bool
+os_file_handle_error_cond_exit(
+	const char*	name,
+	const char*	operation,
+	bool		should_abort,
+	bool		on_error_silent);
+
 /** Does error handling when a file operation fails.
-@param[in]	name		File name or NULL
-@param[in]	operation	Name of operation e.g., "read", "write"
+@param[in]	name		name of a file or NULL
+@param[in]	operation	operation name that failed
 @return true if we should retry the operation */
 static
 bool
 os_file_handle_error(
 	const char*	name,
-	const char*	operation);
+	const char*	operation)
+{
+	/* Exit in case of unknown error */
+	return(os_file_handle_error_cond_exit(name, operation, true, false));
+}
 
-/**
-Does error handling when a file operation fails.
-@param[in]	name		File name or NULL
-@param[in]	operation	Name of operation e.g., "read", "write"
-@param[in]	silent	if true then don't print any message to the log.
+/** Does error handling when a file operation fails.
+@param[in]	name		name of a file or NULL
+@param[in]	operation	operation name that failed
+@param[in]	on_error_silent	if true then don't print any message to the log.
 @return true if we should retry the operation */
 static
 bool
 os_file_handle_error_no_exit(
 	const char*	name,
 	const char*	operation,
-	bool		silent);
+	bool		on_error_silent)
+{
+	/* Don't exit in case of unknown error */
+	return(os_file_handle_error_cond_exit(
+			name, operation, false, on_error_silent));
+}
 
 /** Does simulated AIO. This function should be called by an i/o-handler
 thread.
@@ -5077,52 +5099,31 @@ os_file_read_page(
 	ut_ad(type.validate());
 	ut_ad(n > 0);
 
-	for (;;) {
-		ssize_t	n_bytes;
-
-		n_bytes = os_file_pread(type, file, buf, n, offset, &err);
-
-		if (o != NULL) {
-			*o = n_bytes;
-		}
-
-		if (err != DB_SUCCESS && !exit_on_err) {
-
-			return(err);
-
-		} else if ((ulint) n_bytes == n) {
-			return(DB_SUCCESS);
-		}
-
-		ib::error() << "Tried to read " << n
-			<< " bytes at offset " << offset
-			<< ", but was only able to read " << n_bytes;
-
-		if (exit_on_err) {
+	ssize_t	n_bytes = os_file_pread(type, file, buf, n, offset, &err);
 
-			if (!os_file_handle_error(NULL, "read")) {
-				/* Hard error */
-				break;
-			}
+	if (o) {
+		*o = n_bytes;
+	}
 
-		} else if (!os_file_handle_error_no_exit(NULL, "read", false)) {
+	if (ulint(n_bytes) == n || (err != DB_SUCCESS && !exit_on_err)) {
+		return err;
+	}
 
-			/* Hard error */
-			break;
-		}
+	ib::error() << "Tried to read " << n << " bytes at offset "
+		    << offset << ", but was only able to read " << n_bytes;
 
-		if (n_bytes > 0 && (ulint) n_bytes < n) {
-			n -= (ulint) n_bytes;
-			offset += (ulint) n_bytes;
-			buf = reinterpret_cast<uchar*>(buf) + (ulint) n_bytes;
-		}
+	if (!os_file_handle_error_cond_exit(
+		    NULL, "read", exit_on_err, false)) {
+		ib::fatal()
+			<< "Cannot read from file. OS error number "
+			<< errno << ".";
 	}
 
-	ib::fatal()
-		<< "Cannot read from file. OS error number "
-		<< errno << ".";
+	if (err == DB_SUCCESS) {
+		err = DB_IO_ERROR;
+	}
 
-	return(err);
+	return err;
 }
 
 /** Retrieves the last error number if an error occurs in a file io function.
@@ -5228,37 +5229,6 @@ os_file_handle_error_cond_exit(
 	return(false);
 }
 
-/** Does error handling when a file operation fails.
-@param[in]	name		name of a file or NULL
-@param[in]	operation	operation name that failed
-@return true if we should retry the operation */
-static
-bool
-os_file_handle_error(
-	const char*	name,
-	const char*	operation)
-{
-	/* Exit in case of unknown error */
-	return(os_file_handle_error_cond_exit(name, operation, true, false));
-}
-
-/** Does error handling when a file operation fails.
-@param[in]	name		name of a file or NULL
-@param[in]	operation	operation name that failed
-@param[in]	on_error_silent	if true then don't print any message to the log.
-@return true if we should retry the operation */
-static
-bool
-os_file_handle_error_no_exit(
-	const char*	name,
-	const char*	operation,
-	bool		on_error_silent)
-{
-	/* Don't exit in case of unknown error */
-	return(os_file_handle_error_cond_exit(
-			name, operation, false, on_error_silent));
-}
-
 #ifndef _WIN32
 /** Tries to disable OS caching on an opened file descriptor.
 @param[in]	fd		file descriptor to alter
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index 82e35472cb8..4f7813929f2 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -37,6 +37,7 @@ Created 2012-02-08 by Sunny Bains.
 #include "row0mysql.h"
 #include "srv0start.h"
 #include "row0quiesce.h"
+#include "fil0pagecompress.h"
 #include "ut0new.h"
 
 #include <vector>
@@ -45,12 +46,10 @@ Created 2012-02-08 by Sunny Bains.
 #include <my_aes.h>
 #endif
 
-/** The size of the buffer to use for IO. Note: os_file_read() doesn't expect
-reads to fail. If you set the buffer size to be greater than a multiple of the
-file size then it will assert. TODO: Fix this limitation of the IO functions.
-@param n page size of the tablespace.
-@retval number of pages */
-#define IO_BUFFER_SIZE(m, n)	((m) / (n))
+/** The size of the buffer to use for IO.
+@param n physical page size
+@return number of pages */
+#define IO_BUFFER_SIZE(n)	((1024 * 1024) / n)
 
 /** For gathering stats on records during phase I */
 struct row_stats_t {
@@ -346,12 +345,14 @@ private:
 
 /** Functor that is called for each physical page that is read from the
 tablespace file.  */
-class AbstractCallback : public PageCallback {
+class AbstractCallback
+{
 public:
 	/** Constructor
 	@param trx covering transaction */
 	AbstractCallback(trx_t* trx)
 		:
+		m_page_size(0, 0, false),
 		m_trx(trx),
 		m_space(ULINT_UNDEFINED),
 		m_xdes(),
@@ -384,31 +385,50 @@ public:
 		return(m_space_flags);
 	}
 
-protected:
-	/** Get the data page depending on the table type, compressed or not.
-	@param block block read from disk
-	@retval the buffer frame */
-	buf_frame_t* get_frame(buf_block_t* block) const UNIV_NOTHROW
+	/**
+	Set the name of the physical file and the file handle that is used
+	to open it for the file that is being iterated over.
+	@param filename the physical name of the tablespace file
+	@param file OS file handle */
+	void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
 	{
-		if (is_compressed_table()) {
-			return(block->page.zip.data);
-		}
-
-		return(buf_block_get_frame(block));
+		m_file = file;
+		m_filepath = filename;
 	}
 
-	/** Check for session interrupt. If required we could
-	even flush to disk here every N pages.
-	@retval DB_SUCCESS or error code */
-	dberr_t periodic_check() UNIV_NOTHROW
-	{
-		if (trx_is_interrupted(m_trx)) {
-			return(DB_INTERRUPTED);
-		}
+	const page_size_t& get_page_size() const { return m_page_size; }
 
-		return(DB_SUCCESS);
+	const char* filename() const { return m_filepath; }
+
+	/**
+	Called for every page in the tablespace. If the page was not
+	updated then its state must be set to BUF_PAGE_NOT_USED. For
+	compressed tables the page descriptor memory will be at offset:
+		block->frame + UNIV_PAGE_SIZE;
+	@param offset - physical offset within the file
+	@param block - block read from file, note it is not from the buffer pool
+	@retval DB_SUCCESS or error code. */
+	virtual dberr_t operator()(
+		os_offset_t	offset,
+		buf_block_t*	block) UNIV_NOTHROW = 0;
+
+	/**
+	@return the space id of the tablespace */
+	virtual ulint get_space_id() const UNIV_NOTHROW = 0;
+
+	bool is_interrupted() const { return trx_is_interrupted(m_trx); }
+
+	/**
+	Get the data page depending on the table type, compressed or not.
+	@param block - block read from disk
+	@retval the buffer frame */
+	static byte* get_frame(const buf_block_t* block)
+	{
+		return block->page.zip.data
+			? block->page.zip.data : block->frame;
 	}
 
+protected:
 	/** Get the physical offset of the extent descriptor within the page.
 	@param page_no page number of the extent descriptor
 	@param page contents of the page containing the extent descriptor.
@@ -488,6 +508,15 @@ protected:
 	}
 
 protected:
+	/** The tablespace page size. */
+	page_size_t		m_page_size;
+
+	/** File handle to the tablespace */
+	pfs_os_file_t		m_file;
+
+	/** Physical file path. */
+	const char*		m_filepath;
+
 	/** Covering transaction. */
 	trx_t*			m_trx;
 
@@ -564,9 +593,7 @@ AbstractCallback::init(
 	m_free_limit = mach_read_from_4(page + FSP_FREE_LIMIT);
 	m_space = mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID);
 
-	dberr_t	err = set_current_xdes(0, page);
-
-	return(err);
+	return set_current_xdes(0, page);
 }
 
 /**
@@ -637,11 +664,7 @@ FetchIndexRootPages::operator() (
 	os_offset_t	offset,
 	buf_block_t*	block) UNIV_NOTHROW
 {
-	dberr_t		err;
-
-	if ((err = periodic_check()) != DB_SUCCESS) {
-		return(err);
-	}
+	if (is_interrupted()) return DB_INTERRUPTED;
 
 	const page_t*	page = get_frame(block);
 
@@ -654,9 +677,9 @@ FetchIndexRootPages::operator() (
 			<< ", file offset: "
 			<< (offset / m_page_size.physical());
 
-		err = DB_CORRUPTION;
+		return DB_CORRUPTION;
 	} else if (page_type == FIL_PAGE_TYPE_XDES) {
-		err = set_current_xdes(block->page.id.page_no(), page);
+		return set_current_xdes(block->page.id.page_no(), page);
 	} else if (fil_page_index_page_check(page)
 		   && !is_free(block->page.id.page_no())
 		   && page_is_root(page)) {
@@ -680,7 +703,7 @@ FetchIndexRootPages::operator() (
 		}
 	}
 
-	return(err);
+	return DB_SUCCESS;
 }
 
 /**
@@ -808,14 +831,6 @@ public:
 		os_offset_t	offset,
 		buf_block_t*	block) UNIV_NOTHROW;
 private:
-
-	/** Status returned by PageConverter::validate() */
-	enum import_page_status_t {
-		IMPORT_PAGE_STATUS_OK,		/*!< Page is OK */
-		IMPORT_PAGE_STATUS_ALL_ZERO,	/*!< Page is all zeros */
-		IMPORT_PAGE_STATUS_CORRUPTED	/*!< Page is corrupted */
-	};
-
 	/** Update the page, set the space id, max trx id and index id.
 	@param block block read from file
 	@param page_type type of the page
@@ -824,17 +839,6 @@ private:
 		buf_block_t*	block,
 		ulint&		page_type) UNIV_NOTHROW;
 
-#ifdef UNIV_DEBUG
-	/**
-	@return true error condition is enabled. */
-	bool trigger_corruption() UNIV_NOTHROW
-	{
-		return(false);
-	}
-	#else
-#define trigger_corruption()	(false)
-#endif /* UNIV_DEBUG */
-
 	/** Update the space, index id, trx id.
 	@param block block to convert
 	@return DB_SUCCESS or error code */
@@ -846,14 +850,6 @@ private:
 	@retval DB_SUCCESS or error code */
 	dberr_t	update_records(buf_block_t* block) UNIV_NOTHROW;
 
-	/** Validate the page, check for corruption.
-	@param offset physical offset within file.
-	@param page page read from file.
-	@return 0 on success, 1 if all zero, 2 if corrupted */
-	import_page_status_t validate(
-		os_offset_t	offset,
-		buf_block_t*	page) UNIV_NOTHROW;
-
 	/** Validate the space flags and update tablespace header page.
 	@param block block read from file, not from the buffer pool.
 	@retval DB_SUCCESS or error code */
@@ -1530,6 +1526,7 @@ IndexPurge::purge() UNIV_NOTHROW
 /** Constructor
 @param cfg config of table being imported.
 @param trx transaction covering the import */
+inline
 PageConverter::PageConverter(
 	row_import*	cfg,
 	trx_t*		trx)
@@ -1553,6 +1550,7 @@ PageConverter::PageConverter(
 @param offsets column offsets for the record
 @param i column ordinal value
 @return DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::adjust_cluster_index_blob_column(
 	rec_t*		rec,
@@ -1581,13 +1579,11 @@ PageConverter::adjust_cluster_index_blob_column(
 
 	field += BTR_EXTERN_SPACE_ID - BTR_EXTERN_FIELD_REF_SIZE + len;
 
-	if (is_compressed_table()) {
-		mach_write_to_4(field, get_space_id());
+	mach_write_to_4(field, get_space_id());
 
+	if (m_page_zip_ptr) {
 		page_zip_write_blob_ptr(
 			m_page_zip_ptr, rec, m_cluster_index, offsets, i, 0);
-	} else {
-		mlog_write_ulint(field, get_space_id(), MLOG_4BYTES, 0);
 	}
 
 	return(DB_SUCCESS);
@@ -1598,6 +1594,7 @@ stored columns.
 @param rec record to update
 @param offsets column offsets for the record
 @return DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::adjust_cluster_index_blob_columns(
 	rec_t*		rec,
@@ -1630,6 +1627,7 @@ BLOB reference, write the new space id.
 @param rec record to update
 @param offsets column offsets for the record
 @return DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::adjust_cluster_index_blob_ref(
 	rec_t*		rec,
@@ -1652,6 +1650,7 @@ PageConverter::adjust_cluster_index_blob_ref(
 re-organising the B+tree.
 @param offsets current row offsets.
 @return true if purge succeeded */
+inline
 bool
 PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
 {
@@ -1674,6 +1673,7 @@ PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
 @param rec record to update
 @param offsets column offsets for the record
 @return DB_SUCCESS or error code. */
+inline
 dberr_t
 PageConverter::adjust_cluster_record(
 	const dict_index_t*	index,
@@ -1700,6 +1700,7 @@ PageConverter::adjust_cluster_record(
 rows that can't be purged optimistically.
 @param block block to update
 @retval DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::update_records(
 	buf_block_t*	block) UNIV_NOTHROW
@@ -1763,6 +1764,7 @@ PageConverter::update_records(
 
 /** Update the space, index id, trx id.
 @return DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::update_index_page(
 	buf_block_t*	block) UNIV_NOTHROW
@@ -1843,6 +1845,7 @@ PageConverter::update_index_page(
 /** Validate the space flags and update tablespace header page.
 @param block block read from file, not from the buffer pool.
 @retval DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::update_header(
 	buf_block_t*	block) UNIV_NOTHROW
@@ -1879,6 +1882,7 @@ PageConverter::update_header(
 /** Update the page, set the space id, max trx id and index id.
 @param block block read from file
 @retval DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::update_page(
 	buf_block_t*	block,
@@ -1886,6 +1890,14 @@ PageConverter::update_page(
 {
 	dberr_t		err = DB_SUCCESS;
 
+	ut_ad(!block->page.zip.data == !is_compressed_table());
+
+	if (block->page.zip.data) {
+		m_page_zip_ptr = &block->page.zip;
+	} else {
+		ut_ad(!m_page_zip_ptr);
+	}
+
 	switch (page_type = fil_page_get_type(get_frame(block))) {
 	case FIL_PAGE_TYPE_FSP_HDR:
 		ut_a(block->page.id.page_no() == 0);
@@ -1940,117 +1952,41 @@ PageConverter::update_page(
 	return(DB_CORRUPTION);
 }
 
-/** Validate the page
-@param offset physical offset within file.
-@param page page read from file.
-@return status */
-PageConverter::import_page_status_t
-PageConverter::validate(
-	os_offset_t	offset,
-	buf_block_t*	block) UNIV_NOTHROW
-{
-	buf_frame_t*	page = get_frame(block);
-
-	/* Check that the page number corresponds to the offset in
-	the file. Flag as corrupt if it doesn't. Disable the check
-	for LSN in buf_page_is_corrupted() */
-
-	if (buf_page_is_corrupted(
-		false, page, get_page_size(), NULL)
-	    || (page_get_page_no(page) != offset / m_page_size.physical()
-		&& page_get_page_no(page) != 0)) {
-
-		return(IMPORT_PAGE_STATUS_CORRUPTED);
-
-	} else if (offset > 0 && page_get_page_no(page) == 0) {
-
-		/* The page is all zero: do nothing. We already checked
-		for all NULs in buf_page_is_corrupted() */
-		return(IMPORT_PAGE_STATUS_ALL_ZERO);
-	}
-
-	return(IMPORT_PAGE_STATUS_OK);
-}
-
 /** Called for every page in the tablespace. If the page was not
 updated then its state must be set to BUF_PAGE_NOT_USED.
-@param offset physical offset within the file
 @param block block read from file, note it is not from the buffer pool
 @retval DB_SUCCESS or error code. */
 dberr_t
-PageConverter::operator() (
-	os_offset_t	offset,
-	buf_block_t*	block) UNIV_NOTHROW
+PageConverter::operator() (os_offset_t, buf_block_t* block) UNIV_NOTHROW
 {
+	/* If we already had an old page with matching number
+	in the buffer pool, evict it now, because
+	we no longer evict the pages on DISCARD TABLESPACE. */
+	buf_page_get_gen(block->page.id, get_page_size(),
+			 RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
+			 __FILE__, __LINE__, NULL, NULL);
+
 	ulint		page_type;
-	dberr_t		err = DB_SUCCESS;
 
-	if ((err = periodic_check()) != DB_SUCCESS) {
-		return(err);
-	}
+	dberr_t err = update_page(block, page_type);
+	if (err != DB_SUCCESS) return err;
 
-	if (is_compressed_table()) {
-		m_page_zip_ptr = &block->page.zip;
+	if (!block->page.zip.data) {
+		buf_flush_init_for_writing(
+			NULL, block->frame, NULL, m_current_lsn);
+	} else if (fil_page_type_is_index(page_type)) {
+		buf_flush_init_for_writing(
+			NULL, block->page.zip.data, &block->page.zip,
+			m_current_lsn);
 	} else {
-		ut_ad(m_page_zip_ptr == 0);
+		/* Calculate and update the checksum of non-index
+		pages for ROW_FORMAT=COMPRESSED tables. */
+		buf_flush_update_zip_checksum(
+			block->page.zip.data, get_page_size().physical(),
+			m_current_lsn);
 	}
 
-	switch (validate(offset, block)) {
-	case IMPORT_PAGE_STATUS_OK:
-
-		/* We have to decompress the compressed pages before
-		we can work on them */
-
-		if ((err = update_page(block, page_type)) != DB_SUCCESS) {
-			break;
-		}
-
-		/* Note: For compressed pages this function will write to the
-		zip descriptor and for uncompressed pages it will write to
-		page (ie. the block->frame). Therefore the caller should write
-		out the descriptor contents and not block->frame for compressed
-		pages. */
-
-		if (!is_compressed_table()
-		    || fil_page_type_is_index(page_type)) {
-
-			buf_flush_init_for_writing(
-				!is_compressed_table() ? block : NULL,
-				!is_compressed_table()
-				? block->frame : block->page.zip.data,
-				!is_compressed_table() ? 0 : m_page_zip_ptr,
-				m_current_lsn);
-		} else {
-			/* Calculate and update the checksum of non-btree
-			pages for compressed tables explicitly here. */
-
-			buf_flush_update_zip_checksum(
-				get_frame(block), get_page_size().physical(),
-				m_current_lsn);
-		}
-
-		break;
-
-	case IMPORT_PAGE_STATUS_ALL_ZERO:
-		/* The page is all zero: leave it as is. */
-		break;
-
-	case IMPORT_PAGE_STATUS_CORRUPTED:
-
-		ib::warn() << "Page " << (offset / m_page_size.physical())
-			<< " at offset " << offset
-			<< " looks corrupted in file " << m_filepath;
-
-		err = DB_CORRUPTION;
-	}
-
-	/* If we already had and old page with matching number
-	in the buffer pool, evict it now, because
-	we no longer evict the pages on DISCARD TABLESPACE. */
-	buf_page_get_gen(block->page.id, get_page_size(),
-			 RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
-			 __FILE__, __LINE__, NULL, NULL);
-	return(err);
+	return DB_SUCCESS;
 }
 
 /*****************************************************************//**
@@ -3328,6 +3264,441 @@ row_import_update_discarded_flag(
 	return(err);
 }
 
+struct fil_iterator_t {
+	pfs_os_file_t	file;			/*!< File handle */
+	const char*	filepath;		/*!< File path name */
+	os_offset_t	start;			/*!< From where to start */
+	os_offset_t	end;			/*!< Where to stop */
+	os_offset_t	file_size;		/*!< File size in bytes */
+	ulint		n_io_buffers;		/*!< Number of pages to use
+						for IO */
+	byte*		io_buffer;		/*!< Buffer to use for IO */
+	fil_space_crypt_t *crypt_data;		/*!< Crypt data (if encrypted) */
+	byte*           crypt_io_buffer;        /*!< IO buffer when encrypted */
+};
+
+/********************************************************************//**
+TODO: This can be made parallel trivially by chunking up the file and creating
+a callback per thread. . Main benefit will be to use multiple CPUs for
+checksums and compressed tables. We have to do compressed tables block by
+block right now. Secondly we need to decompress/compress and copy too much
+of data. These are CPU intensive.
+
+Iterate over all the pages in the tablespace.
+@param iter - Tablespace iterator
+@param block - block to use for IO
+@param callback - Callback to inspect and update page contents
+@retval DB_SUCCESS or error code */
+static
+dberr_t
+fil_iterate(
+/*========*/
+	const fil_iterator_t&	iter,
+	buf_block_t*		block,
+	AbstractCallback&	callback)
+{
+	os_offset_t		offset;
+	const ulint	 	size = callback.get_page_size().physical();
+	ulint			n_bytes = iter.n_io_buffers * size;
+
+	ut_ad(!srv_read_only_mode);
+
+	/* TODO: For ROW_FORMAT=COMPRESSED tables we do a lot of useless
+	copying for non-index pages. Unfortunately, it is
+	required by buf_zip_decompress() */
+
+	for (offset = iter.start; offset < iter.end; offset += n_bytes) {
+		if (callback.is_interrupted()) {
+			return DB_INTERRUPTED;
+		}
+
+		byte*		io_buffer = iter.io_buffer;
+		block->frame = io_buffer;
+
+		if (block->page.zip.data) {
+			/* Zip IO is done in the compressed page buffer. */
+			io_buffer = block->page.zip.data;
+		}
+
+		/* We have to read the exact number of bytes. Otherwise the
+		InnoDB IO functions croak on failed reads. */
+
+		n_bytes = ulint(ut_min(os_offset_t(n_bytes),
+				       iter.end - offset));
+
+		ut_ad(n_bytes > 0);
+		ut_ad(!(n_bytes % size));
+
+		const bool encrypted = iter.crypt_data != NULL
+			&& iter.crypt_data->should_encrypt();
+		/* Use additional crypt io buffer if tablespace is encrypted */
+		byte* const readptr = encrypted
+			? iter.crypt_io_buffer : io_buffer;
+		byte* const writeptr = readptr;
+
+		IORequest	read_request(IORequest::READ);
+		read_request.disable_partial_io_warnings();
+
+		dberr_t	err = os_file_read_no_error_handling(
+			read_request, iter.file, readptr, offset, n_bytes, 0);
+		if (err != DB_SUCCESS) {
+			ib::error() << iter.filepath
+				    << ": os_file_read() failed";
+		}
+
+		bool		updated = false;
+		os_offset_t	page_off = offset;
+		ulint		n_pages_read = n_bytes / size;
+		bool		decrypted = false;
+		block->page.id.set_page_no(ulint(page_off / size));
+
+		for (ulint i = 0; i < n_pages_read;
+		     block->page.id.set_page_no(block->page.id.page_no() + 1),
+		     ++i, page_off += size, block->frame += size) {
+			err = DB_SUCCESS;
+			byte*	src = readptr + i * size;
+			byte*	dst = io_buffer + i * size;
+			bool frame_changed = false;
+			ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
+			const bool page_compressed
+				= page_type
+				== FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+				|| page_type == FIL_PAGE_PAGE_COMPRESSED;
+			const ulint page_no = page_get_page_no(src);
+			if (!page_no && page_off) {
+				const ulint* b = reinterpret_cast<const ulint*>
+					(src);
+				const ulint* const e = b + size / sizeof *b;
+				do {
+					if (*b++) {
+						goto page_corrupted;
+					}
+				} while (b != e);
+
+				/* Proceed to the next page,
+				because this one is all zero. */
+				continue;
+			}
+
+			if (page_no != page_off / size) {
+				goto page_corrupted;
+			}
+
+			if (encrypted) {
+				decrypted = fil_space_decrypt(
+					iter.crypt_data, dst,
+					callback.get_page_size(), src, &err);
+
+				if (err != DB_SUCCESS) {
+					return err;
+				}
+
+				if (decrypted) {
+					updated = true;
+				} else {
+					if (!page_compressed
+					    && !block->page.zip.data) {
+						block->frame = src;
+						frame_changed = true;
+					} else {
+						memcpy(dst, src, size);
+					}
+				}
+			}
+
+			/* If the original page is page_compressed, we need
+			to decompress it before adjusting further. */
+			if (page_compressed) {
+				fil_decompress_page(NULL, dst, ulong(size),
+						    NULL);
+				updated = true;
+			} else if (buf_page_is_corrupted(
+					   false,
+					   encrypted && !frame_changed
+					   ? dst : src,
+					   callback.get_page_size(), NULL)) {
+page_corrupted:
+				ib::warn() << callback.filename()
+					   << ": Page " << (offset / size)
+					   << " at offset " << offset
+					   << " looks corrupted.";
+				return DB_CORRUPTION;
+			}
+
+			if ((err = callback(page_off, block)) != DB_SUCCESS) {
+				return err;
+			} else if (!updated) {
+				updated = buf_block_get_state(block)
+					== BUF_BLOCK_FILE_PAGE;
+			}
+
+			/* If tablespace is encrypted we use additional
+			temporary scratch area where pages are read
+			for decrypting readptr == crypt_io_buffer != io_buffer.
+
+			Destination for decryption is a buffer pool block
+			block->frame == dst == io_buffer that is updated.
+			Pages that did not require decryption even when
+			tablespace is marked as encrypted are not copied
+			instead block->frame is set to src == readptr.
+
+			For encryption we again use temporary scratch area
+			writeptr != io_buffer == dst
+			that is then written to the tablespace
+
+			(1) For normal tables io_buffer == dst == writeptr
+			(2) For only page compressed tables
+			io_buffer == dst == writeptr
+			(3) For encrypted (and page compressed)
+			readptr != io_buffer == dst != writeptr
+			*/
+
+			ut_ad(!encrypted && !page_compressed ?
+			      src == dst && dst == writeptr + (i * size):1);
+			ut_ad(page_compressed && !encrypted ?
+			      src == dst && dst == writeptr + (i * size):1);
+			ut_ad(encrypted ?
+			      src != dst && dst != writeptr + (i * size):1);
+
+			if (encrypted) {
+				memcpy(writeptr + (i * size),
+				       callback.get_frame(block), size);
+			}
+
+			if (frame_changed) {
+				block->frame = dst;
+			}
+
+			src =  io_buffer + (i * size);
+
+			if (page_compressed) {
+				ulint len = 0;
+
+				fil_compress_page(
+					NULL,
+					src,
+					NULL,
+					size,
+					0,/* FIXME: compression level */
+					512,/* FIXME: use proper block size */
+					encrypted,
+					&len);
+				ut_ad(len <= size);
+				memset(src + len, 0, size - len);
+				updated = true;
+			}
+
+			/* Encrypt the page if encryption was used. */
+			if (encrypted && decrypted) {
+				byte *dest = writeptr + i * size;
+				byte* tmp = fil_encrypt_buf(
+					iter.crypt_data,
+					block->page.id.space(),
+					block->page.id.page_no(),
+					mach_read_from_8(src + FIL_PAGE_LSN),
+					src, callback.get_page_size(), dest);
+
+				if (tmp == src) {
+					/* TODO: remove unnecessary memcpy's */
+					memcpy(dest, src, size);
+				}
+
+				updated = true;
+			}
+		}
+
+		/* A page was updated in the set, write back to disk. */
+		if (updated) {
+			IORequest       write_request(IORequest::WRITE);
+
+			err = os_file_write(write_request,
+					    iter.filepath, iter.file,
+					    writeptr, offset, n_bytes);
+
+			if (err != DB_SUCCESS) {
+				return err;
+			}
+		}
+	}
+
+	return DB_SUCCESS;
+}
+
+/********************************************************************//**
+Iterate over all the pages in the tablespace.
+@param table - the table definiton in the server
+@param n_io_buffers - number of blocks to read and write together
+@param callback - functor that will do the page updates
+@return	DB_SUCCESS or error code */
+static
+dberr_t
+fil_tablespace_iterate(
+/*===================*/
+	dict_table_t*		table,
+	ulint			n_io_buffers,
+	AbstractCallback&	callback)
+{
+	dberr_t		err;
+	pfs_os_file_t	file;
+	char*		filepath;
+
+	ut_a(n_io_buffers > 0);
+	ut_ad(!srv_read_only_mode);
+
+	DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
+			return(DB_CORRUPTION););
+
+	/* Make sure the data_dir_path is set. */
+	dict_get_and_save_data_dir_path(table, false);
+
+	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+		ut_a(table->data_dir_path);
+
+		filepath = fil_make_filepath(
+			table->data_dir_path, table->name.m_name, IBD, true);
+	} else {
+		filepath = fil_make_filepath(
+			NULL, table->name.m_name, IBD, false);
+	}
+
+	if (!filepath) {
+		return(DB_OUT_OF_MEMORY);
+	} else {
+		bool	success;
+
+		file = os_file_create_simple_no_error_handling(
+			innodb_data_file_key, filepath,
+			OS_FILE_OPEN, OS_FILE_READ_WRITE, false, &success);
+
+		if (!success) {
+			/* The following call prints an error message */
+			os_file_get_last_error(true);
+			ib::error() << "Trying to import a tablespace,"
+				" but could not open the tablespace file "
+				    << filepath;
+			ut_free(filepath);
+			return DB_TABLESPACE_NOT_FOUND;
+		} else {
+			err = DB_SUCCESS;
+		}
+	}
+
+	callback.set_file(filepath, file);
+
+	os_offset_t	file_size = os_file_get_size(file);
+	ut_a(file_size != (os_offset_t) -1);
+
+	/* Allocate a page to read in the tablespace header, so that we
+	can determine the page size and zip_size (if it is compressed).
+	We allocate an extra page in case it is a compressed table. One
+	page is to ensure alignement. */
+
+	void*	page_ptr = ut_malloc_nokey(3 * UNIV_PAGE_SIZE);
+	byte*	page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
+
+	buf_block_t* block = reinterpret_cast<buf_block_t*>
+		(ut_zalloc_nokey(sizeof *block));
+	block->frame = page;
+	block->page.id.copy_from(page_id_t(0, 0));
+	block->page.io_fix = BUF_IO_NONE;
+	block->page.buf_fix_count = 1;
+	block->page.state = BUF_BLOCK_FILE_PAGE;
+
+	/* Read the first page and determine the page and zip size. */
+
+	IORequest       request(IORequest::READ);
+	request.disable_partial_io_warnings();
+
+	err = os_file_read_no_error_handling(request, file, page, 0,
+					     UNIV_PAGE_SIZE, 0);
+
+	if (err == DB_SUCCESS) {
+		err = callback.init(file_size, block);
+	}
+
+	if (err == DB_SUCCESS) {
+		block->page.id.copy_from(
+			page_id_t(callback.get_space_id(), 0));
+		block->page.size.copy_from(callback.get_page_size());
+		if (block->page.size.is_compressed()) {
+			page_zip_set_size(&block->page.zip,
+					  callback.get_page_size().physical());
+			/* ROW_FORMAT=COMPRESSED is not optimised for block IO
+			for now. We do the IMPORT page by page. */
+			n_io_buffers = 1;
+		}
+
+		fil_iterator_t	iter;
+
+		/* read (optional) crypt data */
+		iter.crypt_data = fil_space_read_crypt_data(
+			callback.get_page_size(), page);
+
+		/* If tablespace is encrypted, it needs extra buffers */
+		if (iter.crypt_data && n_io_buffers > 1) {
+			/* decrease io buffers so that memory
+			consumption will not double */
+			n_io_buffers /= 2;
+		}
+
+		iter.file = file;
+		iter.start = 0;
+		iter.end = file_size;
+		iter.filepath = filepath;
+		iter.file_size = file_size;
+		iter.n_io_buffers = n_io_buffers;
+
+		/* Add an extra page for compressed page scratch area. */
+		void*	io_buffer = ut_malloc_nokey(
+			(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+
+		iter.io_buffer = static_cast<byte*>(
+			ut_align(io_buffer, UNIV_PAGE_SIZE));
+
+		void* crypt_io_buffer = NULL;
+		if (iter.crypt_data) {
+			crypt_io_buffer = ut_malloc_nokey(
+				(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+			iter.crypt_io_buffer = static_cast<byte*>(
+				ut_align(crypt_io_buffer, UNIV_PAGE_SIZE));
+		}
+
+		if (block->page.zip.ssize) {
+			ut_ad(iter.n_io_buffers == 1);
+			block->frame = iter.io_buffer;
+			block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
+		}
+
+		err = fil_iterate(iter, block, callback);
+
+		if (iter.crypt_data) {
+			fil_space_destroy_crypt_data(&iter.crypt_data);
+		}
+
+		ut_free(crypt_io_buffer);
+		ut_free(io_buffer);
+	}
+
+	if (err == DB_SUCCESS) {
+		ib::info() << "Sync to disk";
+
+		if (!os_file_flush(file)) {
+			ib::info() << "os_file_flush() failed!";
+			err = DB_IO_ERROR;
+		} else {
+			ib::info() << "Sync to disk - done!";
+		}
+	}
+
+	os_file_close(file);
+
+	ut_free(page_ptr);
+	ut_free(filepath);
+	ut_free(block);
+
+	return(err);
+}
+
 /*****************************************************************//**
 Imports a tablespace. The space id in the .ibd file must match the space id
 of the table in the data dictionary.
@@ -3448,9 +3819,7 @@ row_import_for_mysql(
 		FetchIndexRootPages	fetchIndexRootPages(table, trx);
 
 		err = fil_tablespace_iterate(
-			table, IO_BUFFER_SIZE(
-				cfg.m_page_size.physical(),
-				cfg.m_page_size.physical()),
+			table, IO_BUFFER_SIZE(cfg.m_page_size.physical()),
 			fetchIndexRootPages);
 
 		if (err == DB_SUCCESS) {
@@ -3488,9 +3857,7 @@ row_import_for_mysql(
 	/* Set the IO buffer size in pages. */
 
 	err = fil_tablespace_iterate(
-		table, IO_BUFFER_SIZE(
-			cfg.m_page_size.physical(),
-			cfg.m_page_size.physical()), converter);
+		table, IO_BUFFER_SIZE(cfg.m_page_size.physical()), converter);
 
 	DBUG_EXECUTE_IF("ib_import_reset_space_and_lsn_failure",
 			err = DB_TOO_MANY_CONCURRENT_TRXS;);
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index 7985700367d..ef8ace7f864 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -25,8 +25,6 @@ Created 10/25/1995 Heikki Tuuri
 *******************************************************/
 
 #include "fil0fil.h"
-#include "fil0pagecompress.h"
-#include "fsp0pagecompress.h"
 #include "fil0crypt.h"
 
 #include <debug_sync.h>
@@ -49,12 +47,10 @@ Created 10/25/1995 Heikki Tuuri
 #include "page0zip.h"
 #include "trx0sys.h"
 #include "row0mysql.h"
-#include "os0file.h"
 #ifndef UNIV_HOTBACKUP
 # include "buf0lru.h"
 # include "ibuf0ibuf.h"
 # include "sync0sync.h"
-# include "os0sync.h"
 #else /* !UNIV_HOTBACKUP */
 # include "srv0srv.h"
 static ulint srv_data_read, srv_data_written;
@@ -704,7 +700,7 @@ add_size:
 		space->size += node->size;
 	}
 
-	ulint atomic_writes = fsp_flags_get_atomic_writes(space->flags);
+	ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(space->flags);
 
 	/* printf("Opening file %s\n", node->name); */
 
@@ -4110,7 +4106,6 @@ fil_open_single_table_tablespace(
 	fsp_open_info	remote;
 	ulint		tablespaces_found = 0;
 	ulint		valid_tablespaces_found = 0;
-	ulint           atomic_writes = 0;
 	fil_space_crypt_t* crypt_data = NULL;
 
 #ifdef UNIV_SYNC_DEBUG
@@ -4124,8 +4119,8 @@ fil_open_single_table_tablespace(
 		return(DB_CORRUPTION);
 	}
 
-	ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK));
-	atomic_writes = fsp_flags_get_atomic_writes(flags);
+	ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, id));
+	const ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
 
 	memset(&def, 0, sizeof(def));
 	memset(&dict, 0, sizeof(dict));
@@ -6151,7 +6146,8 @@ fil_io(
 	} else if (type == OS_FILE_WRITE) {
 		ut_ad(!srv_read_only_mode);
 		srv_stats.data_written.add(len);
-		if (fil_page_is_index_page((byte *)buf)) {
+		if (mach_read_from_2(static_cast<const byte*>(buf)
+				     + FIL_PAGE_TYPE) == FIL_PAGE_INDEX) {
 			srv_stats.index_pages_written.inc();
 		} else {
 			srv_stats.non_index_pages_written.inc();
@@ -6684,479 +6680,6 @@ fil_close(void)
 }
 
 /********************************************************************//**
-Initializes a buffer control block when the buf_pool is created. */
-static
-void
-fil_buf_block_init(
-/*===============*/
-	buf_block_t*	block,		/*!< in: pointer to control block */
-	byte*		frame)		/*!< in: pointer to buffer frame */
-{
-	UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
-
-	block->frame = frame;
-
-	block->page.io_fix = BUF_IO_NONE;
-	/* There are assertions that check for this. */
-	block->page.buf_fix_count = 1;
-	block->page.state = BUF_BLOCK_READY_FOR_USE;
-
-	page_zip_des_init(&block->page.zip);
-}
-
-struct fil_iterator_t {
-	pfs_os_file_t	file;			/*!< File handle */
-	const char*	filepath;		/*!< File path name */
-	os_offset_t	start;			/*!< From where to start */
-	os_offset_t	end;			/*!< Where to stop */
-	os_offset_t	file_size;		/*!< File size in bytes */
-	ulint		page_size;		/*!< Page size */
-	ulint		n_io_buffers;		/*!< Number of pages to use
-						for IO */
-	byte*		io_buffer;		/*!< Buffer to use for IO */
-	fil_space_crypt_t *crypt_data;		/*!< Crypt data (if encrypted) */
-	byte*           crypt_io_buffer;        /*!< IO buffer when encrypted */
-};
-
-/********************************************************************//**
-TODO: This can be made parallel trivially by chunking up the file and creating
-a callback per thread. . Main benefit will be to use multiple CPUs for
-checksums and compressed tables. We have to do compressed tables block by
-block right now. Secondly we need to decompress/compress and copy too much
-of data. These are CPU intensive.
-
-Iterate over all the pages in the tablespace.
-@param iter - Tablespace iterator
-@param block - block to use for IO
-@param callback - Callback to inspect and update page contents
-@retval DB_SUCCESS or error code */
-static
-dberr_t
-fil_iterate(
-/*========*/
-	const fil_iterator_t&	iter,
-	buf_block_t*		block,
-	PageCallback&		callback)
-{
-	os_offset_t		offset;
-	ulint			page_no = 0;
-	ulint			space_id = callback.get_space_id();
-	ulint			n_bytes = iter.n_io_buffers * iter.page_size;
-
-	ut_ad(!srv_read_only_mode);
-
-	/* TODO: For compressed tables we do a lot of useless
-	copying for non-index pages. Unfortunately, it is
-	required by buf_zip_decompress() */
-	const bool	row_compressed = callback.get_zip_size() > 0;
-
-	for (offset = iter.start; offset < iter.end; offset += n_bytes) {
-
-		byte*		io_buffer = iter.io_buffer;
-
-		block->frame = io_buffer;
-
-		if (row_compressed) {
-			page_zip_des_init(&block->page.zip);
-			page_zip_set_size(&block->page.zip, iter.page_size);
-			block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
-			ut_d(block->page.zip.m_external = true);
-			ut_ad(iter.page_size == callback.get_zip_size());
-
-			/* Zip IO is done in the compressed page buffer. */
-			io_buffer = block->page.zip.data;
-		}
-
-		/* We have to read the exact number of bytes. Otherwise the
-		InnoDB IO functions croak on failed reads. */
-
-		n_bytes = static_cast<ulint>(
-			ut_min(static_cast<os_offset_t>(n_bytes),
-			       iter.end - offset));
-
-		ut_ad(n_bytes > 0);
-		ut_ad(!(n_bytes % iter.page_size));
-
-		const bool encrypted = iter.crypt_data != NULL
-			&& iter.crypt_data->should_encrypt();
-		/* Use additional crypt io buffer if tablespace is encrypted */
-		byte* const readptr = encrypted
-			? iter.crypt_io_buffer : io_buffer;
-		byte* const writeptr = readptr;
-
-		if (!os_file_read(iter.file, readptr, offset, (ulint) n_bytes)) {
-
-			ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
-
-			return(DB_IO_ERROR);
-		}
-
-		bool		updated = false;
-		os_offset_t	page_off = offset;
-		ulint		n_pages_read = (ulint) n_bytes / iter.page_size;
-		bool		decrypted = false;
-
-		for (ulint i = 0; i < n_pages_read; ++i) {
-			ulint 	size = iter.page_size;
-			dberr_t	err = DB_SUCCESS;
-			byte*	src = readptr + (i * size);
-			byte*	dst = io_buffer + (i * size);
-			bool frame_changed = false;
-
-			ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
-
-			const bool page_compressed
-				= page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
-				|| page_type == FIL_PAGE_PAGE_COMPRESSED;
-
-			/* If tablespace is encrypted, we need to decrypt
-			the page. Note that tablespaces are not in
-			fil_system during import. */
-			if (encrypted) {
-				decrypted = fil_space_decrypt(
-							iter.crypt_data,
-							dst, //dst
-							iter.page_size,
-							src, // src
-							&err); // src
-
-				if (err != DB_SUCCESS) {
-					return(err);
-				}
-
-				if (decrypted) {
-					updated = true;
-				} else {
-					if (!page_compressed && !row_compressed) {
-						block->frame = src;
-						frame_changed = true;
-					} else {
-						memcpy(dst, src, size);
-					}
-				}
-			}
-
-			/* If the original page is page_compressed, we need
-			to decompress page before we can update it. */
-			if (page_compressed) {
-				fil_decompress_page(NULL, dst, ulong(size),
-						    NULL);
-				updated = true;
-			}
-
-			buf_block_set_file_page(block, space_id, page_no++);
-
-			if ((err = callback(page_off, block)) != DB_SUCCESS) {
-
-				return(err);
-
-			} else if (!updated) {
-				updated = buf_block_get_state(block)
-					== BUF_BLOCK_FILE_PAGE;
-			}
-
-			buf_block_set_state(block, BUF_BLOCK_NOT_USED);
-			buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
-
-			/* If tablespace is encrypted we use additional
-			temporary scratch area where pages are read
-			for decrypting readptr == crypt_io_buffer != io_buffer.
-
-			Destination for decryption is a buffer pool block
-			block->frame == dst == io_buffer that is updated.
-			Pages that did not require decryption even when
-			tablespace is marked as encrypted are not copied
-			instead block->frame is set to src == readptr.
-
-			For encryption we again use temporary scratch area
-			writeptr != io_buffer == dst
-			that is then written to the tablespace
-
-			(1) For normal tables io_buffer == dst == writeptr
-			(2) For only page compressed tables
-			io_buffer == dst == writeptr
-			(3) For encrypted (and page compressed)
-			readptr != io_buffer == dst != writeptr
-			*/
-
-			ut_ad(!encrypted && !page_compressed ?
-			      src == dst && dst == writeptr + (i * size):1);
-			ut_ad(page_compressed && !encrypted ?
-			      src == dst && dst == writeptr + (i * size):1);
-			ut_ad(encrypted ?
-			      src != dst && dst != writeptr + (i * size):1);
-
-			if (encrypted) {
-				memcpy(writeptr + (i * size),
-					row_compressed ? block->page.zip.data :
-					block->frame, size);
-			}
-
-			if (frame_changed) {
-				block->frame = dst;
-			}
-
-			src =  io_buffer + (i * size);
-
-			if (page_compressed) {
-				ulint len = 0;
-
-				fil_compress_page(
-					NULL,
-					src,
-					NULL,
-					size,
-					0,/* FIXME: compression level */
-					512,/* FIXME: use proper block size */
-					encrypted,
-					&len);
-
-				updated = true;
-			}
-
-			/* If tablespace is encrypted, encrypt page before we
-			write it back. Note that we should not encrypt the
-			buffer that is in buffer pool. */
-			/* NOTE: At this stage of IMPORT the
-			buffer pool is not being used at all! */
-			if (decrypted && encrypted) {
-				byte *dest = writeptr + (i * size);
-				ulint space = mach_read_from_4(
-					src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-				ulint offset = mach_read_from_4(src + FIL_PAGE_OFFSET);
-				ib_uint64_t lsn = mach_read_from_8(src + FIL_PAGE_LSN);
-
-				byte* tmp = fil_encrypt_buf(
-							iter.crypt_data,
-							space,
-							offset,
-							lsn,
-							src,
-							iter.page_size == UNIV_PAGE_SIZE ? 0 : iter.page_size,
-							dest);
-
-				if (tmp == src) {
-					/* TODO: remove unnecessary memcpy's */
-					memcpy(dest, src, size);
-				}
-
-				updated = true;
-			}
-
-			page_off += iter.page_size;
-			block->frame += iter.page_size;
-		}
-
-		/* A page was updated in the set, write back to disk. */
-		if (updated
-		    && !os_file_write(
-				iter.filepath, iter.file, writeptr,
-				offset, (ulint) n_bytes)) {
-
-			ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed");
-
-			return(DB_IO_ERROR);
-		}
-	}
-
-	return(DB_SUCCESS);
-}
-
-/********************************************************************//**
-Iterate over all the pages in the tablespace.
-@param table - the table definiton in the server
-@param n_io_buffers - number of blocks to read and write together
-@param callback - functor that will do the page updates
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_tablespace_iterate(
-/*===================*/
-	dict_table_t*	table,
-	ulint		n_io_buffers,
-	PageCallback&	callback)
-{
-	dberr_t		err;
-	pfs_os_file_t	file;
-	char*		filepath;
-
-	ut_a(n_io_buffers > 0);
-	ut_ad(!srv_read_only_mode);
-
-	DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
-			return(DB_CORRUPTION););
-
-	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
-		dict_get_and_save_data_dir_path(table, false);
-		ut_a(table->data_dir_path);
-
-		filepath = os_file_make_remote_pathname(
-			table->data_dir_path, table->name, "ibd");
-	} else {
-		filepath = fil_make_ibd_name(table->name, false);
-	}
-
-	{
-		ibool	success;
-
-		file = os_file_create_simple_no_error_handling(
-			innodb_file_data_key, filepath,
-			OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE);
-
-		DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
-		{
-			static bool once;
-
-			if (!once || ut_rnd_interval(0, 10) == 5) {
-				once = true;
-				success = FALSE;
-				os_file_close(file);
-			}
-		});
-
-		if (!success) {
-			/* The following call prints an error message */
-			os_file_get_last_error(true);
-
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Trying to import a tablespace, but could not "
-				"open the tablespace file %s", filepath);
-
-			mem_free(filepath);
-
-			return(DB_TABLESPACE_NOT_FOUND);
-
-		} else {
-			err = DB_SUCCESS;
-		}
-	}
-
-	callback.set_file(filepath, file);
-
-	os_offset_t	file_size = os_file_get_size(file);
-	ut_a(file_size != (os_offset_t) -1);
-
-	/* The block we will use for every physical page */
-	buf_block_t	block;
-
-	memset(&block, 0x0, sizeof(block));
-
-	/* Allocate a page to read in the tablespace header, so that we
-	can determine the page size and zip_size (if it is compressed).
-	We allocate an extra page in case it is a compressed table. One
-	page is to ensure alignement. */
-
-	void*	page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE);
-	byte*	page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
-
-	fil_buf_block_init(&block, page);
-
-	/* Read the first page and determine the page and zip size. */
-
-	if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) {
-
-		err = DB_IO_ERROR;
-
-	} else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) {
-		fil_iterator_t	iter;
-
-		iter.file = file;
-		iter.start = 0;
-		iter.end = file_size;
-		iter.filepath = filepath;
-		iter.file_size = file_size;
-		iter.n_io_buffers = n_io_buffers;
-		iter.page_size = callback.get_page_size();
-
-		/* In MariaDB/MySQL 5.6 tablespace does not exist
-		during import, therefore we can't use space directly
-		here. */
-		ulint crypt_data_offset = fsp_header_get_crypt_offset(
-			callback.get_zip_size());
-
-		/* read (optional) crypt data */
-		iter.crypt_data = fil_space_read_crypt_data(
-			0, page, crypt_data_offset);
-
-		/* Compressed pages can't be optimised for block IO for now.
-		We do the IMPORT page by page. */
-
-		if (callback.get_zip_size() > 0) {
-			iter.n_io_buffers = 1;
-			ut_a(iter.page_size == callback.get_zip_size());
-		}
-
-		/** If tablespace is encrypted, it needs extra buffers */
-		if (iter.crypt_data != NULL) {
-			/* decrease io buffers so that memory
-			* consumption doesnt double
-			* note: the +1 is to avoid n_io_buffers getting down to 0 */
-			iter.n_io_buffers = (iter.n_io_buffers + 1) / 2;
-		}
-
-		/** Add an extra page for compressed page scratch area. */
-
-		void*	io_buffer = mem_alloc(
-			(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
-
-		iter.io_buffer = static_cast<byte*>(
-			ut_align(io_buffer, UNIV_PAGE_SIZE));
-
-		void* crypt_io_buffer = NULL;
-		if (iter.crypt_data != NULL) {
-			crypt_io_buffer = mem_alloc(
-				(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
-			iter.crypt_io_buffer = static_cast<byte*>(
-				ut_align(crypt_io_buffer, UNIV_PAGE_SIZE));
-		}
-
-		err = fil_iterate(iter, &block, callback);
-
-		mem_free(io_buffer);
-
-		if (crypt_io_buffer != NULL) {
-			mem_free(crypt_io_buffer);
-			iter.crypt_io_buffer = NULL;
-			fil_space_destroy_crypt_data(&iter.crypt_data);
-		}
-	}
-
-	if (err == DB_SUCCESS) {
-
-		ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk");
-
-		if (!os_file_flush(file)) {
-			ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!");
-			err = DB_IO_ERROR;
-		} else {
-			ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!");
-		}
-	}
-
-	os_file_close(file);
-
-	mem_free(page_ptr);
-	mem_free(filepath);
-
-	return(err);
-}
-
-/**
-Set the tablespace compressed table size.
-@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
-dberr_t
-PageCallback::set_zip_size(const buf_frame_t* page) UNIV_NOTHROW
-{
-	m_zip_size = fsp_header_get_zip_size(page);
-
-	if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) {
-		return(DB_CORRUPTION);
-	}
-
-	return(DB_SUCCESS);
-}
-
-/********************************************************************//**
 Delete the tablespace file and any related files like .cfg.
 This should not be called for temporary tables. */
 UNIV_INTERN
diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h
index 8c3bf7d2b06..8a4aa9b7dff 100644
--- a/storage/xtradb/include/fil0fil.h
+++ b/storage/xtradb/include/fil0fil.h
@@ -1309,107 +1309,6 @@ fil_delete_file(
 /*============*/
 	const char*	path);	/*!< in: filepath of the ibd tablespace */
 
-/** Callback functor. */
-struct PageCallback {
-
-	/**
-	Default constructor */
-	PageCallback()
-		:
-		m_zip_size(),
-		m_page_size(),
-		m_filepath() UNIV_NOTHROW {}
-
-	virtual ~PageCallback() UNIV_NOTHROW {}
-
-	/**
-	Called for page 0 in the tablespace file at the start.
-	@param file_size - size of the file in bytes
-	@param block - contents of the first page in the tablespace file
-	@retval DB_SUCCESS or error code.*/
-	virtual dberr_t init(
-		os_offset_t		file_size,
-		const buf_block_t*	block) UNIV_NOTHROW = 0;
-
-	/**
-	Called for every page in the tablespace. If the page was not
-	updated then its state must be set to BUF_PAGE_NOT_USED. For
-	compressed tables the page descriptor memory will be at offset:
-		block->frame + UNIV_PAGE_SIZE;
-	@param offset - physical offset within the file
-	@param block - block read from file, note it is not from the buffer pool
-	@retval DB_SUCCESS or error code. */
-	virtual dberr_t operator()(
-		os_offset_t	offset,
-		buf_block_t*	block) UNIV_NOTHROW = 0;
-
-	/**
-	Set the name of the physical file and the file handle that is used
-	to open it for the file that is being iterated over.
-	@param filename - then physical name of the tablespace file.
-	@param file - OS file handle */
-	void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
-	{
-		m_file = file;
-		m_filepath = filename;
-	}
-
-	/**
-	@return the space id of the tablespace */
-	virtual ulint get_space_id() const UNIV_NOTHROW = 0;
-
-	/** The compressed page size
-	@return the compressed page size */
-	ulint get_zip_size() const
-	{
-		return(m_zip_size);
-	}
-
-	/**
-	Set the tablespace compressed table size.
-	@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
-	dberr_t set_zip_size(const buf_frame_t* page) UNIV_NOTHROW;
-
-	/** The compressed page size
-	@return the compressed page size */
-	ulint get_page_size() const
-	{
-		return(m_page_size);
-	}
-
-	/** Compressed table page size */
-	ulint			m_zip_size;
-
-	/** The tablespace page size. */
-	ulint			m_page_size;
-
-	/** File handle to the tablespace */
-	pfs_os_file_t		m_file;
-
-	/** Physical file path. */
-	const char*		m_filepath;
-
-protected:
-	// Disable copying
-	PageCallback(const PageCallback&);
-	PageCallback& operator=(const PageCallback&);
-};
-
-/********************************************************************//**
-Iterate over all the pages in the tablespace.
-@param table - the table definiton in the server
-@param n_io_buffers - number of blocks to read and write together
-@param callback - functor that will do the page updates
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_tablespace_iterate(
-/*===================*/
-	dict_table_t*		table,
-	ulint			n_io_buffers,
-	PageCallback&		callback)
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
 /*******************************************************************//**
 Checks if a single-table tablespace for a given table name exists in the
 tablespace memory cache.
diff --git a/storage/xtradb/mem/mem0mem.cc b/storage/xtradb/mem/mem0mem.cc
index b9f190509ee..f91126697fc 100644
--- a/storage/xtradb/mem/mem0mem.cc
+++ b/storage/xtradb/mem/mem0mem.cc
@@ -406,6 +406,11 @@ mem_heap_create_block_func(
 		heap->total_size += len;
 	}
 
+	/* Poison all available memory. Individual chunks will be unpoisoned on
+	every mem_heap_alloc() call. */
+	compile_time_assert(MEM_BLOCK_HEADER_SIZE >= sizeof *block);
+	UNIV_MEM_FREE(block + 1, len - sizeof *block);
+
 	ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
 
 	return(block);
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 634ebb2af49..6a63f31b37a 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -2,7 +2,7 @@
 
 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted
 by Percona Inc.. Those modifications are
@@ -3169,15 +3169,21 @@ try_again:
 	overlapped.hEvent = win_get_syncio_event();
 	ret = ReadFile(file, buf, n, NULL, &overlapped);
 	if (ret) {
-		ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
-	}
-	else if(GetLastError() == ERROR_IO_PENDING) {
-		ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
+		ret = GetOverlappedResult(file, &overlapped, &len, FALSE);
+	} else if (GetLastError() == ERROR_IO_PENDING) {
+		ret = GetOverlappedResult(file, &overlapped, &len, TRUE);
         }
 	MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
 
-	if (ret && len == n) {
+	if (!ret) {
+	} else if (len == n) {
 		return(TRUE);
+	} else {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Tried to read " ULINTPF " bytes at offset "
+			UINT64PF ". Was only able to read %lu.",
+			n, offset, ret);
+		return FALSE;
 	}
 #else /* __WIN__ */
 	ibool	retry;
@@ -3204,6 +3210,7 @@ try_again:
 			"Tried to read " ULINTPF " bytes at offset "
 			UINT64PF ". Was only able to read %ld.",
 			n, offset, (lint) ret);
+		return FALSE;
 	}
 #endif /* __WIN__ */
 	retry = os_file_handle_error(NULL, "read", __FILE__, __LINE__);
@@ -3272,15 +3279,21 @@ try_again:
 	overlapped.hEvent = win_get_syncio_event();
 	ret = ReadFile(file, buf, n, NULL, &overlapped);
 	if (ret) {
-		ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
-	}
-	else if(GetLastError() == ERROR_IO_PENDING) {
-		ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
+		ret = GetOverlappedResult(file, &overlapped, &len, FALSE);
+	} else if (GetLastError() == ERROR_IO_PENDING) {
+		ret = GetOverlappedResult(file, &overlapped, &len, TRUE);
 	}
 	MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
 
-	if (ret && len == n) {
+	if (!ret) {
+	} else if (len == n) {
 		return(TRUE);
+	} else {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Tried to read " ULINTPF " bytes at offset "
+			UINT64PF ". Was only able to read %lu.",
+			n, offset, len);
+		return FALSE;
 	}
 #else /* __WIN__ */
 	ibool	retry;
@@ -3303,6 +3316,7 @@ try_again:
 			"Tried to read " ULINTPF " bytes at offset "
 			UINT64PF ". Was only able to read %ld.",
 			n, offset, (lint) ret);
+		return FALSE;
 	}
 #endif /* __WIN__ */
 	retry = os_file_handle_error_no_exit(NULL, "read", FALSE, __FILE__, __LINE__);
@@ -3383,10 +3397,9 @@ retry:
 	overlapped.hEvent = win_get_syncio_event();
 	ret = WriteFile(file, buf, n, NULL, &overlapped);
 	if (ret) {
-		ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
-	}
-	else if ( GetLastError() == ERROR_IO_PENDING) {
-		ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
+		ret = GetOverlappedResult(file, &overlapped, &len, FALSE);
+	} else if (GetLastError() == ERROR_IO_PENDING) {
+		ret = GetOverlappedResult(file, &overlapped, &len, TRUE);
 	}
 
 	MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
@@ -6588,8 +6601,7 @@ os_file_trim(
 	DWORD tmp;
 	if (ret) {
 		ret = GetOverlappedResult(slot->file, &overlapped, &tmp, FALSE);
-	}
-	else if (GetLastError() == ERROR_IO_PENDING) {
+	} else if (GetLastError() == ERROR_IO_PENDING) {
 		ret = GetOverlappedResult(slot->file, &overlapped, &tmp, TRUE);
 	}
 	if (!ret) {
diff --git a/storage/xtradb/row/row0import.cc b/storage/xtradb/row/row0import.cc
index 18939d4cab9..20fc0045749 100644
--- a/storage/xtradb/row/row0import.cc
+++ b/storage/xtradb/row/row0import.cc
@@ -40,13 +40,11 @@ Created 2012-02-08 by Sunny Bains.
 #include "row0mysql.h"
 #include "srv0start.h"
 #include "row0quiesce.h"
-#include "buf0buf.h"
+#include "fil0pagecompress.h"
 
 #include <vector>
 
-/** The size of the buffer to use for IO. Note: os_file_read() doesn't expect
-reads to fail. If you set the buffer size to be greater than a multiple of the
-file size then it will assert. TODO: Fix this limitation of the IO functions.
+/** The size of the buffer to use for IO.
 @param n - page size of the tablespace.
 @retval number of pages */
 #define IO_BUFFER_SIZE(n)	((1024 * 1024) / n)
@@ -362,7 +360,8 @@ private:
 
 /** Functor that is called for each physical page that is read from the
 tablespace file.  */
-class AbstractCallback : public PageCallback {
+class AbstractCallback
+{
 public:
 	/** Constructor
 	@param trx - covering transaction */
@@ -395,32 +394,62 @@ public:
 		return(get_zip_size() > 0);
 	}
 
-protected:
 	/**
-	Get the data page depending on the table type, compressed or not.
-	@param block - block read from disk
-	@retval the buffer frame */
-	buf_frame_t* get_frame(buf_block_t* block) const UNIV_NOTHROW
+	Set the name of the physical file and the file handle that is used
+	to open it for the file that is being iterated over.
+	@param filename - then physical name of the tablespace file.
+	@param file - OS file handle */
+	void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
 	{
-		if (is_compressed_table()) {
-			return(block->page.zip.data);
-		}
+		m_file = file;
+		m_filepath = filename;
+	}
 
-		return(buf_block_get_frame(block));
+	/** The compressed page size
+	@return the compressed page size */
+	ulint get_zip_size() const
+	{
+		return(m_zip_size);
 	}
 
-	/** Check for session interrupt. If required we could
-	even flush to disk here every N pages.
-	@retval DB_SUCCESS or error code */
-	dberr_t periodic_check() UNIV_NOTHROW
+	/** The compressed page size
+	@return the compressed page size */
+	ulint get_page_size() const
 	{
-		if (trx_is_interrupted(m_trx)) {
-			return(DB_INTERRUPTED);
-		}
+		return(m_page_size);
+	}
 
-		return(DB_SUCCESS);
+	const char* filename() const { return m_filepath; }
+
+	/**
+	Called for every page in the tablespace. If the page was not
+	updated then its state must be set to BUF_PAGE_NOT_USED. For
+	compressed tables the page descriptor memory will be at offset:
+		block->frame + UNIV_PAGE_SIZE;
+	@param offset - physical offset within the file
+	@param block - block read from file, note it is not from the buffer pool
+	@retval DB_SUCCESS or error code. */
+	virtual dberr_t operator()(
+		os_offset_t	offset,
+		buf_block_t*	block) UNIV_NOTHROW = 0;
+
+	/**
+	@return the space id of the tablespace */
+	virtual ulint get_space_id() const UNIV_NOTHROW = 0;
+
+	bool is_interrupted() const { return trx_is_interrupted(m_trx); }
+
+	/**
+	Get the data page depending on the table type, compressed or not.
+	@param block - block read from disk
+	@retval the buffer frame */
+	static byte* get_frame(const buf_block_t* block)
+	{
+		return block->page.zip.data
+			? block->page.zip.data : block->frame;
 	}
 
+protected:
 	/**
 	Get the physical offset of the extent descriptor within the page.
 	@param page_no - page number of the extent descriptor
@@ -510,6 +539,18 @@ protected:
 	}
 
 protected:
+	/** Compressed table page size */
+	ulint			m_zip_size;
+
+	/** The tablespace page size. */
+	ulint			m_page_size;
+
+	/** File handle to the tablespace */
+	pfs_os_file_t		m_file;
+
+	/** Physical file path. */
+	const char*		m_filepath;
+
 	/** Covering transaction. */
 	trx_t*			m_trx;
 
@@ -566,9 +607,9 @@ AbstractCallback::init(
 	/* Since we don't know whether it is a compressed table
 	or not, the data is always read into the block->frame. */
 
-	dberr_t	err = set_zip_size(block->frame);
+	m_zip_size = fsp_header_get_zip_size(page);
 
-	if (err != DB_SUCCESS) {
+	if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) {
 		return(DB_CORRUPTION);
 	}
 
@@ -605,11 +646,7 @@ AbstractCallback::init(
 	m_free_limit = mach_read_from_4(page + FSP_FREE_LIMIT);
 	m_space = mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID);
 
-	if ((err = set_current_xdes(0, page)) != DB_SUCCESS) {
-		return(err);
-	}
-
-	return(DB_SUCCESS);
+	return set_current_xdes(0, page);
 }
 
 /**
@@ -682,11 +719,7 @@ FetchIndexRootPages::operator() (
 	os_offset_t	offset,
 	buf_block_t*	block) UNIV_NOTHROW
 {
-	dberr_t		err;
-
-	if ((err = periodic_check()) != DB_SUCCESS) {
-		return(err);
-	}
+	if (is_interrupted()) return DB_INTERRUPTED;
 
 	const page_t*	page = get_frame(block);
 
@@ -699,9 +732,9 @@ FetchIndexRootPages::operator() (
 			block->page.offset,
 			(ulint) (offset / m_page_size));
 
-		err = DB_CORRUPTION;
+		return DB_CORRUPTION;
 	} else if (page_type == FIL_PAGE_TYPE_XDES) {
-		err = set_current_xdes(block->page.offset, page);
+		return set_current_xdes(block->page.offset, page);
 	} else if (page_type == FIL_PAGE_INDEX
 		   && !is_free(block->page.offset)
 		   && is_root_page(page)) {
@@ -726,7 +759,7 @@ FetchIndexRootPages::operator() (
 		}
 	}
 
-	return(err);
+	return DB_SUCCESS;
 }
 
 /**
@@ -850,14 +883,6 @@ public:
 		os_offset_t	offset,
 		buf_block_t*	block) UNIV_NOTHROW;
 private:
-
-	/** Status returned by PageConverter::validate() */
-	enum import_page_status_t {
-		IMPORT_PAGE_STATUS_OK,		/*!< Page is OK */
-		IMPORT_PAGE_STATUS_ALL_ZERO,	/*!< Page is all zeros */
-		IMPORT_PAGE_STATUS_CORRUPTED	/*!< Page is corrupted */
-	};
-
 	/**
 	Update the page, set the space id, max trx id and index id.
 	@param block - block read from file
@@ -867,17 +892,6 @@ private:
 		buf_block_t*	block,
 		ulint&		page_type) UNIV_NOTHROW;
 
-#if defined UNIV_DEBUG
-	/**
-	@return true error condition is enabled. */
-	bool trigger_corruption() UNIV_NOTHROW
-	{
-		return(false);
-	}
-	#else
-#define trigger_corruption()	(false)
-#endif /* UNIV_DEBUG */
-
 	/**
 	Update the space, index id, trx id.
 	@param block - block to convert
@@ -891,15 +905,6 @@ private:
 	dberr_t	update_records(buf_block_t* block) UNIV_NOTHROW;
 
 	/**
-	Validate the page, check for corruption.
-	@param offset - physical offset within file.
-	@param page - page read from file.
-	@return 0 on success, 1 if all zero, 2 if corrupted */
-	import_page_status_t validate(
-		os_offset_t	offset,
-		buf_block_t*	page) UNIV_NOTHROW;
-
-	/**
 	Validate the space flags and update tablespace header page.
 	@param block - block read from file, not from the buffer pool.
 	@retval DB_SUCCESS or error code */
@@ -1316,8 +1321,8 @@ row_import::match_schema(
 		return(DB_ERROR);
 	} else if (m_table->n_cols != m_n_cols) {
 		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
-			"Number of columns don't match, table has %u "
-			"columns but the tablespace meta-data file has "
+			"Number of columns don't match, table has %u"
+			" columns but the tablespace meta-data file has "
 			ULINTPF " columns",
 			m_table->n_cols, m_n_cols);
 
@@ -1597,6 +1602,7 @@ IndexPurge::purge() UNIV_NOTHROW
 Constructor
 * @param cfg - config of table being imported.
 * @param trx - transaction covering the import */
+inline
 PageConverter::PageConverter(
 	row_import*	cfg,
 	trx_t*		trx)
@@ -1621,6 +1627,7 @@ Adjust the BLOB reference for a single column that is externally stored
 @param offsets - column offsets for the record
 @param i - column ordinal value
 @return DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::adjust_cluster_index_blob_column(
 	rec_t*		rec,
@@ -1673,6 +1680,7 @@ stored columns.
 @param rec - record to update
 @param offsets - column offsets for the record
 @return DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::adjust_cluster_index_blob_columns(
 	rec_t*		rec,
@@ -1706,6 +1714,7 @@ BLOB reference, write the new space id.
 @param rec - record to update
 @param offsets - column offsets for the record
 @return DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::adjust_cluster_index_blob_ref(
 	rec_t*		rec,
@@ -1729,6 +1738,7 @@ Purge delete-marked records, only if it is possible to do so without
 re-organising the B+tree.
 @param offsets - current row offsets.
 @return true if purge succeeded */
+inline
 bool
 PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
 {
@@ -1753,6 +1763,7 @@ Adjust the BLOB references and sys fields for the current record.
 @param offsets - column offsets for the record
 @param deleted - true if row is delete marked
 @return DB_SUCCESS or error code. */
+inline
 dberr_t
 PageConverter::adjust_cluster_record(
 	const dict_index_t*	index,
@@ -1781,6 +1792,7 @@ Update the BLOB refrences and write UNDO log entries for
 rows that can't be purged optimistically.
 @param block - block to update
 @retval DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::update_records(
 	buf_block_t*	block) UNIV_NOTHROW
@@ -1846,6 +1858,7 @@ PageConverter::update_records(
 /**
 Update the space, index id, trx id.
 @return DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::update_index_page(
 	buf_block_t*	block) UNIV_NOTHROW
@@ -1915,6 +1928,7 @@ PageConverter::update_index_page(
 Validate the space flags and update tablespace header page.
 @param block - block read from file, not from the buffer pool.
 @retval DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::update_header(
 	buf_block_t*	block) UNIV_NOTHROW
@@ -1954,6 +1968,7 @@ PageConverter::update_header(
 Update the page, set the space id, max trx id and index id.
 @param block - block read from file
 @retval DB_SUCCESS or error code */
+inline
 dberr_t
 PageConverter::update_page(
 	buf_block_t*	block,
@@ -1961,6 +1976,14 @@ PageConverter::update_page(
 {
 	dberr_t		err = DB_SUCCESS;
 
+	ut_ad(!block->page.zip.data == !is_compressed_table());
+
+	if (block->page.zip.data) {
+		m_page_zip_ptr = &block->page.zip;
+	} else {
+		ut_ad(!m_page_zip_ptr);
+	}
+
 	switch (page_type = fil_page_get_type(get_frame(block))) {
 	case FIL_PAGE_TYPE_FSP_HDR:
 		/* Work directly on the uncompressed page headers. */
@@ -2016,140 +2039,45 @@ PageConverter::update_page(
 }
 
 /**
-Validate the page
-@param offset - physical offset within file.
-@param page - page read from file.
-@return status */
-PageConverter::import_page_status_t
-PageConverter::validate(
-	os_offset_t	offset,
-	buf_block_t*	block) UNIV_NOTHROW
-{
-	buf_frame_t*	page = get_frame(block);
-
-	/* Check that the page number corresponds to the offset in
-	the file. Flag as corrupt if it doesn't. Disable the check
-	for LSN in buf_page_is_corrupted() */
-
-	if (buf_page_is_corrupted(false, page, get_zip_size(), NULL)
-	    || (page_get_page_no(page) != offset / m_page_size
-		&& page_get_page_no(page) != 0)) {
-
-		return(IMPORT_PAGE_STATUS_CORRUPTED);
-
-	} else if (offset > 0 && page_get_page_no(page) == 0) {
-		ulint		checksum;
-
-		checksum = mach_read_from_4(page + FIL_PAGE_SPACE_OR_CHKSUM);
-		if (checksum != 0) {
-			/* Checksum check passed in buf_page_is_corrupted(). */
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"%s: Page %lu checksum " ULINTPF
-				" should be zero.",
-				m_filepath, (ulong) (offset / m_page_size),
-				checksum);
-		}
-
-		const byte*	b = page + FIL_PAGE_OFFSET;
-		const byte*	e = page + m_page_size
-				    - FIL_PAGE_END_LSN_OLD_CHKSUM;
-
-		/* If the page number is zero and offset > 0 then
-		the entire page MUST consist of zeroes. If not then
-		we flag it as corrupt. */
-
-		while (b != e) {
-
-			if (*b++ && !trigger_corruption()) {
-				return(IMPORT_PAGE_STATUS_CORRUPTED);
-			}
-		}
-
-		/* The page is all zero: do nothing. */
-		return(IMPORT_PAGE_STATUS_ALL_ZERO);
-	}
-
-	return(IMPORT_PAGE_STATUS_OK);
-}
-
-/**
 Called for every page in the tablespace. If the page was not
 updated then its state must be set to BUF_PAGE_NOT_USED.
-@param offset - physical offset within the file
 @param block - block read from file, note it is not from the buffer pool
 @retval DB_SUCCESS or error code. */
 dberr_t
-PageConverter::operator() (
-	os_offset_t	offset,
-	buf_block_t*	block) UNIV_NOTHROW
+PageConverter::operator() (os_offset_t, buf_block_t* block) UNIV_NOTHROW
 {
-	ulint		page_type;
-	dberr_t		err = DB_SUCCESS;
-
-	if ((err = periodic_check()) != DB_SUCCESS) {
-		return(err);
-	}
-
-	if (is_compressed_table()) {
-		m_page_zip_ptr = &block->page.zip;
-	} else {
-		ut_ad(m_page_zip_ptr == 0);
-	}
-
-	switch(validate(offset, block)) {
-	case IMPORT_PAGE_STATUS_OK:
-
-		/* We have to decompress the compressed pages before
-		we can work on them */
-
-		if ((err = update_page(block, page_type)) != DB_SUCCESS) {
-			break;
-		}
-
-		/* Note: For compressed pages this function will write to the
-		zip descriptor and for uncompressed pages it will write to
-		page (ie. the block->frame). Therefore the caller should write
-		out the descriptor contents and not block->frame for compressed
-		pages. */
-
-		if (!is_compressed_table() || page_type == FIL_PAGE_INDEX) {
-
-			buf_flush_init_for_writing(
-				!is_compressed_table()
-				? block->frame : block->page.zip.data,
-				!is_compressed_table() ? 0 : m_page_zip_ptr,
-				m_current_lsn);
-		} else {
-			/* Calculate and update the checksum of non-btree
-			pages for compressed tables explicitly here. */
-
-			buf_flush_update_zip_checksum(
-				get_frame(block), get_zip_size(),
-				m_current_lsn);
-		}
-
-		break;
+	/* If we already had an old page with matching number
+	in the buffer pool, evict it now, because
+	we no longer evict the pages on DISCARD TABLESPACE. */
+	buf_page_get_gen(get_space_id(), get_zip_size(), block->page.offset,
+			 RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
+			 __FILE__, __LINE__, NULL);
 
-	case IMPORT_PAGE_STATUS_ALL_ZERO:
-		/* The page is all zero: leave it as is. */
-		break;
+	ulint		page_type;
 
-	case IMPORT_PAGE_STATUS_CORRUPTED:
+	dberr_t err = update_page(block, page_type);
+	if (err != DB_SUCCESS) return err;
 
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"%s: Page %lu at offset " UINT64PF " looks corrupted.",
-			m_filepath, (ulong) (offset / m_page_size), offset);
+	/* Note: For compressed pages this function will write to the
+	zip descriptor and for uncompressed pages it will write to
+	page (ie. the block->frame). Therefore the caller should write
+	out the descriptor contents and not block->frame for compressed
+	pages. */
 
-		err = DB_CORRUPTION;
+	if (!is_compressed_table() || page_type == FIL_PAGE_INDEX) {
+		buf_flush_init_for_writing(
+			get_frame(block),
+			block->page.zip.data ? &block->page.zip : NULL,
+			m_current_lsn);
+	} else {
+		/* Calculate and update the checksum of non-btree
+		pages for compressed tables explicitly here. */
+		buf_flush_update_zip_checksum(
+			get_frame(block), get_zip_size(),
+			m_current_lsn);
 	}
 
-	/* If we already had and old page with matching number
-	in the buffer pool, evict it now, because
-	we no longer evict the pages on DISCARD TABLESPACE. */
-	buf_page_get_gen(get_space_id(), get_zip_size(), block->page.offset,
-			 RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
-			 __FILE__, __LINE__, NULL);
-	return(err);
+	return DB_SUCCESS;
 }
 
 /*****************************************************************//**
@@ -3424,6 +3352,460 @@ row_import_update_discarded_flag(
 	return(err);
 }
 
+struct fil_iterator_t {
+	pfs_os_file_t	file;			/*!< File handle */
+	const char*	filepath;		/*!< File path name */
+	os_offset_t	start;			/*!< From where to start */
+	os_offset_t	end;			/*!< Where to stop */
+	os_offset_t	file_size;		/*!< File size in bytes */
+	ulint		page_size;		/*!< Page size */
+	ulint		n_io_buffers;		/*!< Number of pages to use
+						for IO */
+	byte*		io_buffer;		/*!< Buffer to use for IO */
+	fil_space_crypt_t *crypt_data;		/*!< Crypt data (if encrypted) */
+	byte*           crypt_io_buffer;        /*!< IO buffer when encrypted */
+};
+
+/********************************************************************//**
+TODO: This can be made parallel trivially by chunking up the file and creating
+a callback per thread. . Main benefit will be to use multiple CPUs for
+checksums and compressed tables. We have to do compressed tables block by
+block right now. Secondly we need to decompress/compress and copy too much
+of data. These are CPU intensive.
+
+Iterate over all the pages in the tablespace.
+@param iter - Tablespace iterator
+@param block - block to use for IO
+@param callback - Callback to inspect and update page contents
+@retval DB_SUCCESS or error code */
+static
+dberr_t
+fil_iterate(
+/*========*/
+	const fil_iterator_t&	iter,
+	buf_block_t*		block,
+	AbstractCallback&	callback)
+{
+	os_offset_t		offset;
+	ulint			n_bytes = iter.n_io_buffers * iter.page_size;
+
+	ut_ad(!srv_read_only_mode);
+
+	/* TODO: For ROW_FORMAT=COMPRESSED tables we do a lot of useless
+	copying for non-index pages. Unfortunately, it is
+	required by buf_zip_decompress() */
+
+	for (offset = iter.start; offset < iter.end; offset += n_bytes) {
+		if (callback.is_interrupted()) {
+			return DB_INTERRUPTED;
+		}
+
+		byte*		io_buffer = iter.io_buffer;
+		block->frame = io_buffer;
+
+		if (block->page.zip.data) {
+			/* Zip IO is done in the compressed page buffer. */
+			io_buffer = block->page.zip.data;
+			ut_ad(PAGE_ZIP_MATCH(block->frame, &block->page.zip));
+		}
+
+		/* We have to read the exact number of bytes. Otherwise the
+		InnoDB IO functions croak on failed reads. */
+
+		n_bytes = ulint(ut_min(os_offset_t(n_bytes),
+				       iter.end - offset));
+
+		ut_ad(n_bytes > 0);
+		ut_ad(!(n_bytes % iter.page_size));
+
+		const bool encrypted = iter.crypt_data != NULL
+			&& iter.crypt_data->should_encrypt();
+		/* Use additional crypt io buffer if tablespace is encrypted */
+		byte* const readptr = encrypted
+			? iter.crypt_io_buffer : io_buffer;
+		byte* const writeptr = readptr;
+
+		if (!os_file_read_no_error_handling(iter.file, readptr,
+						    offset, n_bytes)) {
+			ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
+			return DB_IO_ERROR;
+		}
+
+		bool		updated = false;
+		os_offset_t	page_off = offset;
+		ulint		n_pages_read = (ulint) n_bytes / iter.page_size;
+		bool		decrypted = false;
+		const ulint 	size = iter.page_size;
+		block->page.offset = page_off / size;
+
+		for (ulint i = 0; i < n_pages_read;
+		     ++i, page_off += size, block->frame += size,
+		     block->page.offset++) {
+			dberr_t	err = DB_SUCCESS;
+			byte*	src = readptr + (i * size);
+			byte*	dst = io_buffer + (i * size);
+			bool frame_changed = false;
+			ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
+			const bool page_compressed
+				= page_type
+				== FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+				|| page_type == FIL_PAGE_PAGE_COMPRESSED;
+			const ulint page_no = page_get_page_no(src);
+			if (!page_no && page_off) {
+				const ulint* b = reinterpret_cast<const ulint*>
+					(src);
+				const ulint* const e = b + size / sizeof *b;
+				do {
+					if (*b++) {
+						goto page_corrupted;
+					}
+				} while (b != e);
+
+				/* Proceed to the next page,
+				because this one is all zero. */
+				continue;
+			}
+
+			if (page_no != page_off / size) {
+				goto page_corrupted;
+			}
+
+			if (encrypted) {
+				decrypted = fil_space_decrypt(
+					iter.crypt_data, dst,
+					iter.page_size, src, &err);
+
+				if (err != DB_SUCCESS) {
+					return err;
+				}
+
+				if (decrypted) {
+					updated = true;
+				} else {
+					if (!page_compressed
+					    && !block->page.zip.data) {
+						block->frame = src;
+						frame_changed = true;
+					} else {
+						memcpy(dst, src, size);
+					}
+				}
+			}
+
+			/* If the original page is page_compressed, we need
+			to decompress it before adjusting further. */
+			if (page_compressed) {
+				fil_decompress_page(NULL, dst, ulong(size),
+						    NULL);
+				updated = true;
+			} else if (buf_page_is_corrupted(
+					   false,
+					   encrypted && !frame_changed
+					   ? dst : src,
+					   callback.get_zip_size(), NULL)) {
+page_corrupted:
+				ib_logf(IB_LOG_LEVEL_WARN,
+					"%s: Page %lu at offset "
+					UINT64PF " looks corrupted.",
+					callback.filename(),
+					ulong(offset / size), offset);
+				return DB_CORRUPTION;
+			}
+
+			if ((err = callback(page_off, block)) != DB_SUCCESS) {
+				return err;
+			} else if (!updated) {
+				updated = buf_block_get_state(block)
+					== BUF_BLOCK_FILE_PAGE;
+			}
+
+			/* If tablespace is encrypted we use additional
+			temporary scratch area where pages are read
+			for decrypting readptr == crypt_io_buffer != io_buffer.
+
+			Destination for decryption is a buffer pool block
+			block->frame == dst == io_buffer that is updated.
+			Pages that did not require decryption even when
+			tablespace is marked as encrypted are not copied
+			instead block->frame is set to src == readptr.
+
+			For encryption we again use temporary scratch area
+			writeptr != io_buffer == dst
+			that is then written to the tablespace
+
+			(1) For normal tables io_buffer == dst == writeptr
+			(2) For only page compressed tables
+			io_buffer == dst == writeptr
+			(3) For encrypted (and page compressed)
+			readptr != io_buffer == dst != writeptr
+			*/
+
+			ut_ad(!encrypted && !page_compressed ?
+			      src == dst && dst == writeptr + (i * size):1);
+			ut_ad(page_compressed && !encrypted ?
+			      src == dst && dst == writeptr + (i * size):1);
+			ut_ad(encrypted ?
+			      src != dst && dst != writeptr + (i * size):1);
+
+			if (encrypted) {
+				memcpy(writeptr + (i * size),
+				       callback.get_frame(block), size);
+			}
+
+			if (frame_changed) {
+				block->frame = dst;
+			}
+
+			src =  io_buffer + (i * size);
+
+			if (page_compressed) {
+				ulint len = 0;
+
+				fil_compress_page(
+					NULL,
+					src,
+					NULL,
+					size,
+					0,/* FIXME: compression level */
+					512,/* FIXME: use proper block size */
+					encrypted,
+					&len);
+
+				updated = true;
+			}
+
+			/* If tablespace is encrypted, encrypt page before we
+			write it back. Note that we should not encrypt the
+			buffer that is in buffer pool. */
+			/* NOTE: At this stage of IMPORT the
+			buffer pool is not being used at all! */
+			if (decrypted && encrypted) {
+				byte *dest = writeptr + (i * size);
+				ulint space = mach_read_from_4(
+					src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+				ulint offset = mach_read_from_4(src + FIL_PAGE_OFFSET);
+				ib_uint64_t lsn = mach_read_from_8(src + FIL_PAGE_LSN);
+
+				byte* tmp = fil_encrypt_buf(
+							iter.crypt_data,
+							space,
+							offset,
+							lsn,
+							src,
+							iter.page_size == UNIV_PAGE_SIZE ? 0 : iter.page_size,
+							dest);
+
+				if (tmp == src) {
+					/* TODO: remove unnecessary memcpy's */
+					memcpy(dest, src, size);
+				}
+
+				updated = true;
+			}
+		}
+
+		/* A page was updated in the set, write back to disk. */
+		if (updated
+		    && !os_file_write(
+				iter.filepath, iter.file, writeptr,
+				offset, (ulint) n_bytes)) {
+
+			ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed");
+			return DB_IO_ERROR;
+		}
+	}
+
+	return DB_SUCCESS;
+}
+
+/********************************************************************//**
+Iterate over all the pages in the tablespace.
+@param table - the table definiton in the server
+@param n_io_buffers - number of blocks to read and write together
+@param callback - functor that will do the page updates
+@return	DB_SUCCESS or error code */
+static
+dberr_t
+fil_tablespace_iterate(
+/*===================*/
+	dict_table_t*		table,
+	ulint			n_io_buffers,
+	AbstractCallback&	callback)
+{
+	dberr_t		err;
+	pfs_os_file_t	file;
+	char*		filepath;
+
+	ut_a(n_io_buffers > 0);
+	ut_ad(!srv_read_only_mode);
+
+	DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
+			return(DB_CORRUPTION););
+
+	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+		dict_get_and_save_data_dir_path(table, false);
+		ut_a(table->data_dir_path);
+
+		filepath = os_file_make_remote_pathname(
+			table->data_dir_path, table->name, "ibd");
+	} else {
+		filepath = fil_make_ibd_name(table->name, false);
+	}
+
+	{
+		ibool	success;
+
+		file = os_file_create_simple_no_error_handling(
+			innodb_file_data_key, filepath,
+			OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE);
+
+		DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
+		{
+			static bool once;
+
+			if (!once || ut_rnd_interval(0, 10) == 5) {
+				once = true;
+				success = FALSE;
+				os_file_close(file);
+			}
+		});
+
+		if (!success) {
+			/* The following call prints an error message */
+			os_file_get_last_error(true);
+
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Trying to import a tablespace, but could not "
+				"open the tablespace file %s", filepath);
+
+			mem_free(filepath);
+
+			return(DB_TABLESPACE_NOT_FOUND);
+
+		} else {
+			err = DB_SUCCESS;
+		}
+	}
+
+	callback.set_file(filepath, file);
+
+	os_offset_t	file_size = os_file_get_size(file);
+	ut_a(file_size != (os_offset_t) -1);
+
+	/* Allocate a page to read in the tablespace header, so that we
+	can determine the page size and zip_size (if it is compressed).
+	We allocate an extra page in case it is a compressed table. One
+	page is to ensure alignement. */
+
+	void*	page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE);
+	byte*	page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
+
+	/* The block we will use for every physical page */
+	buf_block_t	block;
+
+	memset(&block, 0, sizeof block);
+	block.frame = page;
+	block.page.space = callback.get_space_id();
+	block.page.io_fix = BUF_IO_NONE;
+	block.page.buf_fix_count = 1;
+	block.page.state = BUF_BLOCK_FILE_PAGE;
+
+	/* Read the first page and determine the page and zip size. */
+
+	if (!os_file_read_no_error_handling(file, page, 0, UNIV_PAGE_SIZE)) {
+
+		err = DB_IO_ERROR;
+
+	} else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) {
+		if (const ulint zip_size = callback.get_zip_size()) {
+			page_zip_set_size(&block.page.zip, zip_size);
+			/* ROW_FORMAT=COMPRESSED is not optimised for block IO
+			for now. We do the IMPORT page by page. */
+			n_io_buffers = 1;
+		}
+
+		fil_iterator_t	iter;
+
+		iter.file = file;
+		iter.start = 0;
+		iter.end = file_size;
+		iter.filepath = filepath;
+		iter.file_size = file_size;
+		iter.n_io_buffers = n_io_buffers;
+		iter.page_size = callback.get_page_size();
+
+		/* In MariaDB/MySQL 5.6 tablespace does not exist
+		during import, therefore we can't use space directly
+		here. */
+		ulint crypt_data_offset = fsp_header_get_crypt_offset(
+			callback.get_zip_size());
+
+		/* read (optional) crypt data */
+		iter.crypt_data = fil_space_read_crypt_data(
+			0, page, crypt_data_offset);
+
+		/** If tablespace is encrypted, it needs extra buffers */
+		if (iter.crypt_data != NULL) {
+			/* decrease io buffers so that memory
+			* consumption doesnt double
+			* note: the +1 is to avoid n_io_buffers getting down to 0 */
+			iter.n_io_buffers = (iter.n_io_buffers + 1) / 2;
+		}
+
+		/** Add an extra page for compressed page scratch area. */
+
+		void*	io_buffer = mem_alloc(
+			(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+
+		iter.io_buffer = static_cast<byte*>(
+			ut_align(io_buffer, UNIV_PAGE_SIZE));
+
+		void* crypt_io_buffer = NULL;
+		if (iter.crypt_data != NULL) {
+			crypt_io_buffer = mem_alloc(
+				(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+			iter.crypt_io_buffer = static_cast<byte*>(
+				ut_align(crypt_io_buffer, UNIV_PAGE_SIZE));
+		}
+
+		if (block.page.zip.ssize) {
+			ut_ad(iter.n_io_buffers == 1);
+			block.frame = iter.io_buffer;
+			block.page.zip.data = block.frame + UNIV_PAGE_SIZE;
+			ut_d(block.page.zip.m_external = true);
+		}
+
+		err = fil_iterate(iter, &block, callback);
+
+		mem_free(io_buffer);
+
+		if (crypt_io_buffer != NULL) {
+			mem_free(crypt_io_buffer);
+			iter.crypt_io_buffer = NULL;
+			fil_space_destroy_crypt_data(&iter.crypt_data);
+		}
+	}
+
+	if (err == DB_SUCCESS) {
+
+		ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk");
+
+		if (!os_file_flush(file)) {
+			ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!");
+			err = DB_IO_ERROR;
+		} else {
+			ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!");
+		}
+	}
+
+	os_file_close(file);
+
+	mem_free(page_ptr);
+	mem_free(filepath);
+
+	return(err);
+}
+
 /*****************************************************************//**
 Imports a tablespace. The space id in the .ibd file must match the space id
 of the table in the data dictionary.
author	Marko Mäkelä <marko.makela@mariadb.com>	2018-03-21 16:18:21 +0200
committer	Marko Mäkelä <marko.makela@mariadb.com>	2018-03-21 22:58:52 +0200
commit	3d7915f000b15ad296e3ff18d750f36a4f17de0e (patch)
tree	a5abe2dd6e6b8a633130597aa08937fcc92c965b
parent	82aeb6b59640b9733c4026bda71887720153b70a (diff)
parent	4629db0dd6442ea7c2d3ecd636060bc4d21f2d19 (diff)
download	mariadb-git-3d7915f000b15ad296e3ff18d750f36a4f17de0e.tar.gz