summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmake/bzip2.cmake33
-rw-r--r--cmake/lz4.cmake48
-rw-r--r--cmake/lzma.cmake33
-rw-r--r--cmake/lzo.cmake48
-rw-r--r--cmake/snappy.cmake32
-rw-r--r--config.h.cmake3
-rw-r--r--configure.cmake3
-rw-r--r--extra/CMakeLists.txt33
-rw-r--r--extra/innochecksum.cc.moved396
-rw-r--r--mysql-test/disabled.def1
-rw-r--r--mysql-test/include/have_innodb_bzip2.inc4
-rw-r--r--mysql-test/include/have_innodb_lz4.inc4
-rw-r--r--mysql-test/include/have_innodb_lzma.inc4
-rw-r--r--mysql-test/include/have_innodb_lzo.inc4
-rw-r--r--mysql-test/include/have_innodb_snappy.inc4
-rw-r--r--mysql-test/suite/innodb/r/innodb-page_compression_bzip2.result437
-rw-r--r--mysql-test/suite/innodb/r/innodb-page_compression_lz4.result438
-rw-r--r--mysql-test/suite/innodb/r/innodb-page_compression_lzma.result437
-rw-r--r--mysql-test/suite/innodb/r/innodb-page_compression_lzo.result351
-rw-r--r--mysql-test/suite/innodb/r/innodb-page_compression_snappy.result438
-rw-r--r--mysql-test/suite/innodb/r/innodb-page_compression_tables.result121
-rw-r--r--mysql-test/suite/innodb/r/innodb-page_compression_zip.result351
-rw-r--r--mysql-test/suite/innodb/r/innodb_monitor.result15
-rw-r--r--mysql-test/suite/innodb/t/innodb-page_compression_bzip2.test251
-rw-r--r--mysql-test/suite/innodb/t/innodb-page_compression_lz4.test252
-rw-r--r--mysql-test/suite/innodb/t/innodb-page_compression_lzma.test251
-rw-r--r--mysql-test/suite/innodb/t/innodb-page_compression_lzo.test204
-rw-r--r--mysql-test/suite/innodb/t/innodb-page_compression_snappy.test253
-rw-r--r--mysql-test/suite/innodb/t/innodb-page_compression_tables.test102
-rw-r--r--mysql-test/suite/innodb/t/innodb-page_compression_zip.test202
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_compression_algorithm_basic.result47
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result15
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result15
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result15
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result15
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_mtflush_threads_basic.result21
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_use_lz4_basic.result3
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_use_mtflush_basic.result21
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result33
-rw-r--r--mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic-master.opt1
-rw-r--r--mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic.test46
-rw-r--r--mysql-test/suite/sys_vars/t/innodb_mtflush_threads_basic.test21
-rw-r--r--mysql-test/suite/sys_vars/t/innodb_use_lz4_basic.test5
-rw-r--r--mysql-test/suite/sys_vars/t/innodb_use_mtflush_basic.test22
-rw-r--r--mysql-test/suite/sys_vars/t/innodb_use_trim_basic.test36
-rw-r--r--storage/innobase/CMakeLists.txt13
-rw-r--r--storage/innobase/btr/btr0btr.cc2
-rw-r--r--storage/innobase/btr/btr0cur.cc4
-rw-r--r--storage/innobase/buf/buf0buf.cc7
-rw-r--r--storage/innobase/buf/buf0dblwr.cc27
-rw-r--r--storage/innobase/buf/buf0flu.cc70
-rw-r--r--storage/innobase/buf/buf0mtflu.cc732
-rw-r--r--storage/innobase/buf/buf0rea.cc5
-rw-r--r--storage/innobase/dict/dict0dict.cc1
-rw-r--r--storage/innobase/fil/fil0fil.cc299
-rw-r--r--storage/innobase/fil/fil0pagecompress.cc793
-rw-r--r--storage/innobase/handler/ha_innodb.cc451
-rw-r--r--storage/innobase/handler/ha_innodb.h19
-rw-r--r--storage/innobase/handler/handler0alter.cc28
-rw-r--r--storage/innobase/handler/i_s.cc1
-rw-r--r--storage/innobase/include/buf0buf.h6
-rw-r--r--storage/innobase/include/buf0flu.h59
-rw-r--r--storage/innobase/include/buf0mtflu.h95
-rw-r--r--storage/innobase/include/dict0dict.h14
-rw-r--r--storage/innobase/include/dict0dict.ic183
-rw-r--r--storage/innobase/include/dict0mem.h57
-rw-r--r--storage/innobase/include/dict0pagecompress.h94
-rw-r--r--storage/innobase/include/dict0pagecompress.ic191
-rw-r--r--storage/innobase/include/dict0types.h8
-rw-r--r--storage/innobase/include/fil0fil.h77
-rw-r--r--storage/innobase/include/fil0pagecompress.h145
-rw-r--r--storage/innobase/include/fsp0fsp.h66
-rw-r--r--storage/innobase/include/fsp0fsp.ic34
-rw-r--r--storage/innobase/include/fsp0pagecompress.h84
-rw-r--r--storage/innobase/include/fsp0pagecompress.ic197
-rw-r--r--storage/innobase/include/fsp0types.h1
-rw-r--r--storage/innobase/include/os0file.h112
-rw-r--r--storage/innobase/include/os0file.ic29
-rw-r--r--storage/innobase/include/srv0mon.h17
-rw-r--r--storage/innobase/include/srv0srv.h94
-rw-r--r--storage/innobase/include/srv0start.h3
-rw-r--r--storage/innobase/include/univ.i31
-rw-r--r--storage/innobase/include/ut0list.h9
-rw-r--r--storage/innobase/include/ut0list.ic20
-rw-r--r--storage/innobase/include/ut0wqueue.h17
-rw-r--r--storage/innobase/log/log0log.cc17
-rw-r--r--storage/innobase/log/log0recv.cc17
-rw-r--r--storage/innobase/os/os0file.cc863
-rw-r--r--storage/innobase/row/row0log.cc4
-rw-r--r--storage/innobase/row/row0merge.cc3
-rw-r--r--storage/innobase/srv/srv0mon.cc128
-rw-r--r--storage/innobase/srv/srv0srv.cc40
-rw-r--r--storage/innobase/srv/srv0start.cc59
-rw-r--r--storage/innobase/ut/ut0wqueue.cc49
-rw-r--r--storage/xtradb/CMakeLists.txt13
-rw-r--r--storage/xtradb/buf/buf0buf.cc28
-rw-r--r--storage/xtradb/buf/buf0dblwr.cc28
-rw-r--r--storage/xtradb/buf/buf0flu.cc82
-rw-r--r--storage/xtradb/buf/buf0mtflu.cc733
-rw-r--r--storage/xtradb/buf/buf0rea.cc5
-rw-r--r--storage/xtradb/dict/dict0dict.cc1
-rw-r--r--storage/xtradb/fil/fil0fil.cc290
-rw-r--r--storage/xtradb/fil/fil0pagecompress.cc790
-rw-r--r--storage/xtradb/handler/ha_innodb.cc453
-rw-r--r--storage/xtradb/handler/ha_innodb.h18
-rw-r--r--storage/xtradb/handler/handler0alter.cc28
-rw-r--r--storage/xtradb/handler/i_s.cc1
-rw-r--r--storage/xtradb/include/buf0buf.h21
-rw-r--r--storage/xtradb/include/buf0flu.h66
-rw-r--r--storage/xtradb/include/buf0mtflu.h95
-rw-r--r--storage/xtradb/include/dict0dict.h12
-rw-r--r--storage/xtradb/include/dict0dict.ic187
-rw-r--r--storage/xtradb/include/dict0mem.h56
-rw-r--r--storage/xtradb/include/dict0pagecompress.h94
-rw-r--r--storage/xtradb/include/dict0pagecompress.ic191
-rw-r--r--storage/xtradb/include/dict0types.h9
-rw-r--r--storage/xtradb/include/fil0fil.h95
-rw-r--r--storage/xtradb/include/fil0pagecompress.h145
-rw-r--r--storage/xtradb/include/fsp0fsp.h68
-rw-r--r--storage/xtradb/include/fsp0fsp.ic33
-rw-r--r--storage/xtradb/include/fsp0pagecompress.h84
-rw-r--r--storage/xtradb/include/fsp0pagecompress.ic197
-rw-r--r--storage/xtradb/include/os0file.h127
-rw-r--r--storage/xtradb/include/os0file.ic34
-rw-r--r--storage/xtradb/include/srv0mon.h18
-rw-r--r--storage/xtradb/include/srv0srv.h90
-rw-r--r--storage/xtradb/include/univ.i31
-rw-r--r--storage/xtradb/include/ut0list.h9
-rw-r--r--storage/xtradb/include/ut0list.ic20
-rw-r--r--storage/xtradb/include/ut0wqueue.h17
-rw-r--r--storage/xtradb/log/log0log.cc20
-rw-r--r--storage/xtradb/log/log0online.cc10
-rw-r--r--storage/xtradb/log/log0recv.cc19
-rw-r--r--storage/xtradb/os/os0file.cc843
-rw-r--r--storage/xtradb/row/row0log.cc4
-rw-r--r--storage/xtradb/row/row0merge.cc3
-rw-r--r--storage/xtradb/srv/srv0mon.cc128
-rw-r--r--storage/xtradb/srv/srv0srv.cc42
-rw-r--r--storage/xtradb/srv/srv0start.cc58
-rw-r--r--storage/xtradb/ut/ut0wqueue.cc49
140 files changed, 15138 insertions, 551 deletions
diff --git a/cmake/bzip2.cmake b/cmake/bzip2.cmake
new file mode 100644
index 00000000000..0c15853d0b9
--- /dev/null
+++ b/cmake/bzip2.cmake
@@ -0,0 +1,33 @@
+# Copyright (C) 2014, SkySQL Ab. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+SET(WITH_INNODB_BZIP2 AUTO CACHE STRING
+ "Build with bzip2. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'")
+
+MACRO (MYSQL_CHECK_BZIP2)
+ IF (WITH_INNODB_BZIP2 STREQUAL "ON" OR WITH_INNODB_BZIP2 STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(bzlib.h HAVE_BZLIB2_H)
+ CHECK_LIBRARY_EXISTS(bz2 BZ2_bzBuffToBuffCompress "" HAVE_BZLIB2_COMPRESS)
+ CHECK_LIBRARY_EXISTS(bz2 BZ2_bzBuffToBuffDecompress "" HAVE_BZLIB2_DECOMPRESS)
+
+ IF (HAVE_BZLIB2_COMPRESS AND HAVE_BZLIB2_DECOMPRESS AND HAVE_BZLIB2_H)
+ ADD_DEFINITIONS(-DHAVE_BZIP2=1)
+ LINK_LIBRARIES(bz2)
+ ELSE()
+ IF (WITH_INNODB_BZIP2 STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required bzip2 library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
diff --git a/cmake/lz4.cmake b/cmake/lz4.cmake
new file mode 100644
index 00000000000..1607c68c5fb
--- /dev/null
+++ b/cmake/lz4.cmake
@@ -0,0 +1,48 @@
+# Copyright (C) 2014, SkySQL Ab. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+SET(WITH_INNODB_LZ4 AUTO CACHE STRING
+ "Build with lz4. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'")
+
+MACRO (MYSQL_CHECK_LZ4)
+ IF (WITH_INNODB_LZ4 STREQUAL "ON" OR WITH_INNODB_LZ4 STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(lz4.h HAVE_LZ4_H)
+ CHECK_LIBRARY_EXISTS(lz4 LZ4_compress_limitedOutput "" HAVE_LZ4_SHARED_LIB)
+
+ IF (HAVE_LZ4_SHARED_LIB AND HAVE_LZ4_H)
+ ADD_DEFINITIONS(-DHAVE_LZ4=1)
+ LINK_LIBRARIES(lz4)
+ ELSE()
+ IF (WITH_INNODB_LZ4 STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required lz4 library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
+
+MACRO (MYSQL_CHECK_LZ4_STATIC)
+ IF (WITH_INNODB_LZ4 STREQUAL "ON" OR WITH_INNODB_LZ4 STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(lz4.h HAVE_LZ4_H)
+ CHECK_LIBRARY_EXISTS(liblz4.a LZ4_compress_limitedOutput "" HAVE_LZ4_LIB)
+
+ IF(HAVE_LZ4_LIB AND HAVE_LZ4_H)
+ ADD_DEFINITIONS(-DHAVE_LZ4=1)
+ LINK_LIBRARIES(liblz4.a)
+ ELSE()
+ IF (WITH_INNODB_LZ4 STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required lz4 library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO() \ No newline at end of file
diff --git a/cmake/lzma.cmake b/cmake/lzma.cmake
new file mode 100644
index 00000000000..12a28a17a43
--- /dev/null
+++ b/cmake/lzma.cmake
@@ -0,0 +1,33 @@
+# Copyright (C) 2014, SkySQL Ab. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+SET(WITH_INNODB_LZMA AUTO CACHE STRING
+ "Build with lzma. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'")
+
+MACRO (MYSQL_CHECK_LZMA)
+ IF (WITH_INNODB_LZMA STREQUAL "ON" OR WITH_INNODB_LZMA STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(lzma.h HAVE_LZMA_H)
+ CHECK_LIBRARY_EXISTS(lzma lzma_stream_buffer_decode "" HAVE_LZMA_DECODE)
+ CHECK_LIBRARY_EXISTS(lzma lzma_easy_buffer_encode "" HAVE_LZMA_ENCODE)
+
+ IF (HAVE_LZMA_DECODE AND HAVE_LZMA_ENCODE AND HAVE_LZMA_H)
+ ADD_DEFINITIONS(-DHAVE_LZMA=1)
+ LINK_LIBRARIES(lzma)
+ ELSE()
+ IF (WITH_INNODB_LZMA STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required lzma library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
diff --git a/cmake/lzo.cmake b/cmake/lzo.cmake
new file mode 100644
index 00000000000..07cba011c06
--- /dev/null
+++ b/cmake/lzo.cmake
@@ -0,0 +1,48 @@
+# Copyright (C) 2014, SkySQL Ab. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+SET(WITH_INNODB_LZO AUTO CACHE STRING
+ "Build with lzo. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'")
+
+MACRO (MYSQL_CHECK_LZO_STATIC)
+ IF (WITH_INNODB_LZO STREQUAL "ON" OR WITH_INNODB_LZO STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(lzo/lzo1x.h HAVE_LZO_H)
+ CHECK_LIBRARY_EXISTS(liblzo2.a lzo1x_1_compress "" HAVE_LZO_LIB)
+
+ IF(HAVE_LZO_LIB AND HAVE_LZO_H)
+ ADD_DEFINITIONS(-DHAVE_LZO=1)
+ LINK_LIBRARIES(liblzo2.a)
+ ELSE()
+ IF (WITH_INNODB_LZO STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required lzo library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
+
+MACRO (MYSQL_CHECK_LZO)
+ IF (WITH_INNODB_LZO STREQUAL "ON" OR WITH_INNODB_LZO STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(lzo/lzo1x.h HAVE_LZO_H)
+ CHECK_LIBRARY_EXISTS(lzo2 lzo1x_1_compress "" HAVE_LZO_SHARED_LIB)
+
+ IF(HAVE_LZO_SHARED_LIB AND HAVE_LZO_H)
+ ADD_DEFINITIONS(-DHAVE_LZO=1)
+ LINK_LIBRARIES(lzo2)
+ ELSE()
+ IF (WITH_INNODB_LZO STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required lzo library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
diff --git a/cmake/snappy.cmake b/cmake/snappy.cmake
new file mode 100644
index 00000000000..cb0839a3480
--- /dev/null
+++ b/cmake/snappy.cmake
@@ -0,0 +1,32 @@
+# Copyright (C) 2015, MariaDB Corporation. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+SET(WITH_INNODB_SNAPPY AUTO CACHE STRING
+ "Build with snappy. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'")
+
+MACRO (MYSQL_CHECK_SNAPPY)
+ IF (WITH_INNODB_SNAPPY STREQUAL "ON" OR WITH_INNODB_SNAPPY STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(snappy-c.h HAVE_SNAPPY_H)
+ CHECK_LIBRARY_EXISTS(snappy snappy_uncompress "" HAVE_SNAPPY_SHARED_LIB)
+
+ IF(HAVE_SNAPPY_SHARED_LIB AND HAVE_SNAPPY_H)
+ ADD_DEFINITIONS(-DHAVE_SNAPPY=1)
+ LINK_LIBRARIES(snappy)
+ ELSE()
+ IF (WITH_INNODB_SNAPPY STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required snappy library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
diff --git a/config.h.cmake b/config.h.cmake
index 46eed79dfa5..a90e636aa9c 100644
--- a/config.h.cmake
+++ b/config.h.cmake
@@ -93,6 +93,7 @@
#cmakedefine HAVE_SYS_TYPES_H 1
#cmakedefine HAVE_SYS_UN_H 1
#cmakedefine HAVE_SYS_VADVISE_H 1
+#cmakedefine HAVE_SYS_STATVFS_H 1
#cmakedefine HAVE_TERM_H 1
#cmakedefine HAVE_TERMBITS_H 1
#cmakedefine HAVE_TERMIOS_H 1
@@ -213,6 +214,8 @@
#cmakedefine HAVE_POLL 1
#cmakedefine HAVE_PORT_CREATE 1
#cmakedefine HAVE_POSIX_FALLOCATE 1
+#cmakedefine HAVE_LINUX_FALLOC_H 1
+#cmakedefine HAVE_FALLOCATE 1
#cmakedefine HAVE_PREAD 1
#cmakedefine HAVE_PAUSE_INSTRUCTION 1
#cmakedefine HAVE_FAKE_PAUSE_INSTRUCTION 1
diff --git a/configure.cmake b/configure.cmake
index fb127688bd2..cbcb26001cb 100644
--- a/configure.cmake
+++ b/configure.cmake
@@ -189,6 +189,7 @@ CHECK_INCLUDE_FILES (ieeefp.h HAVE_IEEEFP_H)
CHECK_INCLUDE_FILES (inttypes.h HAVE_INTTYPES_H)
CHECK_INCLUDE_FILES (langinfo.h HAVE_LANGINFO_H)
CHECK_INCLUDE_FILES (linux/unistd.h HAVE_LINUX_UNISTD_H)
+CHECK_INCLUDE_FILES (linux/falloc.h HAVE_LINUX_FALLOC_H)
CHECK_INCLUDE_FILES (limits.h HAVE_LIMITS_H)
CHECK_INCLUDE_FILES (locale.h HAVE_LOCALE_H)
CHECK_INCLUDE_FILES (malloc.h HAVE_MALLOC_H)
@@ -250,6 +251,7 @@ CHECK_INCLUDE_FILES (wchar.h HAVE_WCHAR_H)
CHECK_INCLUDE_FILES (wctype.h HAVE_WCTYPE_H)
CHECK_INCLUDE_FILES (sys/sockio.h HAVE_SYS_SOCKIO_H)
CHECK_INCLUDE_FILES (sys/utsname.h HAVE_SYS_UTSNAME_H)
+CHECK_INCLUDE_FILES (sys/statvfs.h HAVE_SYS_STATVFS_H)
IF(BFD_H_EXISTS)
IF(NOT_FOR_DISTRIBUTION)
@@ -385,6 +387,7 @@ CHECK_FUNCTION_EXISTS (perror HAVE_PERROR)
CHECK_FUNCTION_EXISTS (poll HAVE_POLL)
CHECK_FUNCTION_EXISTS (port_create HAVE_PORT_CREATE)
CHECK_FUNCTION_EXISTS (posix_fallocate HAVE_POSIX_FALLOCATE)
+CHECK_FUNCTION_EXISTS (fallocate HAVE_FALLOCATE)
CHECK_FUNCTION_EXISTS (pread HAVE_PREAD)
CHECK_FUNCTION_EXISTS (pthread_attr_create HAVE_PTHREAD_ATTR_CREATE)
CHECK_FUNCTION_EXISTS (pthread_attr_getstacksize HAVE_PTHREAD_ATTR_GETSTACKSIZE)
diff --git a/extra/CMakeLists.txt b/extra/CMakeLists.txt
index 585b5aef6f6..3f87bb2df62 100644
--- a/extra/CMakeLists.txt
+++ b/extra/CMakeLists.txt
@@ -72,32 +72,27 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
ENDIF()
ENDIF()
-MYSQL_ADD_EXECUTABLE(replace replace.c COMPONENT Server)
-TARGET_LINK_LIBRARIES(replace mysys)
-IF(UNIX)
- MYSQL_ADD_EXECUTABLE(resolve_stack_dump resolve_stack_dump.c)
- TARGET_LINK_LIBRARIES(resolve_stack_dump mysys)
-
- MYSQL_ADD_EXECUTABLE(mysql_waitpid mysql_waitpid.c COMPONENT Client)
- TARGET_LINK_LIBRARIES(mysql_waitpid mysys)
-ENDIF()
-
-
+IF(WITH_INNOBASE_STORAGE_ENGINE)
# Add path to the InnoDB headers
- INCLUDE_DIRECTORIES(
- ${CMAKE_SOURCE_DIR}/storage/innobase/include
- ${CMAKE_SOURCE_DIR}/sql)
-
+ INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include)
# We use the InnoDB code directly in case the code changes.
ADD_DEFINITIONS("-DUNIV_INNOCHECKSUM")
SET(INNOBASE_SOURCES
../storage/innobase/buf/buf0checksum.cc
../storage/innobase/ut/ut0crc32.cc
../storage/innobase/ut/ut0ut.cc
- ../storage/innobase/page/page0zip.cc
- )
-
+ )
MYSQL_ADD_EXECUTABLE(innochecksum innochecksum.cc ${INNOBASE_SOURCES})
TARGET_LINK_LIBRARIES(innochecksum mysys mysys_ssl)
- ADD_DEPENDENCIES(innochecksum GenError)
+ENDIF()
+
+MYSQL_ADD_EXECUTABLE(replace replace.c COMPONENT Server)
+TARGET_LINK_LIBRARIES(replace mysys)
+
+IF(UNIX)
+ MYSQL_ADD_EXECUTABLE(resolve_stack_dump resolve_stack_dump.c)
+ TARGET_LINK_LIBRARIES(resolve_stack_dump mysys)
+ MYSQL_ADD_EXECUTABLE(mysql_waitpid mysql_waitpid.c COMPONENT Client)
+ TARGET_LINK_LIBRARIES(mysql_waitpid mysys)
+ENDIF()
diff --git a/extra/innochecksum.cc.moved b/extra/innochecksum.cc.moved
new file mode 100644
index 00000000000..c89196b1eee
--- /dev/null
+++ b/extra/innochecksum.cc.moved
@@ -0,0 +1,396 @@
+/*
+ Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+/*
+ InnoDB offline file checksum utility. 85% of the code in this utility
+ is included from the InnoDB codebase.
+
+ The final 15% was originally written by Mark Smith of Danga
+ Interactive, Inc. <junior@danga.com>
+
+ Published with a permission.
+*/
+
+#include <my_global.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifndef __WIN__
+# include <unistd.h>
+#endif
+#include <my_getopt.h>
+#include <m_string.h>
+#include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
+
+/* Only parts of these files are included from the InnoDB codebase.
+The parts not included are excluded by #ifndef UNIV_INNOCHECKSUM. */
+
+#include "univ.i" /* include all of this */
+
+#include "buf0checksum.h" /* buf_calc_page_*() */
+#include "fil0fil.h" /* FIL_* */
+#include "fsp0fsp.h" /* fsp_flags_get_page_size() &
+ fsp_flags_get_zip_size() */
+#include "mach0data.h" /* mach_read_from_4() */
+#include "ut0crc32.h" /* ut_crc32_init() */
+
+#ifdef UNIV_NONINL
+# include "fsp0fsp.ic"
+# include "mach0data.ic"
+# include "ut0rnd.ic"
+#endif
+
+/* Global variables */
+static my_bool verbose;
+static my_bool debug;
+static my_bool just_count;
+static ulong start_page;
+static ulong end_page;
+static ulong do_page;
+static my_bool use_end_page;
+static my_bool do_one_page;
+ulong srv_page_size; /* replaces declaration in srv0srv.c */
+static ulong physical_page_size; /* Page size in bytes on disk. */
+static ulong logical_page_size; /* Page size when uncompressed. */
+
+/* Get the page size of the filespace from the filespace header. */
+static
+my_bool
+get_page_size(
+/*==========*/
+ FILE* f, /*!< in: file pointer, must be open
+ and set to start of file */
+ byte* buf, /*!< in: buffer used to read the page */
+ ulong* logical_page_size, /*!< out: Logical/Uncompressed page size */
+ ulong* physical_page_size) /*!< out: Physical/Commpressed page size */
+{
+ ulong flags;
+
+ int bytes= fread(buf, 1, UNIV_PAGE_SIZE_MIN, f);
+
+ if (ferror(f))
+ {
+ perror("Error reading file header");
+ return FALSE;
+ }
+
+ if (bytes != UNIV_PAGE_SIZE_MIN)
+ {
+ fprintf(stderr, "Error; Was not able to read the minimum page size ");
+ fprintf(stderr, "of %d bytes. Bytes read was %d\n", UNIV_PAGE_SIZE_MIN, bytes);
+ return FALSE;
+ }
+
+ rewind(f);
+
+ flags = mach_read_from_4(buf + FIL_PAGE_DATA + FSP_SPACE_FLAGS);
+
+ /* srv_page_size is used by InnoDB code as UNIV_PAGE_SIZE */
+ srv_page_size = *logical_page_size = fsp_flags_get_page_size(flags);
+
+ /* fsp_flags_get_zip_size() will return zero if not compressed. */
+ *physical_page_size = fsp_flags_get_zip_size(flags);
+ if (*physical_page_size == 0)
+ *physical_page_size= *logical_page_size;
+
+ return TRUE;
+}
+
+
+/* command line argument to do page checks (that's it) */
+/* another argument to specify page ranges... seek to right spot and go from there */
+
+static struct my_option innochecksum_options[] =
+{
+ {"help", '?', "Displays this help and exits.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"info", 'I', "Synonym for --help.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"version", 'V', "Displays version information and exits.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"verbose", 'v', "Verbose (prints progress every 5 seconds).",
+ &verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"debug", 'd', "Debug mode (prints checksums for each page, implies verbose).",
+ &debug, &debug, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"count", 'c', "Print the count of pages in the file.",
+ &just_count, &just_count, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"start_page", 's', "Start on this page number (0 based).",
+ &start_page, &start_page, 0, GET_ULONG, REQUIRED_ARG,
+ 0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0},
+ {"end_page", 'e', "End at this page number (0 based).",
+ &end_page, &end_page, 0, GET_ULONG, REQUIRED_ARG,
+ 0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0},
+ {"page", 'p', "Check only this page (0 based).",
+ &do_page, &do_page, 0, GET_ULONG, REQUIRED_ARG,
+ 0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0},
+
+ {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+static void print_version(void)
+{
+ printf("%s Ver %s, for %s (%s)\n",
+ my_progname, INNODB_VERSION_STR,
+ SYSTEM_TYPE, MACHINE_TYPE);
+}
+
+static void usage(void)
+{
+ print_version();
+ puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000"));
+ printf("InnoDB offline file checksum utility.\n");
+ printf("Usage: %s [-c] [-s <start page>] [-e <end page>] [-p <page>] [-v] [-d] <filename>\n", my_progname);
+ my_print_help(innochecksum_options);
+ my_print_variables(innochecksum_options);
+}
+
+extern "C" my_bool
+innochecksum_get_one_option(
+/*========================*/
+ int optid,
+ const struct my_option *opt __attribute__((unused)),
+ char *argument __attribute__((unused)))
+{
+ switch (optid) {
+ case 'd':
+ verbose=1; /* debug implies verbose... */
+ break;
+ case 'e':
+ use_end_page= 1;
+ break;
+ case 'p':
+ end_page= start_page= do_page;
+ use_end_page= 1;
+ do_one_page= 1;
+ break;
+ case 'V':
+ print_version();
+ exit(0);
+ break;
+ case 'I':
+ case '?':
+ usage();
+ exit(0);
+ break;
+ }
+ return 0;
+}
+
+static int get_options(
+/*===================*/
+ int *argc,
+ char ***argv)
+{
+ int ho_error;
+
+ if ((ho_error=handle_options(argc, argv, innochecksum_options, innochecksum_get_one_option)))
+ exit(ho_error);
+
+ /* The next arg must be the filename */
+ if (!*argc)
+ {
+ usage();
+ return 1;
+ }
+ return 0;
+} /* get_options */
+
+
+int main(int argc, char **argv)
+{
+ FILE* f; /* our input file */
+ char* filename; /* our input filename. */
+ unsigned char buf[UNIV_PAGE_SIZE_MAX]; /* Buffer to store pages read */
+ ulong bytes; /* bytes read count */
+ ulint ct; /* current page number (0 based) */
+ time_t now; /* current time */
+ time_t lastt; /* last time */
+ ulint oldcsum, oldcsumfield, csum, csumfield, crc32, logseq, logseqfield;
+ /* ulints for checksum storage */
+ struct stat st; /* for stat, if you couldn't guess */
+ unsigned long long int size; /* size of file (has to be 64 bits) */
+ ulint pages; /* number of pages in file */
+ off_t offset= 0;
+ int fd;
+
+ printf("InnoDB offline file checksum utility.\n");
+
+ ut_crc32_init();
+
+ MY_INIT(argv[0]);
+
+ if (get_options(&argc,&argv))
+ exit(1);
+
+ if (verbose)
+ my_print_variables(innochecksum_options);
+
+ /* The file name is not optional */
+ filename = *argv;
+ if (*filename == '\0')
+ {
+ fprintf(stderr, "Error; File name missing\n");
+ return 1;
+ }
+
+ /* stat the file to get size and page count */
+ if (stat(filename, &st))
+ {
+ fprintf(stderr, "Error; %s cannot be found\n", filename);
+ return 1;
+ }
+ size= st.st_size;
+
+ /* Open the file for reading */
+ f= fopen(filename, "rb");
+ if (f == NULL)
+ {
+ fprintf(stderr, "Error; %s cannot be opened", filename);
+ perror(" ");
+ return 1;
+ }
+
+ if (!get_page_size(f, buf, &logical_page_size, &physical_page_size))
+ {
+ return 1;
+ }
+
+ /* This tool currently does not support Compressed tables */
+ if (logical_page_size != physical_page_size)
+ {
+ fprintf(stderr, "Error; This file contains compressed pages\n");
+ return 1;
+ }
+
+ pages= (ulint) (size / physical_page_size);
+
+ if (just_count)
+ {
+ if (verbose)
+ printf("Number of pages: ");
+ printf("%lu\n", pages);
+ return 0;
+ }
+ else if (verbose)
+ {
+ printf("file %s = %llu bytes (%lu pages)...\n", filename, size, pages);
+ if (do_one_page)
+ printf("InnoChecksum; checking page %lu\n", do_page);
+ else
+ printf("InnoChecksum; checking pages in range %lu to %lu\n", start_page, use_end_page ? end_page : (pages - 1));
+ }
+
+ /* seek to the necessary position */
+ if (start_page)
+ {
+ fd= fileno(f);
+ if (!fd)
+ {
+ perror("Error; Unable to obtain file descriptor number");
+ return 1;
+ }
+
+ offset= (off_t)start_page * (off_t)physical_page_size;
+
+ if (lseek(fd, offset, SEEK_SET) != offset)
+ {
+ perror("Error; Unable to seek to necessary offset");
+ return 1;
+ }
+ }
+
+ /* main checksumming loop */
+ ct= start_page;
+ lastt= 0;
+ while (!feof(f))
+ {
+ bytes= fread(buf, 1, physical_page_size, f);
+ if (!bytes && feof(f))
+ return 0;
+
+ if (ferror(f))
+ {
+ fprintf(stderr, "Error reading %lu bytes", physical_page_size);
+ perror(" ");
+ return 1;
+ }
+ if (bytes != physical_page_size)
+ {
+ fprintf(stderr, "Error; bytes read (%lu) doesn't match page size (%lu)\n", bytes, physical_page_size);
+ return 1;
+ }
+
+ /* check the "stored log sequence numbers" */
+ logseq= mach_read_from_4(buf + FIL_PAGE_LSN + 4);
+ logseqfield= mach_read_from_4(buf + logical_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM + 4);
+ if (debug)
+ printf("page %lu: log sequence number: first = %lu; second = %lu\n", ct, logseq, logseqfield);
+ if (logseq != logseqfield)
+ {
+ fprintf(stderr, "Fail; page %lu invalid (fails log sequence number check)\n", ct);
+ return 1;
+ }
+
+ /* check old method of checksumming */
+ oldcsum= buf_calc_page_old_checksum(buf);
+ oldcsumfield= mach_read_from_4(buf + logical_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM);
+ if (debug)
+ printf("page %lu: old style: calculated = %lu; recorded = %lu\n", ct, oldcsum, oldcsumfield);
+ if (oldcsumfield != mach_read_from_4(buf + FIL_PAGE_LSN) && oldcsumfield != oldcsum)
+ {
+ fprintf(stderr, "Fail; page %lu invalid (fails old style checksum)\n", ct);
+ return 1;
+ }
+
+ /* now check the new method */
+ csum= buf_calc_page_new_checksum(buf);
+ crc32= buf_calc_page_crc32(buf);
+ csumfield= mach_read_from_4(buf + FIL_PAGE_SPACE_OR_CHKSUM);
+ if (debug)
+ printf("page %lu: new style: calculated = %lu; crc32 = %lu; recorded = %lu\n",
+ ct, csum, crc32, csumfield);
+ if (csumfield != 0 && crc32 != csumfield && csum != csumfield)
+ {
+ fprintf(stderr, "Fail; page %lu invalid (fails innodb and crc32 checksum)\n", ct);
+ return 1;
+ }
+
+ /* end if this was the last page we were supposed to check */
+ if (use_end_page && (ct >= end_page))
+ return 0;
+
+ /* do counter increase and progress printing */
+ ct++;
+ if (verbose)
+ {
+ if (ct % 64 == 0)
+ {
+ now= time(0);
+ if (!lastt) lastt= now;
+ if (now - lastt >= 1)
+ {
+ printf("page %lu okay: %.3f%% done\n", (ct - 1), (float) ct / pages * 100);
+ lastt= now;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
diff --git a/mysql-test/disabled.def b/mysql-test/disabled.def
index bb0e243326a..e5fa24786e1 100644
--- a/mysql-test/disabled.def
+++ b/mysql-test/disabled.def
@@ -20,4 +20,3 @@ mysql_embedded : Bug#12561297 2011-05-14 Anitha Dependent on PB2 chang
ssl_crl_clients_valid : broken upstream
ssl_crl : broken upstream
ssl_crl_clrpath : broken upstream
-file_contents : MDEV-6526 these files are not installed anymore
diff --git a/mysql-test/include/have_innodb_bzip2.inc b/mysql-test/include/have_innodb_bzip2.inc
new file mode 100644
index 00000000000..afbe78f0cf9
--- /dev/null
+++ b/mysql-test/include/have_innodb_bzip2.inc
@@ -0,0 +1,4 @@
+if (! `SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_bzip2' AND variable_value = 'ON'`)
+{
+ --skip Test requires InnoDB compiled with libbz2
+}
diff --git a/mysql-test/include/have_innodb_lz4.inc b/mysql-test/include/have_innodb_lz4.inc
new file mode 100644
index 00000000000..bda3ffa8623
--- /dev/null
+++ b/mysql-test/include/have_innodb_lz4.inc
@@ -0,0 +1,4 @@
+if (!`SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_lz4' AND variable_value = 'ON'`)
+{
+ --skip Test requires InnoDB compiled with liblz4
+}
diff --git a/mysql-test/include/have_innodb_lzma.inc b/mysql-test/include/have_innodb_lzma.inc
new file mode 100644
index 00000000000..86eda33f194
--- /dev/null
+++ b/mysql-test/include/have_innodb_lzma.inc
@@ -0,0 +1,4 @@
+if (!`SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_lzma' AND variable_value = 'ON' `)
+{
+ --skip Test requires InnoDB compiled with liblzma
+}
diff --git a/mysql-test/include/have_innodb_lzo.inc b/mysql-test/include/have_innodb_lzo.inc
new file mode 100644
index 00000000000..f40418b00fb
--- /dev/null
+++ b/mysql-test/include/have_innodb_lzo.inc
@@ -0,0 +1,4 @@
+if (! `SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_lzo' AND variable_value = 'ON'`)
+{
+ --skip Test requires InnoDB compiled with liblzo
+}
diff --git a/mysql-test/include/have_innodb_snappy.inc b/mysql-test/include/have_innodb_snappy.inc
new file mode 100644
index 00000000000..c4dca4c19ee
--- /dev/null
+++ b/mysql-test/include/have_innodb_snappy.inc
@@ -0,0 +1,4 @@
+if (! `SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_snappy' AND variable_value = 'ON'`)
+{
+ --skip Test requires InnoDB compiled with libsnappy
+}
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_bzip2.result b/mysql-test/suite/innodb/r/innodb-page_compression_bzip2.result
new file mode 100644
index 00000000000..8d3bc063a71
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_bzip2.result
@@ -0,0 +1,437 @@
+set global innodb_file_format = `barracuda`;
+set global innodb_file_per_table = on;
+set global innodb_compression_algorithm = 5;
+create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
+show warnings;
+Level Code Message
+create table innodb_normal (c1 int, b char(20)) engine=innodb;
+show warnings;
+Level Code Message
+create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed1;
+Table Create Table
+innodb_page_compressed1 CREATE TABLE `innodb_page_compressed1` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=1
+create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed2;
+Table Create Table
+innodb_page_compressed2 CREATE TABLE `innodb_page_compressed2` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=2
+create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed3;
+Table Create Table
+innodb_page_compressed3 CREATE TABLE `innodb_page_compressed3` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=3
+create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed4;
+Table Create Table
+innodb_page_compressed4 CREATE TABLE `innodb_page_compressed4` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=4
+create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed5;
+Table Create Table
+innodb_page_compressed5 CREATE TABLE `innodb_page_compressed5` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=5
+create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed6;
+Table Create Table
+innodb_page_compressed6 CREATE TABLE `innodb_page_compressed6` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=6
+create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed7;
+Table Create Table
+innodb_page_compressed7 CREATE TABLE `innodb_page_compressed7` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=7
+create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed8;
+Table Create Table
+innodb_page_compressed8 CREATE TABLE `innodb_page_compressed8` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed9;
+Table Create Table
+innodb_page_compressed9 CREATE TABLE `innodb_page_compressed9` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=9
+create procedure innodb_insert_proc (repeat_count int)
+begin
+declare current_num int;
+set current_num = 0;
+while current_num < repeat_count do
+insert into innodb_normal values(current_num,'testing..');
+set current_num = current_num + 1;
+end while;
+end//
+commit;
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+select count(*) from innodb_normal;
+count(*)
+5000
+insert into innodb_compressed select * from innodb_normal;
+insert into innodb_page_compressed1 select * from innodb_normal;
+insert into innodb_page_compressed2 select * from innodb_normal;
+insert into innodb_page_compressed3 select * from innodb_normal;
+insert into innodb_page_compressed4 select * from innodb_normal;
+insert into innodb_page_compressed5 select * from innodb_normal;
+insert into innodb_page_compressed6 select * from innodb_normal;
+insert into innodb_page_compressed7 select * from innodb_normal;
+insert into innodb_page_compressed8 select * from innodb_normal;
+insert into innodb_page_compressed9 select * from innodb_normal;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+alter table innodb_normal page_compressed=1 page_compression_level=8;
+show warnings;
+Level Code Message
+show create table innodb_normal;
+Table Create Table
+innodb_normal CREATE TABLE `innodb_normal` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0;
+show warnings;
+Level Code Message
+show create table innodb_compressed;
+Table Create Table
+innodb_compressed CREATE TABLE `innodb_compressed` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+set global innodb_compression_algorithm = 1;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+set global innodb_compression_algorithm = 0;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compressed;
+drop table innodb_page_compressed1;
+drop table innodb_page_compressed2;
+drop table innodb_page_compressed3;
+drop table innodb_page_compressed4;
+drop table innodb_page_compressed5;
+drop table innodb_page_compressed6;
+drop table innodb_page_compressed7;
+drop table innodb_page_compressed8;
+drop table innodb_page_compressed9;
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_lz4.result b/mysql-test/suite/innodb/r/innodb-page_compression_lz4.result
new file mode 100644
index 00000000000..eeab2622cb6
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_lz4.result
@@ -0,0 +1,438 @@
+set global innodb_file_format = `barracuda`;
+set global innodb_file_per_table = on;
+set global innodb_compression_algorithm = 2;
+create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
+show warnings;
+Level Code Message
+create table innodb_normal (c1 int, b char(20)) engine=innodb;
+show warnings;
+Level Code Message
+create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed1;
+Table Create Table
+innodb_page_compressed1 CREATE TABLE `innodb_page_compressed1` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=1
+create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed2;
+Table Create Table
+innodb_page_compressed2 CREATE TABLE `innodb_page_compressed2` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=2
+create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed3;
+Table Create Table
+innodb_page_compressed3 CREATE TABLE `innodb_page_compressed3` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=3
+create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed4;
+Table Create Table
+innodb_page_compressed4 CREATE TABLE `innodb_page_compressed4` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=4
+create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed5;
+Table Create Table
+innodb_page_compressed5 CREATE TABLE `innodb_page_compressed5` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=5
+create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed6;
+Table Create Table
+innodb_page_compressed6 CREATE TABLE `innodb_page_compressed6` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=6
+create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed7;
+Table Create Table
+innodb_page_compressed7 CREATE TABLE `innodb_page_compressed7` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=7
+create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed8;
+Table Create Table
+innodb_page_compressed8 CREATE TABLE `innodb_page_compressed8` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed9;
+Table Create Table
+innodb_page_compressed9 CREATE TABLE `innodb_page_compressed9` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=9
+create procedure innodb_insert_proc (repeat_count int)
+begin
+declare current_num int;
+set current_num = 0;
+while current_num < repeat_count do
+insert into innodb_normal values(current_num,'testing..');
+set current_num = current_num + 1;
+end while;
+end//
+commit;
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+select count(*) from innodb_normal;
+count(*)
+5000
+insert into innodb_compressed select * from innodb_normal;
+insert into innodb_page_compressed1 select * from innodb_normal;
+insert into innodb_page_compressed2 select * from innodb_normal;
+insert into innodb_page_compressed3 select * from innodb_normal;
+insert into innodb_page_compressed4 select * from innodb_normal;
+insert into innodb_page_compressed5 select * from innodb_normal;
+insert into innodb_page_compressed6 select * from innodb_normal;
+insert into innodb_page_compressed7 select * from innodb_normal;
+insert into innodb_page_compressed8 select * from innodb_normal;
+insert into innodb_page_compressed9 select * from innodb_normal;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+alter table innodb_normal page_compressed=1 page_compression_level=8;
+show warnings;
+Level Code Message
+show create table innodb_normal;
+Table Create Table
+innodb_normal CREATE TABLE `innodb_normal` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0;
+show warnings;
+Level Code Message
+show create table innodb_compressed;
+Table Create Table
+innodb_compressed CREATE TABLE `innodb_compressed` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+set global innodb_compression_algorithm = 1;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+set global innodb_compression_algorithm = 0;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compressed;
+drop table innodb_page_compressed1;
+drop table innodb_page_compressed2;
+drop table innodb_page_compressed3;
+drop table innodb_page_compressed4;
+drop table innodb_page_compressed5;
+drop table innodb_page_compressed6;
+drop table innodb_page_compressed7;
+drop table innodb_page_compressed8;
+drop table innodb_page_compressed9;
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_lzma.result b/mysql-test/suite/innodb/r/innodb-page_compression_lzma.result
new file mode 100644
index 00000000000..d340801b656
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_lzma.result
@@ -0,0 +1,437 @@
+set global innodb_file_format = `barracuda`;
+set global innodb_file_per_table = on;
+set global innodb_compression_algorithm = 4;
+create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
+show warnings;
+Level Code Message
+create table innodb_normal (c1 int, b char(20)) engine=innodb;
+show warnings;
+Level Code Message
+create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed1;
+Table Create Table
+innodb_page_compressed1 CREATE TABLE `innodb_page_compressed1` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=1
+create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed2;
+Table Create Table
+innodb_page_compressed2 CREATE TABLE `innodb_page_compressed2` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=2
+create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed3;
+Table Create Table
+innodb_page_compressed3 CREATE TABLE `innodb_page_compressed3` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=3
+create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed4;
+Table Create Table
+innodb_page_compressed4 CREATE TABLE `innodb_page_compressed4` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=4
+create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed5;
+Table Create Table
+innodb_page_compressed5 CREATE TABLE `innodb_page_compressed5` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=5
+create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed6;
+Table Create Table
+innodb_page_compressed6 CREATE TABLE `innodb_page_compressed6` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=6
+create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed7;
+Table Create Table
+innodb_page_compressed7 CREATE TABLE `innodb_page_compressed7` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=7
+create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed8;
+Table Create Table
+innodb_page_compressed8 CREATE TABLE `innodb_page_compressed8` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed9;
+Table Create Table
+innodb_page_compressed9 CREATE TABLE `innodb_page_compressed9` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=9
+create procedure innodb_insert_proc (repeat_count int)
+begin
+declare current_num int;
+set current_num = 0;
+while current_num < repeat_count do
+insert into innodb_normal values(current_num,'testing..');
+set current_num = current_num + 1;
+end while;
+end//
+commit;
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+select count(*) from innodb_normal;
+count(*)
+5000
+insert into innodb_compressed select * from innodb_normal;
+insert into innodb_page_compressed1 select * from innodb_normal;
+insert into innodb_page_compressed2 select * from innodb_normal;
+insert into innodb_page_compressed3 select * from innodb_normal;
+insert into innodb_page_compressed4 select * from innodb_normal;
+insert into innodb_page_compressed5 select * from innodb_normal;
+insert into innodb_page_compressed6 select * from innodb_normal;
+insert into innodb_page_compressed7 select * from innodb_normal;
+insert into innodb_page_compressed8 select * from innodb_normal;
+insert into innodb_page_compressed9 select * from innodb_normal;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+alter table innodb_normal page_compressed=1 page_compression_level=8;
+show warnings;
+Level Code Message
+show create table innodb_normal;
+Table Create Table
+innodb_normal CREATE TABLE `innodb_normal` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0;
+show warnings;
+Level Code Message
+show create table innodb_compressed;
+Table Create Table
+innodb_compressed CREATE TABLE `innodb_compressed` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+set global innodb_compression_algorithm = 1;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+set global innodb_compression_algorithm = 0;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compressed;
+drop table innodb_page_compressed1;
+drop table innodb_page_compressed2;
+drop table innodb_page_compressed3;
+drop table innodb_page_compressed4;
+drop table innodb_page_compressed5;
+drop table innodb_page_compressed6;
+drop table innodb_page_compressed7;
+drop table innodb_page_compressed8;
+drop table innodb_page_compressed9;
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_lzo.result b/mysql-test/suite/innodb/r/innodb-page_compression_lzo.result
new file mode 100644
index 00000000000..fdbc99f60d9
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_lzo.result
@@ -0,0 +1,351 @@
+set global innodb_file_format = `barracuda`;
+set global innodb_file_per_table = on;
+set global innodb_compression_algorithm = 3;
+create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
+show warnings;
+Level Code Message
+create table innodb_normal (c1 int, b char(20)) engine=innodb;
+show warnings;
+Level Code Message
+create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed1;
+Table Create Table
+innodb_page_compressed1 CREATE TABLE `innodb_page_compressed1` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=1
+create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed2;
+Table Create Table
+innodb_page_compressed2 CREATE TABLE `innodb_page_compressed2` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=2
+create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed3;
+Table Create Table
+innodb_page_compressed3 CREATE TABLE `innodb_page_compressed3` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=3
+create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed4;
+Table Create Table
+innodb_page_compressed4 CREATE TABLE `innodb_page_compressed4` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=4
+create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed5;
+Table Create Table
+innodb_page_compressed5 CREATE TABLE `innodb_page_compressed5` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=5
+create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed6;
+Table Create Table
+innodb_page_compressed6 CREATE TABLE `innodb_page_compressed6` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=6
+create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed7;
+Table Create Table
+innodb_page_compressed7 CREATE TABLE `innodb_page_compressed7` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=7
+create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed8;
+Table Create Table
+innodb_page_compressed8 CREATE TABLE `innodb_page_compressed8` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed9;
+Table Create Table
+innodb_page_compressed9 CREATE TABLE `innodb_page_compressed9` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=9
+create procedure innodb_insert_proc (repeat_count int)
+begin
+declare current_num int;
+set current_num = 0;
+while current_num < repeat_count do
+insert into innodb_normal values(current_num,'testing..');
+set current_num = current_num + 1;
+end while;
+end//
+commit;
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+select count(*) from innodb_normal;
+count(*)
+5000
+insert into innodb_compressed select * from innodb_normal;
+insert into innodb_page_compressed1 select * from innodb_normal;
+insert into innodb_page_compressed2 select * from innodb_normal;
+insert into innodb_page_compressed3 select * from innodb_normal;
+insert into innodb_page_compressed4 select * from innodb_normal;
+insert into innodb_page_compressed5 select * from innodb_normal;
+insert into innodb_page_compressed6 select * from innodb_normal;
+insert into innodb_page_compressed7 select * from innodb_normal;
+insert into innodb_page_compressed8 select * from innodb_normal;
+insert into innodb_page_compressed9 select * from innodb_normal;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+alter table innodb_normal page_compressed=1 page_compression_level=8;
+show warnings;
+Level Code Message
+show create table innodb_normal;
+Table Create Table
+innodb_normal CREATE TABLE `innodb_normal` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0;
+show warnings;
+Level Code Message
+show create table innodb_compressed;
+Table Create Table
+innodb_compressed CREATE TABLE `innodb_compressed` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+set global innodb_compression_algorithm = 1;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compressed;
+drop table innodb_page_compressed1;
+drop table innodb_page_compressed2;
+drop table innodb_page_compressed3;
+drop table innodb_page_compressed4;
+drop table innodb_page_compressed5;
+drop table innodb_page_compressed6;
+drop table innodb_page_compressed7;
+drop table innodb_page_compressed8;
+drop table innodb_page_compressed9;
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_snappy.result b/mysql-test/suite/innodb/r/innodb-page_compression_snappy.result
new file mode 100644
index 00000000000..1709d8e9d2b
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_snappy.result
@@ -0,0 +1,438 @@
+call mtr.add_suppression("InnoDB: Warning: Compression failed for space*");
+set global innodb_file_format = `barracuda`;
+set global innodb_file_per_table = on;
+set global innodb_compression_algorithm = 6;
+create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
+show warnings;
+Level Code Message
+create table innodb_normal (c1 int, b char(20)) engine=innodb;
+show warnings;
+Level Code Message
+create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed1;
+Table Create Table
+innodb_page_compressed1 CREATE TABLE `innodb_page_compressed1` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=1
+create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed2;
+Table Create Table
+innodb_page_compressed2 CREATE TABLE `innodb_page_compressed2` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=2
+create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed3;
+Table Create Table
+innodb_page_compressed3 CREATE TABLE `innodb_page_compressed3` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=3
+create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed4;
+Table Create Table
+innodb_page_compressed4 CREATE TABLE `innodb_page_compressed4` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=4
+create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed5;
+Table Create Table
+innodb_page_compressed5 CREATE TABLE `innodb_page_compressed5` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=5
+create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed6;
+Table Create Table
+innodb_page_compressed6 CREATE TABLE `innodb_page_compressed6` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=6
+create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed7;
+Table Create Table
+innodb_page_compressed7 CREATE TABLE `innodb_page_compressed7` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=7
+create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed8;
+Table Create Table
+innodb_page_compressed8 CREATE TABLE `innodb_page_compressed8` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed9;
+Table Create Table
+innodb_page_compressed9 CREATE TABLE `innodb_page_compressed9` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=9
+create procedure innodb_insert_proc (repeat_count int)
+begin
+declare current_num int;
+set current_num = 0;
+while current_num < repeat_count do
+insert into innodb_normal values(current_num,'testing..');
+set current_num = current_num + 1;
+end while;
+end//
+commit;
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+select count(*) from innodb_normal;
+count(*)
+5000
+insert into innodb_compressed select * from innodb_normal;
+insert into innodb_page_compressed1 select * from innodb_normal;
+insert into innodb_page_compressed2 select * from innodb_normal;
+insert into innodb_page_compressed3 select * from innodb_normal;
+insert into innodb_page_compressed4 select * from innodb_normal;
+insert into innodb_page_compressed5 select * from innodb_normal;
+insert into innodb_page_compressed6 select * from innodb_normal;
+insert into innodb_page_compressed7 select * from innodb_normal;
+insert into innodb_page_compressed8 select * from innodb_normal;
+insert into innodb_page_compressed9 select * from innodb_normal;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+alter table innodb_normal page_compressed=1 page_compression_level=8;
+show warnings;
+Level Code Message
+show create table innodb_normal;
+Table Create Table
+innodb_normal CREATE TABLE `innodb_normal` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0;
+show warnings;
+Level Code Message
+show create table innodb_compressed;
+Table Create Table
+innodb_compressed CREATE TABLE `innodb_compressed` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+set global innodb_compression_algorithm = 1;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+set global innodb_compression_algorithm = 0;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compressed;
+drop table innodb_page_compressed1;
+drop table innodb_page_compressed2;
+drop table innodb_page_compressed3;
+drop table innodb_page_compressed4;
+drop table innodb_page_compressed5;
+drop table innodb_page_compressed6;
+drop table innodb_page_compressed7;
+drop table innodb_page_compressed8;
+drop table innodb_page_compressed9;
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_tables.result b/mysql-test/suite/innodb/r/innodb-page_compression_tables.result
new file mode 100644
index 00000000000..98de5db3c12
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_tables.result
@@ -0,0 +1,121 @@
+SET GLOBAL innodb_file_format = `Barracuda`;
+SET GLOBAL innodb_file_per_table = ON;
+set global innodb_compression_algorithm = 1;
+create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb;
+create table innodb_compact(c1 bigint not null, b char(200)) engine=innodb row_format=compact page_compressed=1;
+create table innodb_dynamic(c1 bigint not null, b char(200)) engine=innodb row_format=dynamic page_compressed=1;
+create table innodb_compressed(c1 bigint not null, b char(200)) engine=innodb row_format=compressed page_compressed=1;
+ERROR HY000: Can't create table `test`.`innodb_compressed` (errno: 140 "Wrong create options")
+show warnings;
+Level Code Message
+Warning 140 InnoDB: PAGE_COMPRESSED table can't have ROW_TYPE=COMPRESSED
+Error 1005 Can't create table `test`.`innodb_compressed` (errno: 140 "Wrong create options")
+Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB
+show create table innodb_compact;
+Table Create Table
+innodb_compact CREATE TABLE `innodb_compact` (
+ `c1` bigint(20) NOT NULL,
+ `b` char(200) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT `page_compressed`=1
+show create table innodb_dynamic;
+Table Create Table
+innodb_dynamic CREATE TABLE `innodb_dynamic` (
+ `c1` bigint(20) NOT NULL,
+ `b` char(200) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC `page_compressed`=1
+create table innodb_redundant(c1 bigint not null, b char(200)) engine=innodb row_format=redundant page_compressed=1;
+ERROR HY000: Can't create table `test`.`innodb_redundant` (errno: 140 "Wrong create options")
+show warnings;
+Level Code Message
+Warning 140 InnoDB: PAGE_COMPRESSED table can't have ROW_TYPE=REDUNDANT
+Error 1005 Can't create table `test`.`innodb_redundant` (errno: 140 "Wrong create options")
+Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB
+create table innodb_redundant(c1 bigint not null, b char(200)) engine=innodb row_format=redundant;
+show create table innodb_redundant;
+Table Create Table
+innodb_redundant CREATE TABLE `innodb_redundant` (
+ `c1` bigint(20) NOT NULL,
+ `b` char(200) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT
+alter table innodb_redundant page_compressed=1;
+ERROR HY000: Can't create table `test`.`#sql-temporary` (errno: 140 "Wrong create options")
+show warnings;
+Level Code Message
+Warning 140 InnoDB: PAGE_COMPRESSED table can't have ROW_TYPE=REDUNDANT
+Error 1005 Can't create table `test`.`#sql-temporary` (errno: 140 "Wrong create options")
+Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB
+show create table innodb_redundant;
+Table Create Table
+innodb_redundant CREATE TABLE `innodb_redundant` (
+ `c1` bigint(20) NOT NULL,
+ `b` char(200) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT
+alter table innodb_redundant row_format=compact page_compressed=1;
+show create table innodb_redundant;
+Table Create Table
+innodb_redundant CREATE TABLE `innodb_redundant` (
+ `c1` bigint(20) NOT NULL,
+ `b` char(200) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT `page_compressed`=1
+drop table innodb_redundant;
+create procedure innodb_insert_proc (repeat_count int)
+begin
+declare current_num int;
+set current_num = 0;
+while current_num < repeat_count do
+insert into innodb_normal values(current_num, substring(MD5(RAND()), -64));
+set current_num = current_num + 1;
+end while;
+end//
+commit;
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+insert into innodb_compact select * from innodb_normal;
+insert into innodb_dynamic select * from innodb_normal;
+update innodb_compact set c1 = c1 + 1;
+update innodb_dynamic set c1 = c1 + 1;
+select count(*) from innodb_compact where c1 < 1500000;
+count(*)
+5000
+select count(*) from innodb_dynamic where c1 < 1500000;
+count(*)
+5000
+update innodb_compact set c1 = c1 + 1;
+update innodb_dynamic set c1 = c1 + 1;
+select count(*) from innodb_compact where c1 < 1500000;
+count(*)
+5000
+select count(*) from innodb_dynamic where c1 < 1500000;
+count(*)
+5000
+SET GLOBAL innodb_file_format = `Barracuda`;
+SET GLOBAL innodb_file_per_table = ON;
+set global innodb_compression_algorithm = 0;
+alter table innodb_compact engine=innodb page_compressed=DEFAULT;
+alter table innodb_dynamic engine=innodb page_compressed=DEFAULT;
+show create table innodb_compact;
+Table Create Table
+innodb_compact CREATE TABLE `innodb_compact` (
+ `c1` bigint(20) NOT NULL,
+ `b` char(200) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT
+show create table innodb_dynamic;
+Table Create Table
+innodb_dynamic CREATE TABLE `innodb_dynamic` (
+ `c1` bigint(20) NOT NULL,
+ `b` char(200) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+update innodb_compact set c1 = c1 + 1;
+update innodb_dynamic set c1 = c1 + 1;
+select count(*) from innodb_compact where c1 < 1500000;
+count(*)
+5000
+select count(*) from innodb_dynamic where c1 < 1500000;
+count(*)
+5000
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compact;
+drop table innodb_dynamic;
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_zip.result b/mysql-test/suite/innodb/r/innodb-page_compression_zip.result
new file mode 100644
index 00000000000..4c3ab273b2e
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_zip.result
@@ -0,0 +1,351 @@
+SET GLOBAL innodb_file_format = `Barracuda`;
+SET GLOBAL innodb_file_per_table = ON;
+set global innodb_compression_algorithm = 1;
+create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
+show warnings;
+Level Code Message
+create table innodb_normal (c1 int, b char(20)) engine=innodb;
+show warnings;
+Level Code Message
+create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed1;
+Table Create Table
+innodb_page_compressed1 CREATE TABLE `innodb_page_compressed1` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=1
+create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed2;
+Table Create Table
+innodb_page_compressed2 CREATE TABLE `innodb_page_compressed2` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=2
+create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed3;
+Table Create Table
+innodb_page_compressed3 CREATE TABLE `innodb_page_compressed3` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=3
+create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed4;
+Table Create Table
+innodb_page_compressed4 CREATE TABLE `innodb_page_compressed4` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=4
+create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed5;
+Table Create Table
+innodb_page_compressed5 CREATE TABLE `innodb_page_compressed5` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=5
+create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed6;
+Table Create Table
+innodb_page_compressed6 CREATE TABLE `innodb_page_compressed6` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=6
+create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed7;
+Table Create Table
+innodb_page_compressed7 CREATE TABLE `innodb_page_compressed7` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=7
+create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed8;
+Table Create Table
+innodb_page_compressed8 CREATE TABLE `innodb_page_compressed8` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+Level Code Message
+show create table innodb_page_compressed9;
+Table Create Table
+innodb_page_compressed9 CREATE TABLE `innodb_page_compressed9` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=9
+create procedure innodb_insert_proc (repeat_count int)
+begin
+declare current_num int;
+set current_num = 0;
+while current_num < repeat_count do
+insert into innodb_normal values(current_num,'testing..');
+set current_num = current_num + 1;
+end while;
+end//
+commit;
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+select count(*) from innodb_normal;
+count(*)
+5000
+insert into innodb_compressed select * from innodb_normal;
+insert into innodb_page_compressed1 select * from innodb_normal;
+insert into innodb_page_compressed2 select * from innodb_normal;
+insert into innodb_page_compressed3 select * from innodb_normal;
+insert into innodb_page_compressed4 select * from innodb_normal;
+insert into innodb_page_compressed5 select * from innodb_normal;
+insert into innodb_page_compressed6 select * from innodb_normal;
+insert into innodb_page_compressed7 select * from innodb_normal;
+insert into innodb_page_compressed8 select * from innodb_normal;
+insert into innodb_page_compressed9 select * from innodb_normal;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+alter table innodb_normal page_compressed=1 page_compression_level=8;
+show warnings;
+Level Code Message
+show create table innodb_normal;
+Table Create Table
+innodb_normal CREATE TABLE `innodb_normal` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0;
+show warnings;
+Level Code Message
+show create table innodb_compressed;
+Table Create Table
+innodb_compressed CREATE TABLE `innodb_compressed` (
+ `c1` int(11) DEFAULT NULL,
+ `b` char(20) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+set global innodb_compression_algorithm = 0;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+count(*)
+5000
+select count(*) from innodb_page_compressed1;
+count(*)
+5000
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+count(*)
+5000
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+count(*)
+5000
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compressed;
+drop table innodb_page_compressed1;
+drop table innodb_page_compressed2;
+drop table innodb_page_compressed3;
+drop table innodb_page_compressed4;
+drop table innodb_page_compressed5;
+drop table innodb_page_compressed6;
+drop table innodb_page_compressed7;
+drop table innodb_page_compressed8;
+drop table innodb_page_compressed9;
diff --git a/mysql-test/suite/innodb/r/innodb_monitor.result b/mysql-test/suite/innodb/r/innodb_monitor.result
index f8d24f4e6f5..03c78f2e040 100644
--- a/mysql-test/suite/innodb/r/innodb_monitor.result
+++ b/mysql-test/suite/innodb/r/innodb_monitor.result
@@ -37,6 +37,8 @@ buffer_pool_bytes_dirty disabled
buffer_pool_pages_free disabled
buffer_pages_created disabled
buffer_pages_written disabled
+buffer_index_pages_written disabled
+buffer_non_index_pages_written disabled
buffer_pages_read disabled
buffer_data_reads disabled
buffer_data_written disabled
@@ -160,6 +162,19 @@ compress_pages_compressed disabled
compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
+compress_saved disabled
+compress_trim_sect512 disabled
+compress_trim_sect1024 disabled
+compress_trim_sect2048 disabled
+compress_trim_sect4096 disabled
+compress_trim_sect8192 disabled
+compress_trim_sect16384 disabled
+compress_trim_sect32768 disabled
+compress_pages_page_compressed disabled
+compress_page_compressed_trim_op disabled
+compress_page_compressed_trim_op_saved disabled
+compress_pages_page_decompressed disabled
+compress_pages_page_compression_error disabled
index_page_splits disabled
index_page_merge_attempts disabled
index_page_merge_successful disabled
diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_bzip2.test b/mysql-test/suite/innodb/t/innodb-page_compression_bzip2.test
new file mode 100644
index 00000000000..2e8831151c7
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-page_compression_bzip2.test
@@ -0,0 +1,251 @@
+-- source include/have_innodb.inc
+-- source include/have_innodb_bzip2.inc
+
+--disable_query_log
+let $innodb_compression_algorithm_orig=`select @@innodb_compression_algorithm`;
+let $innodb_file_format_orig = `select @@innodb_file_format`;
+let $innodb_file_per_table_orig = `select @@innodb_file_per_table`;
+--enable_query_log
+
+set global innodb_file_format = `barracuda`;
+set global innodb_file_per_table = on;
+
+# bzip2
+set global innodb_compression_algorithm = 5;
+
+create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
+show warnings;
+create table innodb_normal (c1 int, b char(20)) engine=innodb;
+show warnings;
+create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1;
+show warnings;
+show create table innodb_page_compressed1;
+create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2;
+show warnings;
+show create table innodb_page_compressed2;
+create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3;
+show warnings;
+show create table innodb_page_compressed3;
+create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4;
+show warnings;
+show create table innodb_page_compressed4;
+create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5;
+show warnings;
+show create table innodb_page_compressed5;
+create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6;
+show warnings;
+show create table innodb_page_compressed6;
+create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7;
+show warnings;
+show create table innodb_page_compressed7;
+create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8;
+show warnings;
+show create table innodb_page_compressed8;
+create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+show create table innodb_page_compressed9;
+delimiter //;
+create procedure innodb_insert_proc (repeat_count int)
+begin
+ declare current_num int;
+ set current_num = 0;
+ while current_num < repeat_count do
+ insert into innodb_normal values(current_num,'testing..');
+ set current_num = current_num + 1;
+ end while;
+end//
+delimiter ;//
+commit;
+
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+select count(*) from innodb_normal;
+insert into innodb_compressed select * from innodb_normal;
+insert into innodb_page_compressed1 select * from innodb_normal;
+insert into innodb_page_compressed2 select * from innodb_normal;
+insert into innodb_page_compressed3 select * from innodb_normal;
+insert into innodb_page_compressed4 select * from innodb_normal;
+insert into innodb_page_compressed5 select * from innodb_normal;
+insert into innodb_page_compressed6 select * from innodb_normal;
+insert into innodb_page_compressed7 select * from innodb_normal;
+insert into innodb_page_compressed8 select * from innodb_normal;
+insert into innodb_page_compressed9 select * from innodb_normal;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+alter table innodb_normal page_compressed=1 page_compression_level=8;
+show warnings;
+show create table innodb_normal;
+alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0;
+show warnings;
+show create table innodb_compressed;
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+# zlib
+set global innodb_compression_algorithm = 1;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+# none
+set global innodb_compression_algorithm = 0;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compressed;
+drop table innodb_page_compressed1;
+drop table innodb_page_compressed2;
+drop table innodb_page_compressed3;
+drop table innodb_page_compressed4;
+drop table innodb_page_compressed5;
+drop table innodb_page_compressed6;
+drop table innodb_page_compressed7;
+drop table innodb_page_compressed8;
+drop table innodb_page_compressed9;
+
+# reset system
+--disable_query_log
+EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig;
+EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig;
+EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig;
+--enable_query_log
diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_lz4.test b/mysql-test/suite/innodb/t/innodb-page_compression_lz4.test
new file mode 100644
index 00000000000..731cbdeab66
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-page_compression_lz4.test
@@ -0,0 +1,252 @@
+-- source include/have_innodb.inc
+-- source include/have_innodb_lz4.inc
+
+--disable_query_log
+let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`;
+let $innodb_file_format_orig = `SELECT @@innodb_file_format`;
+let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`;
+--enable_query_log
+
+set global innodb_file_format = `barracuda`;
+set global innodb_file_per_table = on;
+
+# lz4
+set global innodb_compression_algorithm = 2;
+
+create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
+show warnings;
+create table innodb_normal (c1 int, b char(20)) engine=innodb;
+show warnings;
+create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1;
+show warnings;
+show create table innodb_page_compressed1;
+create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2;
+show warnings;
+show create table innodb_page_compressed2;
+create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3;
+show warnings;
+show create table innodb_page_compressed3;
+create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4;
+show warnings;
+show create table innodb_page_compressed4;
+create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5;
+show warnings;
+show create table innodb_page_compressed5;
+create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6;
+show warnings;
+show create table innodb_page_compressed6;
+create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7;
+show warnings;
+show create table innodb_page_compressed7;
+create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8;
+show warnings;
+show create table innodb_page_compressed8;
+create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+show create table innodb_page_compressed9;
+delimiter //;
+create procedure innodb_insert_proc (repeat_count int)
+begin
+ declare current_num int;
+ set current_num = 0;
+ while current_num < repeat_count do
+ insert into innodb_normal values(current_num,'testing..');
+ set current_num = current_num + 1;
+ end while;
+end//
+delimiter ;//
+commit;
+
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+select count(*) from innodb_normal;
+insert into innodb_compressed select * from innodb_normal;
+insert into innodb_page_compressed1 select * from innodb_normal;
+insert into innodb_page_compressed2 select * from innodb_normal;
+insert into innodb_page_compressed3 select * from innodb_normal;
+insert into innodb_page_compressed4 select * from innodb_normal;
+insert into innodb_page_compressed5 select * from innodb_normal;
+insert into innodb_page_compressed6 select * from innodb_normal;
+insert into innodb_page_compressed7 select * from innodb_normal;
+insert into innodb_page_compressed8 select * from innodb_normal;
+insert into innodb_page_compressed9 select * from innodb_normal;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+alter table innodb_normal page_compressed=1 page_compression_level=8;
+show warnings;
+show create table innodb_normal;
+alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0;
+show warnings;
+show create table innodb_compressed;
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+# zlib
+set global innodb_compression_algorithm = 1;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+# none
+set global innodb_compression_algorithm = 0;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compressed;
+drop table innodb_page_compressed1;
+drop table innodb_page_compressed2;
+drop table innodb_page_compressed3;
+drop table innodb_page_compressed4;
+drop table innodb_page_compressed5;
+drop table innodb_page_compressed6;
+drop table innodb_page_compressed7;
+drop table innodb_page_compressed8;
+drop table innodb_page_compressed9;
+
+# reset system
+--disable_query_log
+EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig;
+EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig;
+EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig;
+--enable_query_log
diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_lzma.test b/mysql-test/suite/innodb/t/innodb-page_compression_lzma.test
new file mode 100644
index 00000000000..071e86b0f9b
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-page_compression_lzma.test
@@ -0,0 +1,251 @@
+-- source include/have_innodb.inc
+-- source include/have_innodb_lzma.inc
+
+--disable_query_log
+let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`;
+let $innodb_file_format_orig = `SELECT @@innodb_file_format`;
+let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`;
+--enable_query_log
+
+set global innodb_file_format = `barracuda`;
+set global innodb_file_per_table = on;
+
+# lzma
+set global innodb_compression_algorithm = 4;
+
+create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
+show warnings;
+create table innodb_normal (c1 int, b char(20)) engine=innodb;
+show warnings;
+create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1;
+show warnings;
+show create table innodb_page_compressed1;
+create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2;
+show warnings;
+show create table innodb_page_compressed2;
+create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3;
+show warnings;
+show create table innodb_page_compressed3;
+create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4;
+show warnings;
+show create table innodb_page_compressed4;
+create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5;
+show warnings;
+show create table innodb_page_compressed5;
+create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6;
+show warnings;
+show create table innodb_page_compressed6;
+create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7;
+show warnings;
+show create table innodb_page_compressed7;
+create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8;
+show warnings;
+show create table innodb_page_compressed8;
+create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+show create table innodb_page_compressed9;
+delimiter //;
+create procedure innodb_insert_proc (repeat_count int)
+begin
+ declare current_num int;
+ set current_num = 0;
+ while current_num < repeat_count do
+ insert into innodb_normal values(current_num,'testing..');
+ set current_num = current_num + 1;
+ end while;
+end//
+delimiter ;//
+commit;
+
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+select count(*) from innodb_normal;
+insert into innodb_compressed select * from innodb_normal;
+insert into innodb_page_compressed1 select * from innodb_normal;
+insert into innodb_page_compressed2 select * from innodb_normal;
+insert into innodb_page_compressed3 select * from innodb_normal;
+insert into innodb_page_compressed4 select * from innodb_normal;
+insert into innodb_page_compressed5 select * from innodb_normal;
+insert into innodb_page_compressed6 select * from innodb_normal;
+insert into innodb_page_compressed7 select * from innodb_normal;
+insert into innodb_page_compressed8 select * from innodb_normal;
+insert into innodb_page_compressed9 select * from innodb_normal;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+alter table innodb_normal page_compressed=1 page_compression_level=8;
+show warnings;
+show create table innodb_normal;
+alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0;
+show warnings;
+show create table innodb_compressed;
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+# zlib
+set global innodb_compression_algorithm = 1;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+# none
+set global innodb_compression_algorithm = 0;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compressed;
+drop table innodb_page_compressed1;
+drop table innodb_page_compressed2;
+drop table innodb_page_compressed3;
+drop table innodb_page_compressed4;
+drop table innodb_page_compressed5;
+drop table innodb_page_compressed6;
+drop table innodb_page_compressed7;
+drop table innodb_page_compressed8;
+drop table innodb_page_compressed9;
+
+# reset system
+--disable_query_log
+EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig;
+EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig;
+EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig;
+--enable_query_log
diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_lzo.test b/mysql-test/suite/innodb/t/innodb-page_compression_lzo.test
new file mode 100644
index 00000000000..6a73f793f26
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-page_compression_lzo.test
@@ -0,0 +1,204 @@
+-- source include/have_innodb.inc
+-- source include/have_innodb_lzo.inc
+
+--disable_query_log
+let $innodb_compression_algorithm_orig=`select @@innodb_compression_algorithm`;
+let $innodb_file_format_orig = `select @@innodb_file_format`;
+let $innodb_file_per_table_orig = `select @@innodb_file_per_table`;
+--enable_query_log
+
+set global innodb_file_format = `barracuda`;
+set global innodb_file_per_table = on;
+
+# lzo
+set global innodb_compression_algorithm = 3;
+
+create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
+show warnings;
+create table innodb_normal (c1 int, b char(20)) engine=innodb;
+show warnings;
+create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1;
+show warnings;
+show create table innodb_page_compressed1;
+create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2;
+show warnings;
+show create table innodb_page_compressed2;
+create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3;
+show warnings;
+show create table innodb_page_compressed3;
+create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4;
+show warnings;
+show create table innodb_page_compressed4;
+create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5;
+show warnings;
+show create table innodb_page_compressed5;
+create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6;
+show warnings;
+show create table innodb_page_compressed6;
+create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7;
+show warnings;
+show create table innodb_page_compressed7;
+create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8;
+show warnings;
+show create table innodb_page_compressed8;
+create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+show create table innodb_page_compressed9;
+delimiter //;
+create procedure innodb_insert_proc (repeat_count int)
+begin
+ declare current_num int;
+ set current_num = 0;
+ while current_num < repeat_count do
+ insert into innodb_normal values(current_num,'testing..');
+ set current_num = current_num + 1;
+ end while;
+end//
+delimiter ;//
+commit;
+
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+select count(*) from innodb_normal;
+insert into innodb_compressed select * from innodb_normal;
+insert into innodb_page_compressed1 select * from innodb_normal;
+insert into innodb_page_compressed2 select * from innodb_normal;
+insert into innodb_page_compressed3 select * from innodb_normal;
+insert into innodb_page_compressed4 select * from innodb_normal;
+insert into innodb_page_compressed5 select * from innodb_normal;
+insert into innodb_page_compressed6 select * from innodb_normal;
+insert into innodb_page_compressed7 select * from innodb_normal;
+insert into innodb_page_compressed8 select * from innodb_normal;
+insert into innodb_page_compressed9 select * from innodb_normal;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+alter table innodb_normal page_compressed=1 page_compression_level=8;
+show warnings;
+show create table innodb_normal;
+alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0;
+show warnings;
+show create table innodb_compressed;
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+# zlib
+set global innodb_compression_algorithm = 1;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compressed;
+drop table innodb_page_compressed1;
+drop table innodb_page_compressed2;
+drop table innodb_page_compressed3;
+drop table innodb_page_compressed4;
+drop table innodb_page_compressed5;
+drop table innodb_page_compressed6;
+drop table innodb_page_compressed7;
+drop table innodb_page_compressed8;
+drop table innodb_page_compressed9;
+
+# reset system
+--disable_query_log
+eval set global innodb_compression_algorithm = $innodb_compression_algorithm_orig;
+eval set global innodb_file_per_table = $innodb_file_per_table_orig;
+eval set global innodb_file_format = $innodb_file_format_orig;
+--enable_query_log
diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_snappy.test b/mysql-test/suite/innodb/t/innodb-page_compression_snappy.test
new file mode 100644
index 00000000000..8c4980ff479
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-page_compression_snappy.test
@@ -0,0 +1,253 @@
+-- source include/have_innodb.inc
+-- source include/have_innodb_snappy.inc
+
+call mtr.add_suppression("InnoDB: Warning: Compression failed for space*");
+
+--disable_query_log
+let $innodb_compression_algorithm_orig=`select @@innodb_compression_algorithm`;
+let $innodb_file_format_orig = `select @@innodb_file_format`;
+let $innodb_file_per_table_orig = `select @@innodb_file_per_table`;
+--enable_query_log
+
+set global innodb_file_format = `barracuda`;
+set global innodb_file_per_table = on;
+
+# snappy
+set global innodb_compression_algorithm = 6;
+
+create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
+show warnings;
+create table innodb_normal (c1 int, b char(20)) engine=innodb;
+show warnings;
+create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1;
+show warnings;
+show create table innodb_page_compressed1;
+create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2;
+show warnings;
+show create table innodb_page_compressed2;
+create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3;
+show warnings;
+show create table innodb_page_compressed3;
+create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4;
+show warnings;
+show create table innodb_page_compressed4;
+create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5;
+show warnings;
+show create table innodb_page_compressed5;
+create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6;
+show warnings;
+show create table innodb_page_compressed6;
+create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7;
+show warnings;
+show create table innodb_page_compressed7;
+create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8;
+show warnings;
+show create table innodb_page_compressed8;
+create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+show create table innodb_page_compressed9;
+delimiter //;
+create procedure innodb_insert_proc (repeat_count int)
+begin
+ declare current_num int;
+ set current_num = 0;
+ while current_num < repeat_count do
+ insert into innodb_normal values(current_num,'testing..');
+ set current_num = current_num + 1;
+ end while;
+end//
+delimiter ;//
+commit;
+
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+select count(*) from innodb_normal;
+insert into innodb_compressed select * from innodb_normal;
+insert into innodb_page_compressed1 select * from innodb_normal;
+insert into innodb_page_compressed2 select * from innodb_normal;
+insert into innodb_page_compressed3 select * from innodb_normal;
+insert into innodb_page_compressed4 select * from innodb_normal;
+insert into innodb_page_compressed5 select * from innodb_normal;
+insert into innodb_page_compressed6 select * from innodb_normal;
+insert into innodb_page_compressed7 select * from innodb_normal;
+insert into innodb_page_compressed8 select * from innodb_normal;
+insert into innodb_page_compressed9 select * from innodb_normal;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+alter table innodb_normal page_compressed=1 page_compression_level=8;
+show warnings;
+show create table innodb_normal;
+alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0;
+show warnings;
+show create table innodb_compressed;
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+# zlib
+set global innodb_compression_algorithm = 1;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+# none
+set global innodb_compression_algorithm = 0;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compressed;
+drop table innodb_page_compressed1;
+drop table innodb_page_compressed2;
+drop table innodb_page_compressed3;
+drop table innodb_page_compressed4;
+drop table innodb_page_compressed5;
+drop table innodb_page_compressed6;
+drop table innodb_page_compressed7;
+drop table innodb_page_compressed8;
+drop table innodb_page_compressed9;
+
+# reset system
+--disable_query_log
+EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig;
+EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig;
+EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig;
+--enable_query_log
diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_tables.test b/mysql-test/suite/innodb/t/innodb-page_compression_tables.test
new file mode 100644
index 00000000000..f7810a44c48
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-page_compression_tables.test
@@ -0,0 +1,102 @@
+-- source include/have_innodb.inc
+
+--disable_query_log
+let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`;
+let $innodb_file_format_orig = `SELECT @@innodb_file_format`;
+let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`;
+--enable_query_log
+
+SET GLOBAL innodb_file_format = `Barracuda`;
+SET GLOBAL innodb_file_per_table = ON;
+# zlib
+set global innodb_compression_algorithm = 1;
+
+create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb;
+create table innodb_compact(c1 bigint not null, b char(200)) engine=innodb row_format=compact page_compressed=1;
+create table innodb_dynamic(c1 bigint not null, b char(200)) engine=innodb row_format=dynamic page_compressed=1;
+--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/
+--error 1005
+create table innodb_compressed(c1 bigint not null, b char(200)) engine=innodb row_format=compressed page_compressed=1;
+--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/
+show warnings;
+show create table innodb_compact;
+show create table innodb_dynamic;
+
+# MDEV-7133: InnoDB: Assertion failure in thread 140737091569408 in file dict0mem.cc line 74
+# InnoDB: Failing assertion: dict_tf_is_valid(flags)
+--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/
+--error 1005
+create table innodb_redundant(c1 bigint not null, b char(200)) engine=innodb row_format=redundant page_compressed=1;
+--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/
+show warnings;
+create table innodb_redundant(c1 bigint not null, b char(200)) engine=innodb row_format=redundant;
+show create table innodb_redundant;
+--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/
+--error 1005
+alter table innodb_redundant page_compressed=1;
+--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/
+show warnings;
+show create table innodb_redundant;
+alter table innodb_redundant row_format=compact page_compressed=1;
+show create table innodb_redundant;
+drop table innodb_redundant;
+
+delimiter //;
+create procedure innodb_insert_proc (repeat_count int)
+begin
+ declare current_num int;
+ set current_num = 0;
+ while current_num < repeat_count do
+ insert into innodb_normal values(current_num, substring(MD5(RAND()), -64));
+ set current_num = current_num + 1;
+ end while;
+end//
+delimiter ;//
+commit;
+
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+
+insert into innodb_compact select * from innodb_normal;
+insert into innodb_dynamic select * from innodb_normal;
+
+update innodb_compact set c1 = c1 + 1;
+update innodb_dynamic set c1 = c1 + 1;
+select count(*) from innodb_compact where c1 < 1500000;
+select count(*) from innodb_dynamic where c1 < 1500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_compact set c1 = c1 + 1;
+update innodb_dynamic set c1 = c1 + 1;
+select count(*) from innodb_compact where c1 < 1500000;
+select count(*) from innodb_dynamic where c1 < 1500000;
+
+SET GLOBAL innodb_file_format = `Barracuda`;
+SET GLOBAL innodb_file_per_table = ON;
+# none
+set global innodb_compression_algorithm = 0;
+
+alter table innodb_compact engine=innodb page_compressed=DEFAULT;
+alter table innodb_dynamic engine=innodb page_compressed=DEFAULT;
+show create table innodb_compact;
+show create table innodb_dynamic;
+
+update innodb_compact set c1 = c1 + 1;
+update innodb_dynamic set c1 = c1 + 1;
+select count(*) from innodb_compact where c1 < 1500000;
+select count(*) from innodb_dynamic where c1 < 1500000;
+
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compact;
+drop table innodb_dynamic;
+
+# reset system
+--disable_query_log
+EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig;
+EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig;
+EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig;
+--enable_query_log
diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_zip.test b/mysql-test/suite/innodb/t/innodb-page_compression_zip.test
new file mode 100644
index 00000000000..8d06367e9b2
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-page_compression_zip.test
@@ -0,0 +1,202 @@
+-- source include/have_innodb.inc
+
+--disable_query_log
+let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`;
+let $innodb_file_format_orig = `SELECT @@innodb_file_format`;
+let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`;
+--enable_query_log
+
+SET GLOBAL innodb_file_format = `Barracuda`;
+SET GLOBAL innodb_file_per_table = ON;
+
+# zlib
+set global innodb_compression_algorithm = 1;
+
+create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
+show warnings;
+create table innodb_normal (c1 int, b char(20)) engine=innodb;
+show warnings;
+create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1;
+show warnings;
+show create table innodb_page_compressed1;
+create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2;
+show warnings;
+show create table innodb_page_compressed2;
+create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3;
+show warnings;
+show create table innodb_page_compressed3;
+create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4;
+show warnings;
+show create table innodb_page_compressed4;
+create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5;
+show warnings;
+show create table innodb_page_compressed5;
+create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6;
+show warnings;
+show create table innodb_page_compressed6;
+create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7;
+show warnings;
+show create table innodb_page_compressed7;
+create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8;
+show warnings;
+show create table innodb_page_compressed8;
+create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+show create table innodb_page_compressed9;
+delimiter //;
+create procedure innodb_insert_proc (repeat_count int)
+begin
+ declare current_num int;
+ set current_num = 0;
+ while current_num < repeat_count do
+ insert into innodb_normal values(current_num,'testing..');
+ set current_num = current_num + 1;
+ end while;
+end//
+delimiter ;//
+commit;
+
+set autocommit=0;
+call innodb_insert_proc(5000);
+commit;
+set autocommit=1;
+select count(*) from innodb_normal;
+insert into innodb_compressed select * from innodb_normal;
+insert into innodb_page_compressed1 select * from innodb_normal;
+insert into innodb_page_compressed2 select * from innodb_normal;
+insert into innodb_page_compressed3 select * from innodb_normal;
+insert into innodb_page_compressed4 select * from innodb_normal;
+insert into innodb_page_compressed5 select * from innodb_normal;
+insert into innodb_page_compressed6 select * from innodb_normal;
+insert into innodb_page_compressed7 select * from innodb_normal;
+insert into innodb_page_compressed8 select * from innodb_normal;
+insert into innodb_page_compressed9 select * from innodb_normal;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+alter table innodb_normal page_compressed=1 page_compression_level=8;
+show warnings;
+show create table innodb_normal;
+alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0;
+show warnings;
+show create table innodb_compressed;
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+# none
+set global innodb_compression_algorithm = 0;
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+commit;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+--source include/restart_mysqld.inc
+
+update innodb_page_compressed1 set c1 = c1 + 1;
+update innodb_page_compressed2 set c1 = c1 + 1;
+update innodb_page_compressed3 set c1 = c1 + 1;
+update innodb_page_compressed4 set c1 = c1 + 1;
+update innodb_page_compressed5 set c1 = c1 + 1;
+update innodb_page_compressed6 set c1 = c1 + 1;
+update innodb_page_compressed7 set c1 = c1 + 1;
+update innodb_page_compressed8 set c1 = c1 + 1;
+update innodb_page_compressed9 set c1 = c1 + 1;
+select count(*) from innodb_compressed;
+select count(*) from innodb_page_compressed1;
+select count(*) from innodb_page_compressed1 where c1 < 500000;
+select count(*) from innodb_page_compressed2 where c1 < 500000;
+select count(*) from innodb_page_compressed3 where c1 < 500000;
+select count(*) from innodb_page_compressed4 where c1 < 500000;
+select count(*) from innodb_page_compressed5 where c1 < 500000;
+select count(*) from innodb_page_compressed6 where c1 < 500000;
+select count(*) from innodb_page_compressed7 where c1 < 500000;
+select count(*) from innodb_page_compressed8 where c1 < 500000;
+select count(*) from innodb_page_compressed9 where c1 < 500000;
+
+drop procedure innodb_insert_proc;
+drop table innodb_normal;
+drop table innodb_compressed;
+drop table innodb_page_compressed1;
+drop table innodb_page_compressed2;
+drop table innodb_page_compressed3;
+drop table innodb_page_compressed4;
+drop table innodb_page_compressed5;
+drop table innodb_page_compressed6;
+drop table innodb_page_compressed7;
+drop table innodb_page_compressed8;
+drop table innodb_page_compressed9;
+
+# reset system
+--disable_query_log
+EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig;
+EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig;
+EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig;
+--enable_query_log
diff --git a/mysql-test/suite/sys_vars/r/innodb_compression_algorithm_basic.result b/mysql-test/suite/sys_vars/r/innodb_compression_algorithm_basic.result
new file mode 100644
index 00000000000..1213ec8bf10
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_compression_algorithm_basic.result
@@ -0,0 +1,47 @@
+SET @start_global_value = @@global.innodb_compression_algorithm;
+SELECT @start_global_value;
+@start_global_value
+zlib
+select @@global.innodb_compression_algorithm;
+@@global.innodb_compression_algorithm
+zlib
+select @@session.innodb_compression_algorithm;
+ERROR HY000: Variable 'innodb_compression_algorithm' is a GLOBAL variable
+show global variables like 'innodb_compression_algorithm';
+Variable_name Value
+innodb_compression_algorithm zlib
+show session variables like 'innodb_compression_algorithm';
+Variable_name Value
+innodb_compression_algorithm zlib
+select * from information_schema.global_variables where variable_name='innodb_compression_algorithm';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_COMPRESSION_ALGORITHM zlib
+select * from information_schema.session_variables where variable_name='innodb_compression_algorithm';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_COMPRESSION_ALGORITHM zlib
+set global innodb_compression_algorithm=1;
+select @@global.innodb_compression_algorithm;
+@@global.innodb_compression_algorithm
+zlib
+select * from information_schema.global_variables where variable_name='innodb_compression_algorithm';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_COMPRESSION_ALGORITHM zlib
+select * from information_schema.session_variables where variable_name='innodb_compression_algorithm';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_COMPRESSION_ALGORITHM zlib
+set session innodb_compression_algorithm=0;
+ERROR HY000: Variable 'innodb_compression_algorithm' is a GLOBAL variable and should be set with SET GLOBAL
+set global innodb_compression_algorithm=1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_compression_algorithm'
+set global innodb_compression_algorithm=1e1;
+ERROR 42000: Incorrect argument type to variable 'innodb_compression_algorithm'
+set global innodb_compression_algorithm="foo";
+ERROR 42000: Variable 'innodb_compression_algorithm' can't be set to the value of 'foo'
+set global innodb_compression_algorithm=0;
+select @@global.innodb_compression_algorithm;
+@@global.innodb_compression_algorithm
+none
+SET @@global.innodb_compression_algorithm = @start_global_value;
+SELECT @@global.innodb_compression_algorithm;
+@@global.innodb_compression_algorithm
+zlib
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
index 8c0af874228..4e9c6839c95 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
@@ -37,6 +37,8 @@ buffer_pool_bytes_dirty disabled
buffer_pool_pages_free disabled
buffer_pages_created disabled
buffer_pages_written disabled
+buffer_index_pages_written disabled
+buffer_non_index_pages_written disabled
buffer_pages_read disabled
buffer_data_reads disabled
buffer_data_written disabled
@@ -160,6 +162,19 @@ compress_pages_compressed disabled
compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
+compress_saved disabled
+compress_trim_sect512 disabled
+compress_trim_sect1024 disabled
+compress_trim_sect2048 disabled
+compress_trim_sect4096 disabled
+compress_trim_sect8192 disabled
+compress_trim_sect16384 disabled
+compress_trim_sect32768 disabled
+compress_pages_page_compressed disabled
+compress_page_compressed_trim_op disabled
+compress_page_compressed_trim_op_saved disabled
+compress_pages_page_decompressed disabled
+compress_pages_page_compression_error disabled
index_page_splits disabled
index_page_merge_attempts disabled
index_page_merge_successful disabled
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result
index 8c0af874228..4e9c6839c95 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result
@@ -37,6 +37,8 @@ buffer_pool_bytes_dirty disabled
buffer_pool_pages_free disabled
buffer_pages_created disabled
buffer_pages_written disabled
+buffer_index_pages_written disabled
+buffer_non_index_pages_written disabled
buffer_pages_read disabled
buffer_data_reads disabled
buffer_data_written disabled
@@ -160,6 +162,19 @@ compress_pages_compressed disabled
compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
+compress_saved disabled
+compress_trim_sect512 disabled
+compress_trim_sect1024 disabled
+compress_trim_sect2048 disabled
+compress_trim_sect4096 disabled
+compress_trim_sect8192 disabled
+compress_trim_sect16384 disabled
+compress_trim_sect32768 disabled
+compress_pages_page_compressed disabled
+compress_page_compressed_trim_op disabled
+compress_page_compressed_trim_op_saved disabled
+compress_pages_page_decompressed disabled
+compress_pages_page_compression_error disabled
index_page_splits disabled
index_page_merge_attempts disabled
index_page_merge_successful disabled
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result
index 8c0af874228..4e9c6839c95 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result
@@ -37,6 +37,8 @@ buffer_pool_bytes_dirty disabled
buffer_pool_pages_free disabled
buffer_pages_created disabled
buffer_pages_written disabled
+buffer_index_pages_written disabled
+buffer_non_index_pages_written disabled
buffer_pages_read disabled
buffer_data_reads disabled
buffer_data_written disabled
@@ -160,6 +162,19 @@ compress_pages_compressed disabled
compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
+compress_saved disabled
+compress_trim_sect512 disabled
+compress_trim_sect1024 disabled
+compress_trim_sect2048 disabled
+compress_trim_sect4096 disabled
+compress_trim_sect8192 disabled
+compress_trim_sect16384 disabled
+compress_trim_sect32768 disabled
+compress_pages_page_compressed disabled
+compress_page_compressed_trim_op disabled
+compress_page_compressed_trim_op_saved disabled
+compress_pages_page_decompressed disabled
+compress_pages_page_compression_error disabled
index_page_splits disabled
index_page_merge_attempts disabled
index_page_merge_successful disabled
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result
index 8c0af874228..4e9c6839c95 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result
@@ -37,6 +37,8 @@ buffer_pool_bytes_dirty disabled
buffer_pool_pages_free disabled
buffer_pages_created disabled
buffer_pages_written disabled
+buffer_index_pages_written disabled
+buffer_non_index_pages_written disabled
buffer_pages_read disabled
buffer_data_reads disabled
buffer_data_written disabled
@@ -160,6 +162,19 @@ compress_pages_compressed disabled
compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
+compress_saved disabled
+compress_trim_sect512 disabled
+compress_trim_sect1024 disabled
+compress_trim_sect2048 disabled
+compress_trim_sect4096 disabled
+compress_trim_sect8192 disabled
+compress_trim_sect16384 disabled
+compress_trim_sect32768 disabled
+compress_pages_page_compressed disabled
+compress_page_compressed_trim_op disabled
+compress_page_compressed_trim_op_saved disabled
+compress_pages_page_decompressed disabled
+compress_pages_page_compression_error disabled
index_page_splits disabled
index_page_merge_attempts disabled
index_page_merge_successful disabled
diff --git a/mysql-test/suite/sys_vars/r/innodb_mtflush_threads_basic.result b/mysql-test/suite/sys_vars/r/innodb_mtflush_threads_basic.result
new file mode 100644
index 00000000000..75a1cc5262e
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_mtflush_threads_basic.result
@@ -0,0 +1,21 @@
+select @@global.innodb_mtflush_threads;
+@@global.innodb_mtflush_threads
+8
+select @@session.innodb_mtflush_threads;
+ERROR HY000: Variable 'innodb_mtflush_threads' is a GLOBAL variable
+show global variables like 'innodb_mtflush_threads';
+Variable_name Value
+innodb_mtflush_threads 8
+show session variables like 'innodb_mtflush_threads';
+Variable_name Value
+innodb_mtflush_threads 8
+select * from information_schema.global_variables where variable_name='innodb_mtflush_threads';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_MTFLUSH_THREADS 8
+select * from information_schema.session_variables where variable_name='innodb_mtflush_threads';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_MTFLUSH_THREADS 8
+set global innodb_mtflush_threads=1;
+ERROR HY000: Variable 'innodb_mtflush_threads' is a read only variable
+set session innodb_mtflush_threads=1;
+ERROR HY000: Variable 'innodb_mtflush_threads' is a read only variable
diff --git a/mysql-test/suite/sys_vars/r/innodb_use_lz4_basic.result b/mysql-test/suite/sys_vars/r/innodb_use_lz4_basic.result
new file mode 100644
index 00000000000..4c3cfa524af
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_use_lz4_basic.result
@@ -0,0 +1,3 @@
+select @@global.innodb_use_fallocate;
+@@global.innodb_use_fallocate
+0
diff --git a/mysql-test/suite/sys_vars/r/innodb_use_mtflush_basic.result b/mysql-test/suite/sys_vars/r/innodb_use_mtflush_basic.result
new file mode 100644
index 00000000000..f77abba7ac9
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_use_mtflush_basic.result
@@ -0,0 +1,21 @@
+select @@global.innodb_use_mtflush;
+@@global.innodb_use_mtflush
+0
+select @@session.innodb_use_mtflush;
+ERROR HY000: Variable 'innodb_use_mtflush' is a GLOBAL variable
+show global variables like 'innodb_use_mtflush';
+Variable_name Value
+innodb_use_mtflush OFF
+show session variables like 'innodb_use_mtflush';
+Variable_name Value
+innodb_use_mtflush OFF
+select * from information_schema.global_variables where variable_name='innodb_use_mtflush';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_USE_MTFLUSH OFF
+select * from information_schema.session_variables where variable_name='innodb_use_mtflush';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_USE_MTFLUSH OFF
+set global innodb_use_mtflush=1;
+ERROR HY000: Variable 'innodb_use_mtflush' is a read only variable
+set session innodb_use_mtflush=1;
+ERROR HY000: Variable 'innodb_use_mtflush' is a read only variable
diff --git a/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result b/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result
new file mode 100644
index 00000000000..63292f5d3c8
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result
@@ -0,0 +1,33 @@
+SET @start_use_trim = @@global.innodb_use_trim;
+SELECT @start_use_trim;
+@start_use_trim
+0
+SELECT COUNT(@@GLOBAL.innodb_use_trim);
+COUNT(@@GLOBAL.innodb_use_trim)
+1
+1 Expected
+SET @@GLOBAL.innodb_use_trim=1;
+SELECT COUNT(@@GLOBAL.innodb_use_trim);
+COUNT(@@GLOBAL.innodb_use_trim)
+1
+1 Expected
+SELECT IF(@@GLOBAL.innodb_use_trim, 'ON', 'OFF') = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_use_trim';
+IF(@@GLOBAL.innodb_use_trim, 'ON', 'OFF') = VARIABLE_VALUE
+1
+1 Expected
+SELECT COUNT(@@GLOBAL.innodb_use_trim);
+COUNT(@@GLOBAL.innodb_use_trim)
+1
+1 Expected
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_use_trim';
+COUNT(VARIABLE_VALUE)
+1
+1 Expected
+SET @@global.innodb_use_trim = @start_use_trim;
+SELECT @@global.innodb_use_trim;
+@@global.innodb_use_trim
+0
diff --git a/mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic-master.opt b/mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic-master.opt
new file mode 100644
index 00000000000..77db41721ca
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic-master.opt
@@ -0,0 +1 @@
+--innodb-compression-algorithm=1 \ No newline at end of file
diff --git a/mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic.test b/mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic.test
new file mode 100644
index 00000000000..6f09ced7dd5
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic.test
@@ -0,0 +1,46 @@
+--source include/have_innodb.inc
+
+SET @start_global_value = @@global.innodb_compression_algorithm;
+SELECT @start_global_value;
+
+#
+# exists as global only
+#
+select @@global.innodb_compression_algorithm;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+select @@session.innodb_compression_algorithm;
+show global variables like 'innodb_compression_algorithm';
+show session variables like 'innodb_compression_algorithm';
+select * from information_schema.global_variables where variable_name='innodb_compression_algorithm';
+select * from information_schema.session_variables where variable_name='innodb_compression_algorithm';
+
+#
+# show that it's writable
+#
+set global innodb_compression_algorithm=1;
+select @@global.innodb_compression_algorithm;
+select * from information_schema.global_variables where variable_name='innodb_compression_algorithm';
+select * from information_schema.session_variables where variable_name='innodb_compression_algorithm';
+--error ER_GLOBAL_VARIABLE
+set session innodb_compression_algorithm=0;
+
+#
+# incorrect types
+#
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_compression_algorithm=1.1;
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_compression_algorithm=1e1;
+--error 1231
+set global innodb_compression_algorithm="foo";
+#
+# min/max values
+#
+set global innodb_compression_algorithm=0;
+select @@global.innodb_compression_algorithm;
+#
+# cleanup
+#
+
+SET @@global.innodb_compression_algorithm = @start_global_value;
+SELECT @@global.innodb_compression_algorithm;
diff --git a/mysql-test/suite/sys_vars/t/innodb_mtflush_threads_basic.test b/mysql-test/suite/sys_vars/t/innodb_mtflush_threads_basic.test
new file mode 100644
index 00000000000..c8412f969eb
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_mtflush_threads_basic.test
@@ -0,0 +1,21 @@
+--source include/have_innodb.inc
+# bool readonly
+
+#
+# show values;
+#
+select @@global.innodb_mtflush_threads;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+select @@session.innodb_mtflush_threads;
+show global variables like 'innodb_mtflush_threads';
+show session variables like 'innodb_mtflush_threads';
+select * from information_schema.global_variables where variable_name='innodb_mtflush_threads';
+select * from information_schema.session_variables where variable_name='innodb_mtflush_threads';
+
+#
+# show that it's read-only
+#
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+set global innodb_mtflush_threads=1;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+set session innodb_mtflush_threads=1;
diff --git a/mysql-test/suite/sys_vars/t/innodb_use_lz4_basic.test b/mysql-test/suite/sys_vars/t/innodb_use_lz4_basic.test
new file mode 100644
index 00000000000..aefa276dcee
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_use_lz4_basic.test
@@ -0,0 +1,5 @@
+--source include/have_innodb.inc
+# bool readonly
+# not on all compilations
+select @@global.innodb_use_fallocate;
+
diff --git a/mysql-test/suite/sys_vars/t/innodb_use_mtflush_basic.test b/mysql-test/suite/sys_vars/t/innodb_use_mtflush_basic.test
new file mode 100644
index 00000000000..a9c40b9e522
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_use_mtflush_basic.test
@@ -0,0 +1,22 @@
+--source include/have_innodb.inc
+# bool readonly
+
+#
+# show values;
+#
+select @@global.innodb_use_mtflush;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+select @@session.innodb_use_mtflush;
+show global variables like 'innodb_use_mtflush';
+show session variables like 'innodb_use_mtflush';
+select * from information_schema.global_variables where variable_name='innodb_use_mtflush';
+select * from information_schema.session_variables where variable_name='innodb_use_mtflush';
+
+#
+# show that it's read-only
+#
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+set global innodb_use_mtflush=1;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+set session innodb_use_mtflush=1;
+
diff --git a/mysql-test/suite/sys_vars/t/innodb_use_trim_basic.test b/mysql-test/suite/sys_vars/t/innodb_use_trim_basic.test
new file mode 100644
index 00000000000..c1b0f142179
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_use_trim_basic.test
@@ -0,0 +1,36 @@
+--source include/have_innodb.inc
+
+SET @start_use_trim = @@global.innodb_use_trim;
+SELECT @start_use_trim;
+
+SELECT COUNT(@@GLOBAL.innodb_use_trim);
+--echo 1 Expected
+
+####################################################################
+# Check if Value can set #
+####################################################################
+
+SET @@GLOBAL.innodb_use_trim=1;
+
+SELECT COUNT(@@GLOBAL.innodb_use_trim);
+--echo 1 Expected
+
+#################################################################
+# Check if the value in GLOBAL Table matches value in variable #
+#################################################################
+
+SELECT IF(@@GLOBAL.innodb_use_trim, 'ON', 'OFF') = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_use_trim';
+--echo 1 Expected
+
+SELECT COUNT(@@GLOBAL.innodb_use_trim);
+--echo 1 Expected
+
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_use_trim';
+--echo 1 Expected
+
+SET @@global.innodb_use_trim = @start_use_trim;
+SELECT @@global.innodb_use_trim; \ No newline at end of file
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index c24f1cda59e..eb94f6ba703 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -18,6 +18,17 @@
INCLUDE(CheckFunctionExists)
INCLUDE(CheckCSourceCompiles)
INCLUDE(CheckCSourceRuns)
+INCLUDE(lz4)
+INCLUDE(lzo)
+INCLUDE(lzma)
+INCLUDE(bzip2)
+INCLUDE(snappy)
+
+MYSQL_CHECK_LZ4()
+MYSQL_CHECK_LZO()
+MYSQL_CHECK_LZMA()
+MYSQL_CHECK_BZIP2()
+MYSQL_CHECK_SNAPPY()
# OS tests
IF(UNIX)
@@ -338,6 +349,7 @@ SET(INNOBASE_SOURCES
buf/buf0flu.cc
buf/buf0lru.cc
buf/buf0rea.cc
+ buf/buf0mtflu.cc
data/data0data.cc
data/data0type.cc
dict/dict0boot.cc
@@ -351,6 +363,7 @@ SET(INNOBASE_SOURCES
eval/eval0eval.cc
eval/eval0proc.cc
fil/fil0fil.cc
+ fil/fil0pagecompress.cc
fsp/fsp0fsp.cc
fut/fut0fut.cc
fut/fut0lst.cc
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 79b533481b7..01bcd18db1d 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -2031,7 +2031,7 @@ btr_parse_page_reorganize(
buf_block_t* block, /*!< in: page to be reorganized, or NULL */
mtr_t* mtr) /*!< in: mtr or NULL */
{
- ulint level;
+ ulint level = page_zip_level;
ut_ad(ptr && end_ptr);
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index c8dd4fae0a9..315a1d1b558 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -1873,9 +1873,13 @@ btr_cur_update_alloc_zip_func(
false=update-in-place */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
+
+ /* Have a local copy of the variables as these can change
+ dynamically. */
const page_t* page = page_cur_get_page(cursor);
ut_ad(page_zip == page_cur_get_page_zip(cursor));
+
ut_ad(page_zip);
ut_ad(!dict_index_is_ibuf(index));
ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets));
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index e5800ef30c0..46f7f5a49df 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -2,6 +2,7 @@
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -878,6 +879,11 @@ buf_page_print(
mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
mach_read_from_4(read_buf
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+
+ ulint page_type = mach_read_from_4(read_buf + FIL_PAGE_TYPE);
+
+ fprintf(stderr, "InnoDB: page type %ld meaning %s\n", page_type,
+ fil_get_page_type_name(page_type));
}
#ifndef UNIV_HOTBACKUP
@@ -3415,6 +3421,7 @@ buf_page_init_low(
bpage->access_time = 0;
bpage->newest_modification = 0;
bpage->oldest_modification = 0;
+ bpage->write_size = 0;
HASH_INVALIDATE(bpage, hash);
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
bpage->file_page_was_freed = FALSE;
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index 62222993622..ad76765145b 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -382,7 +383,7 @@ buf_dblwr_init_or_load_pages(
/* Read the trx sys header to check if we are using the doublewrite
buffer */
off_t trx_sys_page = TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE;
- os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE);
+ os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE, FALSE);
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
@@ -416,12 +417,11 @@ buf_dblwr_init_or_load_pages(
}
/* Read the pages from the doublewrite buffer to memory */
-
block_bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
- os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes);
+ os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes, FALSE);
os_file_read(file, buf + block_bytes, block2 * UNIV_PAGE_SIZE,
- block_bytes);
+ block_bytes, FALSE);
/* Check if any of these pages is half-written in data files, in the
intended position */
@@ -514,7 +514,7 @@ buf_dblwr_process()
fil_io(OS_FILE_READ, true, space_id, zip_size,
page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
- read_buf, NULL);
+ read_buf, NULL, 0);
/* Check if the page is corrupt */
@@ -566,7 +566,7 @@ buf_dblwr_process()
fil_io(OS_FILE_WRITE, true, space_id,
zip_size, page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
- page, NULL);
+ page, NULL, 0);
ib_logf(IB_LOG_LEVEL_INFO,
"Recovered the page from"
@@ -586,7 +586,7 @@ buf_dblwr_process()
zip_size, page_no, 0,
zip_size ? zip_size
: UNIV_PAGE_SIZE,
- page, NULL);
+ page, NULL, NULL);
}
}
}
@@ -798,7 +798,7 @@ buf_dblwr_write_block_to_datafile(
buf_page_get_page_no(bpage), 0,
buf_page_get_zip_size(bpage),
(void*) bpage->zip.data,
- (void*) bpage);
+ (void*) bpage, 0);
return;
}
@@ -810,8 +810,7 @@ buf_dblwr_write_block_to_datafile(
fil_io(flags, sync, buf_block_get_space(block), 0,
buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
- (void*) block->frame, (void*) block);
-
+ (void*) block->frame, (void*) block, (ulint *)&bpage->write_size);
}
/********************************************************************//**
@@ -905,7 +904,7 @@ try_again:
fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
buf_dblwr->block1, 0, len,
- (void*) write_buf, NULL);
+ (void*) write_buf, NULL, 0);
if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
/* No unwritten pages in the second block. */
@@ -921,7 +920,7 @@ try_again:
fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
buf_dblwr->block2, 0, len,
- (void*) write_buf, NULL);
+ (void*) write_buf, NULL, 0);
flush:
/* increment the doublewrite flushed pages counter */
@@ -1150,14 +1149,14 @@ retry:
fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
offset, 0, UNIV_PAGE_SIZE,
(void*) (buf_dblwr->write_buf
- + UNIV_PAGE_SIZE * i), NULL);
+ + UNIV_PAGE_SIZE * i), NULL, 0);
} else {
/* It is a regular page. Write it directly to the
doublewrite buffer */
fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
offset, 0, UNIV_PAGE_SIZE,
(void*) ((buf_block_t*) bpage)->frame,
- NULL);
+ NULL, 0);
}
/* Now flush the doublewrite buffer data to disk */
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index f5145297b3f..79f02b19933 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -1,6 +1,8 @@
/*****************************************************************************
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2014, Fusion-io. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,6 +32,7 @@ Created 11/11/1995 Heikki Tuuri
#endif
#include "buf0buf.h"
+#include "buf0mtflu.h"
#include "buf0checksum.h"
#include "srv0start.h"
#include "srv0srv.h"
@@ -44,10 +47,12 @@ Created 11/11/1995 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "log0log.h"
#include "os0file.h"
+#include "os0sync.h"
#include "trx0sys.h"
#include "srv0mon.h"
#include "mysql/plugin.h"
#include "mysql/service_thd_wait.h"
+#include "fil0pagecompress.h"
/** Number of pages flushed through non flush_list flushes. */
static ulint buf_lru_flush_page_count = 0;
@@ -75,15 +80,6 @@ in thrashing. */
/* @} */
-/** Handled page counters for a single flush */
-struct flush_counters_t {
- ulint flushed; /*!< number of dirty pages flushed */
- ulint evicted; /*!< number of clean pages evicted, including
- evicted uncompressed page images */
- ulint unzip_LRU_evicted;/*!< number of uncompressed page images
- evicted */
-};
-
/******************************************************************//**
Increases flush_list size in bytes with zip_size for compressed page,
UNIV_PAGE_SIZE for uncompressed page in inline function */
@@ -732,8 +728,10 @@ buf_flush_write_complete(
flush_type = buf_page_get_flush_type(bpage);
buf_pool->n_flush[flush_type]--;
+#ifdef UNIV_DEBUG
/* fprintf(stderr, "n pending flush %lu\n",
buf_pool->n_flush[flush_type]); */
+#endif
if (buf_pool->n_flush[flush_type] == 0
&& buf_pool->init_flush[flush_type] == FALSE) {
@@ -887,6 +885,8 @@ buf_flush_write_block_low(
{
ulint zip_size = buf_page_get_zip_size(bpage);
page_t* frame = NULL;
+ ulint space_id = buf_page_get_space(bpage);
+ atomic_writes_t awrites = fil_space_get_atomic_writes(space_id);
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
@@ -963,12 +963,28 @@ buf_flush_write_block_low(
sync, buf_page_get_space(bpage), zip_size,
buf_page_get_page_no(bpage), 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
- frame, bpage);
- } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
- buf_dblwr_write_single_page(bpage, sync);
+ frame, bpage, &bpage->write_size);
} else {
- ut_ad(!sync);
- buf_dblwr_add_to_batch(bpage);
+
+ /* InnoDB uses doublewrite buffer and doublewrite buffer
+ is initialized. User can define do we use atomic writes
+ on a file space (table) or not. If atomic writes are
+ not used we should use doublewrite buffer and if
+ atomic writes should be used, no doublewrite buffer
+ is used. */
+
+ if (awrites == ATOMIC_WRITES_ON) {
+ fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
+ FALSE, buf_page_get_space(bpage), zip_size,
+ buf_page_get_page_no(bpage), 0,
+ zip_size ? zip_size : UNIV_PAGE_SIZE,
+ frame, bpage, &bpage->write_size);
+ } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
+ buf_dblwr_write_single_page(bpage, sync);
+ } else {
+ ut_ad(!sync);
+ buf_dblwr_add_to_batch(bpage);
+ }
}
/* When doing single page flushing the IO is done synchronously
@@ -1228,7 +1244,9 @@ buf_flush_try_neighbors(
}
}
+#ifdef UNIV_DEBUG
/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
+#endif
if (high > fil_space_get_size(space)) {
high = fil_space_get_size(space);
@@ -1681,7 +1699,6 @@ pages: to avoid deadlocks, this function must be written so that it cannot
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
the calling thread is not allowed to own any latches on pages!
@return number of blocks for which the write request was queued */
-static
void
buf_flush_batch(
/*============*/
@@ -1738,7 +1755,6 @@ buf_flush_batch(
/******************************************************************//**
Gather the aggregated stats for both flush list and LRU list flushing */
-static
void
buf_flush_common(
/*=============*/
@@ -1763,7 +1779,6 @@ buf_flush_common(
/******************************************************************//**
Start a buffer flush batch for LRU or flush list */
-static
ibool
buf_flush_start(
/*============*/
@@ -1792,7 +1807,6 @@ buf_flush_start(
/******************************************************************//**
End a buffer flush batch for LRU or flush list */
-static
void
buf_flush_end(
/*==========*/
@@ -1908,6 +1922,10 @@ buf_flush_list(
ulint i;
bool success = true;
+ if (buf_mtflu_init_done()) {
+ return(buf_mtflu_flush_list(min_n, lsn_limit, n_processed));
+ }
+
if (n_processed) {
*n_processed = 0;
}
@@ -2078,6 +2096,11 @@ buf_flush_LRU_tail(void)
{
ulint total_flushed = 0;
+ if(buf_mtflu_init_done())
+ {
+ return(buf_mtflu_flush_LRU_tail());
+ }
+
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool = buf_pool_from_array(i);
@@ -2394,6 +2417,8 @@ page_cleaner_sleep_if_needed(
}
}
+
+
/******************************************************************//**
page_cleaner thread tasked with flushing dirty pages from the buffer
pools. As of now we'll have only one instance of this thread.
@@ -2420,7 +2445,6 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
fprintf(stderr, "InnoDB: page_cleaner thread running, id %lu\n",
os_thread_pf(os_thread_get_curr_id()));
#endif /* UNIV_DEBUG_THREAD_CREATION */
-
buf_page_cleaner_is_active = TRUE;
while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
@@ -2435,10 +2459,11 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
/* Flush pages from flush_list if required */
page_cleaner_flush_pages_if_needed();
n_flushed = 0;
+
} else {
n_flushed = page_cleaner_do_flush_batch(
- PCT_IO(100),
- LSN_MAX);
+ PCT_IO(100),
+ LSN_MAX);
if (n_flushed) {
MONITOR_INC_VALUE_CUMULATIVE(
@@ -2454,6 +2479,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
}
ut_ad(srv_shutdown_state > 0);
+
if (srv_fast_shutdown == 2) {
/* In very fast shutdown we simulate a crash of
buffer pool. We are not required to do any flushing */
@@ -2619,9 +2645,11 @@ buf_flush_validate(
return(ret);
}
+
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#endif /* !UNIV_HOTBACKUP */
+
#ifdef UNIV_DEBUG
/******************************************************************//**
Check if there are any dirty pages that belong to a space id in the flush
diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
new file mode 100644
index 00000000000..f5b3d81991a
--- /dev/null
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -0,0 +1,732 @@
+/*****************************************************************************
+
+Copyright (C) 2013, 2014, Fusion-io. All Rights Reserved.
+Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file buf/buf0mtflu.cc
+Multi-threaded flush method implementation
+
+Created 06/11/2013 Dhananjoy Das DDas@fusionio.com
+Modified 12/12/2013 Jan Lindström jan.lindstrom@skysql.com
+Modified 03/02/2014 Dhananjoy Das DDas@fusionio.com
+Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0mtflu.h"
+#include "buf0checksum.h"
+#include "srv0start.h"
+#include "srv0srv.h"
+#include "page0zip.h"
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "page0page.h"
+#include "fil0fil.h"
+#include "buf0lru.h"
+#include "buf0rea.h"
+#include "ibuf0ibuf.h"
+#include "log0log.h"
+#include "os0file.h"
+#include "os0sync.h"
+#include "trx0sys.h"
+#include "srv0mon.h"
+#include "mysql/plugin.h"
+#include "mysql/service_thd_wait.h"
+#include "fil0pagecompress.h"
+
+#define MT_COMP_WATER_MARK 50
+/** Time to wait for a message. */
+#define MT_WAIT_IN_USECS 5000000
+
+/* Work item status */
+typedef enum wrk_status {
+ WRK_ITEM_UNSET=0, /*!< Work item is not set */
+ WRK_ITEM_START=1, /*!< Processing of work item has started */
+ WRK_ITEM_DONE=2, /*!< Processing is done usually set to
+ SUCCESS/FAILED */
+ WRK_ITEM_SUCCESS=2, /*!< Work item successfully processed */
+ WRK_ITEM_FAILED=3, /*!< Work item process failed */
+ WRK_ITEM_EXIT=4, /*!< Exiting */
+ WRK_ITEM_SET=5, /*!< Work item is set */
+ WRK_ITEM_STATUS_UNDEFINED
+} wrk_status_t;
+
+/* Work item task type */
+typedef enum mt_wrk_tsk {
+ MT_WRK_NONE=0, /*!< Exit queue-wait */
+ MT_WRK_WRITE=1, /*!< Flush operation */
+ MT_WRK_READ=2, /*!< Read operation */
+ MT_WRK_UNDEFINED
+} mt_wrk_tsk_t;
+
+/* Work thread status */
+typedef enum wthr_status {
+ WTHR_NOT_INIT=0, /*!< Work thread not initialized */
+ WTHR_INITIALIZED=1, /*!< Work thread initialized */
+ WTHR_SIG_WAITING=2, /*!< Work thread wating signal */
+ WTHR_RUNNING=3, /*!< Work thread running */
+ WTHR_NO_WORK=4, /*!< Work thread has no work */
+ WTHR_KILL_IT=5, /*!< Work thread should exit */
+ WTHR_STATUS_UNDEFINED
+} wthr_status_t;
+
+/* Write work task */
+typedef struct wr_tsk {
+ buf_pool_t *buf_pool; /*!< buffer-pool instance */
+ buf_flush_t flush_type; /*!< flush-type for buffer-pool
+ flush operation */
+ ulint min; /*!< minimum number of pages
+ requested to be flushed */
+ lsn_t lsn_limit; /*!< lsn limit for the buffer-pool
+ flush operation */
+} wr_tsk_t;
+
+/* Read work task */
+typedef struct rd_tsk {
+ buf_pool_t *page_pool; /*!< list of pages to decompress; */
+} rd_tsk_t;
+
+/* Work item */
+typedef struct wrk_itm
+{
+ mt_wrk_tsk_t tsk; /*!< Task type. Based on task-type
+ one of the entries wr_tsk/rd_tsk
+ will be used */
+ wr_tsk_t wr; /*!< Flush page list */
+ rd_tsk_t rd; /*!< Decompress page list */
+ ulint n_flushed; /*!< Flushed pages count */
+ os_thread_id_t id_usr; /*!< Thread-id currently working */
+ wrk_status_t wi_status; /*!< Work item status */
+ mem_heap_t *wheap; /*!< Heap were to allocate memory
+ for queue nodes */
+ mem_heap_t *rheap;
+} wrk_t;
+
+typedef struct thread_data
+{
+ os_thread_id_t wthread_id; /*!< Identifier */
+ os_thread_t wthread; /*!< Thread id */
+ wthr_status_t wt_status; /*!< Worker thread status */
+} thread_data_t;
+
+/* Thread syncronization data */
+typedef struct thread_sync
+{
+ /* Global variables used by all threads */
+ os_fast_mutex_t thread_global_mtx; /*!< Mutex used protecting below
+ variables */
+ ulint n_threads; /*!< Number of threads */
+ ib_wqueue_t *wq; /*!< Work Queue */
+ ib_wqueue_t *wr_cq; /*!< Write Completion Queue */
+ ib_wqueue_t *rd_cq; /*!< Read Completion Queue */
+ mem_heap_t* wheap; /*!< Work heap where memory
+ is allocated */
+ mem_heap_t* rheap; /*!< Work heap where memory
+ is allocated */
+ wthr_status_t gwt_status; /*!< Global thread status */
+
+ /* Variables used by only one thread at a time */
+ thread_data_t* thread_data; /*!< Thread specific data */
+
+} thread_sync_t;
+
+static int mtflush_work_initialized = -1;
+static thread_sync_t* mtflush_ctx=NULL;
+static os_fast_mutex_t mtflush_mtx;
+
+/******************************************************************//**
+Set multi-threaded flush work initialized. */
+static inline
+void
+buf_mtflu_work_init(void)
+/*=====================*/
+{
+ mtflush_work_initialized = 1;
+}
+
+/******************************************************************//**
+Return true if multi-threaded flush is initialized
+@return true if initialized */
+bool
+buf_mtflu_init_done(void)
+/*=====================*/
+{
+ return(mtflush_work_initialized == 1);
+}
+
+/******************************************************************//**
+Fush buffer pool instance.
+@return number of flushed pages, or 0 if error happened
+*/
+static
+ulint
+buf_mtflu_flush_pool_instance(
+/*==========================*/
+ wrk_t *work_item) /*!< inout: work item to be flushed */
+{
+ flush_counters_t n;
+ ut_a(work_item != NULL);
+ ut_a(work_item->wr.buf_pool != NULL);
+
+ if (!buf_flush_start(work_item->wr.buf_pool, work_item->wr.flush_type)) {
+ /* We have two choices here. If lsn_limit was
+ specified then skipping an instance of buffer
+ pool means we cannot guarantee that all pages
+ up to lsn_limit has been flushed. We can
+ return right now with failure or we can try
+ to flush remaining buffer pools up to the
+ lsn_limit. We attempt to flush other buffer
+ pools based on the assumption that it will
+ help in the retry which will follow the
+ failure. */
+#ifdef UNIV_MTFLUSH_DEBUG
+ fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n");
+#endif
+ return 0;
+ }
+
+ memset(&n, 0, sizeof(flush_counters_t));
+
+ if (work_item->wr.flush_type == BUF_FLUSH_LRU) {
+ /* srv_LRU_scan_depth can be arbitrarily large value.
+ * We cap it with current LRU size.
+ */
+ buf_pool_mutex_enter(work_item->wr.buf_pool);
+ work_item->wr.min = UT_LIST_GET_LEN(work_item->wr.buf_pool->LRU);
+ buf_pool_mutex_exit(work_item->wr.buf_pool);
+ work_item->wr.min = ut_min(srv_LRU_scan_depth,work_item->wr.min);
+ }
+
+ buf_flush_batch(work_item->wr.buf_pool,
+ work_item->wr.flush_type,
+ work_item->wr.min,
+ work_item->wr.lsn_limit,
+ &n);
+
+ work_item->n_flushed = n.flushed;
+ buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type);
+ buf_flush_common(work_item->wr.flush_type, work_item->n_flushed);
+
+ return work_item->n_flushed;
+}
+
+/******************************************************************//**
+Worker function to wait for work items and processing them and
+sending reply back.
+*/
+static
+void
+mtflush_service_io(
+/*===============*/
+ thread_sync_t* mtflush_io, /*!< inout: multi-threaded flush
+ syncronization data */
+ thread_data_t* thread_data) /* Thread status data */
+{
+ wrk_t *work_item = NULL;
+ ulint n_flushed=0;
+
+ ut_a(mtflush_io != NULL);
+ ut_a(thread_data != NULL);
+
+ thread_data->wt_status = WTHR_SIG_WAITING;
+
+ work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq);
+
+ if (work_item == NULL) {
+ work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq);
+ }
+
+ if (work_item) {
+ thread_data->wt_status = WTHR_RUNNING;
+ } else {
+ /* Thread did not get any work */
+ thread_data->wt_status = WTHR_NO_WORK;
+ return;
+ }
+
+ if (work_item->wi_status != WRK_ITEM_EXIT) {
+ work_item->wi_status = WRK_ITEM_SET;
+ }
+
+#ifdef UNIV_MTFLUSH_DEBUG
+ ut_a(work_item->id_usr == 0);
+#endif
+ work_item->id_usr = os_thread_get_curr_id();
+
+ /* This works as a producer/consumer model, where in tasks are
+ * inserted into the work-queue (wq) and completions are based
+ * on the type of operations performed and as a result the WRITE/
+ * compression/flush operation completions get posted to wr_cq.
+ * And READ/decompress operations completions get posted to rd_cq.
+ * in future we may have others.
+ */
+
+ switch(work_item->tsk) {
+ case MT_WRK_NONE:
+ ut_a(work_item->wi_status == WRK_ITEM_EXIT);
+ work_item->wi_status = WRK_ITEM_EXIT;
+ ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap);
+ thread_data->wt_status = WTHR_KILL_IT;
+ break;
+
+ case MT_WRK_WRITE:
+ ut_a(work_item->wi_status == WRK_ITEM_SET);
+ work_item->wi_status = WRK_ITEM_START;
+ /* Process work item */
+ if (0 == (n_flushed = buf_mtflu_flush_pool_instance(work_item))) {
+ work_item->wi_status = WRK_ITEM_FAILED;
+ }
+ work_item->wi_status = WRK_ITEM_SUCCESS;
+ ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap);
+ break;
+
+ case MT_WRK_READ:
+ ut_a(0);
+ break;
+
+ default:
+ /* None other than Write/Read handling planned */
+ ut_a(0);
+ break;
+ }
+}
+
+/******************************************************************//**
+Thead used to flush dirty pages when multi-threaded flush is
+used.
+@return a dummy parameter*/
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(mtflush_io_thread)(
+/*==============================*/
+ void * arg)
+{
+ thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
+ thread_data_t *this_thread_data = NULL;
+ ulint i;
+
+ /* Find correct slot for this thread */
+ os_fast_mutex_lock(&(mtflush_io->thread_global_mtx));
+ for(i=0; i < mtflush_io->n_threads; i ++) {
+ if (mtflush_io->thread_data[i].wthread_id == os_thread_get_curr_id()) {
+ break;
+ }
+ }
+
+ ut_a(i <= mtflush_io->n_threads);
+ this_thread_data = &mtflush_io->thread_data[i];
+ os_fast_mutex_unlock(&(mtflush_io->thread_global_mtx));
+
+ while (TRUE) {
+
+#ifdef UNIV_MTFLUSH_DEBUG
+ fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n",
+ os_thread_get_curr_id(),
+ ib_wqueue_len(mtflush_io->wq),
+ ib_wqueue_len(mtflush_io->wr_cq));
+#endif /* UNIV_MTFLUSH_DEBUG */
+
+ mtflush_service_io(mtflush_io, this_thread_data);
+
+
+ if (this_thread_data->wt_status == WTHR_KILL_IT) {
+ break;
+ }
+ }
+
+ os_thread_exit(NULL);
+ OS_THREAD_DUMMY_RETURN;
+}
+
+/******************************************************************//**
+Add exit work item to work queue to signal multi-threded flush
+threads that they should exit.
+*/
+void
+buf_mtflu_io_thread_exit(void)
+/*==========================*/
+{
+ ulint i;
+ thread_sync_t* mtflush_io = mtflush_ctx;
+ wrk_t* work_item = NULL;
+
+ ut_a(mtflush_io != NULL);
+
+ /* Allocate work items for shutdown message */
+ work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads);
+
+ /* Confirm if the io-thread KILL is in progress, bailout */
+ if (mtflush_io->gwt_status == WTHR_KILL_IT) {
+ return;
+ }
+
+ mtflush_io->gwt_status = WTHR_KILL_IT;
+
+ /* This lock is to safequard against timing bug: flush request take
+ this mutex before sending work items to be processed by flush
+ threads. Inside flush thread we assume that work queue contains only
+ a constant number of items. Thus, we may not install new work items
+ below before all previous ones are processed. This mutex is released
+ by flush request after all work items sent to flush threads have
+ been processed. Thus, we can get this mutex if and only if work
+ queue is empty. */
+
+ os_fast_mutex_lock(&mtflush_mtx);
+
+ /* Make sure the work queue is empty */
+ ut_a(ib_wqueue_is_empty(mtflush_io->wq));
+
+ /* Send one exit work item/thread */
+ for (i=0; i < (ulint)srv_mtflush_threads; i++) {
+ work_item[i].tsk = MT_WRK_NONE;
+ work_item[i].wi_status = WRK_ITEM_EXIT;
+ work_item[i].wheap = mtflush_io->wheap;
+ work_item[i].rheap = mtflush_io->rheap;
+ work_item[i].id_usr = 0;
+
+ ib_wqueue_add(mtflush_io->wq,
+ (void *)&(work_item[i]),
+ mtflush_io->wheap);
+ }
+
+ /* Requests sent */
+ os_fast_mutex_unlock(&mtflush_mtx);
+
+ /* Wait until all work items on a work queue are processed */
+ while(!ib_wqueue_is_empty(mtflush_io->wq)) {
+ /* Wait */
+ os_thread_sleep(MT_WAIT_IN_USECS);
+ }
+
+ ut_a(ib_wqueue_is_empty(mtflush_io->wq));
+
+ /* Collect all work done items */
+ for (i=0; i < (ulint)srv_mtflush_threads;) {
+ wrk_t* work_item = NULL;
+
+ work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, MT_WAIT_IN_USECS);
+
+ /* If we receive reply to work item and it's status is exit,
+ thead has processed this message and existed */
+ if (work_item && work_item->wi_status == WRK_ITEM_EXIT) {
+ i++;
+ }
+ }
+
+ /* Wait about 1/2 sec to allow threads really exit */
+ os_thread_sleep(MT_WAIT_IN_USECS);
+
+ /* Make sure that work queue is empty */
+ while(!ib_wqueue_is_empty(mtflush_io->wq))
+ {
+ ib_wqueue_nowait(mtflush_io->wq);
+ }
+
+ os_fast_mutex_lock(&mtflush_mtx);
+
+ ut_a(ib_wqueue_is_empty(mtflush_io->wq));
+ ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
+ ut_a(ib_wqueue_is_empty(mtflush_io->rd_cq));
+
+ /* Free all queues */
+ ib_wqueue_free(mtflush_io->wq);
+ ib_wqueue_free(mtflush_io->wr_cq);
+ ib_wqueue_free(mtflush_io->rd_cq);
+
+ mtflush_io->wq = NULL;
+ mtflush_io->wr_cq = NULL;
+ mtflush_io->rd_cq = NULL;
+ mtflush_work_initialized = 0;
+
+ /* Free heap */
+ mem_heap_free(mtflush_io->wheap);
+ mem_heap_free(mtflush_io->rheap);
+
+ os_fast_mutex_unlock(&mtflush_mtx);
+ os_fast_mutex_free(&mtflush_mtx);
+ os_fast_mutex_free(&mtflush_io->thread_global_mtx);
+}
+
+/******************************************************************//**
+Initialize multi-threaded flush thread syncronization data.
+@return Initialized multi-threaded flush thread syncroniztion data. */
+void*
+buf_mtflu_handler_init(
+/*===================*/
+ ulint n_threads, /*!< in: Number of threads to create */
+ ulint wrk_cnt) /*!< in: Number of work items */
+{
+ ulint i;
+ mem_heap_t* mtflush_heap;
+ mem_heap_t* mtflush_heap2;
+
+ /* Create heap, work queue, write completion queue, read
+ completion queue for multi-threaded flush, and init
+ handler. */
+ mtflush_heap = mem_heap_create(0);
+ ut_a(mtflush_heap != NULL);
+ mtflush_heap2 = mem_heap_create(0);
+ ut_a(mtflush_heap2 != NULL);
+
+ mtflush_ctx = (thread_sync_t *)mem_heap_alloc(mtflush_heap,
+ sizeof(thread_sync_t));
+ memset(mtflush_ctx, 0, sizeof(thread_sync_t));
+ ut_a(mtflush_ctx != NULL);
+ mtflush_ctx->thread_data = (thread_data_t*)mem_heap_alloc(
+ mtflush_heap, sizeof(thread_data_t) * n_threads);
+ ut_a(mtflush_ctx->thread_data);
+ memset(mtflush_ctx->thread_data, 0, sizeof(thread_data_t) * n_threads);
+
+ mtflush_ctx->n_threads = n_threads;
+ mtflush_ctx->wq = ib_wqueue_create();
+ ut_a(mtflush_ctx->wq);
+ mtflush_ctx->wr_cq = ib_wqueue_create();
+ ut_a(mtflush_ctx->wr_cq);
+ mtflush_ctx->rd_cq = ib_wqueue_create();
+ ut_a(mtflush_ctx->rd_cq);
+ mtflush_ctx->wheap = mtflush_heap;
+ mtflush_ctx->rheap = mtflush_heap2;
+
+ os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_ctx->thread_global_mtx);
+ os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
+
+ /* Create threads for page-compression-flush */
+ for(i=0; i < n_threads; i++) {
+ os_thread_id_t new_thread_id;
+
+ mtflush_ctx->thread_data[i].wt_status = WTHR_INITIALIZED;
+
+ mtflush_ctx->thread_data[i].wthread = os_thread_create(
+ mtflush_io_thread,
+ ((void *) mtflush_ctx),
+ &new_thread_id);
+
+ mtflush_ctx->thread_data[i].wthread_id = new_thread_id;
+ }
+
+ buf_mtflu_work_init();
+
+ return((void *)mtflush_ctx);
+}
+
+/******************************************************************//**
+Flush buffer pool instances.
+@return number of pages flushed. */
+ulint
+buf_mtflu_flush_work_items(
+/*=======================*/
+ ulint buf_pool_inst, /*!< in: Number of buffer pool instances */
+ ulint *per_pool_pages_flushed, /*!< out: Number of pages
+ flushed/instance */
+ buf_flush_t flush_type, /*!< in: Type of flush */
+ ulint min_n, /*!< in: Wished minimum number of
+ blocks to be flushed */
+ lsn_t lsn_limit) /*!< in: All blocks whose
+ oldest_modification is smaller than
+ this should be flushed (if their
+ number does not exceed min_n) */
+{
+ ulint n_flushed=0, i;
+ mem_heap_t* work_heap;
+ mem_heap_t* reply_heap;
+ wrk_t work_item[MTFLUSH_MAX_WORKER];
+
+ if (mtflush_ctx->gwt_status == WTHR_KILL_IT) {
+ return 0;
+ }
+
+ /* Allocate heap where all work items used and queue
+ node items areallocated */
+ work_heap = mem_heap_create(0);
+ reply_heap = mem_heap_create(0);
+
+
+ for(i=0;i<buf_pool_inst; i++) {
+ work_item[i].tsk = MT_WRK_WRITE;
+ work_item[i].wr.buf_pool = buf_pool_from_array(i);
+ work_item[i].wr.flush_type = flush_type;
+ work_item[i].wr.min = min_n;
+ work_item[i].wr.lsn_limit = lsn_limit;
+ work_item[i].wi_status = WRK_ITEM_UNSET;
+ work_item[i].wheap = work_heap;
+ work_item[i].rheap = reply_heap;
+ work_item[i].n_flushed = 0;
+ work_item[i].id_usr = 0;
+
+ ib_wqueue_add(mtflush_ctx->wq,
+ (void *)(work_item + i),
+ work_heap);
+ }
+
+ /* wait on the completion to arrive */
+ for(i=0; i< buf_pool_inst;) {
+ wrk_t *done_wi = NULL;
+ done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq);
+
+ if (done_wi != NULL) {
+ per_pool_pages_flushed[i] = done_wi->n_flushed;
+
+#ifdef UNIV_MTFLUSH_DEBUG
+ if((int)done_wi->id_usr == 0 &&
+ (done_wi->wi_status == WRK_ITEM_SET ||
+ done_wi->wi_status == WRK_ITEM_UNSET)) {
+ fprintf(stderr,
+ "**Set/Unused work_item[%lu] flush_type=%d\n",
+ i,
+ done_wi->wr.flush_type);
+ ut_a(0);
+ }
+#endif
+
+ n_flushed+= done_wi->n_flushed;
+ i++;
+ }
+ }
+
+ /* Release used work_items and queue nodes */
+ mem_heap_free(work_heap);
+ mem_heap_free(reply_heap);
+
+ return(n_flushed);
+}
+
+/*******************************************************************//**
+Multi-threaded version of buf_flush_list
+*/
+bool
+buf_mtflu_flush_list(
+/*=================*/
+ ulint min_n, /*!< in: wished minimum mumber of blocks
+ flushed (it is not guaranteed that the
+ actual number is that big, though) */
+ lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
+ blocks whose oldest_modification is
+ smaller than this should be flushed
+ (if their number does not exceed
+ min_n), otherwise ignored */
+ ulint* n_processed) /*!< out: the number of pages
+ which were processed is passed
+ back to caller. Ignored if NULL */
+
+{
+ ulint i;
+ bool success = true;
+ ulint cnt_flush[MTFLUSH_MAX_WORKER];
+
+ if (n_processed) {
+ *n_processed = 0;
+ }
+
+ if (min_n != ULINT_MAX) {
+ /* Ensure that flushing is spread evenly amongst the
+ buffer pool instances. When min_n is ULINT_MAX
+ we need to flush everything up to the lsn limit
+ so no limit here. */
+ min_n = (min_n + srv_buf_pool_instances - 1)
+ / srv_buf_pool_instances;
+ }
+
+ /* This lock is to safequard against re-entry if any. */
+ os_fast_mutex_lock(&mtflush_mtx);
+ buf_mtflu_flush_work_items(srv_buf_pool_instances,
+ cnt_flush, BUF_FLUSH_LIST,
+ min_n, lsn_limit);
+ os_fast_mutex_unlock(&mtflush_mtx);
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ if (n_processed) {
+ *n_processed += cnt_flush[i];
+ }
+ if (cnt_flush[i]) {
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+ MONITOR_FLUSH_BATCH_COUNT,
+ MONITOR_FLUSH_BATCH_PAGES,
+ cnt_flush[i]);
+ }
+ }
+#ifdef UNIV_MTFLUSH_DEBUG
+ fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu ]\n",
+ __FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed);
+#endif
+ return(success);
+}
+
+/*********************************************************************//**
+Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+UNIV_INTERN
+ulint
+buf_mtflu_flush_LRU_tail(void)
+/*==========================*/
+{
+ ulint total_flushed=0, i;
+ ulint cnt_flush[MTFLUSH_MAX_WORKER];
+
+ ut_a(buf_mtflu_init_done());
+
+ /* This lock is to safeguard against re-entry if any */
+ os_fast_mutex_lock(&mtflush_mtx);
+ buf_mtflu_flush_work_items(srv_buf_pool_instances,
+ cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
+ os_fast_mutex_unlock(&mtflush_mtx);
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ if (cnt_flush[i]) {
+ total_flushed += cnt_flush[i];
+
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_LRU_BATCH_TOTAL_PAGE,
+ MONITOR_LRU_BATCH_COUNT,
+ MONITOR_LRU_BATCH_PAGES,
+ cnt_flush[i]);
+ }
+ }
+
+#if UNIV_MTFLUSH_DEBUG
+ fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu ]\n", (
+ srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed);
+#endif
+
+ return(total_flushed);
+}
+
+/*********************************************************************//**
+Set correct thread identifiers to io thread array based on
+information we have. */
+void
+buf_mtflu_set_thread_ids(
+/*=====================*/
+ ulint n_threads, /*!<in: Number of threads to fill */
+ void* ctx, /*!<in: thread context */
+ os_thread_id_t* thread_ids) /*!<in: thread id array */
+{
+ thread_sync_t *mtflush_io = ((thread_sync_t *)ctx);
+ ulint i;
+ ut_a(mtflush_io != NULL);
+ ut_a(thread_ids != NULL);
+
+ for(i = 0; i < n_threads; i++) {
+ thread_ids[i] = mtflush_io->thread_data[i].wthread_id;
+ }
+}
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 7c8369c0c09..ec76c9923fe 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -184,14 +185,14 @@ buf_read_page_low(
*err = fil_io(OS_FILE_READ | wake_later
| ignore_nonexistent_pages,
sync, space, zip_size, offset, 0, zip_size,
- bpage->zip.data, bpage);
+ bpage->zip.data, bpage, &bpage->write_size);
} else {
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
*err = fil_io(OS_FILE_READ | wake_later
| ignore_nonexistent_pages,
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
- ((buf_block_t*) bpage)->frame, bpage);
+ ((buf_block_t*) bpage)->frame, bpage, 0);
}
if (sync) {
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index b866f44cc54..52a9bb686df 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -2,6 +2,7 @@
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 506ba320853..cb43477ed59 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,6 +25,8 @@ Created 10/25/1995 Heikki Tuuri
*******************************************************/
#include "fil0fil.h"
+#include "fil0pagecompress.h"
+#include "fsp0pagecompress.h"
#include <debug_sync.h>
#include <my_dbug.h>
@@ -45,6 +48,7 @@ Created 10/25/1995 Heikki Tuuri
#include "page0zip.h"
#include "trx0sys.h"
#include "row0mysql.h"
+#include "os0file.h"
#ifndef UNIV_HOTBACKUP
# include "buf0lru.h"
# include "ibuf0ibuf.h"
@@ -54,6 +58,13 @@ Created 10/25/1995 Heikki Tuuri
# include "srv0srv.h"
static ulint srv_data_read, srv_data_written;
#endif /* !UNIV_HOTBACKUP */
+#include "zlib.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#endif
+#include "row0mysql.h"
MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
@@ -262,11 +273,16 @@ fil_read(
block size multiple */
void* buf, /*!< in/out: buffer where to store data read;
in aio this must be appropriately aligned */
- void* message) /*!< in: message for aio handler if non-sync
+ void* message, /*!< in: message for aio handler if non-sync
aio used, else ignored */
+ ulint* write_size) /*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
{
return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
- byte_offset, len, buf, message));
+ byte_offset, len, buf, message, write_size));
}
/********************************************************************//**
@@ -291,18 +307,22 @@ fil_write(
be a block size multiple */
void* buf, /*!< in: buffer from which to write; in aio
this must be appropriately aligned */
- void* message) /*!< in: message for aio handler if non-sync
+ void* message, /*!< in: message for aio handler if non-sync
aio used, else ignored */
+ ulint* write_size) /*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
{
ut_ad(!srv_read_only_mode);
return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
- byte_offset, len, buf, message));
+ byte_offset, len, buf, message, write_size));
}
/*******************************************************************//**
Returns the table space by a given id, NULL if not found. */
-UNIV_INLINE
fil_space_t*
fil_space_get_by_id(
/*================*/
@@ -320,6 +340,19 @@ fil_space_get_by_id(
return(space);
}
+/****************************************************************//**
+Get space id from fil node */
+ulint
+fil_node_get_space_id(
+/*==================*/
+ fil_node_t* node) /*!< in: Compressed node*/
+{
+ ut_ad(node);
+ ut_ad(node->space);
+
+ return (node->space->id);
+}
+
/*******************************************************************//**
Returns the table space by a given name, NULL if not found. */
UNIV_INLINE
@@ -540,8 +573,9 @@ fil_node_open_file(
byte* buf2;
byte* page;
ulint space_id;
- ulint flags;
+ ulint flags=0;
ulint page_size;
+ ulint atomic_writes=0;
ut_ad(mutex_own(&(system->mutex)));
ut_a(node->n_pending == 0);
@@ -558,7 +592,7 @@ fil_node_open_file(
node->handle = os_file_create_simple_no_error_handling(
innodb_file_data_key, node->name, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &success);
+ OS_FILE_READ_ONLY, &success, 0);
if (!success) {
/* The following call prints an error message */
os_file_get_last_error(true);
@@ -575,6 +609,8 @@ fil_node_open_file(
size_bytes = os_file_get_size(node->handle);
ut_a(size_bytes != (os_offset_t) -1);
+
+ node->file_block_size = os_file_get_block_size(node->handle, node->name);
#ifdef UNIV_HOTBACKUP
if (space->id == 0) {
node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
@@ -606,10 +642,14 @@ fil_node_open_file(
set */
page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
- success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE);
+ success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE,
+ space->flags);
+
space_id = fsp_header_get_space_id(page);
flags = fsp_header_get_flags(page);
page_size = fsp_flags_get_page_size(flags);
+ atomic_writes = fsp_flags_get_atomic_writes(flags);
+
ut_free(buf2);
@@ -660,6 +700,17 @@ fil_node_open_file(
ut_error;
}
+ if (UNIV_UNLIKELY(space->flags != flags)) {
+ if (!dict_tf_verify_flags(space->flags, flags)) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags are 0x%lx"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file %s are 0x%lx!\n",
+ space->flags, node->name, flags);
+ ut_error;
+ }
+ }
+
if (size_bytes >= 1024 * 1024) {
/* Truncate the size to whole megabytes. */
size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
@@ -679,6 +730,8 @@ add_size:
space->size += node->size;
}
+ atomic_writes = fsp_flags_get_atomic_writes(space->flags);
+
/* printf("Opening file %s\n", node->name); */
/* Open the file for reading and writing, in Windows normally in the
@@ -689,18 +742,22 @@ add_size:
node->handle = os_file_create(innodb_file_log_key,
node->name, OS_FILE_OPEN,
OS_FILE_AIO, OS_LOG_FILE,
- &ret);
+ &ret, atomic_writes);
} else if (node->is_raw_disk) {
node->handle = os_file_create(innodb_file_data_key,
node->name,
OS_FILE_OPEN_RAW,
OS_FILE_AIO, OS_DATA_FILE,
- &ret);
+ &ret, atomic_writes);
} else {
node->handle = os_file_create(innodb_file_data_key,
node->name, OS_FILE_OPEN,
OS_FILE_AIO, OS_DATA_FILE,
- &ret);
+ &ret, atomic_writes);
+ }
+
+ if (node->file_block_size == 0) {
+ node->file_block_size = os_file_get_block_size(node->handle, node->name);
}
ut_a(ret);
@@ -1071,7 +1128,6 @@ fil_space_create(
DBUG_EXECUTE_IF("fil_space_create_failure", return(false););
ut_a(fil_system);
- ut_a(fsp_flags_is_valid(flags));
/* Look for a matching tablespace and if found free it. */
do {
@@ -1148,6 +1204,7 @@ fil_space_create(
space->flags = flags;
space->magic_n = FIL_SPACE_MAGIC_N;
+ space->printed_compression_failure = false;
rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
@@ -1725,12 +1782,12 @@ fil_write_lsn_and_arch_no_to_file(
buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
err = fil_read(TRUE, space, 0, sum_of_sizes, 0,
- UNIV_PAGE_SIZE, buf, NULL);
+ UNIV_PAGE_SIZE, buf, NULL, 0);
if (err == DB_SUCCESS) {
mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
err = fil_write(TRUE, space, 0, sum_of_sizes, 0,
- UNIV_PAGE_SIZE, buf, NULL);
+ UNIV_PAGE_SIZE, buf, NULL, 0);
}
mem_free(buf1);
@@ -1870,8 +1927,10 @@ fil_read_first_page(
#endif /* UNIV_LOG_ARCHIVE */
lsn_t* min_flushed_lsn, /*!< out: min of flushed
lsn values in data files */
- lsn_t* max_flushed_lsn) /*!< out: max of flushed
+ lsn_t* max_flushed_lsn, /*!< out: max of flushed
lsn values in data files */
+ ulint orig_space_id) /*!< in: original file space
+ id */
{
byte* buf;
byte* page;
@@ -1884,7 +1943,10 @@ fil_read_first_page(
page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
- os_file_read(data_file, page, 0, UNIV_PAGE_SIZE);
+ os_file_read(data_file, page, 0, UNIV_PAGE_SIZE,
+ orig_space_id != ULINT_UNDEFINED ?
+ fil_space_is_page_compressed(orig_space_id) :
+ FALSE);
/* The FSP_HEADER on page 0 is only valid for the first file
in a tablespace. So if this is not the first datafile, leave
@@ -1893,12 +1955,21 @@ fil_read_first_page(
if (!one_read_already) {
*flags = fsp_header_get_flags(page);
*space_id = fsp_header_get_space_id(page);
+ }
- check_msg = fil_check_first_page(page);
+ /* Page is page compressed page, need to decompress, before
+ continue. */
+ if (fil_page_is_compressed(page)) {
+ ulint write_size=0;
+ fil_decompress_page(NULL, page, UNIV_PAGE_SIZE, &write_size);
}
flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
+ if (!one_read_already) {
+ check_msg = fil_check_first_page(page);
+ }
+
ut_free(buf);
if (check_msg) {
@@ -3027,7 +3098,7 @@ fil_create_link_file(
file = os_file_create_simple_no_error_handling(
innodb_file_data_key, link_filepath,
- OS_FILE_CREATE, OS_FILE_READ_WRITE, &success);
+ OS_FILE_CREATE, OS_FILE_READ_WRITE, &success, 0);
if (!success) {
/* The following call will print an error message */
@@ -3043,10 +3114,10 @@ fil_create_link_file(
ut_print_filename(stderr, filepath);
fputs(" already exists.\n", stderr);
err = DB_TABLESPACE_EXISTS;
-
} else if (error == OS_FILE_DISK_FULL) {
err = DB_OUT_OF_FILE_SPACE;
-
+ } else if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
+ err = DB_UNSUPPORTED;
} else {
err = DB_ERROR;
}
@@ -3057,7 +3128,7 @@ fil_create_link_file(
}
if (!os_file_write(link_filepath, file, filepath, 0,
- strlen(filepath))) {
+ strlen(filepath))) {
err = DB_ERROR;
}
@@ -3136,8 +3207,9 @@ fil_open_linked_file(
/*===============*/
const char* tablename, /*!< in: database/tablename */
char** remote_filepath,/*!< out: remote filepath */
- os_file_t* remote_file) /*!< out: remote file handle */
-
+ os_file_t* remote_file, /*!< out: remote file handle */
+ ulint atomic_writes) /*!< in: atomic writes table option
+ value */
{
ibool success;
@@ -3151,7 +3223,7 @@ fil_open_linked_file(
*remote_file = os_file_create_simple_no_error_handling(
innodb_file_data_key, *remote_filepath,
OS_FILE_OPEN, OS_FILE_READ_ONLY,
- &success);
+ &success, atomic_writes);
if (!success) {
char* link_filepath = fil_make_isl_name(tablename);
@@ -3206,6 +3278,7 @@ fil_create_new_single_table_tablespace(
/* TRUE if a table is created with CREATE TEMPORARY TABLE */
bool is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
+ ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
ut_a(space_id > 0);
ut_ad(!srv_read_only_mode);
@@ -3238,7 +3311,8 @@ fil_create_new_single_table_tablespace(
OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_NORMAL,
OS_DATA_FILE,
- &ret);
+ &ret,
+ atomic_writes);
if (ret == FALSE) {
/* The following call will print an error message */
@@ -3265,6 +3339,11 @@ fil_create_new_single_table_tablespace(
goto error_exit_3;
}
+ if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
+ err = DB_UNSUPPORTED;
+ goto error_exit_3;
+ }
+
if (error == OS_FILE_DISK_FULL) {
err = DB_OUT_OF_FILE_SPACE;
goto error_exit_3;
@@ -3303,6 +3382,7 @@ fil_create_new_single_table_tablespace(
flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE);
fsp_header_init_fields(page, space_id, flags);
mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
+ ut_ad(fsp_flags_is_valid(flags));
if (!(fsp_flags_is_compressed(flags))) {
buf_flush_init_for_writing(page, NULL, 0);
@@ -3479,16 +3559,25 @@ fil_open_single_table_tablespace(
fsp_open_info remote;
ulint tablespaces_found = 0;
ulint valid_tablespaces_found = 0;
+ ulint atomic_writes = 0;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex)));
- if (!fsp_flags_is_valid(flags)) {
+ /* Table flags can be ULINT_UNDEFINED if
+ dict_tf_to_fsp_flags_failure is set. */
+ if (flags != ULINT_UNDEFINED) {
+ if (!fsp_flags_is_valid(flags)) {
+ return(DB_CORRUPTION);
+ }
+ } else {
return(DB_CORRUPTION);
}
+ atomic_writes = fsp_flags_get_atomic_writes(flags);
+
/* If the tablespace was relocated, we do not
compare the DATA_DIR flag */
ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR;
@@ -3513,7 +3602,7 @@ fil_open_single_table_tablespace(
}
link_file_found = fil_open_linked_file(
- tablename, &remote.filepath, &remote.file);
+ tablename, &remote.filepath, &remote.file, atomic_writes);
remote.success = link_file_found;
if (remote.success) {
/* possibility of multiple files. */
@@ -3541,7 +3630,7 @@ fil_open_single_table_tablespace(
if (dict.filepath) {
dict.file = os_file_create_simple_no_error_handling(
innodb_file_data_key, dict.filepath, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &dict.success);
+ OS_FILE_READ_ONLY, &dict.success, atomic_writes);
if (dict.success) {
/* possibility of multiple files. */
validate = true;
@@ -3553,7 +3642,7 @@ fil_open_single_table_tablespace(
ut_a(def.filepath);
def.file = os_file_create_simple_no_error_handling(
innodb_file_data_key, def.filepath, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &def.success);
+ OS_FILE_READ_ONLY, &def.success, atomic_writes);
if (def.success) {
tablespaces_found++;
}
@@ -3572,7 +3661,7 @@ fil_open_single_table_tablespace(
#ifdef UNIV_LOG_ARCHIVE
&space_arch_log_no, &space_arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
- &def.lsn, &def.lsn);
+ &def.lsn, &def.lsn, id);
def.valid = !def.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
@@ -3597,7 +3686,7 @@ fil_open_single_table_tablespace(
#ifdef UNIV_LOG_ARCHIVE
&remote.arch_log_no, &remote.arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
- &remote.lsn, &remote.lsn);
+ &remote.lsn, &remote.lsn, id);
remote.valid = !remote.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
@@ -3623,7 +3712,7 @@ fil_open_single_table_tablespace(
#ifdef UNIV_LOG_ARCHIVE
&dict.arch_log_no, &dict.arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
- &dict.lsn, &dict.lsn);
+ &dict.lsn, &dict.lsn, id);
dict.valid = !dict.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
@@ -3887,7 +3976,8 @@ fil_user_tablespace_find_space_id(
for (ulint j = 0; j < page_count; ++j) {
- st = os_file_read(fsp->file, page, (j* page_size), page_size);
+ st = os_file_read(fsp->file, page, (j* page_size), page_size,
+ fsp_flags_is_page_compressed(fsp->flags));
if (!st) {
ib_logf(IB_LOG_LEVEL_INFO,
@@ -4000,7 +4090,7 @@ fil_user_tablespace_restore_page(
err = os_file_write(fsp->filepath, fsp->file, page,
(zip_size ? zip_size : page_size) * page_no,
- buflen);
+ buflen);
os_file_flush(fsp->file);
out:
@@ -4027,7 +4117,7 @@ check_first_page:
#ifdef UNIV_LOG_ARCHIVE
&fsp->arch_log_no, &fsp->arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
- &fsp->lsn, &fsp->lsn)) {
+ &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED)) {
ib_logf(IB_LOG_LEVEL_ERROR,
"%s in tablespace %s (table %s)",
check_msg, fsp->filepath, tablename);
@@ -4100,9 +4190,7 @@ fil_load_single_table_tablespace(
fsp_open_info def;
fsp_open_info remote;
os_offset_t size;
-#ifdef UNIV_HOTBACKUP
fil_space_t* space;
-#endif
memset(&def, 0, sizeof(def));
memset(&remote, 0, sizeof(remote));
@@ -4135,7 +4223,8 @@ fil_load_single_table_tablespace(
one of them is sent to this function. So if this table has
already been loaded, there is nothing to do.*/
mutex_enter(&fil_system->mutex);
- if (fil_space_get_by_name(tablename)) {
+ space = fil_space_get_by_name(tablename);
+ if (space) {
mem_free(tablename);
mutex_exit(&fil_system->mutex);
return;
@@ -4160,7 +4249,7 @@ fil_load_single_table_tablespace(
/* Check for a link file which locates a remote tablespace. */
remote.success = fil_open_linked_file(
- tablename, &remote.filepath, &remote.file);
+ tablename, &remote.filepath, &remote.file, FALSE);
/* Read the first page of the remote tablespace */
if (remote.success) {
@@ -4175,7 +4264,7 @@ fil_load_single_table_tablespace(
/* Try to open the tablespace in the datadir. */
def.file = os_file_create_simple_no_error_handling(
innodb_file_data_key, def.filepath, OS_FILE_OPEN,
- OS_FILE_READ_WRITE, &def.success);
+ OS_FILE_READ_ONLY, &def.success, FALSE);
/* Read the first page of the remote tablespace */
if (def.success) {
@@ -4903,6 +4992,7 @@ retry:
}
page_size = fsp_flags_get_zip_size(space->flags);
+
if (!page_size) {
page_size = UNIV_PAGE_SIZE;
}
@@ -4940,6 +5030,11 @@ retry:
start_page_no = space->size;
file_start_page_no = space->size - node->size;
+ /* Determine correct file block size */
+ if (node->file_block_size == 0) {
+ node->file_block_size = os_file_get_block_size(node->handle, node->name);
+ }
+
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
os_offset_t start_offset = start_page_no * page_size;
@@ -4951,7 +5046,7 @@ retry:
"space for file \'%s\' failed. Current size "
INT64PF ", desired size " INT64PF "\n",
node->name, start_offset, len+start_offset);
- os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE);
+ os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE, __FILE__, __LINE__);
success = FALSE;
} else {
success = TRUE;
@@ -4961,9 +5056,11 @@ retry:
success = FALSE; errno = 28; os_has_said_disk_full = TRUE;);
mutex_enter(&fil_system->mutex);
+
if (success) {
- node->size += n_pages;
- space->size += n_pages;
+ node->size += (size_after_extend - start_page_no);
+ space->size += (size_after_extend - start_page_no);
+
os_has_said_disk_full = FALSE;
}
@@ -4999,7 +5096,7 @@ retry:
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
node->name, node->handle, buf,
offset, page_size * n_pages,
- NULL, NULL);
+ node, NULL, 0, FALSE, 0);
#endif /* UNIV_HOTBACKUP */
@@ -5098,7 +5195,7 @@ fil_extend_tablespaces_to_stored_len(void)
single-threaded operation */
error = fil_read(TRUE, space->id,
fsp_flags_get_zip_size(space->flags),
- 0, 0, UNIV_PAGE_SIZE, buf, NULL);
+ 0, 0, UNIV_PAGE_SIZE, buf, NULL, 0);
ut_a(error == DB_SUCCESS);
size_in_header = fsp_get_size_low(buf);
@@ -5378,8 +5475,13 @@ fil_io(
void* buf, /*!< in/out: buffer where to store read data
or from where to write; in aio this must be
appropriately aligned */
- void* message) /*!< in: message for aio handler if non-sync
+ void* message, /*!< in: message for aio handler if non-sync
aio used, else ignored */
+ ulint* write_size) /*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
{
ulint mode;
fil_space_t* space;
@@ -5389,6 +5491,8 @@ fil_io(
ulint wake_later;
os_offset_t offset;
ibool ignore_nonexistent_pages;
+ ibool page_compressed = FALSE;
+ ulint page_compression_level = 0;
is_log = type & OS_FILE_LOG;
type = type & ~OS_FILE_LOG;
@@ -5442,6 +5546,11 @@ fil_io(
} else if (type == OS_FILE_WRITE) {
ut_ad(!srv_read_only_mode);
srv_stats.data_written.add(len);
+ if (fil_page_is_index_page((byte *)buf)) {
+ srv_stats.index_pages_written.inc();
+ } else {
+ srv_stats.non_index_pages_written.inc();
+ }
}
/* Reserve the fil_system mutex and make sure that we can open at
@@ -5567,6 +5676,9 @@ fil_io(
ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
+ page_compressed = fsp_flags_is_page_compressed(space->flags);
+ page_compression_level = fsp_flags_get_page_compression_level(space->flags);
+
#ifdef UNIV_HOTBACKUP
/* In mysqlbackup do normal i/o, not aio */
if (type == OS_FILE_READ) {
@@ -5579,7 +5691,8 @@ fil_io(
#else
/* Queue the aio request */
ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
- offset, len, node, message);
+ offset, len, node, message, write_size,
+ page_compressed, page_compression_level);
#endif /* UNIV_HOTBACKUP */
@@ -6119,7 +6232,8 @@ fil_iterate(
ut_ad(!(n_bytes % iter.page_size));
if (!os_file_read(iter.file, io_buffer, offset,
- (ulint) n_bytes)) {
+ (ulint) n_bytes,
+ fil_space_is_page_compressed(space_id))) {
ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
@@ -6206,7 +6320,7 @@ fil_tablespace_iterate(
file = os_file_create_simple_no_error_handling(
innodb_file_data_key, filepath,
- OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
+ OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE);
DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
{
@@ -6258,7 +6372,8 @@ fil_tablespace_iterate(
/* Read the first page and determine the page and zip size. */
- if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) {
+ if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE,
+ dict_tf_get_page_compression(table->flags))) {
err = DB_IO_ERROR;
@@ -6424,3 +6539,87 @@ fil_mtr_rename_log(
0, 0, new_name, old_name, mtr);
}
}
+
+/****************************************************************//**
+Acquire fil_system mutex */
+void
+fil_system_enter(void)
+/*==================*/
+{
+ ut_ad(!mutex_own(&fil_system->mutex));
+ mutex_enter(&fil_system->mutex);
+}
+
+/****************************************************************//**
+Release fil_system mutex */
+void
+fil_system_exit(void)
+/*=================*/
+{
+ ut_ad(mutex_own(&fil_system->mutex));
+ mutex_exit(&fil_system->mutex);
+}
+
+/*******************************************************************//**
+Return space name */
+char*
+fil_space_name(
+/*===========*/
+ fil_space_t* space) /*!< in: space */
+{
+ return (space->name);
+}
+
+/*******************************************************************//**
+Return page type name */
+const char*
+fil_get_page_type_name(
+/*===================*/
+ ulint page_type) /*!< in: FIL_PAGE_TYPE */
+{
+ switch(page_type) {
+ case FIL_PAGE_PAGE_COMPRESSED:
+ return (const char*)"PAGE_COMPRESSED";
+ case FIL_PAGE_INDEX:
+ return (const char*)"INDEX";
+ case FIL_PAGE_UNDO_LOG:
+ return (const char*)"UNDO LOG";
+ case FIL_PAGE_INODE:
+ return (const char*)"INODE";
+ case FIL_PAGE_IBUF_FREE_LIST:
+ return (const char*)"IBUF_FREE_LIST";
+ case FIL_PAGE_TYPE_ALLOCATED:
+ return (const char*)"ALLOCATED";
+ case FIL_PAGE_IBUF_BITMAP:
+ return (const char*)"IBUF_BITMAP";
+ case FIL_PAGE_TYPE_SYS:
+ return (const char*)"SYS";
+ case FIL_PAGE_TYPE_TRX_SYS:
+ return (const char*)"TRX_SYS";
+ case FIL_PAGE_TYPE_FSP_HDR:
+ return (const char*)"FSP_HDR";
+ case FIL_PAGE_TYPE_XDES:
+ return (const char*)"XDES";
+ case FIL_PAGE_TYPE_BLOB:
+ return (const char*)"BLOB";
+ case FIL_PAGE_TYPE_ZBLOB:
+ return (const char*)"ZBLOB";
+ case FIL_PAGE_TYPE_ZBLOB2:
+ return (const char*)"ZBLOB2";
+ case FIL_PAGE_TYPE_COMPRESSED:
+ return (const char*)"ORACLE PAGE COMPRESSED";
+ default:
+ return (const char*)"PAGE TYPE CORRUPTED";
+ }
+}
+/****************************************************************//**
+Get block size from fil node
+@return block size*/
+ulint
+fil_node_get_block_size(
+/*====================*/
+ fil_node_t* node) /*!< in: Node where to get block
+ size */
+{
+ return (node->file_block_size);
+}
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
new file mode 100644
index 00000000000..6baf1963c47
--- /dev/null
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -0,0 +1,793 @@
+/*****************************************************************************
+
+Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fil/fil0pagecompress.cc
+Implementation for page compressed file spaces.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@mariadb.com
+Updated 14/02/2015
+***********************************************************************/
+
+#include "fil0fil.h"
+#include "fil0pagecompress.h"
+
+#include <debug_sync.h>
+#include <my_dbug.h>
+
+#include "mem0mem.h"
+#include "hash0hash.h"
+#include "os0file.h"
+#include "mach0data.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "log0recv.h"
+#include "fsp0fsp.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "dict0dict.h"
+#include "page0page.h"
+#include "page0zip.h"
+#include "trx0sys.h"
+#include "row0mysql.h"
+#ifndef UNIV_HOTBACKUP
+# include "buf0lru.h"
+# include "ibuf0ibuf.h"
+# include "sync0sync.h"
+# include "os0sync.h"
+#else /* !UNIV_HOTBACKUP */
+# include "srv0srv.h"
+static ulint srv_data_read, srv_data_written;
+#endif /* !UNIV_HOTBACKUP */
+#include "zlib.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+#include "row0mysql.h"
+#ifdef HAVE_LZ4
+#include "lz4.h"
+#endif
+#ifdef HAVE_LZO
+#include "lzo/lzo1x.h"
+#endif
+#ifdef HAVE_LZMA
+#include "lzma.h"
+#endif
+#ifdef HAVE_BZIP2
+#include "bzlib.h"
+#endif
+#ifdef HAVE_SNAPPY
+#include "snappy-c.h"
+#endif
+
+/* Used for debugging */
+//#define UNIV_PAGECOMPRESS_DEBUG 1
+
+/****************************************************************//**
+For page compressed pages decompress the page after actual read
+operation. */
+static
+void
+fil_decompress_page_2(
+/*==================*/
+ byte* page_buf, /*!< out: destination buffer for
+ uncompressed data */
+ byte* buf, /*!< in: source compressed data */
+ ulong len, /*!< in: length of output buffer.*/
+ ulint* write_size) /*!< in/out: Actual payload size of
+ the compressed data. */
+{
+ ulint page_type = mach_read_from_2(buf + FIL_PAGE_TYPE);
+
+ if (page_type != FIL_PAGE_TYPE_COMPRESSED) {
+ /* It is not a compressed page */
+ return;
+ }
+
+ ulint olen = 0;
+ byte* ptr = buf + FIL_PAGE_DATA;
+ ulint version = mach_read_from_1(buf + FIL_PAGE_VERSION);
+ int err = 0;
+
+ ut_a(version == 1);
+
+ /* Read the original page type, before we compressed the data. */
+ page_type = mach_read_from_2(buf + FIL_PAGE_ORIGINAL_TYPE_V1);
+
+ ulint original_len = mach_read_from_2(buf + FIL_PAGE_ORIGINAL_SIZE_V1);
+
+ if (original_len < UNIV_PAGE_SIZE_MIN - (FIL_PAGE_DATA + 8)
+ || original_len > UNIV_PAGE_SIZE_MAX - FIL_PAGE_DATA
+ || len < original_len + FIL_PAGE_DATA) {
+ fprintf(stderr,
+ "InnoDB: Corruption: We try to uncompress corrupted page\n"
+ "InnoDB: Original len %lu len %lu.\n",
+ original_len, len);
+
+ fflush(stderr);
+ ut_error;
+
+ }
+
+ ulint algorithm = mach_read_from_1(buf + FIL_PAGE_ALGORITHM_V1);
+
+ switch(algorithm) {
+ case PAGE_ZLIB_ALGORITHM: {
+
+ fprintf(stderr, "InnoDB: [Note]: zlib\n");
+
+ err = uncompress(page_buf, &len, ptr, original_len);
+ /* If uncompress fails it means that page is corrupted */
+ if (err != Z_OK) {
+
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but uncompress failed with error %d.\n"
+ "InnoDB: size %lu len %lu\n",
+ err, original_len, len);
+
+ fflush(stderr);
+
+ ut_error;
+ }
+
+ break;
+ }
+#ifdef HAVE_LZ4
+ case PAGE_LZ4_ALGORITHM: {
+ fprintf(stderr, "InnoDB: [Note]: lz4\n");
+ err = LZ4_decompress_fast(
+ (const char*) ptr, (char*) (page_buf), original_len);
+
+ if (err < 0) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %d bytes.\n"
+ "InnoDB: size %lu len %lu\n",
+ err, original_len, len);
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+ }
+#endif /* HAVE_LZ4 */
+
+#ifdef HAVE_LZMA
+ case PAGE_LZMA_ALGORITHM: {
+
+ lzma_ret ret;
+ size_t src_pos = 0;
+ size_t dst_pos = 0;
+ uint64_t memlimit = UINT64_MAX;
+
+ fprintf(stderr, "InnoDB: [Note]: lzma\n");
+ ret = lzma_stream_buffer_decode(
+ &memlimit,
+ 0,
+ NULL,
+ ptr,
+ &src_pos,
+ original_len,
+ (page_buf),
+ &dst_pos,
+ len);
+
+
+ if (ret != LZMA_OK || (dst_pos <= 0 || dst_pos > len)) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %ld bytes.\n"
+ "InnoDB: size %lu len %lu\n",
+ dst_pos, original_len, len);
+ fflush(stderr);
+
+ ut_error;
+ }
+
+ break;
+ }
+#endif /* HAVE_LZMA */
+
+#ifdef HAVE_LZO
+ case PAGE_LZO_ALGORITHM: {
+ fprintf(stderr, "InnoDB: [Note]: lzo \n");
+ err = lzo1x_decompress((const unsigned char *)ptr,
+ original_len,(unsigned char *)(page_buf), &olen, NULL);
+
+ if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %ld bytes.\n"
+ "InnoDB: size %lu len %lu\n",
+ olen, original_len, len);
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+ }
+#endif /* HAVE_LZO */
+
+ default:
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but compression algorithm %s\n"
+ "InnoDB: is not known.\n"
+ ,fil_get_compression_alg_name(algorithm));
+
+ fflush(stderr);
+ ut_error;
+ break;
+ }
+
+ /* Leave the header alone */
+ memmove(buf+FIL_PAGE_DATA, page_buf, original_len);
+
+ mach_write_to_2(buf + FIL_PAGE_TYPE, page_type);
+
+ ut_ad(memcmp(buf + FIL_PAGE_LSN + 4,
+ buf + (original_len + FIL_PAGE_DATA)
+ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4) == 0);
+}
+
+/****************************************************************//**
+For page compressed pages compress the page before actual write
+operation.
+@return compressed page to be written*/
+byte*
+fil_compress_page(
+/*==============*/
+ ulint space_id, /*!< in: tablespace id of the
+ table. */
+ byte* buf, /*!< in: buffer from which to write; in aio
+ this must be appropriately aligned */
+ byte* out_buf, /*!< out: compressed buffer */
+ ulint len, /*!< in: length of input buffer.*/
+ ulint compression_level, /* in: compression level */
+ ulint block_size, /*!< in: block size */
+ ulint* out_len, /*!< out: actual length of compressed
+ page */
+ byte* lzo_mem) /*!< in: temporal memory used by LZO */
+{
+ int err = Z_OK;
+ int level = 0;
+ ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
+ ulint write_size=0;
+ ulint comp_method = innodb_compression_algorithm; /* Cache to avoid
+ change during
+ function execution */
+
+ ut_ad(buf);
+ ut_ad(out_buf);
+ ut_ad(len);
+ ut_ad(out_len);
+
+ level = compression_level;
+ ut_ad(fil_space_is_page_compressed(space_id));
+
+ fil_system_enter();
+ fil_space_t* space = fil_space_get_by_id(space_id);
+ fil_system_exit();
+
+ /* If no compression level was provided to this table, use system
+ default level */
+ if (level == 0) {
+ level = page_zip_level;
+ }
+
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr,
+ "InnoDB: Note: Preparing for compress for space %lu name %s len %lu\n",
+ space_id, fil_space_name(space), len);
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+
+ write_size = UNIV_PAGE_SIZE - header_len;
+
+ switch(comp_method) {
+#ifdef HAVE_LZ4
+ case PAGE_LZ4_ALGORITHM:
+ err = LZ4_compress_limitedOutput((const char *)buf,
+ (char *)out_buf+header_len, len, write_size);
+ write_size = err;
+
+ if (err == 0) {
+ /* If error we leave the actual page as it was */
+
+ if (space->printed_compression_failure == false) {
+ fprintf(stderr,
+ "InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
+ space_id, fil_space_name(space), len, err, write_size);
+ space->printed_compression_failure = true;
+ }
+
+ srv_stats.pages_page_compression_error.inc();
+ *out_len = len;
+ return (buf);
+ }
+ break;
+#endif /* HAVE_LZ4 */
+#ifdef HAVE_LZO
+ case PAGE_LZO_ALGORITHM:
+ err = lzo1x_1_15_compress(
+ buf, len, out_buf+header_len, &write_size, lzo_mem);
+
+ if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) {
+ if (space->printed_compression_failure == false) {
+ fprintf(stderr,
+ "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n",
+ space_id, fil_space_name(space), len, err, write_size);
+ space->printed_compression_failure = true;
+ }
+ srv_stats.pages_page_compression_error.inc();
+ *out_len = len;
+ return (buf);
+ }
+
+ break;
+#endif /* HAVE_LZO */
+#ifdef HAVE_LZMA
+ case PAGE_LZMA_ALGORITHM: {
+ size_t out_pos=0;
+
+ err = lzma_easy_buffer_encode(
+ compression_level,
+ LZMA_CHECK_NONE,
+ NULL, /* No custom allocator, use malloc/free */
+ reinterpret_cast<uint8_t*>(buf),
+ len,
+ reinterpret_cast<uint8_t*>(out_buf + header_len),
+ &out_pos,
+ (size_t)write_size);
+
+ if (err != LZMA_OK || out_pos > UNIV_PAGE_SIZE-header_len) {
+ if (space->printed_compression_failure == false) {
+ fprintf(stderr,
+ "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n",
+ space_id, fil_space_name(space), len, err, out_pos);
+ space->printed_compression_failure = true;
+ }
+
+ srv_stats.pages_page_compression_error.inc();
+ *out_len = len;
+ return (buf);
+ }
+
+ write_size = out_pos;
+
+ break;
+ }
+#endif /* HAVE_LZMA */
+
+#ifdef HAVE_BZIP2
+ case PAGE_BZIP2_ALGORITHM: {
+
+ err = BZ2_bzBuffToBuffCompress(
+ (char *)(out_buf + header_len),
+ (unsigned int *)&write_size,
+ (char *)buf,
+ len,
+ 1,
+ 0,
+ 0);
+
+ if (err != BZ_OK || write_size > UNIV_PAGE_SIZE-header_len) {
+ if (space->printed_compression_failure == false) {
+ fprintf(stderr,
+ "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n",
+ space_id, fil_space_name(space), len, err, write_size);
+ space->printed_compression_failure = true;
+ }
+ srv_stats.pages_page_compression_error.inc();
+ *out_len = len;
+ return (buf);
+ }
+ break;
+ }
+#endif /* HAVE_BZIP2 */
+
+#ifdef HAVE_SNAPPY
+ case PAGE_SNAPPY_ALGORITHM:
+ {
+ snappy_status cstatus;
+
+ cstatus = snappy_compress((const char *)buf, len, (char *)(out_buf+header_len), &write_size);
+
+ if (cstatus != SNAPPY_OK || write_size > UNIV_PAGE_SIZE-header_len) {
+ if (space->printed_compression_failure == false) {
+ fprintf(stderr,
+ "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n",
+ space_id, fil_space_name(space), len, (int)cstatus, write_size);
+ space->printed_compression_failure = true;
+ }
+ srv_stats.pages_page_compression_error.inc();
+ *out_len = len;
+ return (buf);
+ }
+ break;
+ }
+#endif /* HAVE_SNAPPY */
+
+ case PAGE_ZLIB_ALGORITHM:
+ err = compress2(out_buf+header_len, (ulong*)&write_size, buf, len, level);
+
+ if (err != Z_OK) {
+ /* If error we leave the actual page as it was */
+
+ if (space->printed_compression_failure == false) {
+ fprintf(stderr,
+ "InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
+ space_id, fil_space_name(space), len, err, write_size);
+ space->printed_compression_failure = true;
+ }
+
+ srv_stats.pages_page_compression_error.inc();
+ *out_len = len;
+ return (buf);
+ }
+ break;
+
+ case PAGE_UNCOMPRESSED:
+ *out_len = len;
+ return (buf);
+ break;
+
+ default:
+ ut_error;
+ break;
+ }
+
+ /* Set up the page header */
+ memcpy(out_buf, buf, FIL_PAGE_DATA);
+ /* Set up the checksum */
+ mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
+ /* Set up the correct page type */
+ mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
+ /* Set up the flush lsn to be compression algorithm */
+ mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, comp_method);
+ /* Set up the actual payload lenght */
+ mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
+
+#ifdef UNIV_DEBUG
+ /* Verify */
+ ut_ad(fil_page_is_compressed(out_buf));
+ ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
+ ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
+ ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == (ulint)comp_method);
+
+ /* Verify that page can be decompressed */
+ {
+ byte *comp_page;
+ byte *uncomp_page;
+
+ comp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3));
+ uncomp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3));
+ memcpy(comp_page, out_buf, UNIV_PAGE_SIZE);
+
+ fil_decompress_page(uncomp_page, comp_page, len, NULL);
+ if(buf_page_is_corrupted(false, uncomp_page, 0)) {
+ buf_page_print(uncomp_page, 0, BUF_PAGE_PRINT_NO_CRASH);
+ ut_error;
+ }
+ ut_free(comp_page);
+ ut_free(uncomp_page);
+ }
+#endif /* UNIV_DEBUG */
+
+ write_size+=header_len;
+
+ /* Actual write needs to be alligned on block size */
+ if (write_size % block_size) {
+ size_t tmp = write_size;
+#ifdef UNIV_DEBUG
+ ut_a(block_size > 0);
+#endif
+ write_size = (size_t)ut_uint64_align_up((ib_uint64_t)write_size, block_size);
+#ifdef UNIV_DEBUG
+ ut_a(write_size > 0 && ((write_size % block_size) == 0));
+ ut_a(write_size >= tmp);
+#endif
+ }
+
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr,
+ "InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n",
+ space_id, fil_space_name(space), len, write_size);
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+
+
+ srv_stats.page_compression_saved.add((len - write_size));
+ srv_stats.pages_page_compressed.inc();
+
+ /* If we do not persistently trim rest of page, we need to write it
+ all */
+ if (!srv_use_trim) {
+ write_size = len;
+ }
+
+ *out_len = write_size;
+
+ return(out_buf);
+
+}
+
+/****************************************************************//**
+For page compressed pages decompress the page after actual read
+operation. */
+void
+fil_decompress_page(
+/*================*/
+ byte* page_buf, /*!< in: preallocated buffer or NULL */
+ byte* buf, /*!< out: buffer from which to read; in aio
+ this must be appropriately aligned */
+ ulong len, /*!< in: length of output buffer.*/
+ ulint* write_size) /*!< in/out: Actual payload size of
+ the compressed data. */
+{
+ int err = 0;
+ ulint actual_size = 0;
+ ulint compression_alg = 0;
+ byte *in_buf;
+ ulint olen=0;
+ ulint ptype;
+
+ ut_ad(buf);
+ ut_ad(len);
+
+ ptype = mach_read_from_2(buf+FIL_PAGE_TYPE);
+
+ /* Do not try to uncompressed pages that are not compressed */
+ if (ptype != FIL_PAGE_PAGE_COMPRESSED && ptype != FIL_PAGE_TYPE_COMPRESSED) {
+ return;
+ }
+
+ // If no buffer was given, we need to allocate temporal buffer
+ if (page_buf == NULL) {
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr,
+ "InnoDB: Note: FIL: Compression buffer not given, allocating...\n");
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+ in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3));
+ } else {
+ in_buf = page_buf;
+ }
+
+ if (ptype == FIL_PAGE_TYPE_COMPRESSED) {
+
+ fil_decompress_page_2(in_buf, buf, len, write_size);
+ // Need to free temporal buffer if no buffer was given
+ if (page_buf == NULL) {
+ ut_free(in_buf);
+ }
+ return;
+ }
+
+ /* Before actual decompress, make sure that page type is correct */
+
+ if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC ||
+ mach_read_from_2(buf+FIL_PAGE_TYPE) != FIL_PAGE_PAGE_COMPRESSED) {
+ fprintf(stderr,
+ "InnoDB: Corruption: We try to uncompress corrupted page\n"
+ "InnoDB: CRC %lu type %lu.\n"
+ "InnoDB: len %lu\n",
+ mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM),
+ mach_read_from_2(buf+FIL_PAGE_TYPE), len);
+
+ fflush(stderr);
+ ut_error;
+ }
+
+ /* Get compression algorithm */
+ compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN);
+
+ /* Get the actual size of compressed page */
+ actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
+ /* Check if payload size is corrupted */
+ if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) {
+ fprintf(stderr,
+ "InnoDB: Corruption: We try to uncompress corrupted page\n"
+ "InnoDB: actual size %lu compression %s\n",
+ actual_size, fil_get_compression_alg_name(compression_alg));
+ fflush(stderr);
+ ut_error;
+ }
+
+ /* Store actual payload size of the compressed data. This pointer
+ points to buffer pool. */
+ if (write_size) {
+ *write_size = actual_size;
+ }
+
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr,
+ "InnoDB: Note: Preparing for decompress for len %lu\n",
+ actual_size);
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+
+
+ switch(compression_alg) {
+ case PAGE_ZLIB_ALGORITHM:
+ err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
+
+ /* If uncompress fails it means that page is corrupted */
+ if (err != Z_OK) {
+
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but uncompress failed with error %d.\n"
+ "InnoDB: size %lu len %lu\n",
+ err, actual_size, len);
+
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+
+#ifdef HAVE_LZ4
+ case PAGE_LZ4_ALGORITHM:
+ err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, len);
+
+ if (err != (int)actual_size) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %d bytes.\n"
+ "InnoDB: size %lu len %lu\n",
+ err, actual_size, len);
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+#endif /* HAVE_LZ4 */
+#ifdef HAVE_LZO
+ case PAGE_LZO_ALGORITHM:
+ err = lzo1x_decompress((const unsigned char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE,
+ actual_size,(unsigned char *)in_buf, &olen, NULL);
+
+ if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %ld bytes.\n"
+ "InnoDB: size %lu len %lu\n",
+ olen, actual_size, len);
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+#endif /* HAVE_LZO */
+#ifdef HAVE_LZMA
+ case PAGE_LZMA_ALGORITHM: {
+
+ lzma_ret ret;
+ size_t src_pos = 0;
+ size_t dst_pos = 0;
+ uint64_t memlimit = UINT64_MAX;
+
+ ret = lzma_stream_buffer_decode(
+ &memlimit,
+ 0,
+ NULL,
+ buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE,
+ &src_pos,
+ actual_size,
+ in_buf,
+ &dst_pos,
+ len);
+
+
+ if (ret != LZMA_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %ld bytes.\n"
+ "InnoDB: size %lu len %lu\n",
+ dst_pos, actual_size, len);
+ fflush(stderr);
+
+ ut_error;
+ }
+
+ break;
+ }
+#endif /* HAVE_LZMA */
+#ifdef HAVE_BZIP2
+ case PAGE_BZIP2_ALGORITHM: {
+ unsigned int dst_pos = UNIV_PAGE_SIZE;
+
+ err = BZ2_bzBuffToBuffDecompress(
+ (char *)in_buf,
+ &dst_pos,
+ (char *)(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE),
+ actual_size,
+ 1,
+ 0);
+
+ if (err != BZ_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %du bytes.\n"
+ "InnoDB: size %lu len %lu err %d\n",
+ dst_pos, actual_size, len, err);
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+ }
+#endif /* HAVE_BZIP2 */
+#ifdef HAVE_SNAPPY
+ case PAGE_SNAPPY_ALGORITHM:
+ {
+ snappy_status cstatus;
+
+ cstatus = snappy_uncompress(
+ (const char *)(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE),
+ actual_size,
+ (char *)in_buf,
+ &olen);
+
+ if (cstatus != SNAPPY_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %lu bytes.\n"
+ "InnoDB: size %lu len %lu err %d\n",
+ olen, actual_size, len, (int)cstatus);
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+ }
+#endif /* HAVE_SNAPPY */
+ default:
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but compression algorithm %s\n"
+ "InnoDB: is not known.\n"
+ ,fil_get_compression_alg_name(compression_alg));
+
+ fflush(stderr);
+ ut_error;
+ break;
+ }
+
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr,
+ "InnoDB: Note: Decompression succeeded for len %lu \n",
+ len);
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+
+ srv_stats.pages_page_decompressed.inc();
+
+ /* Copy the uncompressed page to the buffer pool, not
+ really any other options. */
+ memcpy(buf, in_buf, len);
+
+ // Need to free temporal buffer if no buffer was given
+ if (page_buf == NULL) {
+ ut_free(in_buf);
+ }
+}
+
+
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 7b57f072493..10fc11eff5e 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -4,7 +4,7 @@ Copyright (c) 2000, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2014 SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2015, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -101,6 +101,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
#endif /* UNIV_DEBUG */
#include "fts0priv.h"
#include "page0zip.h"
+#include "fil0pagecompress.h"
#define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
@@ -502,6 +503,28 @@ ib_cb_t innodb_api_cb[] = {
(ib_cb_t) ib_trx_read_only
};
+/**
+ Structure for CREATE TABLE options (table options).
+ It needs to be called ha_table_option_struct.
+
+ The option values can be specified in the CREATE TABLE at the end:
+ CREATE TABLE ( ... ) *here*
+*/
+
+ha_create_table_option innodb_table_option_list[]=
+{
+ /* With this option user can enable page compression feature for the
+ table */
+ HA_TOPTION_BOOL("PAGE_COMPRESSED", page_compressed, 0),
+ /* With this option user can set zip compression level for page
+ compression for this table*/
+ HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, ULINT_UNDEFINED, 0, 9, 1),
+ /* With this option user can enable atomic writes feature for this table */
+ HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0),
+ HA_TOPTION_END
+};
+
+
/*************************************************************//**
Check whether valid argument given to innodb_ft_*_stopword_table.
This function is registered as a callback with MySQL.
@@ -537,7 +560,28 @@ static inline
ulint
innobase_map_isolation_level(
/*=========================*/
- enum_tx_isolation iso); /*!< in: MySQL isolation level code */
+ enum_tx_isolation iso); /*!< in: MySQL isolation level code
+ */
+
+/*************************************************************//**
+Check for a valid value of innobase_compression_algorithm.
+@return 0 for valid innodb_compression_algorithm. */
+static
+int
+innodb_compression_algorithm_validate(
+/*==================================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to system
+ variable */
+ void* save, /*!< out: immediate result
+ for update function */
+ struct st_mysql_value* value); /*!< in: incoming string */
+
+static ibool innodb_have_lzo=IF_LZO(1, 0);
+static ibool innodb_have_lz4=IF_LZ4(1, 0);
+static ibool innodb_have_lzma=IF_LZMA(1, 0);
+static ibool innodb_have_bzip2=IF_BZIP2(1, 0);
+static ibool innodb_have_snappy=IF_SNAPPY(1, 0);
static const char innobase_hton_name[]= "InnoDB";
@@ -690,6 +734,46 @@ static SHOW_VAR innodb_status_variables[]= {
{"purge_view_trx_id_age",
(char*) &export_vars.innodb_purge_view_trx_id_age, SHOW_LONG},
#endif /* UNIV_DEBUG */
+ /* Status variables for page compression */
+ {"page_compression_saved",
+ (char*) &export_vars.innodb_page_compression_saved, SHOW_LONGLONG},
+ {"page_compression_trim_sect512",
+ (char*) &export_vars.innodb_page_compression_trim_sect512, SHOW_LONGLONG},
+ {"page_compression_trim_sect1024",
+ (char*) &export_vars.innodb_page_compression_trim_sect1024, SHOW_LONGLONG},
+ {"page_compression_trim_sect2048",
+ (char*) &export_vars.innodb_page_compression_trim_sect2048, SHOW_LONGLONG},
+ {"page_compression_trim_sect4096",
+ (char*) &export_vars.innodb_page_compression_trim_sect4096, SHOW_LONGLONG},
+ {"page_compression_trim_sect8192",
+ (char*) &export_vars.innodb_page_compression_trim_sect8192, SHOW_LONGLONG},
+ {"page_compression_trim_sect16384",
+ (char*) &export_vars.innodb_page_compression_trim_sect16384, SHOW_LONGLONG},
+ {"page_compression_trim_sect32768",
+ (char*) &export_vars.innodb_page_compression_trim_sect32768, SHOW_LONGLONG},
+ {"num_index_pages_written",
+ (char*) &export_vars.innodb_index_pages_written, SHOW_LONGLONG},
+ {"num_non_index_pages_written",
+ (char*) &export_vars.innodb_non_index_pages_written, SHOW_LONGLONG},
+ {"num_pages_page_compressed",
+ (char*) &export_vars.innodb_pages_page_compressed, SHOW_LONGLONG},
+ {"num_page_compressed_trim_op",
+ (char*) &export_vars.innodb_page_compressed_trim_op, SHOW_LONGLONG},
+ {"num_page_compressed_trim_op_saved",
+ (char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG},
+ {"num_pages_page_decompressed",
+ (char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG},
+ {"have_lz4",
+ (char*) &innodb_have_lz4, SHOW_BOOL},
+ {"have_lzo",
+ (char*) &innodb_have_lzo, SHOW_BOOL},
+ {"have_lzma",
+ (char*) &innodb_have_lzma, SHOW_BOOL},
+ {"have_bzip2",
+ (char*) &innodb_have_bzip2, SHOW_BOOL},
+ {"have_snappy",
+ (char*) &innodb_have_snappy, SHOW_BOOL},
+
{NullS, NullS, SHOW_LONG}
};
@@ -2910,6 +2994,8 @@ innobase_init(
if (srv_file_per_table)
innobase_hton->tablefile_extensions = ha_innobase_exts;
+ innobase_hton->table_options = innodb_table_option_list;
+
ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
#ifndef DBUG_OFF
@@ -2944,6 +3030,51 @@ innobase_init(
}
}
+#ifndef HAVE_LZ4
+ if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) {
+ sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: liblz4 is not installed. \n",
+ innodb_compression_algorithm);
+ goto error;
+ }
+#endif
+
+#ifndef HAVE_LZO
+ if (innodb_compression_algorithm == PAGE_LZO_ALGORITHM) {
+ sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: liblzo is not installed. \n",
+ innodb_compression_algorithm);
+ goto error;
+ }
+#endif
+
+#ifndef HAVE_LZMA
+ if (innodb_compression_algorithm == PAGE_LZMA_ALGORITHM) {
+ sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: liblzma is not installed. \n",
+ innodb_compression_algorithm);
+ goto error;
+ }
+#endif
+
+#ifndef HAVE_BZIP2
+ if (innodb_compression_algorithm == PAGE_BZIP2_ALGORITHM) {
+ sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: libbz2 is not installed. \n",
+ innodb_compression_algorithm);
+ goto error;
+ }
+#endif
+
+#ifndef HAVE_SNAPPY
+ if (innodb_compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
+ sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: libsnappy is not installed. \n",
+ innodb_compression_algorithm);
+ goto error;
+ }
+#endif
+
os_innodb_umask = (ulint) my_umask;
/* First calculate the default path for innodb_data_home_dir etc.,
@@ -9668,11 +9799,16 @@ innobase_table_flags(
enum row_type row_format;
rec_format_t innodb_row_format = REC_FORMAT_COMPACT;
bool use_data_dir;
+ ha_table_option_struct *options= form->s->option_struct;
/* Cache the value of innodb_file_format, in case it is
modified by another thread while the table is being created. */
const ulint file_format_allowed = srv_file_format;
+ /* Cache the value of innobase_compression_level, in case it is
+ modified by another thread while the table is being created. */
+ const ulint default_compression_level = page_zip_level;
+
*flags = 0;
*flags2 = 0;
@@ -9726,6 +9862,8 @@ index_bad:
}
}
+ row_format = form->s->row_type;
+
if (create_info->key_block_size) {
/* The requested compressed page size (key_block_size)
is given in kilobytes. If it is a valid number, store
@@ -9735,7 +9873,7 @@ index_bad:
ulint kbsize; /* Key Block Size */
for (zssize = kbsize = 1;
zssize <= ut_min(UNIV_PAGE_SSIZE_MAX,
- PAGE_ZIP_SSIZE_MAX);
+ PAGE_ZIP_SSIZE_MAX);
zssize++, kbsize <<= 1) {
if (kbsize == create_info->key_block_size) {
zip_ssize = zssize;
@@ -9763,8 +9901,8 @@ index_bad:
}
if (!zip_allowed
- || zssize > ut_min(UNIV_PAGE_SSIZE_MAX,
- PAGE_ZIP_SSIZE_MAX)) {
+ || zssize > ut_min(UNIV_PAGE_SSIZE_MAX,
+ PAGE_ZIP_SSIZE_MAX)) {
push_warning_printf(
thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
@@ -9773,8 +9911,6 @@ index_bad:
}
}
- row_format = form->s->row_type;
-
if (zip_ssize && zip_allowed) {
/* if ROW_FORMAT is set to default,
automatically change it to COMPRESSED.*/
@@ -9811,7 +9947,6 @@ index_bad:
case ROW_TYPE_REDUNDANT:
innodb_row_format = REC_FORMAT_REDUNDANT;
break;
-
case ROW_TYPE_COMPRESSED:
case ROW_TYPE_DYNAMIC:
if (!use_tablespace) {
@@ -9829,10 +9964,18 @@ index_bad:
" innodb_file_format > Antelope.",
get_row_format_name(row_format));
} else {
- innodb_row_format = (row_format == ROW_TYPE_DYNAMIC
- ? REC_FORMAT_DYNAMIC
- : REC_FORMAT_COMPRESSED);
- break;
+ switch(row_format) {
+ case ROW_TYPE_COMPRESSED:
+ innodb_row_format = REC_FORMAT_COMPRESSED;
+ break;
+ case ROW_TYPE_DYNAMIC:
+ innodb_row_format = REC_FORMAT_DYNAMIC;
+ break;
+ default:
+ /* Not possible, avoid compiler warning */
+ break;
+ }
+ break; /* Correct row_format */
}
zip_allowed = FALSE;
/* fall through to set row_format = COMPACT */
@@ -9859,7 +10002,15 @@ index_bad:
&& ((create_info->data_file_name != NULL)
&& !(create_info->options & HA_LEX_CREATE_TMP_TABLE));
- dict_tf_set(flags, innodb_row_format, zip_ssize, use_data_dir);
+ /* Set up table dictionary flags */
+ dict_tf_set(flags,
+ innodb_row_format,
+ zip_ssize,
+ use_data_dir,
+ options->page_compressed,
+ (ulint)options->page_compression_level == ULINT_UNDEFINED ?
+ default_compression_level : options->page_compression_level,
+ options->atomic_writes);
if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
*flags2 |= DICT_TF2_TEMPORARY;
@@ -9877,6 +10028,114 @@ index_bad:
DBUG_RETURN(true);
}
+
+/*****************************************************************//**
+Check engine specific table options not handled by SQL-parser.
+@return NULL if valid, string if not */
+UNIV_INTERN
+const char*
+ha_innobase::check_table_options(
+ THD *thd, /*!< in: thread handle */
+ TABLE* table, /*!< in: information on table
+ columns and indexes */
+ HA_CREATE_INFO* create_info, /*!< in: more information of the
+ created table, contains also the
+ create statement string */
+ const bool use_tablespace, /*!< in: use file par table */
+ const ulint file_format)
+{
+ enum row_type row_format = table->s->row_type;;
+ ha_table_option_struct *options= table->s->option_struct;
+ atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes;
+
+ /* Check page compression requirements */
+ if (options->page_compressed) {
+
+ if (row_format == ROW_TYPE_COMPRESSED) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED table can't have"
+ " ROW_TYPE=COMPRESSED");
+ return "PAGE_COMPRESSED";
+ }
+
+ if (row_format == ROW_TYPE_REDUNDANT) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED table can't have"
+ " ROW_TYPE=REDUNDANT");
+ return "PAGE_COMPRESSED";
+ }
+
+ if (!use_tablespace) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED requires"
+ " innodb_file_per_table.");
+ return "PAGE_COMPRESSED";
+ }
+
+ if (file_format < UNIV_FORMAT_B) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED requires"
+ " innodb_file_format > Antelope.");
+ return "PAGE_COMPRESSED";
+ }
+
+ if (create_info->key_block_size) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED table can't have"
+ " key_block_size");
+ return "PAGE_COMPRESSED";
+ }
+ }
+
+ /* Check page compression level requirements, some of them are
+ already checked above */
+ if ((ulint)options->page_compression_level != ULINT_UNDEFINED) {
+ if (options->page_compressed == false) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSION_LEVEL requires"
+ " PAGE_COMPRESSED");
+ return "PAGE_COMPRESSION_LEVEL";
+ }
+
+ if (options->page_compression_level < 0 || options->page_compression_level > 9) {
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
+ " Valid values are [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]",
+ options->page_compression_level);
+ return "PAGE_COMPRESSION_LEVEL";
+ }
+ }
+
+ /* Check atomic writes requirements */
+ if (awrites == ATOMIC_WRITES_ON ||
+ (awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
+ if (!use_tablespace) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: ATOMIC_WRITES requires"
+ " innodb_file_per_table.");
+ return "ATOMIC_WRITES";
+ }
+ }
+
+ return 0;
+}
+
/*****************************************************************//**
Creates a new table to an InnoDB database.
@return error number */
@@ -9908,6 +10167,7 @@ ha_innobase::create(
while creating the table. So we read the current value here
and make all further decisions based on this. */
bool use_tablespace = srv_file_per_table;
+ const ulint file_format = srv_file_format;
/* Zip Shift Size - log2 - 9 of compressed page size,
zero for uncompressed */
@@ -9931,6 +10191,12 @@ ha_innobase::create(
/* Create the table definition in InnoDB */
+ /* Validate table options not handled by the SQL-parser */
+ if(check_table_options(thd, form, create_info, use_tablespace,
+ file_format)) {
+ DBUG_RETURN(HA_WRONG_CREATE_OPTION);
+ }
+
/* Validate create options if innodb_strict_mode is set. */
if (create_options_are_invalid(
thd, form, create_info, use_tablespace)) {
@@ -14227,6 +14493,12 @@ ha_innobase::check_if_incompatible_data(
HA_CREATE_INFO* info,
uint table_changes)
{
+ ha_table_option_struct *param_old, *param_new;
+
+ /* Cache engine specific options */
+ param_new = info->option_struct;
+ param_old = table->s->option_struct;
+
innobase_copy_frm_flags_from_create_info(prebuilt->table, info);
if (table_changes != IS_EQUAL_YES) {
@@ -14253,6 +14525,13 @@ ha_innobase::check_if_incompatible_data(
return(COMPATIBLE_DATA_NO);
}
+ /* Changes on engine specific table options requests a rebuild of the table. */
+ if (param_new->page_compressed != param_old->page_compressed ||
+ param_new->page_compression_level != param_old->page_compression_level ||
+ param_new->atomic_writes != param_old->atomic_writes) {
+ return(COMPATIBLE_DATA_NO);
+ }
+
return(COMPATIBLE_DATA_YES);
}
@@ -16426,7 +16705,7 @@ static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
PLUGIN_VAR_RQCMDARG,
- "Compression level used for compressed row format. 0 is no compression"
+ "Compression level used for zlib compression. 0 is no compression"
", 1 is fastest, 9 is best compression and default is 6.",
NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
@@ -16940,6 +17219,41 @@ static MYSQL_SYSVAR_ULONG(saved_page_number_debug,
NULL, innodb_save_page_no, 0, 0, UINT_MAX32, 0);
#endif /* UNIV_DEBUG */
+static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
+ PLUGIN_VAR_OPCMDARG,
+ "Use trim. Default FALSE.",
+ NULL, NULL, FALSE);
+
+static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 };
+static TYPELIB page_compression_algorithms_typelib=
+{
+ array_elements(page_compression_algorithms) - 1, 0,
+ page_compression_algorithms, 0
+};
+static MYSQL_SYSVAR_ENUM(compression_algorithm, innodb_compression_algorithm,
+ PLUGIN_VAR_OPCMDARG,
+ "Compression algorithm used on page compression. One of: none, zlib, lz4, lzo, lzma, or bzip2",
+ innodb_compression_algorithm_validate, NULL,
+ /* We use here the largest number of supported compression method to
+ enable all those methods that are available. Availability of compression
+ method is verified on innodb_compression_algorithm_validate function. */
+ PAGE_UNCOMPRESSED,
+ &page_compression_algorithms_typelib);
+
+static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Number of multi-threaded flush threads",
+ NULL, NULL,
+ MTFLUSH_DEFAULT_WORKER, /* Default setting */
+ 1, /* Minimum setting */
+ MTFLUSH_MAX_WORKER, /* Max setting */
+ 0);
+
+static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Use multi-threaded flush. Default FALSE.",
+ NULL, NULL, FALSE);
+
static MYSQL_SYSVAR_UINT(simulate_comp_failures, srv_simulate_comp_failures,
PLUGIN_VAR_NOCMDARG,
"Simulate compression failures.",
@@ -17104,6 +17418,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(fil_make_page_dirty_debug),
MYSQL_SYSVAR(saved_page_number_debug),
#endif /* UNIV_DEBUG */
+ MYSQL_SYSVAR(use_trim),
+ MYSQL_SYSVAR(compression_algorithm),
+ MYSQL_SYSVAR(mtflush_threads),
+ MYSQL_SYSVAR(use_mtflush),
MYSQL_SYSVAR(simulate_comp_failures),
NULL
};
@@ -17447,6 +17765,9 @@ ib_senderrf(
case IB_LOG_LEVEL_FATAL:
l = 0;
break;
+ default:
+ l = 0;
+ break;
}
my_printv_error(code, format, MYF(l), args);
@@ -17604,6 +17925,108 @@ innobase_convert_to_system_charset(
static_cast<uint>(len), errors));
}
+/*************************************************************//**
+Check for a valid value of innobase_compression_algorithm.
+@return 0 for valid innodb_compression_algorithm. */
+static
+int
+innodb_compression_algorithm_validate(
+/*==================================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to system
+ variable */
+ void* save, /*!< out: immediate result
+ for update function */
+ struct st_mysql_value* value) /*!< in: incoming string */
+{
+ long compression_algorithm;
+ DBUG_ENTER("innobase_compression_algorithm_validate");
+
+ if (value->value_type(value) == MYSQL_VALUE_TYPE_STRING) {
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ const char *str;
+ int length= sizeof(buff);
+
+ if (!(str= value->val_str(value, buff, &length))) {
+ DBUG_RETURN(1);
+ }
+
+ if ((compression_algorithm= (long)find_type(str, &page_compression_algorithms_typelib, 0) - 1) < 0) {
+ DBUG_RETURN(1);
+ }
+ } else {
+ long long tmp;
+
+ if (value->val_int(value, &tmp)) {
+ DBUG_RETURN(1);
+ }
+
+ if (tmp < 0 || tmp >= page_compression_algorithms_typelib.count) {
+ DBUG_RETURN(1);
+ }
+
+ compression_algorithm= (long) tmp;
+ }
+
+ *reinterpret_cast<ulong*>(save) = compression_algorithm;
+
+#ifndef HAVE_LZ4
+ if (compression_algorithm == PAGE_LZ4_ALGORITHM) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_UNSUPPORTED,
+ "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: liblz4 is not installed. \n",
+ compression_algorithm);
+ DBUG_RETURN(1);
+ }
+#endif
+
+#ifndef HAVE_LZO
+ if (compression_algorithm == PAGE_LZO_ALGORITHM) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_UNSUPPORTED,
+ "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: liblzo is not installed. \n",
+ compression_algorithm);
+ DBUG_RETURN(1);
+ }
+#endif
+
+#ifndef HAVE_LZMA
+ if (compression_algorithm == PAGE_LZMA_ALGORITHM) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_UNSUPPORTED,
+ "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: liblzma is not installed. \n",
+ compression_algorithm);
+ DBUG_RETURN(1);
+ }
+#endif
+
+#ifndef HAVE_BZIP2
+ if (compression_algorithm == PAGE_BZIP2_ALGORITHM) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_UNSUPPORTED,
+ "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: libbz2 is not installed. \n",
+ compression_algorithm);
+ DBUG_RETURN(1);
+ }
+#endif
+
+#ifndef HAVE_SNAPPY
+ if (compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_UNSUPPORTED,
+ "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: libsnappy is not installed. \n",
+ compression_algorithm);
+ DBUG_RETURN(1);
+ }
+#endif
+ DBUG_RETURN(0);
+}
+
/**********************************************************************
Issue a warning that the row is too big. */
UNIV_INTERN
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 5cebc425769..2fc7f773a8d 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -56,6 +57,22 @@ typedef struct st_innobase_share {
/** Prebuilt structures in an InnoDB table handle used within MySQL */
struct row_prebuilt_t;
+/** Engine specific table options are definined using this struct */
+struct ha_table_option_struct
+{
+ bool page_compressed; /*!< Table is using page compression
+ if this option is true. */
+ int page_compression_level; /*!< Table page compression level
+ or UNIV_UNSPECIFIED. */
+ uint atomic_writes; /*!< Use atomic writes for this
+ table if this options is ON or
+ in DEFAULT if
+ srv_use_atomic_writes=1.
+ Atomic writes are not used if
+ value OFF.*/
+};
+
+
/** The class defining a handle to an Innodb table */
class ha_innobase: public handler
{
@@ -175,6 +192,8 @@ class ha_innobase: public handler
char* norm_name,
char* temp_path,
char* remote_path);
+ const char* check_table_options(THD *thd, TABLE* table,
+ HA_CREATE_INFO* create_info, const bool use_tablespace, const ulint file_format);
int create(const char *name, register TABLE *form,
HA_CREATE_INFO *create_info);
int truncate();
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 3f393d9d431..96a779868fb 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2005, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -263,6 +264,22 @@ ha_innobase::check_if_supported_inplace_alter(
update_thd();
trx_search_latch_release_if_reserved(prebuilt->trx);
+ /* Change on engine specific table options require rebuild of the
+ table */
+ if (ha_alter_info->handler_flags
+ == Alter_inplace_info::CHANGE_CREATE_OPTION) {
+ ha_table_option_struct *new_options= ha_alter_info->create_info->option_struct;
+ ha_table_option_struct *old_options= table->s->option_struct;
+
+ if (new_options->page_compressed != old_options->page_compressed ||
+ new_options->page_compression_level != old_options->page_compression_level ||
+ new_options->atomic_writes != old_options->atomic_writes) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+ }
+
if (ha_alter_info->handler_flags
& ~(INNOBASE_INPLACE_IGNORE
| INNOBASE_ALTER_NOREBUILD
@@ -3397,6 +3414,17 @@ ha_innobase::prepare_inplace_alter_table(
if (ha_alter_info->handler_flags
& Alter_inplace_info::CHANGE_CREATE_OPTION) {
+ /* Check engine specific table options */
+ if (const char* invalid_tbopt = check_table_options(
+ user_thd, altered_table,
+ ha_alter_info->create_info,
+ prebuilt->table->space != 0,
+ srv_file_format)) {
+ my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
+ table_type(), invalid_tbopt);
+ goto err_exit_no_heap;
+ }
+
if (const char* invalid_opt = create_options_are_invalid(
user_thd, altered_table,
ha_alter_info->create_info,
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index 6168ce0f0d2..2211243fbf3 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -92,6 +92,7 @@ static buf_page_desc_t i_s_page_type[] = {
{"COMPRESSED_BLOB", FIL_PAGE_TYPE_ZBLOB},
{"COMPRESSED_BLOB2", FIL_PAGE_TYPE_ZBLOB2},
{"IBUF_INDEX", I_S_PAGE_TYPE_IBUF},
+ {"PAGE COMPRESSED", FIL_PAGE_PAGE_COMPRESSED},
{"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN}
};
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 4b2556524fa..ebed2bb62a6 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1499,6 +1500,11 @@ struct buf_page_t{
state == BUF_BLOCK_ZIP_PAGE and
zip.data == NULL means an active
buf_pool->watch */
+
+ ulint write_size; /* Write size is set when this
+ page is first time written and then
+ if written again we check is TRIM
+ operation needed. */
#ifndef UNIV_HOTBACKUP
buf_page_t* hash; /*!< node used in chaining to
buf_pool->page_hash or
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index f116720574b..66f9f7f4b7e 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2014, 2014, SkySQL Ab.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -36,6 +37,14 @@ Created 11/5/1995 Heikki Tuuri
/** Flag indicating if the page_cleaner is in active state. */
extern ibool buf_page_cleaner_is_active;
+/** Handled page counters for a single flush */
+struct flush_counters_t {
+ ulint flushed; /*!< number of dirty pages flushed */
+ ulint evicted; /*!< number of clean pages evicted */
+ ulint unzip_LRU_evicted;/*!< number of uncompressed page images
+ evicted */
+};
+
/********************************************************************//**
Remove a block from the flush list of modified blocks. */
UNIV_INTERN
@@ -279,6 +288,56 @@ buf_flush_get_dirty_pages_count(
#endif /* !UNIV_HOTBACKUP */
+/******************************************************************//**
+Start a buffer flush batch for LRU or flush list */
+ibool
+buf_flush_start(
+/*============*/
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU
+ or BUF_FLUSH_LIST */
+/******************************************************************//**
+End a buffer flush batch for LRU or flush list */
+void
+buf_flush_end(
+/*==========*/
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU
+ or BUF_FLUSH_LIST */
+/******************************************************************//**
+Gather the aggregated stats for both flush list and LRU list flushing */
+void
+buf_flush_common(
+/*=============*/
+ buf_flush_t flush_type, /*!< in: type of flush */
+ ulint page_count); /*!< in: number of pages flushed */
+
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the LRU list or flush_list.
+NOTE 1: in the case of an LRU flush the calling thread may own latches to
+pages: to avoid deadlocks, this function must be written so that it cannot
+end up waiting for these latches! NOTE 2: in the case of a flush list flush,
+the calling thread is not allowed to own any latches on pages! */
+void
+buf_flush_batch(
+/*============*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
+ BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
+ then the caller must not own any
+ latches on pages */
+ ulint min_n, /*!< in: wished minimum mumber of blocks
+ flushed (it is not guaranteed that the
+ actual number is that big, though) */
+ lsn_t lsn_limit, /*!< in: in the case of BUF_FLUSH_LIST
+ all blocks whose oldest_modification is
+ smaller than this should be flushed
+ (if their number does not exceed
+ min_n), otherwise ignored */
+ flush_counters_t* n); /*!< out: flushed/evicted page
+ counts */
+
+
#ifndef UNIV_NONINL
#include "buf0flu.ic"
#endif
diff --git a/storage/innobase/include/buf0mtflu.h b/storage/innobase/include/buf0mtflu.h
new file mode 100644
index 00000000000..0475335bbf5
--- /dev/null
+++ b/storage/innobase/include/buf0mtflu.h
@@ -0,0 +1,95 @@
+/*****************************************************************************
+
+Copyright (C) 2014 SkySQL Ab. All Rights Reserved.
+Copyright (C) 2014 Fusion-io. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/buf0mtflu.h
+Multi-threadef flush method interface function prototypes
+
+Created 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
+ Dhananjoy Das DDas@fusionio.com
+***********************************************************************/
+
+#ifndef buf0mtflu_h
+#define buf0mtflu_h
+
+/******************************************************************//**
+Add exit work item to work queue to signal multi-threded flush
+threads that they should exit.
+*/
+void
+buf_mtflu_io_thread_exit(void);
+/*===========================*/
+
+/******************************************************************//**
+Initialize multi-threaded flush thread syncronization data.
+@return Initialized multi-threaded flush thread syncroniztion data. */
+void*
+buf_mtflu_handler_init(
+/*===================*/
+ ulint n_threads, /*!< in: Number of threads to create */
+ ulint wrk_cnt); /*!< in: Number of work items */
+
+/******************************************************************//**
+Return true if multi-threaded flush is initialized
+@return true if initialized, false if not */
+bool
+buf_mtflu_init_done(void);
+/*======================*/
+
+/*********************************************************************//**
+Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+UNIV_INTERN
+ulint
+buf_mtflu_flush_LRU_tail(void);
+/*===========================*/
+
+/*******************************************************************//**
+Multi-threaded version of buf_flush_list
+*/
+bool
+buf_mtflu_flush_list(
+/*=================*/
+ ulint min_n, /*!< in: wished minimum mumber of blocks
+ flushed (it is not guaranteed that the
+ actual number is that big, though) */
+ lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
+ blocks whose oldest_modification is
+ smaller than this should be flushed
+ (if their number does not exceed
+ min_n), otherwise ignored */
+ ulint* n_processed); /*!< out: the number of pages
+ which were processed is passed
+ back to caller. Ignored if NULL */
+
+/*********************************************************************//**
+Set correct thread identifiers to io thread array based on
+information we have. */
+void
+buf_mtflu_set_thread_ids(
+/*=====================*/
+ ulint n_threads, /*!<in: Number of threads to fill */
+ void* ctx, /*!<in: thread context */
+ os_thread_id_t* thread_ids); /*!<in: thread id array */
+
+#endif
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 9e007809471..db39d502db6 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -2,6 +2,7 @@
Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,6 +43,8 @@ Created 1/8/1996 Heikki Tuuri
#include "ut0byte.h"
#include "trx0types.h"
#include "row0types.h"
+#include "fsp0fsp.h"
+#include "dict0pagecompress.h"
extern bool innodb_table_stats_not_found;
extern bool innodb_index_stats_not_found;
@@ -918,7 +921,14 @@ dict_tf_set(
ulint* flags, /*!< in/out: table */
rec_format_t format, /*!< in: file format */
ulint zip_ssize, /*!< in: zip shift size */
- bool remote_path) /*!< in: table uses DATA DIRECTORY */
+ bool remote_path, /*!< in: table uses DATA DIRECTORY
+ */
+ bool page_compressed,/*!< in: table uses page compressed
+ pages */
+ ulint page_compression_level, /*!< in: table page compression
+ level */
+ ulint atomic_writes) /*!< in: table atomic
+ writes option value*/
__attribute__((nonnull));
/********************************************************************//**
Convert a 32 bit integer table flags to the 32 bit integer that is
@@ -946,6 +956,7 @@ dict_tf_get_zip_size(
/*=================*/
ulint flags) /*!< in: flags */
__attribute__((const));
+
/********************************************************************//**
Check whether the table uses the compressed compact page format.
@return compressed page size, or 0 if not compressed */
@@ -1848,6 +1859,7 @@ dict_table_get_index_on_first_col(
#endif /* !UNIV_HOTBACKUP */
+
#ifndef UNIV_NONINL
#include "dict0dict.ic"
#endif
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 066ffe47e4a..84d5c57f720 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -537,9 +538,25 @@ dict_tf_is_valid(
ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
ulint unused = DICT_TF_GET_UNUSED(flags);
+ ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags);
+ ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
+ ulint data_dir = DICT_TF_HAS_DATA_DIR(flags);
+ ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags);
/* Make sure there are no bits that we do not know about. */
if (unused != 0) {
+ fprintf(stderr,
+ "InnoDB: Error: table unused flags are %ld"
+ " in the data dictionary and are corrupted\n"
+ "InnoDB: Error: data dictionary flags are\n"
+ "InnoDB: compact %ld atomic_blobs %ld\n"
+ "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+ "InnoDB: page_compression %ld page_compression_level %ld\n"
+ "InnoDB: atomic_writes %ld\n",
+ unused,
+ compact, atomic_blobs, unused, data_dir, zip_ssize,
+ page_compression, page_compression_level, atomic_writes
+ );
return(false);
@@ -550,12 +567,34 @@ dict_tf_is_valid(
data stored off-page in the clustered index. */
if (!compact) {
+ fprintf(stderr,
+ "InnoDB: Error: table compact flags are %ld"
+ " in the data dictionary and are corrupted\n"
+ "InnoDB: Error: data dictionary flags are\n"
+ "InnoDB: compact %ld atomic_blobs %ld\n"
+ "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+ "InnoDB: page_compression %ld page_compression_level %ld\n"
+ "InnoDB: atomic_writes %ld\n",
+ compact, compact, atomic_blobs, unused, data_dir, zip_ssize,
+ page_compression, page_compression_level, atomic_writes
+ );
return(false);
}
} else if (zip_ssize) {
/* Antelope does not support COMPRESSED row format. */
+ fprintf(stderr,
+ "InnoDB: Error: table flags are %ld"
+ " in the data dictionary and are corrupted\n"
+ "InnoDB: Error: data dictionary flags are\n"
+ "InnoDB: compact %ld atomic_blobs %ld\n"
+ "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+ "InnoDB: page_compression %ld page_compression_level %ld\n"
+ "InnoDB: atomic_writes %ld\n",
+ flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
+ page_compression, page_compression_level, atomic_writes
+ );
return(false);
}
@@ -568,6 +607,58 @@ dict_tf_is_valid(
|| !atomic_blobs
|| zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+ fprintf(stderr,
+ "InnoDB: Error: table compact flags are %ld in the data dictionary and are corrupted\n"
+ "InnoDB: Error: data dictionary flags are\n"
+ "InnoDB: compact %ld atomic_blobs %ld\n"
+ "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+ "InnoDB: page_compression %ld page_compression_level %ld\n"
+ "InnoDB: atomic_writes %ld\n",
+ flags,
+ compact, atomic_blobs, unused, data_dir, zip_ssize,
+ page_compression, page_compression_level, atomic_writes
+
+ );
+ return(false);
+ }
+ }
+
+ if (page_compression || page_compression_level) {
+ /* Page compression format must have compact and
+ atomic_blobs and page_compression_level requires
+ page_compression */
+ if (!compact
+ || !page_compression
+ || !atomic_blobs) {
+
+ fprintf(stderr,
+ "InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n"
+ "InnoDB: Error: data dictionary flags are\n"
+ "InnoDB: compact %ld atomic_blobs %ld\n"
+ "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+ "InnoDB: page_compression %ld page_compression_level %ld\n"
+ "InnoDB: atomic_writes %ld\n",
+ flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
+ page_compression, page_compression_level, atomic_writes
+ );
+ return(false);
+ }
+ }
+
+ if (atomic_writes) {
+
+ if(atomic_writes > ATOMIC_WRITES_OFF) {
+
+ fprintf(stderr,
+ "InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n"
+ "InnoDB: Error: data dictionary flags are\n"
+ "InnoDB: compact %ld atomic_blobs %ld\n"
+ "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+ "InnoDB: page_compression %ld page_compression_level %ld\n"
+ "InnoDB: atomic_writes %ld\n",
+ flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
+ page_compression, page_compression_level, atomic_writes
+ );
return(false);
}
}
@@ -594,6 +685,11 @@ dict_sys_tables_type_validate(
ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(type);
ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type);
ulint unused = DICT_TF_GET_UNUSED(type);
+ ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
+ ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type);
+ ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type);
+
+ ut_a(atomic_writes <= ATOMIC_WRITES_OFF);
/* The low order bit of SYS_TABLES.TYPE is always set to 1.
If the format is UNIV_FORMAT_B or higher, this field is the same
@@ -604,12 +700,16 @@ dict_sys_tables_type_validate(
if (redundant) {
if (zip_ssize || atomic_blobs) {
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=Redundant, zip_ssize %lu atomic_blobs %lu\n",
+ zip_ssize, atomic_blobs);
return(ULINT_UNDEFINED);
}
}
/* Make sure there are no bits that we do not know about. */
if (unused) {
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, unused %lu\n",
+ type, unused);
return(ULINT_UNDEFINED);
}
@@ -624,6 +724,8 @@ dict_sys_tables_type_validate(
} else if (zip_ssize) {
/* Antelope does not support COMPRESSED format. */
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu\n",
+ type, zip_ssize);
return(ULINT_UNDEFINED);
}
@@ -633,11 +735,15 @@ dict_sys_tables_type_validate(
should be in N_COLS, but we already know about the
low_order_bit and DICT_N_COLS_COMPACT flags. */
if (!atomic_blobs) {
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu atomic_blobs %lu\n",
+ type, zip_ssize, atomic_blobs);
return(ULINT_UNDEFINED);
}
/* Validate that the number is within allowed range. */
if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu max %d\n",
+ type, zip_ssize, PAGE_ZIP_SSIZE_MAX);
return(ULINT_UNDEFINED);
}
}
@@ -647,6 +753,27 @@ dict_sys_tables_type_validate(
format, so the DATA_DIR flag is compatible with any other
table flags. However, it is not used with TEMPORARY tables.*/
+ if (page_compression || page_compression_level) {
+ /* page compressed row format must have low_order_bit and
+ atomic_blobs bits set and the DICT_N_COLS_COMPACT flag
+ should be in N_COLS, but we already know about the
+ low_order_bit and DICT_N_COLS_COMPACT flags. */
+
+ if (!atomic_blobs || !page_compression) {
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, page_compression %lu page_compression_level %lu\n"
+ "InnoDB: Error: atomic_blobs %lu\n",
+ type, page_compression, page_compression_level, atomic_blobs);
+ return(ULINT_UNDEFINED);
+ }
+ }
+
+ /* Validate that the atomic writes number is within allowed range. */
+ if (atomic_writes > ATOMIC_WRITES_OFF) {
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, atomic_writes %lu\n",
+ type, atomic_writes);
+ return(ULINT_UNDEFINED);
+ }
+
/* Return the validated SYS_TABLES.TYPE. */
return(type);
}
@@ -719,8 +846,16 @@ dict_tf_set(
ulint* flags, /*!< in/out: table flags */
rec_format_t format, /*!< in: file format */
ulint zip_ssize, /*!< in: zip shift size */
- bool use_data_dir) /*!< in: table uses DATA DIRECTORY */
+ bool use_data_dir, /*!< in: table uses DATA DIRECTORY
+ */
+ bool page_compressed,/*!< in: table uses page compressed
+ pages */
+ ulint page_compression_level, /*!< in: table page compression
+ level */
+ ulint atomic_writes) /*!< in: table atomic writes setup */
{
+ atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
+
switch (format) {
case REC_FORMAT_REDUNDANT:
*flags = 0;
@@ -742,6 +877,19 @@ dict_tf_set(
break;
}
+ if (page_compressed) {
+ *flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS)
+ | (1 << DICT_TF_POS_PAGE_COMPRESSION)
+ | (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
+
+ ut_ad(zip_ssize == 0);
+ ut_ad(dict_tf_get_page_compression(*flags) == TRUE);
+ ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level);
+ }
+
+ *flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
+ ut_a(dict_tf_get_atomic_writes(*flags) == awrites);
+
if (use_data_dir) {
*flags |= (1 << DICT_TF_POS_DATA_DIR);
}
@@ -765,6 +913,9 @@ dict_tf_to_fsp_flags(
ulint table_flags) /*!< in: dict_table_t::flags */
{
ulint fsp_flags;
+ ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
+ ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
+ ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
return(ULINT_UNDEFINED););
@@ -783,7 +934,20 @@ dict_tf_to_fsp_flags(
fsp_flags |= DICT_TF_HAS_DATA_DIR(table_flags)
? FSP_FLAGS_MASK_DATA_DIR : 0;
+ /* In addition, tablespace flags also contain if the page
+ compression is used for this table. */
+ fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION(fsp_flags, page_compression);
+
+ /* In addition, tablespace flags also contain page compression level
+ if page compression is used for this table. */
+ fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level);
+
+ /* In addition, tablespace flags also contain flag if atomic writes
+ is used for this table */
+ fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes);
+
ut_a(fsp_flags_is_valid(fsp_flags));
+ ut_a(dict_tf_verify_flags(table_flags, fsp_flags));
return(fsp_flags);
}
@@ -811,10 +975,15 @@ dict_sys_tables_type_to_tf(
/* Adjust bit zero. */
flags = redundant ? 0 : 1;
- /* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+ /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
+ PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */
flags |= type & (DICT_TF_MASK_ZIP_SSIZE
| DICT_TF_MASK_ATOMIC_BLOBS
- | DICT_TF_MASK_DATA_DIR);
+ | DICT_TF_MASK_DATA_DIR
+ | DICT_TF_MASK_PAGE_COMPRESSION
+ | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+ | DICT_TF_MASK_ATOMIC_WRITES
+ );
return(flags);
}
@@ -842,10 +1011,14 @@ dict_tf_to_sys_tables_type(
/* Adjust bit zero. It is always 1 in SYS_TABLES.TYPE */
type = 1;
- /* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+ /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
+ PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */
type |= flags & (DICT_TF_MASK_ZIP_SSIZE
| DICT_TF_MASK_ATOMIC_BLOBS
- | DICT_TF_MASK_DATA_DIR);
+ | DICT_TF_MASK_DATA_DIR
+ | DICT_TF_MASK_PAGE_COMPRESSION
+ | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+ | DICT_TF_MASK_ATOMIC_WRITES);
return(type);
}
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 5f6811f0719..e64b4e18a2e 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -2,6 +2,7 @@
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -125,11 +126,26 @@ This flag prevents older engines from attempting to open the table and
allows InnoDB to update_create_info() accordingly. */
#define DICT_TF_WIDTH_DATA_DIR 1
+/**
+Width of the page compression flag
+*/
+#define DICT_TF_WIDTH_PAGE_COMPRESSION 1
+#define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4
+
+/**
+Width of atomic writes flag
+DEFAULT=0, ON = 1, OFF = 2
+*/
+#define DICT_TF_WIDTH_ATOMIC_WRITES 2
+
/** Width of all the currently known table flags */
#define DICT_TF_BITS (DICT_TF_WIDTH_COMPACT \
+ DICT_TF_WIDTH_ZIP_SSIZE \
+ DICT_TF_WIDTH_ATOMIC_BLOBS \
- + DICT_TF_WIDTH_DATA_DIR)
+ + DICT_TF_WIDTH_DATA_DIR \
+ + DICT_TF_WIDTH_PAGE_COMPRESSION \
+ + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \
+ + DICT_TF_WIDTH_ATOMIC_WRITES)
/** A mask of all the known/used bits in table flags */
#define DICT_TF_BIT_MASK (~(~0 << DICT_TF_BITS))
@@ -145,9 +161,19 @@ allows InnoDB to update_create_info() accordingly. */
/** Zero relative shift position of the DATA_DIR field */
#define DICT_TF_POS_DATA_DIR (DICT_TF_POS_ATOMIC_BLOBS \
+ DICT_TF_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the PAGE_COMPRESSION field */
+#define DICT_TF_POS_PAGE_COMPRESSION (DICT_TF_POS_DATA_DIR \
+ + DICT_TF_WIDTH_DATA_DIR)
+/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
+#define DICT_TF_POS_PAGE_COMPRESSION_LEVEL (DICT_TF_POS_PAGE_COMPRESSION \
+ + DICT_TF_WIDTH_PAGE_COMPRESSION)
+/** Zero relative shift position of the ATOMIC_WRITES field */
+#define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \
+ + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
+
/** Zero relative shift position of the start of the UNUSED bits */
-#define DICT_TF_POS_UNUSED (DICT_TF_POS_DATA_DIR \
- + DICT_TF_WIDTH_DATA_DIR)
+#define DICT_TF_POS_UNUSED (DICT_TF_POS_ATOMIC_WRITES \
+ + DICT_TF_WIDTH_ATOMIC_WRITES)
/** Bit mask of the COMPACT field */
#define DICT_TF_MASK_COMPACT \
@@ -165,6 +191,18 @@ allows InnoDB to update_create_info() accordingly. */
#define DICT_TF_MASK_DATA_DIR \
((~(~0 << DICT_TF_WIDTH_DATA_DIR)) \
<< DICT_TF_POS_DATA_DIR)
+/** Bit mask of the PAGE_COMPRESSION field */
+#define DICT_TF_MASK_PAGE_COMPRESSION \
+ ((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION)) \
+ << DICT_TF_POS_PAGE_COMPRESSION)
+/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
+#define DICT_TF_MASK_PAGE_COMPRESSION_LEVEL \
+ ((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)) \
+ << DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
+/** Bit mask of the ATOMIC_WRITES field */
+#define DICT_TF_MASK_ATOMIC_WRITES \
+ ((~(~0 << DICT_TF_WIDTH_ATOMIC_WRITES)) \
+ << DICT_TF_POS_ATOMIC_WRITES)
/** Return the value of the COMPACT field */
#define DICT_TF_GET_COMPACT(flags) \
@@ -182,6 +220,19 @@ allows InnoDB to update_create_info() accordingly. */
#define DICT_TF_HAS_DATA_DIR(flags) \
((flags & DICT_TF_MASK_DATA_DIR) \
>> DICT_TF_POS_DATA_DIR)
+/** Return the value of the PAGE_COMPRESSION field */
+#define DICT_TF_GET_PAGE_COMPRESSION(flags) \
+ ((flags & DICT_TF_MASK_PAGE_COMPRESSION) \
+ >> DICT_TF_POS_PAGE_COMPRESSION)
+/** Return the value of the PAGE_COMPRESSION_LEVEL field */
+#define DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags) \
+ ((flags & DICT_TF_MASK_PAGE_COMPRESSION_LEVEL) \
+ >> DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
+/** Return the value of the ATOMIC_WRITES field */
+#define DICT_TF_GET_ATOMIC_WRITES(flags) \
+ ((flags & DICT_TF_MASK_ATOMIC_WRITES) \
+ >> DICT_TF_POS_ATOMIC_WRITES)
+
/** Return the contents of the UNUSED bits */
#define DICT_TF_GET_UNUSED(flags) \
(flags >> DICT_TF_POS_UNUSED)
diff --git a/storage/innobase/include/dict0pagecompress.h b/storage/innobase/include/dict0pagecompress.h
new file mode 100644
index 00000000000..19a2a6c52f3
--- /dev/null
+++ b/storage/innobase/include/dict0pagecompress.h
@@ -0,0 +1,94 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0pagecompress.h
+Helper functions for extracting/storing page compression information
+to dictionary.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#ifndef dict0pagecompress_h
+#define dict0pagecompress_h
+
+/********************************************************************//**
+Extract the page compression level from table flags.
+@return page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_tf_get_page_compression_level(
+/*===============================*/
+ ulint flags) /*!< in: flags */
+ __attribute__((const));
+/********************************************************************//**
+Extract the page compression flag from table flags
+@return page compression flag, or false if not compressed */
+UNIV_INLINE
+ibool
+dict_tf_get_page_compression(
+/*==========================*/
+ ulint flags) /*!< in: flags */
+ __attribute__((const));
+
+/********************************************************************//**
+Check whether the table uses the page compressed page format.
+@return page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_page_compression_level(
+/*==============================*/
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((const));
+
+/********************************************************************//**
+Verify that dictionary flags match tablespace flags
+@return true if flags match, false if not */
+UNIV_INLINE
+ibool
+dict_tf_verify_flags(
+/*=================*/
+ ulint table_flags, /*!< in: dict_table_t::flags */
+ ulint fsp_flags) /*!< in: fil_space_t::flags */
+ __attribute__((const));
+
+/********************************************************************//**
+Extract the atomic writes flag from table flags.
+@return true if atomic writes are used, false if not used */
+UNIV_INLINE
+atomic_writes_t
+dict_tf_get_atomic_writes(
+/*======================*/
+ ulint flags) /*!< in: flags */
+ __attribute__((const));
+
+/********************************************************************//**
+Check whether the table uses the atomic writes.
+@return true if atomic writes is used, false if not */
+UNIV_INLINE
+atomic_writes_t
+dict_table_get_atomic_writes(
+/*=========================*/
+ const dict_table_t* table); /*!< in: table */
+
+
+#ifndef UNIV_NONINL
+#include "dict0pagecompress.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/dict0pagecompress.ic b/storage/innobase/include/dict0pagecompress.ic
new file mode 100644
index 00000000000..811976434a8
--- /dev/null
+++ b/storage/innobase/include/dict0pagecompress.ic
@@ -0,0 +1,191 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0pagecompress.ic
+Inline implementation for helper functions for extracting/storing
+page compression and atomic writes information to dictionary.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+/********************************************************************//**
+Verify that dictionary flags match tablespace flags
+@return true if flags match, false if not */
+UNIV_INLINE
+ibool
+dict_tf_verify_flags(
+/*=================*/
+ ulint table_flags, /*!< in: dict_table_t::flags */
+ ulint fsp_flags) /*!< in: fil_space_t::flags */
+{
+ ulint table_unused = DICT_TF_GET_UNUSED(table_flags);
+ ulint compact = DICT_TF_GET_COMPACT(table_flags);
+ ulint ssize = DICT_TF_GET_ZIP_SSIZE(table_flags);
+ ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(table_flags);
+ ulint data_dir = DICT_TF_HAS_DATA_DIR(table_flags);
+ ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
+ ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
+ ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
+ ulint post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags);
+ ulint zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags);
+ ulint fsp_atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags);
+ ulint page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(fsp_flags);
+ ulint fsp_unused = FSP_FLAGS_GET_UNUSED(fsp_flags);
+ ulint fsp_page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(fsp_flags);
+ ulint fsp_page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(fsp_flags);
+ ulint fsp_atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(fsp_flags);
+
+ DBUG_EXECUTE_IF("dict_tf_verify_flags_failure",
+ return(ULINT_UNDEFINED););
+
+ ut_a(!table_unused);
+ ut_a(!fsp_unused);
+ ut_a(page_ssize == 0 || page_ssize != 0); /* silence compiler */
+ ut_a(compact == 0 || compact == 1); /* silence compiler */
+ ut_a(data_dir == 0 || data_dir == 1); /* silence compiler */
+ ut_a(post_antelope == 0 || post_antelope == 1); /* silence compiler */
+
+ if (ssize != zip_ssize) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags has zip_ssize %ld"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file has zip_ssize %ld\n",
+ ssize, zip_ssize);
+ return (FALSE);
+ }
+ if (atomic_blobs != fsp_atomic_blobs) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags has atomic_blobs %ld"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file has atomic_blobs %ld\n",
+ atomic_blobs, fsp_atomic_blobs);
+
+ return (FALSE);
+ }
+ if (page_compression != fsp_page_compression) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags has page_compression %ld"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file ahas page_compression %ld\n",
+ page_compression, fsp_page_compression);
+
+ return (FALSE);
+ }
+ if (page_compression_level != fsp_page_compression_level) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags has page_compression_level %ld"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file has page_compression_level %ld\n",
+ page_compression_level, fsp_page_compression_level);
+
+ return (FALSE);
+ }
+
+ if (atomic_writes != fsp_atomic_writes) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags has atomic writes %ld"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file has atomic_writes %ld\n",
+ atomic_writes, fsp_atomic_writes);
+
+ return (FALSE);
+ }
+
+ return(TRUE);
+}
+
+/********************************************************************//**
+Extract the page compression level from dict_table_t::flags.
+These flags are in memory, so assert that they are valid.
+@return page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_tf_get_page_compression_level(
+/*===============================*/
+ ulint flags) /*!< in: flags */
+{
+ ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
+
+ ut_ad(page_compression_level <= 9);
+
+ return(page_compression_level);
+}
+
+/********************************************************************//**
+Check whether the table uses the page compression page format.
+@return page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_page_compression_level(
+/*==============================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ ut_ad(table);
+ ut_ad(dict_tf_get_page_compression(table->flags));
+
+ return(dict_tf_get_page_compression_level(table->flags));
+}
+
+/********************************************************************//**
+Check whether the table uses the page compression page format.
+@return true if page compressed, false if not */
+UNIV_INLINE
+ibool
+dict_tf_get_page_compression(
+/*=========================*/
+ ulint flags) /*!< in: flags */
+{
+ return(DICT_TF_GET_PAGE_COMPRESSION(flags));
+}
+
+/********************************************************************//**
+Check whether the table uses the page compression page format.
+@return true if page compressed, false if not */
+UNIV_INLINE
+ibool
+dict_table_is_page_compressed(
+/*==========================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ return (dict_tf_get_page_compression(table->flags));
+}
+
+/********************************************************************//**
+Extract the atomic writes flag from table flags.
+@return enumerated value of atomic writes */
+UNIV_INLINE
+atomic_writes_t
+dict_tf_get_atomic_writes(
+/*======================*/
+ ulint flags) /*!< in: flags */
+{
+ return((atomic_writes_t)DICT_TF_GET_ATOMIC_WRITES(flags));
+}
+
+/********************************************************************//**
+Check whether the table uses the atomic writes.
+@return enumerated value of atomic writes */
+UNIV_INLINE
+atomic_writes_t
+dict_table_get_atomic_writes(
+/*=========================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ return ((atomic_writes_t)dict_tf_get_atomic_writes(table->flags));
+}
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index d34b6f7eab3..35430e8ea62 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -75,6 +76,13 @@ enum ib_quiesce_t {
QUIESCE_COMPLETE /*!< All done */
};
+/** Enum values for atomic_writes table option */
+typedef enum {
+ ATOMIC_WRITES_DEFAULT = 0,
+ ATOMIC_WRITES_ON = 1,
+ ATOMIC_WRITES_OFF = 2
+} atomic_writes_t;
+
/** Prefix for tmp tables, adopted from sql/table.h */
#define tmp_file_prefix "#sql"
#define tmp_file_prefix_length 4
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index da2ee1c5730..e3f9f86d414 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -131,11 +132,33 @@ extern fil_addr_t fil_addr_null;
data file (ibdata*, not *.ibd):
the file has been flushed to disk
at least up to this lsn */
+/** If page type is FIL_PAGE_COMPRESSED then the 8 bytes starting at
+FIL_PAGE_FILE_FLUSH_LSN are broken down as follows: */
+
+/** Control information version format (u8) */
+static const ulint FIL_PAGE_VERSION = FIL_PAGE_FILE_FLUSH_LSN;
+
+/** Compression algorithm (u8) */
+static const ulint FIL_PAGE_ALGORITHM_V1 = FIL_PAGE_VERSION + 1;
+
+/** Original page type (u16) */
+static const ulint FIL_PAGE_ORIGINAL_TYPE_V1 = FIL_PAGE_ALGORITHM_V1 + 1;
+
+/** Original data size in bytes (u16)*/
+static const ulint FIL_PAGE_ORIGINAL_SIZE_V1 = FIL_PAGE_ORIGINAL_TYPE_V1 + 2;
+
+/** Size after compression (u16)*/
+static const ulint FIL_PAGE_COMPRESS_SIZE_V1 = FIL_PAGE_ORIGINAL_SIZE_V1 + 2;
+
#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this
contains the space id of the page */
#define FIL_PAGE_SPACE_ID FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
#define FIL_PAGE_DATA 38 /*!< start of the data on the page */
+/* Following are used when page compression is used */
+#define FIL_PAGE_COMPRESSED_SIZE 2 /*!< Number of bytes used to store
+ actual payload data size on
+ compressed pages. */
/* @} */
/** File page trailer @{ */
#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used
@@ -146,6 +169,7 @@ extern fil_addr_t fil_addr_null;
/* @} */
/** File page types (values of FIL_PAGE_TYPE) @{ */
+#define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< page compressed page */
#define FIL_PAGE_INDEX 17855 /*!< B-tree node */
#define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */
#define FIL_PAGE_INODE 3 /*!< Index node */
@@ -160,7 +184,8 @@ extern fil_addr_t fil_addr_null;
#define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */
#define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */
#define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */
-#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_ZBLOB2
+#define FIL_PAGE_TYPE_COMPRESSED 13 /*!< Compressed page */
+#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_COMPRESSED
/*!< Last page type */
/* @} */
@@ -228,6 +253,7 @@ struct fil_node_t {
ib_int64_t flush_counter;/*!< up to what
modification_counter value we have
flushed the modifications to disk */
+ ulint file_block_size;
UT_LIST_NODE_T(fil_node_t) chain;
/*!< link field for the file chain */
UT_LIST_NODE_T(fil_node_t) LRU;
@@ -305,6 +331,9 @@ struct fil_space_t {
bool is_in_unflushed_spaces;
/*!< true if this space is currently in
unflushed_spaces */
+ bool printed_compression_failure;
+ /*!< true if we have already printed
+ compression failure */
UT_LIST_NODE_T(fil_space_t) space_list;
/*!< list of all spaces */
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
@@ -401,6 +430,7 @@ ulint
fil_space_get_type(
/*===============*/
ulint id); /*!< in: space id */
+
#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
Appends a new file to the chain of files of a space. File must be closed.
@@ -580,8 +610,10 @@ fil_read_first_page(
#endif /* UNIV_LOG_ARCHIVE */
lsn_t* min_flushed_lsn, /*!< out: min of flushed
lsn values in data files */
- lsn_t* max_flushed_lsn) /*!< out: max of flushed
+ lsn_t* max_flushed_lsn, /*!< out: max of flushed
lsn values in data files */
+ ulint orig_space_id) /*!< in: file space id or
+ ULINT_UNDEFINED */
__attribute__((warn_unused_result));
/*******************************************************************//**
Increments the count of pending operation, if space is not being deleted.
@@ -945,8 +977,13 @@ fil_io(
void* buf, /*!< in/out: buffer where to store read data
or from where to write; in aio this must be
appropriately aligned */
- void* message) /*!< in: message for aio handler if non-sync
+ void* message, /*!< in: message for aio handler if non-sync
aio used, else ignored */
+ ulint* write_size) /*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
__attribute__((nonnull(8)));
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
@@ -1198,4 +1235,38 @@ fil_user_tablespace_restore_page(
write buffer */
#endif /* !UNIV_INNOCHECKSUM */
+
+/****************************************************************//**
+Acquire fil_system mutex */
+void
+fil_system_enter(void);
+/*==================*/
+/****************************************************************//**
+Release fil_system mutex */
+void
+fil_system_exit(void);
+/*==================*/
+
+#ifndef UNIV_INNOCHECKSUM
+/*******************************************************************//**
+Returns the table space by a given id, NULL if not found. */
+fil_space_t*
+fil_space_get_by_id(
+/*================*/
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
+Return space name */
+char*
+fil_space_name(
+/*===========*/
+ fil_space_t* space); /*!< in: space */
+#endif
+
+/*******************************************************************//**
+Return page type name */
+const char*
+fil_get_page_type_name(
+/*===================*/
+ ulint page_type); /*!< in: FIL_PAGE_TYPE */
+
#endif /* fil0fil_h */
diff --git a/storage/innobase/include/fil0pagecompress.h b/storage/innobase/include/fil0pagecompress.h
new file mode 100644
index 00000000000..c797c221efc
--- /dev/null
+++ b/storage/innobase/include/fil0pagecompress.h
@@ -0,0 +1,145 @@
+/*****************************************************************************
+
+Copyright (C) 2013, 2014 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+#ifndef fil0pagecompress_h
+#define fil0pagecompress_h
+
+#include "fsp0fsp.h"
+#include "fsp0pagecompress.h"
+
+/******************************************************************//**
+@file include/fil0pagecompress.h
+Helper functions for extracting/storing page compression and
+atomic writes information to table space.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+/*******************************************************************//**
+Returns the page compression level flag of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return page compression level if page compressed, ULINT_UNDEFINED if space not found */
+ulint
+fil_space_get_page_compression_level(
+/*=================================*/
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
+Returns the page compression flag of the space, or false if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return true if page compressed, false if not or space not found */
+ibool
+fil_space_is_page_compressed(
+/*=========================*/
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
+Returns the page compression flag of the space, or false if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return true if page compressed, false if not or space not found */
+ibool
+fil_space_get_page_compressed(
+/*=========================*/
+ fil_space_t* space); /*!< in: space id */
+/*******************************************************************//**
+Returns the atomic writes flag of the space, or false if the space
+is not using atomic writes. The tablespace must be cached in the memory cache.
+@return atomic write table option value */
+atomic_writes_t
+fil_space_get_atomic_writes(
+/*=========================*/
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
+Find out wheather the page is index page or not
+@return true if page type index page, false if not */
+ibool
+fil_page_is_index_page(
+/*===================*/
+ byte *buf); /*!< in: page */
+
+/****************************************************************//**
+Get the name of the compression algorithm used for page
+compression.
+@return compression algorithm name or "UNKNOWN" if not known*/
+const char*
+fil_get_compression_alg_name(
+/*=========================*/
+ ulint comp_alg); /*!<in: compression algorithm number */
+
+/****************************************************************//**
+For page compressed pages compress the page before actual write
+operation.
+@return compressed page to be written*/
+byte*
+fil_compress_page(
+/*==============*/
+ ulint space_id, /*!< in: tablespace id of the
+ table. */
+ byte* buf, /*!< in: buffer from which to write; in aio
+ this must be appropriately aligned */
+ byte* out_buf, /*!< out: compressed buffer */
+ ulint len, /*!< in: length of input buffer.*/
+ ulint compression_level, /*!< in: compression level */
+ ulint block_size, /*!< in: block size */
+ ulint* out_len, /*!< out: actual length of compressed
+ page */
+ byte* lzo_mem); /*!< in: temporal memory used by LZO */
+
+/****************************************************************//**
+For page compressed pages decompress the page after actual read
+operation.
+@return uncompressed page */
+void
+fil_decompress_page(
+/*================*/
+ byte* page_buf, /*!< in: preallocated buffer or NULL */
+ byte* buf, /*!< out: buffer from which to read; in aio
+ this must be appropriately aligned */
+ ulong len, /*!< in: length of output buffer.*/
+ ulint* write_size); /*!< in/out: Actual payload size of
+ the compressed data. */
+
+/****************************************************************//**
+Get space id from fil node
+@return space id*/
+ulint
+fil_node_get_space_id(
+/*==================*/
+ fil_node_t* node); /*!< in: Node where to get space id*/
+
+/****************************************************************//**
+Get block size from fil node
+@return block size*/
+ulint
+fil_node_get_block_size(
+ fil_node_t* node); /*!< in: Node where to get block
+ size */
+/*******************************************************************//**
+Find out wheather the page is page compressed
+@return true if page is page compressed*/
+ibool
+fil_page_is_compressed(
+/*===================*/
+ byte *buf); /*!< in: page */
+
+/*******************************************************************//**
+Find out wheather the page is page compressed with lzo method
+@return true if page is page compressed with lzo method*/
+ibool
+fil_page_is_lzo_compressed(
+/*=======================*/
+ byte *buf); /*!< in: page */
+#endif
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index a587ccc9f20..87f1f5a636d 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -53,12 +54,21 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */
/** Width of the DATA_DIR flag. This flag indicates that the tablespace
is found in a remote location, not the default data directory. */
#define FSP_FLAGS_WIDTH_DATA_DIR 1
+/** Number of flag bits used to indicate the page compression and compression level */
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION 1
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4
+/** Number of flag bits used to indicate atomic writes for this tablespace */
+#define FSP_FLAGS_WIDTH_ATOMIC_WRITES 2
+
/** Width of all the currently known tablespace flags */
#define FSP_FLAGS_WIDTH (FSP_FLAGS_WIDTH_POST_ANTELOPE \
+ FSP_FLAGS_WIDTH_ZIP_SSIZE \
+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS \
+ FSP_FLAGS_WIDTH_PAGE_SSIZE \
- + FSP_FLAGS_WIDTH_DATA_DIR)
+ + FSP_FLAGS_WIDTH_DATA_DIR \
+ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION \
+ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \
+ + FSP_FLAGS_WIDTH_ATOMIC_WRITES)
/** A mask of all the known/used bits in tablespace flags */
#define FSP_FLAGS_MASK (~(~0 << FSP_FLAGS_WIDTH))
@@ -71,9 +81,20 @@ is found in a remote location, not the default data directory. */
/** Zero relative shift position of the ATOMIC_BLOBS field */
#define FSP_FLAGS_POS_ATOMIC_BLOBS (FSP_FLAGS_POS_ZIP_SSIZE \
+ FSP_FLAGS_WIDTH_ZIP_SSIZE)
-/** Zero relative shift position of the PAGE_SSIZE field */
-#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_BLOBS \
+/** Note that these need to be before the page size to be compatible with
+dictionary */
+/** Zero relative shift position of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION (FSP_FLAGS_POS_ATOMIC_BLOBS \
+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL (FSP_FLAGS_POS_PAGE_COMPRESSION \
+ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION)
+/** Zero relative shift position of the ATOMIC_WRITES field */
+#define FSP_FLAGS_POS_ATOMIC_WRITES (FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL \
+ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)
+ /** Zero relative shift position of the PAGE_SSIZE field */
+#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_WRITES \
+ + FSP_FLAGS_WIDTH_ATOMIC_WRITES)
/** Zero relative shift position of the start of the UNUSED bits */
#define FSP_FLAGS_POS_DATA_DIR (FSP_FLAGS_POS_PAGE_SSIZE \
+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
@@ -101,6 +122,18 @@ is found in a remote location, not the default data directory. */
#define FSP_FLAGS_MASK_DATA_DIR \
((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR)) \
<< FSP_FLAGS_POS_DATA_DIR)
+/** Bit mask of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION \
+ ((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION)) \
+ << FSP_FLAGS_POS_PAGE_COMPRESSION)
+/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL \
+ ((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)) \
+ << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Bit mask of the ATOMIC_WRITES field */
+#define FSP_FLAGS_MASK_ATOMIC_WRITES \
+ ((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_WRITES)) \
+ << FSP_FLAGS_POS_ATOMIC_WRITES)
/** Return the value of the POST_ANTELOPE field */
#define FSP_FLAGS_GET_POST_ANTELOPE(flags) \
@@ -126,11 +159,38 @@ is found in a remote location, not the default data directory. */
#define FSP_FLAGS_GET_UNUSED(flags) \
(flags >> FSP_FLAGS_POS_UNUSED)
+/** Return the value of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION(flags) \
+ ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION) \
+ >> FSP_FLAGS_POS_PAGE_COMPRESSION)
+/** Return the value of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags) \
+ ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL) \
+ >> FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Return the value of the ATOMIC_WRITES field */
+#define FSP_FLAGS_GET_ATOMIC_WRITES(flags) \
+ ((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) \
+ >> FSP_FLAGS_POS_ATOMIC_WRITES)
+
/** Set a PAGE_SSIZE into the correct bits in a given
tablespace flags. */
#define FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize) \
(flags | (ssize << FSP_FLAGS_POS_PAGE_SSIZE))
+/** Set a PAGE_COMPRESSION into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_COMPRESSION(flags, compression) \
+ (flags | (compression << FSP_FLAGS_POS_PAGE_COMPRESSION))
+
+/** Set a PAGE_COMPRESSION_LEVEL into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level) \
+ (flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL))
+/** Set a ATOMIC_WRITES into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics) \
+ (flags | (atomics << FSP_FLAGS_POS_ATOMIC_WRITES))
+
/* @} */
/* @defgroup Tablespace Header Constants (moved from fsp0fsp.c) @{ */
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index 0d81e817cc9..3a3eb21a61a 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -63,12 +64,17 @@ fsp_flags_is_valid(
ulint atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
ulint page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
ulint unused = FSP_FLAGS_GET_UNUSED(flags);
+ ulint page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags);
+ ulint page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags);
+ ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
/* fsp_flags is zero unless atomic_blobs is set. */
/* Make sure there are no bits that we do not know about. */
if (unused != 0 || flags == 1) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted unused %lu\n",
+ flags, unused);
return(false);
} else if (post_antelope) {
/* The Antelope row formats REDUNDANT and COMPACT did
@@ -76,6 +82,8 @@ fsp_flags_is_valid(
4-byte field is zero for Antelope row formats. */
if (!atomic_blobs) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_blobs %lu\n",
+ flags, atomic_blobs);
return(false);
}
}
@@ -87,10 +95,14 @@ fsp_flags_is_valid(
externally stored parts. */
if (post_antelope || zip_ssize != 0) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu atomic_blobs %lu\n",
+ flags, zip_ssize, atomic_blobs);
return(false);
}
} else if (!post_antelope || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu max %d\n",
+ flags, zip_ssize, PAGE_ZIP_SSIZE_MAX);
return(false);
} else if (page_ssize > UNIV_PAGE_SSIZE_MAX) {
@@ -98,12 +110,33 @@ fsp_flags_is_valid(
be zero for an original 16k page size.
Validate the page shift size is within allowed range. */
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu\n",
+ flags, page_ssize, UNIV_PAGE_SSIZE_MAX);
return(false);
} else if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_ORIG && !page_ssize) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu:%d\n",
+ flags, page_ssize, UNIV_PAGE_SIZE, UNIV_PAGE_SIZE_ORIG);
return(false);
}
+ /* Page compression level requires page compression and atomic blobs
+ to be set */
+ if (page_compression_level || page_compression) {
+ if (!page_compression || !atomic_blobs) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_compression %lu\n"
+ "InnoDB: Error: page_compression_level %lu atomic_blobs %lu\n",
+ flags, page_compression, page_compression_level, atomic_blobs);
+ return(false);
+ }
+ }
+
+ if (atomic_writes > ATOMIC_WRITES_OFF) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_writes %lu\n",
+ flags, atomic_writes);
+ return (false);
+ }
+
#if UNIV_FORMAT_MAX != UNIV_FORMAT_B
# error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations."
#endif
@@ -312,3 +345,4 @@ xdes_calc_descriptor_page(
}
#endif /* !UNIV_INNOCHECKSUM */
+
diff --git a/storage/innobase/include/fsp0pagecompress.h b/storage/innobase/include/fsp0pagecompress.h
new file mode 100644
index 00000000000..5f943ee2b83
--- /dev/null
+++ b/storage/innobase/include/fsp0pagecompress.h
@@ -0,0 +1,84 @@
+/*****************************************************************************
+
+Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fsp0pagecompress.h
+Helper functions for extracting/storing page compression and
+atomic writes information to file space.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#ifndef fsp0pagecompress_h
+#define fsp0pagecompress_h
+
+/* Supported page compression methods */
+
+#define PAGE_UNCOMPRESSED 0
+#define PAGE_ZLIB_ALGORITHM 1
+#define PAGE_LZ4_ALGORITHM 2
+#define PAGE_LZO_ALGORITHM 3
+#define PAGE_LZMA_ALGORITHM 4
+#define PAGE_BZIP2_ALGORITHM 5
+#define PAGE_SNAPPY_ALGORITHM 6
+#define PAGE_ALGORITHM_LAST PAGE_SNAPPY_ALGORITHM
+
+/**********************************************************************//**
+Reads the page compression level from the first page of a tablespace.
+@return page compression level, or 0 if uncompressed */
+UNIV_INTERN
+ulint
+fsp_header_get_compression_level(
+/*=============================*/
+ const page_t* page); /*!< in: first page of a tablespace */
+
+/********************************************************************//**
+Determine if the tablespace is page compressed from dict_table_t::flags.
+@return TRUE if page compressed, FALSE if not compressed */
+UNIV_INLINE
+ibool
+fsp_flags_is_page_compressed(
+/*=========================*/
+ ulint flags); /*!< in: tablespace flags */
+
+/********************************************************************//**
+Extract the page compression level from tablespace flags.
+A tablespace has only one physical page compression level
+whether that page is compressed or not.
+@return page compression level of the file-per-table tablespace,
+or zero if the table is not compressed. */
+UNIV_INLINE
+ulint
+fsp_flags_get_page_compression_level(
+/*=================================*/
+ ulint flags); /*!< in: tablespace flags */
+
+/********************************************************************//**
+Determine the tablespace is using atomic writes from dict_table_t::flags.
+@return true if atomic writes is used, false if not */
+UNIV_INLINE
+atomic_writes_t
+fsp_flags_get_atomic_writes(
+/*========================*/
+ ulint flags); /*!< in: tablespace flags */
+
+#ifndef UNIV_NONINL
+#include "fsp0pagecompress.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/fsp0pagecompress.ic b/storage/innobase/include/fsp0pagecompress.ic
new file mode 100644
index 00000000000..3e59106b05d
--- /dev/null
+++ b/storage/innobase/include/fsp0pagecompress.ic
@@ -0,0 +1,197 @@
+/*****************************************************************************
+
+Copyright (C) 2013,2014 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fsp0pagecompress.ic
+Implementation for helper functions for extracting/storing page
+compression and atomic writes information to file space.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+/********************************************************************//**
+Determine if the tablespace is page compressed from dict_table_t::flags.
+@return TRUE if page compressed, FALSE if not page compressed */
+UNIV_INLINE
+ibool
+fsp_flags_is_page_compressed(
+/*=========================*/
+ ulint flags) /*!< in: tablespace flags */
+{
+ return(FSP_FLAGS_GET_PAGE_COMPRESSION(flags));
+}
+
+/********************************************************************//**
+Determine the tablespace is page compression level from dict_table_t::flags.
+@return page compression level or 0 if not compressed*/
+UNIV_INLINE
+ulint
+fsp_flags_get_page_compression_level(
+/*=================================*/
+ ulint flags) /*!< in: tablespace flags */
+{
+ return(FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags));
+}
+
+/********************************************************************//**
+Determine the tablespace is using atomic writes from dict_table_t::flags.
+@return true if atomic writes is used, false if not */
+UNIV_INLINE
+atomic_writes_t
+fsp_flags_get_atomic_writes(
+/*========================*/
+ ulint flags) /*!< in: tablespace flags */
+{
+ return((atomic_writes_t)FSP_FLAGS_GET_ATOMIC_WRITES(flags));
+}
+
+/*******************************************************************//**
+Find out wheather the page is index page or not
+@return true if page type index page, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_index_page(
+/*===================*/
+ byte *buf) /*!< in: page */
+{
+ return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX);
+}
+
+/*******************************************************************//**
+Find out wheather the page is page compressed
+@return true if page is page compressed, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_compressed(
+/*===================*/
+ byte *buf) /*!< in: page */
+{
+ return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
+}
+
+/*******************************************************************//**
+Returns the page compression level of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return page compression level, ULINT_UNDEFINED if space not found */
+UNIV_INLINE
+ulint
+fil_space_get_page_compression_level(
+/*=================================*/
+ ulint id) /*!< in: space id */
+{
+ ulint flags;
+
+ flags = fil_space_get_flags(id);
+
+ if (flags && flags != ULINT_UNDEFINED) {
+
+ return(fsp_flags_get_page_compression_level(flags));
+ }
+
+ return(flags);
+}
+
+/*******************************************************************//**
+Extract the page compression from space.
+@return true if space is page compressed, false if space is not found
+or space is not page compressed. */
+UNIV_INLINE
+ibool
+fil_space_is_page_compressed(
+/*=========================*/
+ ulint id) /*!< in: space id */
+{
+ ulint flags;
+
+ flags = fil_space_get_flags(id);
+
+ if (flags && flags != ULINT_UNDEFINED) {
+
+ return(fsp_flags_is_page_compressed(flags));
+ }
+
+ return(flags);
+}
+
+/****************************************************************//**
+Get the name of the compression algorithm used for page
+compression.
+@return compression algorithm name or "UNKNOWN" if not known*/
+UNIV_INLINE
+const char*
+fil_get_compression_alg_name(
+/*=========================*/
+ ulint comp_alg) /*!<in: compression algorithm number */
+{
+ switch(comp_alg) {
+ case PAGE_UNCOMPRESSED:
+ return ("uncompressed");
+ break;
+ case PAGE_ZLIB_ALGORITHM:
+ return ("ZLIB");
+ break;
+ case PAGE_LZ4_ALGORITHM:
+ return ("LZ4");
+ break;
+ case PAGE_LZO_ALGORITHM:
+ return ("LZO");
+ break;
+ case PAGE_LZMA_ALGORITHM:
+ return ("LZMA");
+ break;
+ default:
+ return("UNKNOWN");
+ ut_error;
+ break;
+ }
+}
+
+/*******************************************************************//**
+Returns the atomic writes flag of the space, or false if the space
+is not using atomic writes. The tablespace must be cached in the memory cache.
+@return atomic writes table option value */
+UNIV_INLINE
+atomic_writes_t
+fil_space_get_atomic_writes(
+/*========================*/
+ ulint id) /*!< in: space id */
+{
+ ulint flags;
+
+ flags = fil_space_get_flags(id);
+
+ if (flags && flags != ULINT_UNDEFINED) {
+
+ return((atomic_writes_t)fsp_flags_get_atomic_writes(flags));
+ }
+
+ return((atomic_writes_t)0);
+}
+
+/*******************************************************************//**
+Find out wheather the page is page compressed with lzo method
+@return true if page is page compressed with lzo method, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_lzo_compressed(
+/*=======================*/
+ byte *buf) /*!< in: page */
+{
+ return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED &&
+ mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN) == PAGE_LZO_ALGORITHM);
+}
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index 94fd908ab0c..e5c1734b842 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -29,6 +29,7 @@ Created May 26, 2009 Vasil Dimov
#include "univ.i"
#include "fil0fil.h" /* for FIL_PAGE_DATA */
+#include "ut0byte.h"
/** @name Flags for inserting records in order
If records are inserted in order, there are the following
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 5077c9e37eb..4126be51ae9 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -2,6 +2,7 @@
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -151,10 +152,9 @@ enum os_file_create_t {
#define OS_FILE_INSUFFICIENT_RESOURCE 78
#define OS_FILE_AIO_INTERRUPTED 79
#define OS_FILE_OPERATION_ABORTED 80
-
#define OS_FILE_ACCESS_VIOLATION 81
-
-#define OS_FILE_ERROR_MAX 100
+#define OS_FILE_OPERATION_NOT_SUPPORTED 125
+#define OS_FILE_ERROR_MAX 200
/* @} */
/** Types for aio operations @{ */
@@ -295,33 +295,35 @@ os_file_write
The wrapper functions have the prefix of "innodb_". */
#ifdef UNIV_PFS_IO
-# define os_file_create(key, name, create, purpose, type, success) \
+# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \
pfs_os_file_create_func(key, name, create, purpose, type, \
- success, __FILE__, __LINE__)
+ success, atomic_writes, __FILE__, __LINE__)
# define os_file_create_simple(key, name, create, access, success) \
pfs_os_file_create_simple_func(key, name, create, access, \
success, __FILE__, __LINE__)
# define os_file_create_simple_no_error_handling( \
- key, name, create_mode, access, success) \
+ key, name, create_mode, access, success, atomic_writes) \
pfs_os_file_create_simple_no_error_handling_func( \
- key, name, create_mode, access, success, __FILE__, __LINE__)
+ key, name, create_mode, access, success, atomic_writes, __FILE__, __LINE__)
# define os_file_close(file) \
pfs_os_file_close_func(file, __FILE__, __LINE__)
# define os_aio(type, mode, name, file, buf, offset, \
- n, message1, message2) \
+ n, message1, message2, write_size, \
+ page_compression, page_compression_level) \
pfs_os_aio_func(type, mode, name, file, buf, offset, \
- n, message1, message2, __FILE__, __LINE__)
+ n, message1, message2, write_size, \
+ page_compression, page_compression_level, __FILE__, __LINE__)
-# define os_file_read(file, buf, offset, n) \
- pfs_os_file_read_func(file, buf, offset, n, __FILE__, __LINE__)
+# define os_file_read(file, buf, offset, n, compressed) \
+ pfs_os_file_read_func(file, buf, offset, n, compressed, __FILE__, __LINE__)
-# define os_file_read_no_error_handling(file, buf, offset, n) \
+# define os_file_read_no_error_handling(file, buf, offset, n, compressed) \
pfs_os_file_read_no_error_handling_func(file, buf, offset, n, \
- __FILE__, __LINE__)
+ compressed, __FILE__, __LINE__)
# define os_file_write(name, file, buf, offset, n) \
pfs_os_file_write_func(name, file, buf, offset, \
@@ -342,28 +344,28 @@ The wrapper functions have the prefix of "innodb_". */
/* If UNIV_PFS_IO is not defined, these I/O APIs point
to original un-instrumented file I/O APIs */
-# define os_file_create(key, name, create, purpose, type, success) \
- os_file_create_func(name, create, purpose, type, success)
+# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \
+ os_file_create_func(name, create, purpose, type, success, atomic_writes)
-# define os_file_create_simple(key, name, create_mode, access, success) \
+# define os_file_create_simple(key, name, create_mode, access, success) \
os_file_create_simple_func(name, create_mode, access, success)
# define os_file_create_simple_no_error_handling( \
- key, name, create_mode, access, success) \
- os_file_create_simple_no_error_handling_func( \
- name, create_mode, access, success)
+ key, name, create_mode, access, success, atomic_writes) \
+ os_file_create_simple_no_error_handling_func( \
+ name, create_mode, access, success, atomic_writes)
# define os_file_close(file) os_file_close_func(file)
-# define os_aio(type, mode, name, file, buf, offset, n, message1, message2) \
+# define os_aio(type, mode, name, file, buf, offset, n, message1, message2, write_size, page_compression, page_compression_level) \
os_aio_func(type, mode, name, file, buf, offset, n, \
- message1, message2)
+ message1, message2, write_size, page_compression, page_compression_level)
-# define os_file_read(file, buf, offset, n) \
- os_file_read_func(file, buf, offset, n)
+# define os_file_read(file, buf, offset, n, compressed) \
+ os_file_read_func(file, buf, offset, n, compressed)
-# define os_file_read_no_error_handling(file, buf, offset, n) \
- os_file_read_no_error_handling_func(file, buf, offset, n)
+# define os_file_read_no_error_handling(file, buf, offset, n, compressed) \
+ os_file_read_no_error_handling_func(file, buf, offset, n, compressed)
# define os_file_write(name, file, buf, offset, n) \
os_file_write_func(name, file, buf, offset, n)
@@ -524,7 +526,9 @@ os_file_create_simple_no_error_handling_func(
OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes)/*!< in: atomic writes table option
+ value */
__attribute__((nonnull, warn_unused_result));
/****************************************************************//**
Tries to disable OS caching on an opened file descriptor. */
@@ -558,7 +562,9 @@ os_file_create_func(
async i/o or unbuffered i/o: look in the
function source code for the exact rules */
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes)/*!< in: atomic writes table option
+ value */
__attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
Deletes a file. The file has to be closed before calling this.
@@ -648,6 +654,8 @@ pfs_os_file_create_simple_no_error_handling_func(
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes,/*!< in: atomic writes table option
+ value */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
__attribute__((nonnull, warn_unused_result));
@@ -676,6 +684,8 @@ pfs_os_file_create_func(
function source code for the exact rules */
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes,/*!< in: atomic writes table option
+ value*/
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
__attribute__((nonnull, warn_unused_result));
@@ -706,6 +716,8 @@ pfs_os_file_read_func(
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
+ ibool compressed, /*!< in: is this file space
+ compressed ? */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
@@ -724,6 +736,8 @@ pfs_os_file_read_no_error_handling_func(
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
+ ibool compressed, /*!< in: is this file space
+ compressed ? */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
@@ -754,6 +768,15 @@ pfs_os_aio_func(
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
+ ulint* write_size,/*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
+ ibool page_compression, /*!< in: is page compression used
+ on this file space */
+ ulint page_compression_level, /*!< page compression
+ level to be used */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
/*******************************************************************//**
@@ -910,7 +933,9 @@ os_file_read_func(
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
- ulint n); /*!< in: number of bytes to read */
+ ulint n, /*!< in: number of bytes to read */
+ ibool compressed); /*!< in: is this file space
+ compressed ? */
/*******************************************************************//**
Rewind file to its start, read at most size - 1 bytes from it to str, and
NUL-terminate str. All errors are silently ignored. This function is
@@ -935,7 +960,9 @@ os_file_read_no_error_handling_func(
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
- ulint n); /*!< in: number of bytes to read */
+ ulint n, /*!< in: number of bytes to read */
+ ibool compressed); /*!< in: is this file space
+ compressed ? */
/*******************************************************************//**
NOTE! Use the corresponding macro os_file_write(), not directly this
@@ -952,6 +979,7 @@ os_file_write_func(
const void* buf, /*!< in: buffer from which to write */
os_offset_t offset, /*!< in: file offset where to write */
ulint n); /*!< in: number of bytes to write */
+
/*******************************************************************//**
Check the existence and type of the given file.
@return TRUE if call succeeded */
@@ -1114,10 +1142,20 @@ os_aio_func(
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
- void* message2);/*!< in: message for the aio handler
+ void* message2,/*!< in: message for the aio handler
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
+ ulint* write_size,/*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
+ ibool page_compression, /*!< in: is page compression used
+ on this file space */
+ ulint page_compression_level); /*!< page compression
+ level to be used */
+
/************************************************************************//**
Wakes up all async i/o threads so that they know to exit themselves in
shutdown. */
@@ -1291,8 +1329,20 @@ os_file_handle_error_no_exit(
/*=========================*/
const char* name, /*!< in: name of a file or NULL */
const char* operation, /*!< in: operation */
- ibool on_error_silent);/*!< in: if TRUE then don't print
+ ibool on_error_silent,/*!< in: if TRUE then don't print
any message to the log. */
+ const char* file, /*!< in: file name */
+ const ulint line); /*!< in: line */
+
+/***********************************************************************//**
+Try to get number of bytes per sector from file system.
+@return file block size */
+UNIV_INTERN
+ulint
+os_file_get_block_size(
+/*===================*/
+ os_file_t file, /*!< in: handle to a file */
+ const char* name); /*!< in: file name */
#ifndef UNIV_NONINL
#include "os0file.ic"
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index defd8204ba3..8e1cea585e6 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -88,6 +89,8 @@ pfs_os_file_create_simple_no_error_handling_func(
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes,/*!< in: atomic writes table option
+ value */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -103,7 +106,7 @@ pfs_os_file_create_simple_no_error_handling_func(
name, src_file, src_line);
file = os_file_create_simple_no_error_handling_func(
- name, create_mode, access_type, success);
+ name, create_mode, access_type, success, atomic_writes);
register_pfs_file_open_end(locker, file);
@@ -134,6 +137,8 @@ pfs_os_file_create_func(
function source code for the exact rules */
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes, /*!< in: atomic writes table option
+ value */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -148,7 +153,7 @@ pfs_os_file_create_func(
: PSI_FILE_OPEN),
name, src_file, src_line);
- file = os_file_create_func(name, create_mode, purpose, type, success);
+ file = os_file_create_func(name, create_mode, purpose, type, success, atomic_writes);
register_pfs_file_open_end(locker, file);
@@ -210,6 +215,15 @@ pfs_os_aio_func(
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
+ ulint* write_size,/*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
+ ibool page_compression, /*!< in: is page compression used
+ on this file space */
+ ulint page_compression_level, /*!< page compression
+ level to be used */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -225,7 +239,8 @@ pfs_os_aio_func(
src_file, src_line);
result = os_aio_func(type, mode, name, file, buf, offset,
- n, message1, message2);
+ n, message1, message2, write_size,
+ page_compression, page_compression_level);
register_pfs_file_io_end(locker, n);
@@ -246,6 +261,8 @@ pfs_os_file_read_func(
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
+ ibool compressed, /*!< in: is this file space
+ compressed ? */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -256,7 +273,7 @@ pfs_os_file_read_func(
register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
src_file, src_line);
- result = os_file_read_func(file, buf, offset, n);
+ result = os_file_read_func(file, buf, offset, n, compressed);
register_pfs_file_io_end(locker, n);
@@ -279,6 +296,8 @@ pfs_os_file_read_no_error_handling_func(
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
+ ibool compressed, /*!< in: is this file space
+ compressed ? */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -289,7 +308,7 @@ pfs_os_file_read_no_error_handling_func(
register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
src_file, src_line);
- result = os_file_read_no_error_handling_func(file, buf, offset, n);
+ result = os_file_read_no_error_handling_func(file, buf, offset, n, compressed);
register_pfs_file_io_end(locker, n);
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index 2d90f47eefe..90fafb05047 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -2,6 +2,7 @@
Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -163,6 +164,8 @@ enum monitor_id_t {
MONITOR_OVLD_BUF_POOL_PAGES_FREE,
MONITOR_OVLD_PAGE_CREATED,
MONITOR_OVLD_PAGES_WRITTEN,
+ MONITOR_OVLD_INDEX_PAGES_WRITTEN,
+ MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN,
MONITOR_OVLD_PAGES_READ,
MONITOR_OVLD_BYTE_READ,
MONITOR_OVLD_BYTE_WRITTEN,
@@ -304,6 +307,20 @@ enum monitor_id_t {
MONITOR_PAGE_DECOMPRESS,
MONITOR_PAD_INCREMENTS,
MONITOR_PAD_DECREMENTS,
+ /* New monitor variables for page compression */
+ MONITOR_OVLD_PAGE_COMPRESS_SAVED,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768,
+ MONITOR_OVLD_PAGES_PAGE_COMPRESSED,
+ MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP,
+ MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED,
+ MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED,
+ MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR,
/* Index related counters */
MONITOR_MODULE_INDEX,
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index d06a14a9153..24a1678c38b 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -103,6 +103,37 @@ struct srv_stats_t {
a disk page */
ulint_ctr_1_t buf_pool_reads;
+ /** Number of bytes saved by page compression */
+ ulint_ctr_64_t page_compression_saved;
+ /** Number of 512Byte TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect512;
+ /** Number of 1K TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect1024;
+ /** Number of 2K TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect2048;
+ /** Number of 4K TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect4096;
+ /** Number of 8K TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect8192;
+ /** Number of 16K TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect16384;
+ /** Number of 32K TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect32768;
+ /* Number of index pages written */
+ ulint_ctr_64_t index_pages_written;
+ /* Number of non index pages written */
+ ulint_ctr_64_t non_index_pages_written;
+ /* Number of pages compressed with page compression */
+ ulint_ctr_64_t pages_page_compressed;
+ /* Number of TRIM operations induced by page compression */
+ ulint_ctr_64_t page_compressed_trim_op;
+ /* Number of TRIM operations saved by using actual write size knowledge */
+ ulint_ctr_64_t page_compressed_trim_op_saved;
+ /* Number of pages decompressed with page compression */
+ ulint_ctr_64_t pages_page_decompressed;
+ /* Number of page compression errors */
+ ulint_ctr_64_t pages_page_compression_error;
+
/** Number of data read in total (in bytes) */
ulint_ctr_1_t data_read;
@@ -230,6 +261,31 @@ OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
extern my_bool srv_use_native_aio;
+
+/* Use trim operation */
+extern my_bool srv_use_trim;
+
+/* Use posix fallocate */
+#ifdef HAVE_POSIX_FALLOCATE
+extern my_bool srv_use_posix_fallocate;
+#endif
+
+/* Use atomic writes i.e disable doublewrite buffer */
+extern my_bool srv_use_atomic_writes;
+
+/* Compression algorithm*/
+extern ulong innodb_compression_algorithm;
+
+/* Number of flush threads */
+#define MTFLUSH_MAX_WORKER 64
+#define MTFLUSH_DEFAULT_WORKER 8
+
+/* Number of threads used for multi-threaded flush */
+extern long srv_mtflush_threads;
+
+/* If this flag is TRUE, then we will use multi threaded flush. */
+extern my_bool srv_use_mtflush;
+
#ifdef __WIN__
extern ibool srv_use_native_conditions;
#endif /* __WIN__ */
@@ -362,12 +418,8 @@ extern my_bool srv_stats_sample_traditional;
extern ibool srv_use_doublewrite_buf;
extern ulong srv_doublewrite_batch_size;
-extern ibool srv_use_atomic_writes;
-#ifdef HAVE_POSIX_FALLOCATE
-extern ibool srv_use_posix_fallocate;
-#endif
-
extern double srv_max_buf_pool_modified_pct;
+
extern ulong srv_max_purge_lag;
extern ulong srv_max_purge_lag_delay;
@@ -871,6 +923,38 @@ struct export_var_t{
ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id
- purged view's min trx_id */
#endif /* UNIV_DEBUG */
+
+ ib_int64_t innodb_page_compression_saved;/*!< Number of bytes saved
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM
+ by page compression */
+ ib_int64_t innodb_index_pages_written; /*!< Number of index pages
+ written */
+ ib_int64_t innodb_non_index_pages_written; /*!< Number of non index pages
+ written */
+ ib_int64_t innodb_pages_page_compressed;/*!< Number of pages
+ compressed by page compression */
+ ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
+ induced by page compression */
+ ib_int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations
+ saved by page compression */
+ ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages
+ decompressed by page
+ compression */
+ ib_int64_t innodb_pages_page_compression_error;/*!< Number of page
+ compression errors */
};
/** Thread slot in the thread table. */
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
index 40d502f4459..e1c19982ba5 100644
--- a/storage/innobase/include/srv0start.h
+++ b/storage/innobase/include/srv0start.h
@@ -37,7 +37,8 @@ Created 10/10/1995 Heikki Tuuri
#endif
/*********************************************************************//**
-Normalizes a directory path for Windows: converts slashes to backslashes. */
+Normalizes a directory path for Windows: converts slashes to backslashes.
+*/
UNIV_INTERN
void
srv_normalize_path_for_win(
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 3e17f65e4bc..76c0d21fab8 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -2,6 +2,7 @@
Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
+Copyright (c) 2013, 2015, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -331,6 +332,36 @@ typedef enum innodb_file_formats_enum innodb_file_formats_t;
/** The 2-logarithm of UNIV_PAGE_SIZE: */
#define UNIV_PAGE_SIZE_SHIFT srv_page_size_shift
+#ifdef HAVE_LZO
+#define IF_LZO(A,B) A
+#else
+#define IF_LZO(A,B) B
+#endif
+
+#ifdef HAVE_LZ4
+#define IF_LZ4(A,B) A
+#else
+#define IF_LZ4(A,B) B
+#endif
+
+#ifdef HAVE_LZMA
+#define IF_LZMA(A,B) A
+#else
+#define IF_LZMA(A,B) B
+#endif
+
+#ifdef HAVE_BZIP2
+#define IF_BZIP2(A,B) A
+#else
+#define IF_BZIP2(A,B) B
+#endif
+
+#ifdef HAVE_SNAPPY
+#define IF_SNAPPY(A,B) A
+#else
+#define IF_SNAPPY(A,B) B
+#endif
+
/** The universal page size of the database */
#define UNIV_PAGE_SIZE ((ulint) srv_page_size)
diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h
index 29fc8669ce4..796a272db59 100644
--- a/storage/innobase/include/ut0list.h
+++ b/storage/innobase/include/ut0list.h
@@ -150,6 +150,15 @@ ib_list_is_empty(
/* out: TRUE if empty else */
const ib_list_t* list); /* in: list */
+/********************************************************************
+Get number of items on list.
+@return number of items on list */
+UNIV_INLINE
+ulint
+ib_list_len(
+/*========*/
+ const ib_list_t* list); /*<! in: list */
+
/* List. */
struct ib_list_t {
ib_list_node_t* first; /*!< first node */
diff --git a/storage/innobase/include/ut0list.ic b/storage/innobase/include/ut0list.ic
index d9dcb2eac99..7a7f53adb2f 100644
--- a/storage/innobase/include/ut0list.ic
+++ b/storage/innobase/include/ut0list.ic
@@ -58,3 +58,23 @@ ib_list_is_empty(
{
return(!(list->first || list->last));
}
+
+/********************************************************************
+Get number of items on list.
+@return number of items on list */
+UNIV_INLINE
+ulint
+ib_list_len(
+/*========*/
+ const ib_list_t* list) /*<! in: list */
+{
+ ulint len = 0;
+ ib_list_node_t* node = list->first;
+
+ while(node) {
+ len++;
+ node = node->next;
+ }
+
+ return (len);
+}
diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h
index 33385ddf2d4..9906e299808 100644
--- a/storage/innobase/include/ut0wqueue.h
+++ b/storage/innobase/include/ut0wqueue.h
@@ -95,6 +95,23 @@ ib_wqueue_timedwait(
ib_wqueue_t* wq, /* in: work queue */
ib_time_t wait_in_usecs); /* in: wait time in micro seconds */
+/********************************************************************
+Return first item on work queue or NULL if queue is empty
+@return work item or NULL */
+void*
+ib_wqueue_nowait(
+/*=============*/
+ ib_wqueue_t* wq); /*<! in: work queue */
+
+/********************************************************************
+Get number of items on queue.
+@return number of items on queue */
+ulint
+ib_wqueue_len(
+/*==========*/
+ ib_wqueue_t* wq); /*<! in: work queue */
+
+
/* Work queue. */
struct ib_wqueue_t {
ib_mutex_t mutex; /*!< mutex protecting everything */
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index 1850e798ed3..d65baa316d8 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -2,6 +2,7 @@
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Google Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -1272,7 +1273,7 @@ log_group_file_header_flush(
(ulint) (dest_offset / UNIV_PAGE_SIZE),
(ulint) (dest_offset % UNIV_PAGE_SIZE),
OS_FILE_LOG_BLOCK_SIZE,
- buf, group);
+ buf, group, 0);
srv_stats.os_log_pending_writes.dec();
}
@@ -1400,7 +1401,7 @@ loop:
fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
(ulint) (next_offset / UNIV_PAGE_SIZE),
(ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
- group);
+ group, 0);
srv_stats.os_log_pending_writes.dec();
@@ -1966,7 +1967,7 @@ log_group_checkpoint(
write_offset / UNIV_PAGE_SIZE,
write_offset % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE,
- buf, ((byte*) group + 1));
+ buf, ((byte*) group + 1), 0);
ut_ad(((ulint) group & 0x1UL) == 0);
}
@@ -2046,7 +2047,7 @@ log_group_read_checkpoint_info(
fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
+ OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0);
}
/******************************************************//**
@@ -2340,7 +2341,7 @@ loop:
fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
(ulint) (source_offset / UNIV_PAGE_SIZE),
(ulint) (source_offset % UNIV_PAGE_SIZE),
- len, buf, NULL);
+ len, buf, NULL, 0);
start_lsn += len;
buf += len;
@@ -2405,7 +2406,7 @@ log_group_archive_file_header_write(
dest_offset / UNIV_PAGE_SIZE,
dest_offset % UNIV_PAGE_SIZE,
2 * OS_FILE_LOG_BLOCK_SIZE,
- buf, &log_archive_io);
+ buf, &log_archive_io, 0);
}
/******************************************************//**
@@ -2441,7 +2442,7 @@ log_group_archive_completed_header_write(
dest_offset % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE,
buf + LOG_FILE_ARCH_COMPLETED,
- &log_archive_io);
+ &log_archive_io, 0);
}
/******************************************************//**
@@ -2569,7 +2570,7 @@ loop:
(ulint) (next_offset / UNIV_PAGE_SIZE),
(ulint) (next_offset % UNIV_PAGE_SIZE),
ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
- &log_archive_io);
+ &log_archive_io, 0);
start_lsn += len;
next_offset += len;
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 9affec63252..3632c45d603 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -2,6 +2,7 @@
Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -2078,7 +2079,7 @@ recv_apply_log_recs_for_backup(void)
error = fil_io(OS_FILE_READ, true,
recv_addr->space, zip_size,
recv_addr->page_no, 0, zip_size,
- block->page.zip.data, NULL);
+ block->page.zip.data, NULL, 0);
if (error == DB_SUCCESS
&& !buf_zip_decompress(block, TRUE)) {
exit(1);
@@ -2088,7 +2089,7 @@ recv_apply_log_recs_for_backup(void)
recv_addr->space, 0,
recv_addr->page_no, 0,
UNIV_PAGE_SIZE,
- block->frame, NULL);
+ block->frame, NULL, 0);
}
if (error != DB_SUCCESS) {
@@ -2117,13 +2118,13 @@ recv_apply_log_recs_for_backup(void)
recv_addr->space, zip_size,
recv_addr->page_no, 0,
zip_size,
- block->page.zip.data, NULL);
+ block->page.zip.data, NULL, 0);
} else {
error = fil_io(OS_FILE_WRITE, true,
recv_addr->space, 0,
recv_addr->page_no, 0,
UNIV_PAGE_SIZE,
- block->frame, NULL);
+ block->frame, NULL, 0);
}
skip_this_recv_addr:
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
@@ -3082,7 +3083,7 @@ recv_recovery_from_checkpoint_start_func(
fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0,
0, 0, LOG_FILE_HDR_SIZE,
- log_hdr_buf, max_cp_group);
+ log_hdr_buf, max_cp_group, 0);
if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
(byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
@@ -3113,7 +3114,7 @@ recv_recovery_from_checkpoint_start_func(
fil_io(OS_FILE_WRITE | OS_FILE_LOG, true,
max_cp_group->space_id, 0,
0, 0, OS_FILE_LOG_BLOCK_SIZE,
- log_hdr_buf, max_cp_group);
+ log_hdr_buf, max_cp_group, 0);
}
#ifdef UNIV_LOG_ARCHIVE
@@ -3743,7 +3744,7 @@ ask_again:
/* Read the archive file header */
fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0, 0,
- LOG_FILE_HDR_SIZE, buf, NULL);
+ LOG_FILE_HDR_SIZE, buf, NULL, 0);
/* Check if the archive file header is consistent */
@@ -3816,7 +3817,7 @@ ask_again:
fil_io(OS_FILE_READ | OS_FILE_LOG, true,
group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
- read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
+ read_offset % UNIV_PAGE_SIZE, len, buf, NULL, 0);
ret = recv_scan_log_recs(
(buf_pool_get_n_pages()
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index e1c98f6ace3..525b537ddd7 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -2,6 +2,7 @@
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, 2015, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -42,8 +43,15 @@ Created 10/21/1995 Heikki Tuuri
#include "srv0srv.h"
#include "srv0start.h"
#include "fil0fil.h"
+#include "fil0pagecompress.h"
#include "buf0buf.h"
#include "srv0mon.h"
+#include "srv0srv.h"
+#ifdef HAVE_POSIX_FALLOCATE
+#include "unistd.h"
+#include "fcntl.h"
+#include "linux/falloc.h"
+#endif
#ifndef UNIV_HOTBACKUP
# include "os0sync.h"
# include "os0thread.h"
@@ -60,6 +68,38 @@ Created 10/21/1995 Heikki Tuuri
#include <libaio.h>
#endif
+#if defined(UNIV_LINUX) && defined(HAVE_SYS_IOCTL_H)
+# include <sys/ioctl.h>
+# ifndef DFS_IOCTL_ATOMIC_WRITE_SET
+# define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint)
+# endif
+#endif
+
+#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H)
+#include <sys/statvfs.h>
+#endif
+
+#if defined(UNIV_LINUX) && defined(HAVE_LINUX_FALLOC_H)
+#include <linux/falloc.h>
+#endif
+
+#if defined(HAVE_FALLOCATE)
+#ifndef FALLOC_FL_KEEP_SIZE
+#define FALLOC_FL_KEEP_SIZE 0x01
+#endif
+#ifndef FALLOC_FL_PUNCH_HOLE
+#define FALLOC_FL_PUNCH_HOLE 0x02
+#endif
+#endif
+
+#ifdef HAVE_LZO
+#include "lzo/lzo1x.h"
+#endif
+
+#ifdef HAVE_SNAPPY
+#include "snappy-c.h"
+#endif
+
/** Insert buffer segment id */
static const ulint IO_IBUF_SEGMENT = 0;
@@ -175,6 +215,32 @@ struct os_aio_slot_t{
and which can be used to identify
which pending aio operation was
completed */
+ ulint bitmap;
+
+ byte* page_compression_page; /*!< Memory allocated for
+ page compressed page and
+ freed after the write
+ has been completed */
+
+ ibool page_compression;
+ ulint page_compression_level;
+
+ ulint* write_size; /*!< Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
+
+ byte* page_buf; /*!< Actual page buffer for
+ page compressed pages, do not
+ free this */
+
+ ibool page_compress_success;
+ /*!< TRUE if page compression was
+ successfull, false if not */
+
+ ulint file_block_size;/*!< file block size */
+
#ifdef WIN_ASYNC_IO
HANDLE handle; /*!< handle object we need in the
OVERLAPPED struct */
@@ -185,6 +251,7 @@ struct os_aio_slot_t{
int n_bytes; /* bytes written/read. */
int ret; /* AIO return code */
#endif /* WIN_ASYNC_IO */
+ byte *lzo_mem; /* Temporal memory used by LZO */
};
/** The asynchronous i/o array structure */
@@ -294,6 +361,88 @@ UNIV_INTERN ulint os_n_pending_writes = 0;
/** Number of pending read operations */
UNIV_INTERN ulint os_n_pending_reads = 0;
+/** After first fallocate failure we will disable os_file_trim */
+UNIV_INTERN ibool os_fallocate_failed = FALSE;
+
+/**********************************************************************//**
+Directly manipulate the allocated disk space by deallocating for the file referred to
+by fd for the byte range starting at offset and continuing for len bytes.
+Within the specified range, partial file system blocks are zeroed, and whole
+file system blocks are removed from the file. After a successful call,
+subsequent reads from this range will return zeroes.
+@return true if success, false if error */
+UNIV_INTERN
+ibool
+os_file_trim(
+/*=========*/
+ os_aio_slot_t* slot); /*!< in: slot structure */
+
+/**********************************************************************//**
+Allocate memory for temporal buffer used for page compression. This
+buffer is freed later. */
+UNIV_INTERN
+void
+os_slot_alloc_page_buf(
+/*===================*/
+ os_aio_slot_t* slot); /*!< in: slot structure */
+
+#ifdef HAVE_LZO
+/**********************************************************************//**
+Allocate memory for temporal memory used for page compression when
+LZO compression method is used */
+UNIV_INTERN
+void
+os_slot_alloc_lzo_mem(
+/*===================*/
+ os_aio_slot_t* slot); /*!< in: slot structure */
+#endif
+
+/****************************************************************//**
+Does error handling when a file operation fails.
+@return TRUE if we should retry the operation */
+ibool
+os_file_handle_error_no_exit(
+/*=========================*/
+ const char* name, /*!< in: name of a file or NULL */
+ const char* operation, /*!< in: operation */
+ ibool on_error_silent,/*!< in: if TRUE then don't print
+ any message to the log. */
+ const char* file, /*!< in: file name */
+ const ulint line); /*!< in: line */
+
+/****************************************************************//**
+Tries to enable the atomic write feature, if available, for the specified file
+handle.
+@return TRUE if success */
+static __attribute__((warn_unused_result))
+ibool
+os_file_set_atomic_writes(
+/*======================*/
+ const char* name /*!< in: name of the file */
+ __attribute__((unused)),
+ os_file_t file /*!< in: handle to the file */
+ __attribute__((unused)))
+{
+#ifdef DFS_IOCTL_ATOMIC_WRITE_SET
+ int atomic_option = 1;
+
+ if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) {
+
+ fprintf(stderr, "InnoDB: Warning:Trying to enable atomic writes on "
+ "file %s on non-supported platform!\n", name);
+ os_file_handle_error_no_exit(name, "ioctl", FALSE, __FILE__, __LINE__);
+ return(FALSE);
+ }
+
+ return(TRUE);
+#else
+ fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on "
+ "file %s on non-supported platform!\n", name);
+ return(FALSE);
+#endif
+}
+
+
#ifdef UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
/**********************************************************************//**
@@ -439,6 +588,19 @@ os_file_get_last_error_low(
"InnoDB: because of either a thread exit"
" or an application request.\n"
"InnoDB: Retry attempt is made.\n");
+ } else if (err == ECANCELED || err == ENOTTY) {
+ if (strerror(err) != NULL) {
+ fprintf(stderr,
+ "InnoDB: Error number %d"
+ " means '%s'.\n",
+ err, strerror(err));
+ }
+
+ if(srv_use_atomic_writes) {
+ fprintf(stderr,
+ "InnoDB: Error trying to enable atomic writes on "
+ "non-supported destination!\n");
+ }
} else {
fprintf(stderr,
"InnoDB: Some operating system error numbers"
@@ -503,6 +665,19 @@ os_file_get_last_error_low(
"InnoDB: The error means mysqld does not have"
" the access rights to\n"
"InnoDB: the directory.\n");
+ } else if (err == ECANCELED || err == ENOTTY) {
+ if (strerror(err) != NULL) {
+ fprintf(stderr,
+ "InnoDB: Error number %d"
+ " means '%s'.\n",
+ err, strerror(err));
+ }
+
+ if(srv_use_atomic_writes) {
+ fprintf(stderr,
+ "InnoDB: Error trying to enable atomic writes on "
+ "non-supported destination!\n");
+ }
} else {
if (strerror(err) != NULL) {
fprintf(stderr,
@@ -536,6 +711,9 @@ os_file_get_last_error_low(
case ENOTDIR:
case EISDIR:
return(OS_FILE_PATH_ERROR);
+ case ECANCELED:
+ case ENOTTY:
+ return(OS_FILE_OPERATION_NOT_SUPPORTED);
case EAGAIN:
if (srv_use_native_aio) {
return(OS_FILE_AIO_RESOURCES_RESERVED);
@@ -582,9 +760,11 @@ os_file_handle_error_cond_exit(
const char* operation, /*!< in: operation */
ibool should_exit, /*!< in: call exit(3) if unknown error
and this parameter is TRUE */
- ibool on_error_silent)/*!< in: if TRUE then don't print
+ ibool on_error_silent,/*!< in: if TRUE then don't print
any message to the log iff it is
an unknown non-fatal error */
+ const char* file, /*!< in: file name */
+ const ulint line) /*!< in: line */
{
ulint err;
@@ -614,6 +794,9 @@ os_file_handle_error_cond_exit(
" InnoDB: Disk is full. Try to clean the disk"
" to free space.\n");
+ fprintf(stderr,
+ " InnoDB: at file %s and at line %ld\n", file, line);
+
os_has_said_disk_full = TRUE;
fflush(stderr);
@@ -655,6 +838,9 @@ os_file_handle_error_cond_exit(
? " Cannot continue operation" : "");
}
+ fprintf(stderr,
+ " InnoDB: at file %s and at line %ld\n", file, line);
+
if (should_exit) {
exit(1);
}
@@ -671,10 +857,12 @@ ibool
os_file_handle_error(
/*=================*/
const char* name, /*!< in: name of a file or NULL */
- const char* operation) /*!< in: operation */
+ const char* operation, /*!< in: operation */
+ const char* file, /*!< in: file name */
+ const ulint line) /*!< in: line */
{
/* exit in case of unknown error */
- return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE));
+ return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE, file, line));
}
/****************************************************************//**
@@ -685,12 +873,14 @@ os_file_handle_error_no_exit(
/*=========================*/
const char* name, /*!< in: name of a file or NULL */
const char* operation, /*!< in: operation */
- ibool on_error_silent)/*!< in: if TRUE then don't print
+ ibool on_error_silent,/*!< in: if TRUE then don't print
any message to the log. */
+ const char* file, /*!< in: file name */
+ const ulint line) /*!< in: line */
{
/* don't exit in case of unknown error */
return(os_file_handle_error_cond_exit(
- name, operation, FALSE, on_error_silent));
+ name, operation, FALSE, on_error_silent, file, line));
}
#undef USE_FILE_LOCK
@@ -830,7 +1020,7 @@ os_file_opendir(
if (dir == INVALID_HANDLE_VALUE) {
if (error_is_fatal) {
- os_file_handle_error(dirname, "opendir");
+ os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
}
return(NULL);
@@ -841,7 +1031,7 @@ os_file_opendir(
dir = opendir(dirname);
if (dir == NULL && error_is_fatal) {
- os_file_handle_error(dirname, "opendir");
+ os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
}
return(dir);
@@ -863,7 +1053,7 @@ os_file_closedir(
ret = FindClose(dir);
if (!ret) {
- os_file_handle_error_no_exit(NULL, "closedir", FALSE);
+ os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
return(-1);
}
@@ -875,7 +1065,7 @@ os_file_closedir(
ret = closedir(dir);
if (ret) {
- os_file_handle_error_no_exit(NULL, "closedir", FALSE);
+ os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
}
return(ret);
@@ -947,7 +1137,7 @@ next_file:
return(1);
} else {
- os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE);
+ os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE, __FILE__, __LINE__);
return(-1);
}
#else
@@ -1033,7 +1223,7 @@ next_file:
goto next_file;
}
- os_file_handle_error_no_exit(full_path, "stat", FALSE);
+ os_file_handle_error_no_exit(full_path, "stat", FALSE, __FILE__, __LINE__);
ut_free(full_path);
@@ -1084,7 +1274,7 @@ os_file_create_directory(
&& !fail_if_exists))) {
os_file_handle_error_no_exit(
- pathname, "CreateDirectory", FALSE);
+ pathname, "CreateDirectory", FALSE, __FILE__, __LINE__);
return(FALSE);
}
@@ -1097,7 +1287,7 @@ os_file_create_directory(
if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
/* failure */
- os_file_handle_error_no_exit(pathname, "mkdir", FALSE);
+ os_file_handle_error_no_exit(pathname, "mkdir", FALSE, __FILE__, __LINE__);
return(FALSE);
}
@@ -1207,7 +1397,7 @@ os_file_create_simple_func(
retry = os_file_handle_error(
name, create_mode == OS_FILE_OPEN ?
- "open" : "create");
+ "open" : "create", __FILE__, __LINE__);
} else {
*success = TRUE;
@@ -1275,7 +1465,7 @@ os_file_create_simple_func(
retry = os_file_handle_error(
name,
create_mode == OS_FILE_OPEN
- ? "open" : "create");
+ ? "open" : "create", __FILE__, __LINE__);
} else {
*success = TRUE;
retry = false;
@@ -1317,9 +1507,12 @@ os_file_create_simple_no_error_handling_func(
OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes) /*! in: atomic writes table option
+ value */
{
os_file_t file;
+ atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
*success = FALSE;
#ifdef __WIN__
@@ -1380,6 +1573,23 @@ os_file_create_simple_no_error_handling_func(
attributes,
NULL); // No template file
+ /* If we have proper file handle and atomic writes should be used,
+ try to set atomic writes and if that fails when creating a new
+ table, produce a error. If atomic writes are used on existing
+ file, ignore error and use traditional writes for that file */
+ if (file != INVALID_HANDLE_VALUE
+ && (awrites == ATOMIC_WRITES_ON ||
+ (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
+ && !os_file_set_atomic_writes(name, file)) {
+ if (create_mode == OS_FILE_CREATE) {
+ fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+ CloseHandle(file);
+ os_file_delete_if_exists_func(name);
+ *success = FALSE;
+ file = INVALID_HANDLE_VALUE;
+ }
+ }
+
*success = (file != INVALID_HANDLE_VALUE);
#else /* __WIN__ */
int create_flag;
@@ -1440,6 +1650,24 @@ os_file_create_simple_no_error_handling_func(
}
#endif /* USE_FILE_LOCK */
+ /* If we have proper file handle and atomic writes should be used,
+ try to set atomic writes and if that fails when creating a new
+ table, produce a error. If atomic writes are used on existing
+ file, ignore error and use traditional writes for that file */
+ if (file != -1
+ && (awrites == ATOMIC_WRITES_ON ||
+ (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
+ && !os_file_set_atomic_writes(name, file)) {
+ if (create_mode == OS_FILE_CREATE) {
+ fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+ close(file);
+ os_file_delete_if_exists_func(name);
+ *success = FALSE;
+ file = -1;
+ }
+ }
+
+
#endif /* __WIN__ */
return(file);
@@ -1524,12 +1752,15 @@ os_file_create_func(
async i/o or unbuffered i/o: look in the
function source code for the exact rules */
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes) /*! in: atomic writes table option
+ value */
{
os_file_t file;
ibool retry;
ibool on_error_no_exit;
ibool on_error_silent;
+ atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
#ifdef __WIN__
DBUG_EXECUTE_IF(
@@ -1662,9 +1893,9 @@ os_file_create_func(
if (on_error_no_exit) {
retry = os_file_handle_error_no_exit(
- name, operation, on_error_silent);
+ name, operation, on_error_silent, __FILE__, __LINE__);
} else {
- retry = os_file_handle_error(name, operation);
+ retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
}
} else {
*success = TRUE;
@@ -1673,6 +1904,22 @@ os_file_create_func(
} while (retry);
+ /* If we have proper file handle and atomic writes should be used,
+ try to set atomic writes and if that fails when creating a new
+ table, produce a error. If atomic writes are used on existing
+ file, ignore error and use traditional writes for that file */
+ if (file != INVALID_HANDLE_VALUE && type == OS_DATA_FILE
+ && (awrites == ATOMIC_WRITES_ON ||
+ (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
+ && !os_file_set_atomic_writes(name, file)) {
+ if (create_mode == OS_FILE_CREATE) {
+ fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+ CloseHandle(file);
+ os_file_delete_if_exists_func(name);
+ *success = FALSE;
+ file = INVALID_HANDLE_VALUE;
+ }
+ }
#else /* __WIN__ */
int create_flag;
const char* mode_str = NULL;
@@ -1747,9 +1994,9 @@ os_file_create_func(
if (on_error_no_exit) {
retry = os_file_handle_error_no_exit(
- name, operation, on_error_silent);
+ name, operation, on_error_silent, __FILE__, __LINE__);
} else {
- retry = os_file_handle_error(name, operation);
+ retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
}
} else {
*success = TRUE;
@@ -1801,6 +2048,22 @@ os_file_create_func(
}
#endif /* USE_FILE_LOCK */
+ /* If we have proper file handle and atomic writes should be used,
+ try to set atomic writes and if that fails when creating a new
+ table, produce a error. If atomic writes are used on existing
+ file, ignore error and use traditional writes for that file */
+ if (file != -1 && type == OS_DATA_FILE
+ && (awrites == ATOMIC_WRITES_ON ||
+ (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
+ && !os_file_set_atomic_writes(name, file)) {
+ if (create_mode == OS_FILE_CREATE) {
+ fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+ close(file);
+ os_file_delete_if_exists_func(name);
+ *success = FALSE;
+ file = -1;
+ }
+ }
#endif /* __WIN__ */
return(file);
@@ -1859,7 +2122,7 @@ loop:
ret = unlink(name);
if (ret != 0 && errno != ENOENT) {
- os_file_handle_error_no_exit(name, "delete", FALSE);
+ os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
return(false);
}
@@ -1923,7 +2186,7 @@ loop:
ret = unlink(name);
if (ret != 0) {
- os_file_handle_error_no_exit(name, "delete", FALSE);
+ os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
return(false);
}
@@ -1967,7 +2230,7 @@ os_file_rename_func(
return(TRUE);
}
- os_file_handle_error_no_exit(oldpath, "rename", FALSE);
+ os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__);
return(FALSE);
#else
@@ -1976,7 +2239,7 @@ os_file_rename_func(
ret = rename(oldpath, newpath);
if (ret != 0) {
- os_file_handle_error_no_exit(oldpath, "rename", FALSE);
+ os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__);
return(FALSE);
}
@@ -2005,7 +2268,7 @@ os_file_close_func(
return(TRUE);
}
- os_file_handle_error(NULL, "close");
+ os_file_handle_error(NULL, "close", __FILE__, __LINE__);
return(FALSE);
#else
@@ -2014,7 +2277,7 @@ os_file_close_func(
ret = close(file);
if (ret == -1) {
- os_file_handle_error(NULL, "close");
+ os_file_handle_error(NULL, "close", __FILE__, __LINE__);
return(FALSE);
}
@@ -2106,6 +2369,11 @@ os_file_set_size(
current_size = 0;
+#ifdef UNIV_DEBUG
+ fprintf(stderr, "InnoDB: Note: File %s current_size %lu extended_size %lu\n",
+ name, os_file_get_size(file), size);
+#endif
+
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
@@ -2114,15 +2382,15 @@ os_file_set_size(
fprintf(stderr, "InnoDB: Error: preallocating file "
"space for file \'%s\' failed. Current size "
"%lu, desired size %lu\n",
- name, (long unsigned) current_size, (long unsigned) size);
- os_file_handle_error_no_exit(name, "posix_fallocate", FALSE);
+ name, current_size, size);
+ os_file_handle_error_no_exit(name, "posix_fallocate", FALSE, __FILE__, __LINE__);
+
return(FALSE);
}
return(TRUE);
}
#endif
-
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
@@ -2149,6 +2417,7 @@ os_file_set_size(
}
ret = os_file_write(name, file, buf, current_size, n_bytes);
+
if (!ret) {
ut_free(buf2);
goto error_handling;
@@ -2279,7 +2548,7 @@ os_file_flush_func(
return(TRUE);
}
- os_file_handle_error(NULL, "flush");
+ os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
/* It is a fatal error if a file flush does not succeed, because then
the database can get corrupt on disk */
@@ -2333,7 +2602,7 @@ os_file_flush_func(
ib_logf(IB_LOG_LEVEL_ERROR, "The OS said file flush did not succeed");
- os_file_handle_error(NULL, "flush");
+ os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
/* It is a fatal error if a file flush does not succeed, because then
the database can get corrupt on disk */
@@ -2571,7 +2840,9 @@ os_file_read_func(
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
- ulint n) /*!< in: number of bytes to read */
+ ulint n, /*!< in: number of bytes to read */
+ ibool compressed) /*!< in: is this file space
+ compressed ? */
{
#ifdef __WIN__
BOOL ret;
@@ -2639,6 +2910,14 @@ try_again:
os_mutex_exit(os_file_count_mutex);
if (ret && len == n) {
+ /* Note that InnoDB writes files that are not formated
+ as file spaces and they do not have FIL_PAGE_TYPE
+ field, thus we must use here information is the actual
+ file space compressed. */
+ if (fil_page_is_compressed((byte *)buf)) {
+ fil_decompress_page(NULL, (byte *)buf, len, NULL);
+ }
+
return(TRUE);
}
#else /* __WIN__ */
@@ -2651,6 +2930,13 @@ try_again:
ret = os_file_pread(file, buf, n, offset);
if ((ulint) ret == n) {
+ /* Note that InnoDB writes files that are not formated
+ as file spaces and they do not have FIL_PAGE_TYPE
+ field, thus we must use here information is the actual
+ file space compressed. */
+ if (fil_page_is_compressed((byte *)buf)) {
+ fil_decompress_page(NULL, (byte *)buf, n, NULL);
+ }
return(TRUE);
} else if (ret == -1) {
ib_logf(IB_LOG_LEVEL_ERROR,
@@ -2667,7 +2953,7 @@ try_again:
#ifdef __WIN__
error_handling:
#endif
- retry = os_file_handle_error(NULL, "read");
+ retry = os_file_handle_error(NULL, "read", __FILE__, __LINE__);
if (retry) {
goto try_again;
@@ -2702,7 +2988,9 @@ os_file_read_no_error_handling_func(
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
- ulint n) /*!< in: number of bytes to read */
+ ulint n, /*!< in: number of bytes to read */
+ ibool compressed) /*!< in: is this file space
+ compressed ? */
{
#ifdef __WIN__
BOOL ret;
@@ -2770,6 +3058,15 @@ try_again:
os_mutex_exit(os_file_count_mutex);
if (ret && len == n) {
+
+ /* Note that InnoDB writes files that are not formated
+ as file spaces and they do not have FIL_PAGE_TYPE
+ field, thus we must use here information is the actual
+ file space compressed. */
+ if (fil_page_is_compressed((byte *)buf)) {
+ fil_decompress_page(NULL, (byte *)buf, n, NULL);
+ }
+
return(TRUE);
}
#else /* __WIN__ */
@@ -2782,6 +3079,13 @@ try_again:
ret = os_file_pread(file, buf, n, offset);
if ((ulint) ret == n) {
+ /* Note that InnoDB writes files that are not formated
+ as file spaces and they do not have FIL_PAGE_TYPE
+ field, thus we must use here information is the actual
+ file space compressed. */
+ if (fil_page_is_compressed((byte *)buf)) {
+ fil_decompress_page(NULL, (byte *)buf, n, NULL);
+ }
return(TRUE);
} else if (ret == -1) {
ib_logf(IB_LOG_LEVEL_ERROR,
@@ -2798,7 +3102,7 @@ try_again:
#ifdef __WIN__
error_handling:
#endif
- retry = os_file_handle_error_no_exit(NULL, "read", FALSE);
+ retry = os_file_handle_error_no_exit(NULL, "read", FALSE, __FILE__, __LINE__);
if (retry) {
goto try_again;
@@ -2869,6 +3173,7 @@ os_file_write_func(
ut_ad(buf);
ut_ad(n > 0);
+
retry:
low = (DWORD) offset & 0xFFFFFFFF;
high = (DWORD) (offset >> 32);
@@ -3073,7 +3378,7 @@ os_file_status(
} else if (ret) {
/* file exists, but stat call failed */
- os_file_handle_error_no_exit(path, "stat", FALSE);
+ os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
return(FALSE);
}
@@ -3101,7 +3406,7 @@ os_file_status(
} else if (ret) {
/* file exists, but stat call failed */
- os_file_handle_error_no_exit(path, "stat", FALSE);
+ os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
return(FALSE);
}
@@ -3150,7 +3455,7 @@ os_file_get_status(
} else if (ret) {
/* file exists, but stat call failed */
- os_file_handle_error_no_exit(path, "stat", FALSE);
+ os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
return(DB_FAIL);
@@ -3203,7 +3508,7 @@ os_file_get_status(
} else if (ret) {
/* file exists, but stat call failed */
- os_file_handle_error_no_exit(path, "stat", FALSE);
+ os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
return(DB_FAIL);
@@ -3770,7 +4075,8 @@ os_aio_array_create(
array->slots = static_cast<os_aio_slot_t*>(
ut_malloc(n * sizeof(*array->slots)));
- memset(array->slots, 0x0, sizeof(n * sizeof(*array->slots)));
+ memset(array->slots, 0x0, n * sizeof(*array->slots));
+
#ifdef __WIN__
array->handles = static_cast<HANDLE*>(ut_malloc(n * sizeof(HANDLE)));
#endif /* __WIN__ */
@@ -3858,8 +4164,8 @@ os_aio_array_free(
/*==============*/
os_aio_array_t*& array) /*!< in, own: array to free */
{
-#ifdef WIN_ASYNC_IO
ulint i;
+#ifdef WIN_ASYNC_IO
for (i = 0; i < array->n_slots; i++) {
os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
@@ -3881,6 +4187,19 @@ os_aio_array_free(
}
#endif /* LINUX_NATIVE_AIO */
+ for (i = 0; i < array->n_slots; i++) {
+ os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
+ if (slot->page_compression_page) {
+ ut_free(slot->page_compression_page);
+ slot->page_compression_page = NULL;
+ }
+
+ if (slot->lzo_mem) {
+ ut_free(slot->lzo_mem);
+ slot->lzo_mem = NULL;
+ }
+ }
+
ut_free(array->slots);
ut_free(array);
@@ -4214,7 +4533,16 @@ os_aio_array_reserve_slot(
void* buf, /*!< in: buffer where to read or from which
to write */
os_offset_t offset, /*!< in: file offset */
- ulint len) /*!< in: length of the block to read or write */
+ ulint len, /*!< in: length of the block to read or write */
+ ulint* write_size,/*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
+ ibool page_compression, /*!< in: is page compression used
+ on this file space */
+ ulint page_compression_level) /*!< page compression
+ level to be used */
{
os_aio_slot_t* slot = NULL;
#ifdef WIN_ASYNC_IO
@@ -4304,6 +4632,61 @@ found:
slot->buf = static_cast<byte*>(buf);
slot->offset = offset;
slot->io_already_done = FALSE;
+ slot->page_compress_success = FALSE;
+ slot->write_size = write_size;
+ slot->page_compression_level = page_compression_level;
+ slot->page_compression = page_compression;
+
+ if (message1) {
+ slot->file_block_size = fil_node_get_block_size(message1);
+ }
+
+ /* If the space is page compressed and this is write operation
+ then we compress the page */
+ if (message1 && type == OS_FILE_WRITE && page_compression ) {
+ ulint real_len = len;
+ byte* tmp = NULL;
+
+ /* Release the array mutex while compressing */
+ os_mutex_exit(array->mutex);
+
+ // We allocate memory for page compressed buffer if and only
+ // if it is not yet allocated.
+ os_slot_alloc_page_buf(slot);
+
+#ifdef HAVE_LZO
+ if (innodb_compression_algorithm == 3) {
+ os_slot_alloc_lzo_mem(slot);
+ }
+#endif
+
+ /* Call page compression */
+ tmp = fil_compress_page(fil_node_get_space_id(slot->message1),
+ (byte *)buf,
+ slot->page_buf,
+ len,
+ page_compression_level,
+ fil_node_get_block_size(slot->message1),
+ &real_len,
+ slot->lzo_mem
+ );
+
+ /* If compression succeeded, set up the length and buffer */
+ if (tmp != buf) {
+ len = real_len;
+ buf = slot->page_buf;
+ slot->len = real_len;
+ slot->page_compress_success = TRUE;
+ } else {
+ slot->page_compress_success = FALSE;
+ }
+
+ /* Take array mutex back, not sure if this is really needed
+ below */
+ os_mutex_enter(array->mutex);
+
+ }
+
#ifdef WIN_ASYNC_IO
control = &slot->control;
@@ -4578,10 +4961,19 @@ os_aio_func(
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
- void* message2)/*!< in: message for the aio handler
+ void* message2,/*!< in: message for the aio handler
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
+ ulint* write_size,/*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
+ ibool page_compression, /*!< in: is page compression used
+ on this file space */
+ ulint page_compression_level) /*!< page compression
+ level to be used */
{
os_aio_array_t* array;
os_aio_slot_t* slot;
@@ -4631,7 +5023,8 @@ os_aio_func(
and os_file_write_func() */
if (type == OS_FILE_READ) {
- ret = os_file_read_func(file, buf, offset, n);
+ ret = os_file_read_func(file, buf, offset, n,
+ page_compression);
} else {
ut_ad(!srv_read_only_mode);
@@ -4643,10 +5036,9 @@ os_aio_func(
os_has_said_disk_full = FALSE; ret = 0; errno = 28;);
if (!ret) {
- os_file_handle_error_cond_exit(name, "os_file_write_func", TRUE, FALSE);
+ os_file_handle_error_cond_exit(name, "os_file_write_func", TRUE, FALSE, __FILE__, __LINE__);
}
}
-
return ret;
}
@@ -4693,7 +5085,8 @@ try_again:
}
slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
- name, buf, offset, n);
+ name, buf, offset, n, write_size, page_compression, page_compression_level);
+
if (type == OS_FILE_READ) {
if (srv_use_native_aio) {
os_n_file_reads++;
@@ -4773,7 +5166,7 @@ err_exit:
os_aio_array_free_slot(array, slot);
if (os_file_handle_error(
- name,type == OS_FILE_READ ? "aio read" : "aio write")) {
+ name,type == OS_FILE_READ ? "aio read" : "aio write", __FILE__, __LINE__)) {
goto try_again;
}
@@ -4886,9 +5279,17 @@ os_aio_windows_handle(
if (ret && len == slot->len) {
ret_val = TRUE;
- } else if (os_file_handle_error(slot->name, "Windows aio")) {
+ } else if (!ret || (len != slot->len)) {
- retry = TRUE;
+ if (!ret) {
+ if (os_file_handle_error(slot->name, "Windows aio", __FILE__, __LINE__)) {
+ retry = TRUE;
+ } else {
+ ret_val = FALSE;
+ }
+ } else {
+ retry = TRUE;
+ }
} else {
ret_val = FALSE;
@@ -4916,9 +5317,18 @@ os_aio_windows_handle(
switch (slot->type) {
case OS_FILE_WRITE:
- ret = WriteFile(slot->file, slot->buf,
+ if (slot->message1 &&
+ slot->page_compression &&
+ slot->page_compress_success &&
+ slot->page_buf) {
+ ret = WriteFile(slot->file, slot->page_buf,
+ (DWORD) slot->len, &len,
+ &(slot->control));
+ } else {
+ ret = WriteFile(slot->file, slot->buf,
(DWORD) slot->len, &len,
&(slot->control));
+ }
break;
case OS_FILE_READ:
@@ -4950,6 +5360,28 @@ os_aio_windows_handle(
ret_val = ret && len == slot->len;
}
+ if (slot->type == OS_FILE_READ) {
+ if(fil_page_is_compressed(slot->buf)) {
+ os_slot_alloc_page_buf(slot);
+
+#ifdef HAVE_LZO
+ if (fil_page_is_lzo_compressed(slot->buf)) {
+ os_slot_alloc_lzo_mem(slot);
+ }
+#endif
+
+ fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size);
+ }
+ } else {
+ /* OS_FILE_WRITE */
+ if (slot->page_compress_success && fil_page_is_compressed(slot->page_buf)) {
+ if (srv_use_trim && os_fallocate_failed == FALSE) {
+ // Deallocate unused blocks from file system
+ os_file_trim(slot);
+ }
+ }
+ }
+
os_aio_array_free_slot(array, slot);
return(ret_val);
@@ -5039,6 +5471,34 @@ retry:
/* We have not overstepped to next segment. */
ut_a(slot->pos < end_pos);
+ if (slot->type == OS_FILE_READ) {
+ /* If the table is page compressed and this is read,
+ we decompress before we annouce the read is
+ complete. For writes, we free the compressed page. */
+ if (fil_page_is_compressed(slot->buf)) {
+ // We allocate memory for page compressed buffer if and only
+ // if it is not yet allocated.
+ os_slot_alloc_page_buf(slot);
+#ifdef HAVE_LZO
+ if (fil_page_is_lzo_compressed(slot->buf)) {
+ os_slot_alloc_lzo_mem(slot);
+ }
+#endif
+
+ fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size);
+ }
+ } else {
+ /* OS_FILE_WRITE */
+ if (slot->page_compress_success &&
+ fil_page_is_compressed(slot->page_buf)) {
+ ut_ad(slot->page_compression_page);
+ if (srv_use_trim && os_fallocate_failed == FALSE) {
+ // Deallocate unused blocks from file system
+ os_file_trim(slot);
+ }
+ }
+ }
+
/* Mark this request as completed. The error handling
will be done in the calling function. */
os_mutex_enter(array->mutex);
@@ -5182,6 +5642,13 @@ found:
} else {
errno = -slot->ret;
+ if (slot->ret == 0) {
+ fprintf(stderr,
+ "InnoDB: Number of bytes after aio %d requested %lu\n"
+ "InnoDB: from file %s\n",
+ slot->n_bytes, slot->len, slot->name);
+ }
+
/* os_file_handle_error does tell us if we should retry
this IO. As it stands now, we don't do this retry when
reaping requests from a different context than
@@ -5189,7 +5656,7 @@ found:
windows and linux native AIO.
We should probably look into this to transparently
re-submit the IO. */
- os_file_handle_error(slot->name, "Linux aio");
+ os_file_handle_error(slot->name, "Linux aio", __FILE__, __LINE__);
ret = FALSE;
}
@@ -5473,13 +5940,13 @@ consecutive_loop:
errno = 28;);
if (!ret) {
- os_file_handle_error_cond_exit(aio_slot->name, "os_file_write_func", TRUE, FALSE);
+ os_file_handle_error_cond_exit(aio_slot->name, "os_file_write_func", TRUE, FALSE, __FILE__, __LINE__);
}
} else {
ret = os_file_read(
aio_slot->file, combined_buf,
- aio_slot->offset, total_len);
+ aio_slot->offset, total_len, aio_slot->page_compression);
}
srv_set_io_thread_op_info(global_segment, "file i/o done");
@@ -5869,4 +6336,290 @@ os_aio_all_slots_free(void)
}
#endif /* UNIV_DEBUG */
+#ifdef _WIN32
+#include <winioctl.h>
+#ifndef FSCTL_FILE_LEVEL_TRIM
+#define FSCTL_FILE_LEVEL_TRIM CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 130, METHOD_BUFFERED, FILE_WRITE_DATA)
+typedef struct _FILE_LEVEL_TRIM_RANGE {
+ DWORDLONG Offset;
+ DWORDLONG Length;
+} FILE_LEVEL_TRIM_RANGE, *PFILE_LEVEL_TRIM_RANGE;
+
+typedef struct _FILE_LEVEL_TRIM {
+ DWORD Key;
+ DWORD NumRanges;
+ FILE_LEVEL_TRIM_RANGE Ranges[1];
+} FILE_LEVEL_TRIM, *PFILE_LEVEL_TRIM;
+#endif
+#endif
+
+/**********************************************************************//**
+Directly manipulate the allocated disk space by deallocating for the file referred to
+by fd for the byte range starting at offset and continuing for len bytes.
+Within the specified range, partial file system blocks are zeroed, and whole
+file system blocks are removed from the file. After a successful call,
+subsequent reads from this range will return zeroes.
+@return true if success, false if error */
+UNIV_INTERN
+ibool
+os_file_trim(
+/*=========*/
+ os_aio_slot_t* slot) /*!< in: slot structure */
+{
+
+ size_t len = slot->len;
+ size_t trim_len = UNIV_PAGE_SIZE - len;
+ os_offset_t off = slot->offset + len;
+ size_t bsize = slot->file_block_size;
+
+ // len here should be alligned to sector size
+ ut_a((trim_len % bsize) == 0);
+ ut_a((len % bsize) == 0);
+ ut_a(bsize != 0);
+ ut_a((off % bsize) == 0);
+
+#ifdef UNIV_DEBUG
+ fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu block_size %lu\n",
+ *slot->write_size, trim_len, len, off, bsize);
+#endif
+
+ // Nothing to do if trim length is zero or if actual write
+ // size is initialized and it is smaller than current write size.
+ // In first write if we trim we set write_size to actual bytes
+ // written and rest of the page is trimmed. In following writes
+ // there is no need to trim again if write_size only increases
+ // because rest of the page is already trimmed. If actual write
+ // size decreases we need to trim again.
+ if (trim_len == 0 ||
+ (slot->write_size &&
+ *slot->write_size > 0 &&
+ len >= *slot->write_size)) {
+
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n",
+ *slot->write_size, trim_len, len);
+#endif
+
+ if (*slot->write_size > 0 && len >= *slot->write_size) {
+ srv_stats.page_compressed_trim_op_saved.inc();
+ }
+
+ *slot->write_size = len;
+
+ return (TRUE);
+ }
+
+#ifdef __linux__
+#if defined(HAVE_FALLOCATE)
+ int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
+
+ if (ret) {
+ /* After first failure do not try to trim again */
+ os_fallocate_failed = TRUE;
+ srv_use_trim = FALSE;
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: [Warning] fallocate call failed with error code %d.\n"
+ " InnoDB: start: %lu len: %lu payload: %lu\n"
+ " InnoDB: Disabling fallocate for now.\n", ret, off, trim_len, len);
+
+ os_file_handle_error_no_exit(slot->name,
+ " fallocate(FALLOC_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) ",
+ FALSE, __FILE__, __LINE__);
+
+ if (slot->write_size) {
+ *slot->write_size = 0;
+ }
+
+ return (FALSE);
+ } else {
+ if (slot->write_size) {
+ *slot->write_size = len;
+ }
+ }
+#else
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: [Warning] fallocate not supported on this installation."
+ " InnoDB: Disabling fallocate for now.");
+ os_fallocate_failed = TRUE;
+ srv_use_trim = FALSE;
+ if (slot->write_size) {
+ *slot->write_size = 0;
+ }
+
+#endif /* HAVE_FALLOCATE ... */
+
+#elif defined(_WIN32)
+ FILE_LEVEL_TRIM flt;
+ flt.Key = 0;
+ flt.NumRanges = 1;
+ flt.Ranges[0].Offset = off;
+ flt.Ranges[0].Length = trim_len;
+
+ BOOL ret = DeviceIoControl(slot->file, FSCTL_FILE_LEVEL_TRIM,
+ &flt, sizeof(flt), NULL, NULL, NULL, NULL);
+
+ if (!ret) {
+ /* After first failure do not try to trim again */
+ os_fallocate_failed = TRUE;
+ srv_use_trim=FALSE;
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: [Warning] fallocate call failed with error.\n"
+ " InnoDB: start: %lx len: %du payload: %lu\n"
+ " InnoDB: Disabling fallocate for now.\n", (slot->offset+len), trim_len, len);
+
+ os_file_handle_error_no_exit(slot->name,
+ " DeviceIOControl(FSCTL_FILE_LEVEL_TRIM) ",
+ FALSE, __FILE__, __LINE__);
+
+ if (slot->write_size) {
+ *slot->write_size = 0;
+ }
+ return (FALSE);
+ } else {
+ if (slot->write_size) {
+ *slot->write_size = len;
+ }
+ }
+#endif
+
+ switch(bsize) {
+ case 512:
+ srv_stats.page_compression_trim_sect512.add((trim_len / bsize));
+ break;
+ case 1024:
+ srv_stats.page_compression_trim_sect1024.add((trim_len / bsize));
+ break;
+ case 2948:
+ srv_stats.page_compression_trim_sect2048.add((trim_len / bsize));
+ break;
+ case 4096:
+ srv_stats.page_compression_trim_sect4096.add((trim_len / bsize));
+ break;
+ case 8192:
+ srv_stats.page_compression_trim_sect8192.add((trim_len / bsize));
+ break;
+ case 16384:
+ srv_stats.page_compression_trim_sect16384.add((trim_len / bsize));
+ break;
+ case 32768:
+ srv_stats.page_compression_trim_sect32768.add((trim_len / bsize));
+ break;
+ default:
+ break;
+ }
+
+ srv_stats.page_compressed_trim_op.inc();
+
+ return (TRUE);
+
+}
#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Allocate memory for temporal buffer used for page compression. This
+buffer is freed later. */
+UNIV_INTERN
+void
+os_slot_alloc_page_buf(
+/*===================*/
+ os_aio_slot_t* slot) /*!< in: slot structure */
+{
+ byte* cbuf2;
+ byte* cbuf;
+ ulint asize = UNIV_PAGE_SIZE;
+
+ ut_a(slot != NULL);
+ if (slot->page_compression_page == NULL) {
+ /* We allocate extra to avoid memory overwrite on compression */
+#ifdef HAVE_SNAPPY
+ asize += snappy_max_compressed_length(asize) - UNIV_PAGE_SIZE;
+#endif
+ cbuf2 = static_cast<byte *>(ut_malloc(asize*2));
+ cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE));
+ slot->page_compression_page = static_cast<byte *>(cbuf2);
+ slot->page_buf = static_cast<byte *>(cbuf);
+ memset(slot->page_compression_page, 0, asize*2);
+ ut_a(slot->page_buf != NULL);
+ }
+}
+
+#ifdef HAVE_LZO
+/**********************************************************************//**
+Allocate memory for temporal memory used for page compression when
+LZO compression method is used */
+UNIV_INTERN
+void
+os_slot_alloc_lzo_mem(
+/*===================*/
+ os_aio_slot_t* slot) /*!< in: slot structure */
+{
+ ut_a(slot != NULL);
+ if(slot->lzo_mem == NULL) {
+ slot->lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS));
+ memset(slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS);
+ ut_a(slot->lzo_mem != NULL);
+ }
+}
+#endif
+
+/***********************************************************************//**
+Try to get number of bytes per sector from file system.
+@return file block size */
+UNIV_INTERN
+ulint
+os_file_get_block_size(
+/*===================*/
+ os_file_t file, /*!< in: handle to a file */
+ const char* name) /*!< in: file name */
+{
+ ulint fblock_size = 512;
+
+#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H)
+ struct statvfs fstat;
+ int err;
+
+ err = fstatvfs(file, &fstat);
+
+ if (err != 0) {
+ fprintf(stderr, "InnoDB: Warning: fstatvfs() failed on file %s\n", name);
+ os_file_handle_error_no_exit(name, "fstatvfs()", FALSE, __FILE__, __LINE__);
+ } else {
+ fblock_size = fstat.f_bsize;
+ }
+#endif /* UNIV_LINUX */
+#ifdef __WIN__
+ {
+ DWORD SectorsPerCluster = 0;
+ DWORD BytesPerSector = 0;
+ DWORD NumberOfFreeClusters = 0;
+ DWORD TotalNumberOfClusters = 0;
+
+ if (GetFreeSpace((LPCTSTR)name, &SectorsPerCluster, &BytesPerSector, &NumberOfFreeClusters, &TotalNumberOfClusters)) {
+ fblock_size = BytesPerSector;
+ } else {
+ fprintf(stderr, "InnoDB: Warning: GetFreeSpace() failed on file %s\n", name);
+ os_file_handle_error_no_exit(name, "GetFreeSpace()", FALSE, __FILE__, __LINE__);
+ }
+ }
+#endif /* __WIN__*/
+
+ if (fblock_size > UNIV_PAGE_SIZE/2 || fblock_size < 512) {
+ fprintf(stderr, "InnoDB: Note: File system for file %s has "
+ "file block size %lu not supported for page_size %lu\n",
+ name, fblock_size, UNIV_PAGE_SIZE);
+
+ if (fblock_size < 512) {
+ fblock_size = 512;
+ } else {
+ fblock_size = UNIV_PAGE_SIZE/2;
+ }
+
+ fprintf(stderr, "InnoDB: Note: Using file block size %ld for file %s\n",
+ fblock_size, name);
+ }
+
+ return fblock_size;
+}
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 74ebe159677..018bf44fb8d 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -2567,7 +2567,7 @@ all_done:
success = os_file_read_no_error_handling(
OS_FILE_FROM_FD(index->online_log->fd),
index->online_log->head.block, ofs,
- srv_sort_buf_size);
+ srv_sort_buf_size, FALSE);
if (!success) {
fprintf(stderr, "InnoDB: unable to read temporary file"
@@ -3398,7 +3398,7 @@ all_done:
success = os_file_read_no_error_handling(
OS_FILE_FROM_FD(index->online_log->fd),
index->online_log->head.block, ofs,
- srv_sort_buf_size);
+ srv_sort_buf_size, FALSE);
if (!success) {
fprintf(stderr, "InnoDB: unable to read temporary file"
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index 284081d4b0c..7ebcdefdc3a 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -870,7 +870,8 @@ row_merge_read(
#endif /* UNIV_DEBUG */
success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf,
- ofs, srv_sort_buf_size);
+ ofs, srv_sort_buf_size, FALSE);
+
#ifdef POSIX_FADV_DONTNEED
/* Each block is read exactly once. Free up the file cache. */
posix_fadvise(fd, ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index a0dd32c203f..5e15dd15db2 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -2,6 +2,7 @@
Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, 2014, MariaDB Corporation
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -290,6 +291,18 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_EXISTING | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_WRITTEN},
+ {"buffer_index_pages_written", "buffer",
+ "Number of index pages written (innodb_index_pages_written)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_PAGES_WRITTEN},
+
+ {"buffer_non_index_pages_written", "buffer",
+ "Number of non index pages written (innodb_non_index_pages_written)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN},
+
{"buffer_pages_read", "buffer",
"Number of pages read (innodb_pages_read)",
static_cast<monitor_type_t>(
@@ -879,6 +892,71 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS},
+ {"compress_saved", "compression",
+ "Number of bytes saved by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED},
+
+ {"compress_trim_sect512", "compression",
+ "Number of sect-512 TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512},
+
+ {"compress_trim_sect1024", "compression",
+ "Number of sect-1024 TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024},
+
+ {"compress_trim_sect2048", "compression",
+ "Number of sect-2048 TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048},
+
+ {"compress_trim_sect4096", "compression",
+ "Number of sect-4K TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096},
+
+ {"compress_trim_sect8192", "compression",
+ "Number of sect-8K TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192},
+
+ {"compress_trim_sect16384", "compression",
+ "Number of sect-16K TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384},
+
+ {"compress_trim_sect32768", "compression",
+ "Number of sect-32K TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768},
+
+ {"compress_pages_page_compressed", "compression",
+ "Number of pages compressed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSED},
+
+ {"compress_page_compressed_trim_op", "compression",
+ "Number of TRIM operation performed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP},
+
+ {"compress_page_compressed_trim_op_saved", "compression",
+ "Number of TRIM operation saved by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED},
+
+ {"compress_pages_page_decompressed", "compression",
+ "Number of pages decompressed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED},
+
+ {"compress_pages_page_compression_error", "compression",
+ "Number of page compression errors",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR},
+
/* ========== Counters for Index ========== */
{"module_index", "index", "Index Manager",
MONITOR_MODULE,
@@ -1573,6 +1651,16 @@ srv_mon_process_existing_counter(
value = stat.n_pages_written;
break;
+ /* innodb_index_pages_written, the number of index pages written */
+ case MONITOR_OVLD_INDEX_PAGES_WRITTEN:
+ value = srv_stats.index_pages_written;
+ break;
+
+ /* innodb_non_index_pages_written, the number of non index pages written */
+ case MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN:
+ value = srv_stats.non_index_pages_written;
+ break;
+
/* innodb_pages_read */
case MONITOR_OVLD_PAGES_READ:
buf_get_total_stat(&stat);
@@ -1834,6 +1922,46 @@ srv_mon_process_existing_counter(
value = btr_cur_n_non_sea;
break;
+ case MONITOR_OVLD_PAGE_COMPRESS_SAVED:
+ value = srv_stats.page_compression_saved;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512:
+ value = srv_stats.page_compression_trim_sect512;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024:
+ value = srv_stats.page_compression_trim_sect1024;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048:
+ value = srv_stats.page_compression_trim_sect2048;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096:
+ value = srv_stats.page_compression_trim_sect4096;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192:
+ value = srv_stats.page_compression_trim_sect8192;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384:
+ value = srv_stats.page_compression_trim_sect16384;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768:
+ value = srv_stats.page_compression_trim_sect32768;
+ break;
+ case MONITOR_OVLD_PAGES_PAGE_COMPRESSED:
+ value = srv_stats.pages_page_compressed;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP:
+ value = srv_stats.page_compressed_trim_op;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED:
+ value = srv_stats.page_compressed_trim_op_saved;
+ break;
+ case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED:
+ value = srv_stats.pages_page_decompressed;
+ break;
+ case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR:
+ value = srv_stats.pages_page_compression_error;
+ break;
+
default:
ut_error;
}
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index cd3bed9e2fe..129b33f0da9 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -71,6 +71,7 @@ Created 10/8/1995 Heikki Tuuri
#include "mysql/plugin.h"
#include "mysql/service_thd_wait.h"
+#include "fil0pagecompress.h"
/* The following is the maximum allowed duration of a lock wait. */
UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
@@ -146,6 +147,20 @@ use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
UNIV_INTERN my_bool srv_use_native_aio = TRUE;
+/* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
+to the pages */
+UNIV_INTERN my_bool srv_use_trim = FALSE;
+/* If this flag is TRUE, then we will use posix fallocate for file extentsion */
+UNIV_INTERN my_bool srv_use_posix_fallocate = FALSE;
+/* If this flag is TRUE, then we disable doublewrite buffer */
+UNIV_INTERN my_bool srv_use_atomic_writes = FALSE;
+/* If this flag IS TRUE, then we use this algorithm for page compressing the pages */
+UNIV_INTERN ulong innodb_compression_algorithm = PAGE_ZLIB_ALGORITHM;
+/* Number of threads used for multi-threaded flush */
+UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
+/* If this flag is TRUE, then we will use multi threaded flush. */
+UNIV_INTERN my_bool srv_use_mtflush = FALSE;
+
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
pointers, because they are not available on Windows Server 2003 and
@@ -356,11 +371,6 @@ batch flushing i.e.: LRU flushing and flush_list flushing. The rest
of the pages are used for single page flushing. */
UNIV_INTERN ulong srv_doublewrite_batch_size = 120;
-UNIV_INTERN ibool srv_use_atomic_writes = FALSE;
-#ifdef HAVE_POSIX_FALLOCATE
-UNIV_INTERN ibool srv_use_posix_fallocate = TRUE;
-#endif
-
UNIV_INTERN ulong srv_replication_delay = 0;
/*-------------------------------------------*/
@@ -393,6 +403,17 @@ static ulint srv_n_system_rows_read_old = 0;
UNIV_INTERN ulint srv_truncated_status_writes = 0;
UNIV_INTERN ulint srv_available_undo_logs = 0;
+UNIV_INTERN ib_uint64_t srv_page_compression_saved = 0;
+UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect512 = 0;
+UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect4096 = 0;
+UNIV_INTERN ib_uint64_t srv_index_pages_written = 0;
+UNIV_INTERN ib_uint64_t srv_non_index_pages_written = 0;
+UNIV_INTERN ib_uint64_t srv_pages_page_compressed = 0;
+UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op = 0;
+UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op_saved = 0;
+UNIV_INTERN ib_uint64_t srv_index_page_decompressed = 0;
+
+
/* Set the following to 0 if you want InnoDB to write messages on
stderr on startup/shutdown. */
UNIV_INTERN ibool srv_print_verbose_log = TRUE;
@@ -1518,6 +1539,15 @@ srv_export_innodb_status(void)
srv_truncated_status_writes;
export_vars.innodb_available_undo_logs = srv_available_undo_logs;
+ export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved;
+ export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512;
+ export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096;
+ export_vars.innodb_index_pages_written = srv_stats.index_pages_written;
+ export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written;
+ export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed;
+ export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
+ export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;
+ export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
#ifdef UNIV_DEBUG
rw_lock_s_lock(&purge_sys->latch);
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 7048a44ae97..f2de5e954ad 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -3,6 +3,7 @@
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -72,6 +73,7 @@ Created 2/16/1996 Heikki Tuuri
# include "sync0sync.h"
# include "buf0flu.h"
# include "buf0rea.h"
+# include "buf0mtflu.h"
# include "dict0boot.h"
# include "dict0load.h"
# include "dict0stats_bg.h"
@@ -129,10 +131,14 @@ static os_file_t files[1000];
/** io_handler_thread parameters for thread identification */
static ulint n[SRV_MAX_N_IO_THREADS + 6];
/** io_handler_thread identifiers, 32 is the maximum number of purge threads */
-static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32];
+/** 6 is the ? */
+#define START_OLD_THREAD_CNT (SRV_MAX_N_IO_THREADS + 6 + 32)
+static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32 + MTFLUSH_MAX_WORKER];
+/* Thread contex data for multi-threaded flush */
+void *mtflush_ctx=NULL;
/** Thead handles */
-static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 6 + 32];
+static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 6 + 32 + MTFLUSH_MAX_WORKER];
static os_thread_t buf_flush_page_cleaner_thread_handle;
static os_thread_t buf_dump_thread_handle;
static os_thread_t dict_stats_thread_handle;
@@ -544,7 +550,7 @@ create_log_file(
*file = os_file_create(
innodb_file_log_key, name,
OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
- OS_LOG_FILE, &ret);
+ OS_LOG_FILE, &ret, FALSE);
if (!ret) {
ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
@@ -751,7 +757,7 @@ open_log_file(
*file = os_file_create(innodb_file_log_key, name,
OS_FILE_OPEN, OS_FILE_AIO,
- OS_LOG_FILE, &ret);
+ OS_LOG_FILE, &ret, FALSE);
if (!ret) {
ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
return(DB_ERROR);
@@ -842,7 +848,7 @@ open_or_create_data_files(
files[i] = os_file_create(
innodb_file_data_key, name, OS_FILE_CREATE,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
if (srv_read_only_mode) {
@@ -885,7 +891,7 @@ open_or_create_data_files(
files[i] = os_file_create(
innodb_file_data_key, name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
if (!ret) {
ib_logf(IB_LOG_LEVEL_ERROR,
@@ -900,7 +906,7 @@ open_or_create_data_files(
#ifdef UNIV_LOG_ARCHIVE
min_arch_log_no, max_arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
- min_flushed_lsn, max_flushed_lsn);
+ min_flushed_lsn, max_flushed_lsn, ULINT_UNDEFINED);
/* If first page is valid, don't overwrite DB.
It prevents overwriting DB when mysql_install_db
@@ -936,17 +942,17 @@ open_or_create_data_files(
files[i] = os_file_create(
innodb_file_data_key,
name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
} else if (i == 0) {
files[i] = os_file_create(
innodb_file_data_key,
name, OS_FILE_OPEN_RETRY,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
} else {
files[i] = os_file_create(
innodb_file_data_key,
name, OS_FILE_OPEN, OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
+ OS_DATA_FILE, &ret, FALSE);
}
if (!ret) {
@@ -1031,7 +1037,8 @@ check_first_page:
#ifdef UNIV_LOG_ARCHIVE
min_arch_log_no, max_arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
- min_flushed_lsn, max_flushed_lsn);
+ min_flushed_lsn, max_flushed_lsn,
+ ULINT_UNDEFINED);
if (check_msg) {
@@ -1166,7 +1173,7 @@ srv_undo_tablespace_create(
innodb_file_data_key,
name,
srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
if (srv_read_only_mode && ret) {
ib_logf(IB_LOG_LEVEL_INFO,
@@ -1253,7 +1260,8 @@ srv_undo_tablespace_open(
| OS_FILE_ON_ERROR_SILENT,
OS_FILE_NORMAL,
OS_DATA_FILE,
- &ret);
+ &ret,
+ FALSE);
/* If the file open was successful then load the tablespace. */
@@ -2754,6 +2762,24 @@ files_checked:
}
if (!srv_read_only_mode) {
+ if (srv_use_mtflush) {
+ /* Start multi-threaded flush threads */
+ mtflush_ctx = buf_mtflu_handler_init(
+ srv_mtflush_threads,
+ srv_buf_pool_instances);
+
+ /* Set up the thread ids */
+ buf_mtflu_set_thread_ids(
+ srv_mtflush_threads,
+ mtflush_ctx,
+ (thread_ids + 6 + 32));
+
+#if UNIV_DEBUG
+ fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n",
+ __FILE__, __LINE__, srv_buf_pool_instances, srv_mtflush_threads);
+#endif
+ }
+
buf_flush_page_cleaner_thread_handle = os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
buf_flush_page_cleaner_thread_started = true;
}
@@ -3021,6 +3047,13 @@ innobase_shutdown_for_mysql(void)
logs_empty_and_mark_files_at_shutdown() and should have
already quit or is quitting right now. */
+
+ if (srv_use_mtflush) {
+ /* g. Exit the multi threaded flush threads */
+
+ buf_mtflu_io_thread_exit();
+ }
+
os_mutex_enter(os_sync_mutex);
if (os_thread_count == 0) {
diff --git a/storage/innobase/ut/ut0wqueue.cc b/storage/innobase/ut/ut0wqueue.cc
index d1ba36b3b00..1607e535a94 100644
--- a/storage/innobase/ut/ut0wqueue.cc
+++ b/storage/innobase/ut/ut0wqueue.cc
@@ -162,6 +162,38 @@ ib_wqueue_timedwait(
}
/********************************************************************
+Return first item on work queue or NULL if queue is empty
+@return work item or NULL */
+void*
+ib_wqueue_nowait(
+/*=============*/
+ ib_wqueue_t* wq) /*<! in: work queue */
+{
+ ib_list_node_t* node = NULL;
+
+ mutex_enter(&wq->mutex);
+
+ if(!ib_list_is_empty(wq->items)) {
+ node = ib_list_get_first(wq->items);
+
+ if (node) {
+ ib_list_remove(wq->items, node);
+
+ }
+ }
+
+ /* We must reset the event when the list
+ gets emptied. */
+ if(ib_list_is_empty(wq->items)) {
+ os_event_reset(wq->event);
+ }
+
+ mutex_exit(&wq->mutex);
+
+ return (node ? node->data : NULL);
+}
+
+/********************************************************************
Check if queue is empty. */
ibool
@@ -173,3 +205,20 @@ ib_wqueue_is_empty(
{
return(ib_list_is_empty(wq->items));
}
+
+/********************************************************************
+Get number of items on queue.
+@return number of items on queue */
+ulint
+ib_wqueue_len(
+/*==========*/
+ ib_wqueue_t* wq) /*<! in: work queue */
+{
+ ulint len = 0;
+
+ mutex_enter(&wq->mutex);
+ len = ib_list_len(wq->items);
+ mutex_exit(&wq->mutex);
+
+ return(len);
+}
diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt
index 2f74456e7a7..75e994aa34b 100644
--- a/storage/xtradb/CMakeLists.txt
+++ b/storage/xtradb/CMakeLists.txt
@@ -18,6 +18,17 @@
INCLUDE(CheckFunctionExists)
INCLUDE(CheckCSourceCompiles)
INCLUDE(CheckCSourceRuns)
+INCLUDE(lz4)
+INCLUDE(lzo)
+INCLUDE(lzma)
+INCLUDE(bzip2)
+INCLUDE(snappy)
+
+MYSQL_CHECK_LZ4()
+MYSQL_CHECK_LZO()
+MYSQL_CHECK_LZMA()
+MYSQL_CHECK_BZIP2()
+MYSQL_CHECK_SNAPPY()
# OS tests
IF(UNIX)
@@ -337,6 +348,7 @@ SET(INNOBASE_SOURCES
buf/buf0flu.cc
buf/buf0lru.cc
buf/buf0rea.cc
+ buf/buf0mtflu.cc
data/data0data.cc
data/data0type.cc
dict/dict0boot.cc
@@ -350,6 +362,7 @@ SET(INNOBASE_SOURCES
eval/eval0eval.cc
eval/eval0proc.cc
fil/fil0fil.cc
+ fil/fil0pagecompress.cc
fsp/fsp0fsp.cc
fut/fut0fut.cc
fut/fut0lst.cc
diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc
index d38e080051c..97781a12d20 100644
--- a/storage/xtradb/buf/buf0buf.cc
+++ b/storage/xtradb/buf/buf0buf.cc
@@ -2,6 +2,7 @@
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -910,6 +911,11 @@ buf_page_print(
mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
mach_read_from_4(read_buf
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+
+ ulint page_type = mach_read_from_4(read_buf + FIL_PAGE_TYPE);
+
+ fprintf(stderr, "InnoDB: page type %ld meaning %s\n", page_type,
+ fil_get_page_type_name(page_type));
}
#ifndef UNIV_HOTBACKUP
@@ -3544,6 +3550,7 @@ buf_page_init_low(
bpage->access_time = 0;
bpage->newest_modification = 0;
bpage->oldest_modification = 0;
+ bpage->write_size = 0;
HASH_INVALIDATE(bpage, hash);
bpage->is_corrupt = FALSE;
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
@@ -5706,3 +5713,24 @@ buf_page_init_for_backup_restore(
}
}
#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Aquire LRU list mutex */
+void
+buf_pool_mutex_enter(
+/*=================*/
+ buf_pool_t* buf_pool) /*!< in: buffer pool */
+{
+ ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
+ mutex_enter(&buf_pool->LRU_list_mutex);
+}
+/*********************************************************************//**
+Exit LRU list mutex */
+void
+buf_pool_mutex_exit(
+/*================*/
+ buf_pool_t* buf_pool) /*!< in: buffer pool */
+{
+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+ mutex_exit(&buf_pool->LRU_list_mutex);
+}
diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc
index f4d1c637e3e..dc84bd194ef 100644
--- a/storage/xtradb/buf/buf0dblwr.cc
+++ b/storage/xtradb/buf/buf0dblwr.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -382,7 +383,7 @@ buf_dblwr_init_or_load_pages(
/* Read the trx sys header to check if we are using the doublewrite
buffer */
off_t trx_sys_page = TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE;
- os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE);
+ os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE, FALSE);
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
@@ -416,12 +417,11 @@ buf_dblwr_init_or_load_pages(
}
/* Read the pages from the doublewrite buffer to memory */
-
block_bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
- os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes);
+ os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes, FALSE);
os_file_read(file, buf + block_bytes, block2 * UNIV_PAGE_SIZE,
- block_bytes);
+ block_bytes, FALSE);
/* Check if any of these pages is half-written in data files, in the
intended position */
@@ -514,7 +514,7 @@ buf_dblwr_process()
fil_io(OS_FILE_READ, true, space_id, zip_size,
page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
- read_buf, NULL);
+ read_buf, NULL, 0);
/* Check if the page is corrupt */
@@ -566,7 +566,7 @@ buf_dblwr_process()
fil_io(OS_FILE_WRITE, true, space_id,
zip_size, page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
- page, NULL);
+ page, NULL, 0);
ib_logf(IB_LOG_LEVEL_INFO,
"Recovered the page from"
@@ -586,7 +586,7 @@ buf_dblwr_process()
zip_size, page_no, 0,
zip_size ? zip_size
: UNIV_PAGE_SIZE,
- page, NULL);
+ page, NULL, NULL);
}
}
}
@@ -798,7 +798,7 @@ buf_dblwr_write_block_to_datafile(
buf_page_get_page_no(bpage), 0,
buf_page_get_zip_size(bpage),
(void*) bpage->zip.data,
- (void*) bpage);
+ (void*) bpage, 0);
return;
}
@@ -810,8 +810,8 @@ buf_dblwr_write_block_to_datafile(
fil_io(flags, sync, buf_block_get_space(block), 0,
buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
- (void*) block->frame, (void*) block);
-
+ (void*) block->frame, (void*) block,
+ (ulint *)&bpage->write_size);
}
/********************************************************************//**
@@ -905,7 +905,7 @@ try_again:
fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
buf_dblwr->block1, 0, len,
- (void*) write_buf, NULL);
+ (void*) write_buf, NULL, 0);
if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
/* No unwritten pages in the second block. */
@@ -921,7 +921,7 @@ try_again:
fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
buf_dblwr->block2, 0, len,
- (void*) write_buf, NULL);
+ (void*) write_buf, NULL, 0);
flush:
/* increment the doublewrite flushed pages counter */
@@ -1151,14 +1151,14 @@ retry:
fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
offset, 0, UNIV_PAGE_SIZE,
(void*) (buf_dblwr->write_buf
- + UNIV_PAGE_SIZE * i), NULL);
+ + UNIV_PAGE_SIZE * i), NULL, 0);
} else {
/* It is a regular page. Write it directly to the
doublewrite buffer */
fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
offset, 0, UNIV_PAGE_SIZE,
(void*) ((buf_block_t*) bpage)->frame,
- NULL);
+ NULL, 0);
}
/* Now flush the doublewrite buffer data to disk */
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index 0f39c5de2ca..7c9e5e091c5 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -1,6 +1,8 @@
/*****************************************************************************
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2014, Fusion-io. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,6 +32,7 @@ Created 11/11/1995 Heikki Tuuri
#endif
#include "buf0buf.h"
+#include "buf0mtflu.h"
#include "buf0checksum.h"
#include "srv0start.h"
#include "srv0srv.h"
@@ -44,10 +47,12 @@ Created 11/11/1995 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "log0log.h"
#include "os0file.h"
+#include "os0sync.h"
#include "trx0sys.h"
#include "srv0mon.h"
#include "mysql/plugin.h"
#include "mysql/service_thd_wait.h"
+#include "fil0pagecompress.h"
/** Number of pages flushed through non flush_list flushes. */
// static ulint buf_lru_flush_page_count = 0;
@@ -75,15 +80,6 @@ in thrashing. */
/* @} */
-/** Handled page counters for a single flush */
-struct flush_counters_t {
- ulint flushed; /*!< number of dirty pages flushed */
- ulint evicted; /*!< number of clean pages evicted, including
- evicted uncompressed page images */
- ulint unzip_LRU_evicted;/*!< number of uncompressed page images
- evicted */
-};
-
/******************************************************************//**
Increases flush_list size in bytes with zip_size for compressed page,
UNIV_PAGE_SIZE for uncompressed page in inline function */
@@ -724,8 +720,10 @@ buf_flush_write_complete(
buf_pool->n_flush[flush_type]--;
- /* fprintf(stderr, "n pending flush %lu\n",
- buf_pool->n_flush[flush_type]); */
+#ifdef UNIV_MTFLUSH_DEBUG
+ fprintf(stderr, "n pending flush %lu\n",
+ buf_pool->n_flush[flush_type]);
+#endif
if (buf_pool->n_flush[flush_type] == 0
&& buf_pool->init_flush[flush_type] == FALSE) {
@@ -881,6 +879,8 @@ buf_flush_write_block_low(
{
ulint zip_size = buf_page_get_zip_size(bpage);
page_t* frame = NULL;
+ ulint space_id = buf_page_get_space(bpage);
+ atomic_writes_t awrites = fil_space_get_atomic_writes(space_id);
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
@@ -956,12 +956,26 @@ buf_flush_write_block_low(
sync, buf_page_get_space(bpage), zip_size,
buf_page_get_page_no(bpage), 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
- frame, bpage);
- } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
- buf_dblwr_write_single_page(bpage, sync);
+ frame, bpage, &bpage->write_size);
} else {
- ut_ad(!sync);
- buf_dblwr_add_to_batch(bpage);
+ /* InnoDB uses doublewrite buffer and doublewrite buffer
+ is initialized. User can define do we use atomic writes
+ on a file space (table) or not. If atomic writes are
+ not used we should use doublewrite buffer and if
+ atomic writes should be used, no doublewrite buffer
+ is used. */
+
+ if (awrites == ATOMIC_WRITES_ON) {
+ fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
+ FALSE, buf_page_get_space(bpage), zip_size,
+ buf_page_get_page_no(bpage), 0,
+ zip_size ? zip_size : UNIV_PAGE_SIZE,
+ frame, bpage, &bpage->write_size);
+ } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
+ buf_dblwr_write_single_page(bpage, sync);
+ } else {
+ buf_dblwr_add_to_batch(bpage);
+ }
}
/* When doing single page flushing the IO is done synchronously
@@ -1753,7 +1767,6 @@ end up waiting for these latches! NOTE 2: in the case of a flush list flush,
the calling thread is not allowed to own any latches on pages!
@return number of blocks for which the write request was queued */
__attribute__((nonnull))
-static
void
buf_flush_batch(
/*============*/
@@ -1812,7 +1825,6 @@ buf_flush_batch(
/******************************************************************//**
Gather the aggregated stats for both flush list and LRU list flushing */
-static
void
buf_flush_common(
/*=============*/
@@ -1839,7 +1851,6 @@ buf_flush_common(
/******************************************************************//**
Start a buffer flush batch for LRU or flush list */
-static
ibool
buf_flush_start(
/*============*/
@@ -1854,6 +1865,11 @@ buf_flush_start(
/* There is already a flush batch of the same type running */
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr, "Error: flush_type %d n_flush %lu init_flush %lu\n",
+ flush_type, buf_pool->n_flush[flush_type], buf_pool->init_flush[flush_type]);
+#endif
+
mutex_exit(&buf_pool->flush_state_mutex);
return(FALSE);
@@ -1868,7 +1884,6 @@ buf_flush_start(
/******************************************************************//**
End a buffer flush batch for LRU or flush list */
-static
void
buf_flush_end(
/*==========*/
@@ -1923,6 +1938,24 @@ buf_flush_wait_batch_end(
}
}
+/* JAN: TODO: */
+
+void buf_pool_enter_LRU_mutex(
+ buf_pool_t* buf_pool)
+{
+ ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
+ mutex_enter(&buf_pool->LRU_list_mutex);
+}
+
+void buf_pool_exit_LRU_mutex(
+ buf_pool_t* buf_pool)
+{
+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+ mutex_exit(&buf_pool->LRU_list_mutex);
+}
+
+/* JAN: TODO: END: */
+
/*******************************************************************//**
This utility flushes dirty blocks from the end of the LRU list and also
puts replaceable clean pages from the end of the LRU list to the free
@@ -1993,6 +2026,10 @@ buf_flush_list(
bool timeout = false;
ulint flush_start_time = 0;
+ if (buf_mtflu_init_done()) {
+ return(buf_mtflu_flush_list(min_n, lsn_limit, n_processed));
+ }
+
for (i = 0; i < srv_buf_pool_instances; i++) {
requested_pages[i] = 0;
active_instance[i] = true;
@@ -2220,6 +2257,11 @@ buf_flush_LRU_tail(void)
ulint free_list_lwm = srv_LRU_scan_depth / 100
* srv_cleaner_free_list_lwm;
+ if(buf_mtflu_init_done())
+ {
+ return(buf_mtflu_flush_LRU_tail());
+ }
+
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
const buf_pool_t* buf_pool = buf_pool_from_array(i);
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
new file mode 100644
index 00000000000..223edab2e9c
--- /dev/null
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -0,0 +1,733 @@
+/*****************************************************************************
+
+Copyright (C) 2013, 2014, Fusion-io. All Rights Reserved.
+Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file buf/buf0mtflu.cc
+Multi-threaded flush method implementation
+
+Created 06/11/2013 Dhananjoy Das DDas@fusionio.com
+Modified 12/12/2013 Jan Lindström jan.lindstrom@skysql.com
+Modified 03/02/2014 Dhananjoy Das DDas@fusionio.com
+Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0mtflu.h"
+#include "buf0checksum.h"
+#include "srv0start.h"
+#include "srv0srv.h"
+#include "page0zip.h"
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "page0page.h"
+#include "fil0fil.h"
+#include "buf0lru.h"
+#include "buf0rea.h"
+#include "ibuf0ibuf.h"
+#include "log0log.h"
+#include "os0file.h"
+#include "os0sync.h"
+#include "trx0sys.h"
+#include "srv0mon.h"
+#include "mysql/plugin.h"
+#include "mysql/service_thd_wait.h"
+#include "fil0pagecompress.h"
+
+#define MT_COMP_WATER_MARK 50
+/** Time to wait for a message. */
+#define MT_WAIT_IN_USECS 5000000
+
+/* Work item status */
+typedef enum wrk_status {
+ WRK_ITEM_UNSET=0, /*!< Work item is not set */
+ WRK_ITEM_START=1, /*!< Processing of work item has started */
+ WRK_ITEM_DONE=2, /*!< Processing is done usually set to
+ SUCCESS/FAILED */
+ WRK_ITEM_SUCCESS=2, /*!< Work item successfully processed */
+ WRK_ITEM_FAILED=3, /*!< Work item process failed */
+ WRK_ITEM_EXIT=4, /*!< Exiting */
+ WRK_ITEM_SET=5, /*!< Work item is set */
+ WRK_ITEM_STATUS_UNDEFINED
+} wrk_status_t;
+
+/* Work item task type */
+typedef enum mt_wrk_tsk {
+ MT_WRK_NONE=0, /*!< Exit queue-wait */
+ MT_WRK_WRITE=1, /*!< Flush operation */
+ MT_WRK_READ=2, /*!< Read operation */
+ MT_WRK_UNDEFINED
+} mt_wrk_tsk_t;
+
+/* Work thread status */
+typedef enum wthr_status {
+ WTHR_NOT_INIT=0, /*!< Work thread not initialized */
+ WTHR_INITIALIZED=1, /*!< Work thread initialized */
+ WTHR_SIG_WAITING=2, /*!< Work thread wating signal */
+ WTHR_RUNNING=3, /*!< Work thread running */
+ WTHR_NO_WORK=4, /*!< Work thread has no work */
+ WTHR_KILL_IT=5, /*!< Work thread should exit */
+ WTHR_STATUS_UNDEFINED
+} wthr_status_t;
+
+/* Write work task */
+typedef struct wr_tsk {
+ buf_pool_t *buf_pool; /*!< buffer-pool instance */
+ buf_flush_t flush_type; /*!< flush-type for buffer-pool
+ flush operation */
+ ulint min; /*!< minimum number of pages
+ requested to be flushed */
+ lsn_t lsn_limit; /*!< lsn limit for the buffer-pool
+ flush operation */
+} wr_tsk_t;
+
+/* Read work task */
+typedef struct rd_tsk {
+ buf_pool_t *page_pool; /*!< list of pages to decompress; */
+} rd_tsk_t;
+
+/* Work item */
+typedef struct wrk_itm
+{
+ mt_wrk_tsk_t tsk; /*!< Task type. Based on task-type
+ one of the entries wr_tsk/rd_tsk
+ will be used */
+ wr_tsk_t wr; /*!< Flush page list */
+ rd_tsk_t rd; /*!< Decompress page list */
+ ulint n_flushed; /*!< Flushed pages count */
+ os_thread_id_t id_usr; /*!< Thread-id currently working */
+ wrk_status_t wi_status; /*!< Work item status */
+ mem_heap_t *wheap; /*!< Heap were to allocate memory
+ for queue nodes */
+ mem_heap_t *rheap;
+} wrk_t;
+
+typedef struct thread_data
+{
+ os_thread_id_t wthread_id; /*!< Identifier */
+ os_thread_t wthread; /*!< Thread id */
+ wthr_status_t wt_status; /*!< Worker thread status */
+} thread_data_t;
+
+/* Thread syncronization data */
+typedef struct thread_sync
+{
+ /* Global variables used by all threads */
+ os_fast_mutex_t thread_global_mtx; /*!< Mutex used protecting below
+ variables */
+ ulint n_threads; /*!< Number of threads */
+ ib_wqueue_t *wq; /*!< Work Queue */
+ ib_wqueue_t *wr_cq; /*!< Write Completion Queue */
+ ib_wqueue_t *rd_cq; /*!< Read Completion Queue */
+ mem_heap_t* wheap; /*!< Work heap where memory
+ is allocated */
+ mem_heap_t* rheap; /*!< Work heap where memory
+ is allocated */
+ wthr_status_t gwt_status; /*!< Global thread status */
+
+ /* Variables used by only one thread at a time */
+ thread_data_t* thread_data; /*!< Thread specific data */
+
+} thread_sync_t;
+
+static int mtflush_work_initialized = -1;
+static thread_sync_t* mtflush_ctx=NULL;
+static os_fast_mutex_t mtflush_mtx;
+
+/******************************************************************//**
+Set multi-threaded flush work initialized. */
+static inline
+void
+buf_mtflu_work_init(void)
+/*=====================*/
+{
+ mtflush_work_initialized = 1;
+}
+
+/******************************************************************//**
+Return true if multi-threaded flush is initialized
+@return true if initialized */
+bool
+buf_mtflu_init_done(void)
+/*=====================*/
+{
+ return(mtflush_work_initialized == 1);
+}
+
+/******************************************************************//**
+Fush buffer pool instance.
+@return number of flushed pages, or 0 if error happened
+*/
+static
+ulint
+buf_mtflu_flush_pool_instance(
+/*==========================*/
+ wrk_t *work_item) /*!< inout: work item to be flushed */
+{
+ flush_counters_t n;
+ ut_a(work_item != NULL);
+ ut_a(work_item->wr.buf_pool != NULL);
+
+ if (!buf_flush_start(work_item->wr.buf_pool, work_item->wr.flush_type)) {
+ /* We have two choices here. If lsn_limit was
+ specified then skipping an instance of buffer
+ pool means we cannot guarantee that all pages
+ up to lsn_limit has been flushed. We can
+ return right now with failure or we can try
+ to flush remaining buffer pools up to the
+ lsn_limit. We attempt to flush other buffer
+ pools based on the assumption that it will
+ help in the retry which will follow the
+ failure. */
+#ifdef UNIV_MTFLUSH_DEBUG
+ fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n");
+#endif
+ return 0;
+ }
+
+ memset(&n, 0, sizeof(flush_counters_t));
+
+ if (work_item->wr.flush_type == BUF_FLUSH_LRU) {
+ /* srv_LRU_scan_depth can be arbitrarily large value.
+ * We cap it with current LRU size.
+ */
+ buf_pool_mutex_enter(work_item->wr.buf_pool);
+ work_item->wr.min = UT_LIST_GET_LEN(work_item->wr.buf_pool->LRU);
+ buf_pool_mutex_exit(work_item->wr.buf_pool);
+ work_item->wr.min = ut_min(srv_LRU_scan_depth,work_item->wr.min);
+ }
+
+ buf_flush_batch(work_item->wr.buf_pool,
+ work_item->wr.flush_type,
+ work_item->wr.min,
+ work_item->wr.lsn_limit,
+ false,
+ &n);
+
+ work_item->n_flushed = n.flushed;
+ buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type);
+ buf_flush_common(work_item->wr.flush_type, work_item->n_flushed);
+
+ return work_item->n_flushed;
+}
+
+/******************************************************************//**
+Worker function to wait for work items and processing them and
+sending reply back.
+*/
+static
+void
+mtflush_service_io(
+/*===============*/
+ thread_sync_t* mtflush_io, /*!< inout: multi-threaded flush
+ syncronization data */
+ thread_data_t* thread_data) /* Thread status data */
+{
+ wrk_t *work_item = NULL;
+ ulint n_flushed=0;
+
+ ut_a(mtflush_io != NULL);
+ ut_a(thread_data != NULL);
+
+ thread_data->wt_status = WTHR_SIG_WAITING;
+
+ work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq);
+
+ if (work_item == NULL) {
+ work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq);
+ }
+
+ if (work_item) {
+ thread_data->wt_status = WTHR_RUNNING;
+ } else {
+ /* Thread did not get any work */
+ thread_data->wt_status = WTHR_NO_WORK;
+ return;
+ }
+
+ if (work_item->wi_status != WRK_ITEM_EXIT) {
+ work_item->wi_status = WRK_ITEM_SET;
+ }
+
+#ifdef UNIV_MTFLUSH_DEBUG
+ ut_a(work_item->id_usr == 0);
+#endif
+ work_item->id_usr = os_thread_get_curr_id();
+
+ /* This works as a producer/consumer model, where in tasks are
+ * inserted into the work-queue (wq) and completions are based
+ * on the type of operations performed and as a result the WRITE/
+ * compression/flush operation completions get posted to wr_cq.
+ * And READ/decompress operations completions get posted to rd_cq.
+ * in future we may have others.
+ */
+
+ switch(work_item->tsk) {
+ case MT_WRK_NONE:
+ ut_a(work_item->wi_status == WRK_ITEM_EXIT);
+ work_item->wi_status = WRK_ITEM_EXIT;
+ ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap);
+ thread_data->wt_status = WTHR_KILL_IT;
+ break;
+
+ case MT_WRK_WRITE:
+ ut_a(work_item->wi_status == WRK_ITEM_SET);
+ work_item->wi_status = WRK_ITEM_START;
+ /* Process work item */
+ if (0 == (n_flushed = buf_mtflu_flush_pool_instance(work_item))) {
+ work_item->wi_status = WRK_ITEM_FAILED;
+ }
+ work_item->wi_status = WRK_ITEM_SUCCESS;
+ ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap);
+ break;
+
+ case MT_WRK_READ:
+ ut_a(0);
+ break;
+
+ default:
+ /* None other than Write/Read handling planned */
+ ut_a(0);
+ break;
+ }
+}
+
+/******************************************************************//**
+Thead used to flush dirty pages when multi-threaded flush is
+used.
+@return a dummy parameter*/
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(mtflush_io_thread)(
+/*==============================*/
+ void * arg)
+{
+ thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
+ thread_data_t *this_thread_data = NULL;
+ ulint i;
+
+ /* Find correct slot for this thread */
+ os_fast_mutex_lock(&(mtflush_io->thread_global_mtx));
+ for(i=0; i < mtflush_io->n_threads; i ++) {
+ if (mtflush_io->thread_data[i].wthread_id == os_thread_get_curr_id()) {
+ break;
+ }
+ }
+
+ ut_a(i <= mtflush_io->n_threads);
+ this_thread_data = &mtflush_io->thread_data[i];
+ os_fast_mutex_unlock(&(mtflush_io->thread_global_mtx));
+
+ while (TRUE) {
+
+#ifdef UNIV_MTFLUSH_DEBUG
+ fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n",
+ os_thread_get_curr_id(),
+ ib_wqueue_len(mtflush_io->wq),
+ ib_wqueue_len(mtflush_io->wr_cq));
+#endif /* UNIV_MTFLUSH_DEBUG */
+
+ mtflush_service_io(mtflush_io, this_thread_data);
+
+
+ if (this_thread_data->wt_status == WTHR_KILL_IT) {
+ break;
+ }
+ }
+
+ os_thread_exit(NULL);
+ OS_THREAD_DUMMY_RETURN;
+}
+
+/******************************************************************//**
+Add exit work item to work queue to signal multi-threded flush
+threads that they should exit.
+*/
+void
+buf_mtflu_io_thread_exit(void)
+/*==========================*/
+{
+ ulint i;
+ thread_sync_t* mtflush_io = mtflush_ctx;
+ wrk_t* work_item = NULL;
+
+ ut_a(mtflush_io != NULL);
+
+ /* Allocate work items for shutdown message */
+ work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads);
+
+ /* Confirm if the io-thread KILL is in progress, bailout */
+ if (mtflush_io->gwt_status == WTHR_KILL_IT) {
+ return;
+ }
+
+ mtflush_io->gwt_status = WTHR_KILL_IT;
+
+ /* This lock is to safequard against timing bug: flush request take
+ this mutex before sending work items to be processed by flush
+ threads. Inside flush thread we assume that work queue contains only
+ a constant number of items. Thus, we may not install new work items
+ below before all previous ones are processed. This mutex is released
+ by flush request after all work items sent to flush threads have
+ been processed. Thus, we can get this mutex if and only if work
+ queue is empty. */
+
+ os_fast_mutex_lock(&mtflush_mtx);
+
+ /* Make sure the work queue is empty */
+ ut_a(ib_wqueue_is_empty(mtflush_io->wq));
+
+ /* Send one exit work item/thread */
+ for (i=0; i < (ulint)srv_mtflush_threads; i++) {
+ work_item[i].tsk = MT_WRK_NONE;
+ work_item[i].wi_status = WRK_ITEM_EXIT;
+ work_item[i].wheap = mtflush_io->wheap;
+ work_item[i].rheap = mtflush_io->rheap;
+ work_item[i].id_usr = 0;
+
+ ib_wqueue_add(mtflush_io->wq,
+ (void *)&(work_item[i]),
+ mtflush_io->wheap);
+ }
+
+ /* Requests sent */
+ os_fast_mutex_unlock(&mtflush_mtx);
+
+ /* Wait until all work items on a work queue are processed */
+ while(!ib_wqueue_is_empty(mtflush_io->wq)) {
+ /* Wait */
+ os_thread_sleep(MT_WAIT_IN_USECS);
+ }
+
+ ut_a(ib_wqueue_is_empty(mtflush_io->wq));
+
+ /* Collect all work done items */
+ for (i=0; i < (ulint)srv_mtflush_threads;) {
+ wrk_t* work_item = NULL;
+
+ work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, MT_WAIT_IN_USECS);
+
+ /* If we receive reply to work item and it's status is exit,
+ thead has processed this message and existed */
+ if (work_item && work_item->wi_status == WRK_ITEM_EXIT) {
+ i++;
+ }
+ }
+
+ /* Wait about 1/2 sec to allow threads really exit */
+ os_thread_sleep(MT_WAIT_IN_USECS);
+
+ /* Make sure that work queue is empty */
+ while(!ib_wqueue_is_empty(mtflush_io->wq))
+ {
+ ib_wqueue_nowait(mtflush_io->wq);
+ }
+
+ os_fast_mutex_lock(&mtflush_mtx);
+
+ ut_a(ib_wqueue_is_empty(mtflush_io->wq));
+ ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
+ ut_a(ib_wqueue_is_empty(mtflush_io->rd_cq));
+
+ /* Free all queues */
+ ib_wqueue_free(mtflush_io->wq);
+ ib_wqueue_free(mtflush_io->wr_cq);
+ ib_wqueue_free(mtflush_io->rd_cq);
+
+ mtflush_io->wq = NULL;
+ mtflush_io->wr_cq = NULL;
+ mtflush_io->rd_cq = NULL;
+ mtflush_work_initialized = 0;
+
+ /* Free heap */
+ mem_heap_free(mtflush_io->wheap);
+ mem_heap_free(mtflush_io->rheap);
+
+ os_fast_mutex_unlock(&mtflush_mtx);
+ os_fast_mutex_free(&mtflush_mtx);
+ os_fast_mutex_free(&mtflush_io->thread_global_mtx);
+}
+
+/******************************************************************//**
+Initialize multi-threaded flush thread syncronization data.
+@return Initialized multi-threaded flush thread syncroniztion data. */
+void*
+buf_mtflu_handler_init(
+/*===================*/
+ ulint n_threads, /*!< in: Number of threads to create */
+ ulint wrk_cnt) /*!< in: Number of work items */
+{
+ ulint i;
+ mem_heap_t* mtflush_heap;
+ mem_heap_t* mtflush_heap2;
+
+ /* Create heap, work queue, write completion queue, read
+ completion queue for multi-threaded flush, and init
+ handler. */
+ mtflush_heap = mem_heap_create(0);
+ ut_a(mtflush_heap != NULL);
+ mtflush_heap2 = mem_heap_create(0);
+ ut_a(mtflush_heap2 != NULL);
+
+ mtflush_ctx = (thread_sync_t *)mem_heap_alloc(mtflush_heap,
+ sizeof(thread_sync_t));
+ memset(mtflush_ctx, 0, sizeof(thread_sync_t));
+ ut_a(mtflush_ctx != NULL);
+ mtflush_ctx->thread_data = (thread_data_t*)mem_heap_alloc(
+ mtflush_heap, sizeof(thread_data_t) * n_threads);
+ ut_a(mtflush_ctx->thread_data);
+ memset(mtflush_ctx->thread_data, 0, sizeof(thread_data_t) * n_threads);
+
+ mtflush_ctx->n_threads = n_threads;
+ mtflush_ctx->wq = ib_wqueue_create();
+ ut_a(mtflush_ctx->wq);
+ mtflush_ctx->wr_cq = ib_wqueue_create();
+ ut_a(mtflush_ctx->wr_cq);
+ mtflush_ctx->rd_cq = ib_wqueue_create();
+ ut_a(mtflush_ctx->rd_cq);
+ mtflush_ctx->wheap = mtflush_heap;
+ mtflush_ctx->rheap = mtflush_heap2;
+
+ os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_ctx->thread_global_mtx);
+ os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
+
+ /* Create threads for page-compression-flush */
+ for(i=0; i < n_threads; i++) {
+ os_thread_id_t new_thread_id;
+
+ mtflush_ctx->thread_data[i].wt_status = WTHR_INITIALIZED;
+
+ mtflush_ctx->thread_data[i].wthread = os_thread_create(
+ mtflush_io_thread,
+ ((void *) mtflush_ctx),
+ &new_thread_id);
+
+ mtflush_ctx->thread_data[i].wthread_id = new_thread_id;
+ }
+
+ buf_mtflu_work_init();
+
+ return((void *)mtflush_ctx);
+}
+
+/******************************************************************//**
+Flush buffer pool instances.
+@return number of pages flushed. */
+ulint
+buf_mtflu_flush_work_items(
+/*=======================*/
+ ulint buf_pool_inst, /*!< in: Number of buffer pool instances */
+ ulint *per_pool_pages_flushed, /*!< out: Number of pages
+ flushed/instance */
+ buf_flush_t flush_type, /*!< in: Type of flush */
+ ulint min_n, /*!< in: Wished minimum number of
+ blocks to be flushed */
+ lsn_t lsn_limit) /*!< in: All blocks whose
+ oldest_modification is smaller than
+ this should be flushed (if their
+ number does not exceed min_n) */
+{
+ ulint n_flushed=0, i;
+ mem_heap_t* work_heap;
+ mem_heap_t* reply_heap;
+ wrk_t work_item[MTFLUSH_MAX_WORKER];
+
+ if (mtflush_ctx->gwt_status == WTHR_KILL_IT) {
+ return 0;
+ }
+
+ /* Allocate heap where all work items used and queue
+ node items areallocated */
+ work_heap = mem_heap_create(0);
+ reply_heap = mem_heap_create(0);
+
+
+ for(i=0;i<buf_pool_inst; i++) {
+ work_item[i].tsk = MT_WRK_WRITE;
+ work_item[i].wr.buf_pool = buf_pool_from_array(i);
+ work_item[i].wr.flush_type = flush_type;
+ work_item[i].wr.min = min_n;
+ work_item[i].wr.lsn_limit = lsn_limit;
+ work_item[i].wi_status = WRK_ITEM_UNSET;
+ work_item[i].wheap = work_heap;
+ work_item[i].rheap = reply_heap;
+ work_item[i].n_flushed = 0;
+ work_item[i].id_usr = 0;
+
+ ib_wqueue_add(mtflush_ctx->wq,
+ (void *)(work_item + i),
+ work_heap);
+ }
+
+ /* wait on the completion to arrive */
+ for(i=0; i< buf_pool_inst;) {
+ wrk_t *done_wi = NULL;
+ done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq);
+
+ if (done_wi != NULL) {
+ per_pool_pages_flushed[i] = done_wi->n_flushed;
+
+#ifdef UNIV_MTFLUSH_DEBUG
+ if((int)done_wi->id_usr == 0 &&
+ (done_wi->wi_status == WRK_ITEM_SET ||
+ done_wi->wi_status == WRK_ITEM_UNSET)) {
+ fprintf(stderr,
+ "**Set/Unused work_item[%lu] flush_type=%d\n",
+ i,
+ done_wi->wr.flush_type);
+ ut_a(0);
+ }
+#endif
+
+ n_flushed+= done_wi->n_flushed;
+ i++;
+ }
+ }
+
+ /* Release used work_items and queue nodes */
+ mem_heap_free(work_heap);
+ mem_heap_free(reply_heap);
+
+ return(n_flushed);
+}
+
+/*******************************************************************//**
+Multi-threaded version of buf_flush_list
+*/
+bool
+buf_mtflu_flush_list(
+/*=================*/
+ ulint min_n, /*!< in: wished minimum mumber of blocks
+ flushed (it is not guaranteed that the
+ actual number is that big, though) */
+ lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
+ blocks whose oldest_modification is
+ smaller than this should be flushed
+ (if their number does not exceed
+ min_n), otherwise ignored */
+ ulint* n_processed) /*!< out: the number of pages
+ which were processed is passed
+ back to caller. Ignored if NULL */
+
+{
+ ulint i;
+ bool success = true;
+ ulint cnt_flush[MTFLUSH_MAX_WORKER];
+
+ if (n_processed) {
+ *n_processed = 0;
+ }
+
+ if (min_n != ULINT_MAX) {
+ /* Ensure that flushing is spread evenly amongst the
+ buffer pool instances. When min_n is ULINT_MAX
+ we need to flush everything up to the lsn limit
+ so no limit here. */
+ min_n = (min_n + srv_buf_pool_instances - 1)
+ / srv_buf_pool_instances;
+ }
+
+ /* This lock is to safequard against re-entry if any. */
+ os_fast_mutex_lock(&mtflush_mtx);
+ buf_mtflu_flush_work_items(srv_buf_pool_instances,
+ cnt_flush, BUF_FLUSH_LIST,
+ min_n, lsn_limit);
+ os_fast_mutex_unlock(&mtflush_mtx);
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ if (n_processed) {
+ *n_processed += cnt_flush[i];
+ }
+ if (cnt_flush[i]) {
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+ MONITOR_FLUSH_BATCH_COUNT,
+ MONITOR_FLUSH_BATCH_PAGES,
+ cnt_flush[i]);
+ }
+ }
+#ifdef UNIV_MTFLUSH_DEBUG
+ fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu ]\n",
+ __FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed);
+#endif
+ return(success);
+}
+
+/*********************************************************************//**
+Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+UNIV_INTERN
+ulint
+buf_mtflu_flush_LRU_tail(void)
+/*==========================*/
+{
+ ulint total_flushed=0, i;
+ ulint cnt_flush[MTFLUSH_MAX_WORKER];
+
+ ut_a(buf_mtflu_init_done());
+
+ /* This lock is to safeguard against re-entry if any */
+ os_fast_mutex_lock(&mtflush_mtx);
+ buf_mtflu_flush_work_items(srv_buf_pool_instances,
+ cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
+ os_fast_mutex_unlock(&mtflush_mtx);
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ if (cnt_flush[i]) {
+ total_flushed += cnt_flush[i];
+
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_LRU_BATCH_TOTAL_PAGE,
+ MONITOR_LRU_BATCH_COUNT,
+ MONITOR_LRU_BATCH_PAGES,
+ cnt_flush[i]);
+ }
+ }
+
+#if UNIV_MTFLUSH_DEBUG
+ fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu ]\n", (
+ srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed);
+#endif
+
+ return(total_flushed);
+}
+
+/*********************************************************************//**
+Set correct thread identifiers to io thread array based on
+information we have. */
+void
+buf_mtflu_set_thread_ids(
+/*=====================*/
+ ulint n_threads, /*!<in: Number of threads to fill */
+ void* ctx, /*!<in: thread context */
+ os_thread_id_t* thread_ids) /*!<in: thread id array */
+{
+ thread_sync_t *mtflush_io = ((thread_sync_t *)ctx);
+ ulint i;
+ ut_a(mtflush_io != NULL);
+ ut_a(thread_ids != NULL);
+
+ for(i = 0; i < n_threads; i++) {
+ thread_ids[i] = mtflush_io->thread_data[i].wthread_id;
+ }
+}
diff --git a/storage/xtradb/buf/buf0rea.cc b/storage/xtradb/buf/buf0rea.cc
index c28df72df92..63d2fdf7726 100644
--- a/storage/xtradb/buf/buf0rea.cc
+++ b/storage/xtradb/buf/buf0rea.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -229,14 +230,14 @@ not_to_recover:
*err = _fil_io(OS_FILE_READ | wake_later
| ignore_nonexistent_pages,
sync, space, zip_size, offset, 0, zip_size,
- bpage->zip.data, bpage, trx);
+ bpage->zip.data, bpage, 0, trx);
} else {
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
*err = _fil_io(OS_FILE_READ | wake_later
| ignore_nonexistent_pages,
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
- ((buf_block_t*) bpage)->frame, bpage, trx);
+ ((buf_block_t*) bpage)->frame, bpage, &bpage->write_size, trx);
}
if (sync) {
diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc
index 8d8347ca726..a76121544b0 100644
--- a/storage/xtradb/dict/dict0dict.cc
+++ b/storage/xtradb/dict/dict0dict.cc
@@ -2,6 +2,7 @@
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index da61d29f6f8..d1f35480ecf 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -54,6 +55,14 @@ Created 10/25/1995 Heikki Tuuri
# include "srv0srv.h"
static ulint srv_data_read, srv_data_written;
#endif /* !UNIV_HOTBACKUP */
+#include "fil0pagecompress.h"
+#include "zlib.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#endif
+#include "row0mysql.h"
MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
@@ -162,7 +171,7 @@ fil_system_t* fil_system = NULL;
&& srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)\
|| ((s)->purpose == FIL_LOG \
&& srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT))
-
+
#else /* __WIN__ */
# define fil_buffering_disabled(s) (0)
#endif /* __WIN__ */
@@ -270,11 +279,16 @@ fil_read(
block size multiple */
void* buf, /*!< in/out: buffer where to store data read;
in aio this must be appropriately aligned */
- void* message) /*!< in: message for aio handler if non-sync
- aio used, else ignored */
+ void* message, /*!< in: message for aio handler if non-sync
+ aio used, else ignored */
+ ulint* write_size) /*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
{
return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
- byte_offset, len, buf, message));
+ byte_offset, len, buf, message, write_size));
}
/********************************************************************//**
@@ -299,18 +313,22 @@ fil_write(
be a block size multiple */
void* buf, /*!< in: buffer from which to write; in aio
this must be appropriately aligned */
- void* message) /*!< in: message for aio handler if non-sync
- aio used, else ignored */
+ void* message, /*!< in: message for aio handler if non-sync
+ aio used, else ignored */
+ ulint* write_size) /*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
{
ut_ad(!srv_read_only_mode);
return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
- byte_offset, len, buf, message));
+ byte_offset, len, buf, message, write_size));
}
/*******************************************************************//**
Returns the table space by a given id, NULL if not found. */
-UNIV_INLINE
fil_space_t*
fil_space_get_by_id(
/*================*/
@@ -328,6 +346,19 @@ fil_space_get_by_id(
return(space);
}
+/****************************************************************//**
+Get space id from fil node */
+ulint
+fil_node_get_space_id(
+/*==================*/
+ fil_node_t* node) /*!< in: Compressed node*/
+{
+ ut_ad(node);
+ ut_ad(node->space);
+
+ return (node->space->id);
+}
+
/*******************************************************************//**
Returns the table space by a given name, NULL if not found. */
UNIV_INLINE
@@ -548,8 +579,9 @@ fil_node_open_file(
byte* buf2;
byte* page;
ulint space_id;
- ulint flags;
+ ulint flags=0;
ulint page_size;
+ ulint atomic_writes=0;
ut_ad(mutex_own(&(system->mutex)));
ut_a(node->n_pending == 0);
@@ -566,7 +598,7 @@ fil_node_open_file(
node->handle = os_file_create_simple_no_error_handling(
innodb_file_data_key, node->name, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &success);
+ OS_FILE_READ_ONLY, &success, 0);
if (!success) {
/* The following call prints an error message */
os_file_get_last_error(true);
@@ -583,6 +615,8 @@ fil_node_open_file(
size_bytes = os_file_get_size(node->handle);
ut_a(size_bytes != (os_offset_t) -1);
+
+ node->file_block_size = os_file_get_block_size(node->handle, node->name);
#ifdef UNIV_HOTBACKUP
if (space->id == 0) {
node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
@@ -614,10 +648,13 @@ fil_node_open_file(
set */
page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
- success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE);
+ success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE,
+ space->flags);
+
space_id = fsp_header_get_space_id(page);
flags = fsp_header_get_flags(page);
page_size = fsp_flags_get_page_size(flags);
+ atomic_writes = fsp_flags_get_atomic_writes(flags);
ut_free(buf2);
@@ -668,6 +705,17 @@ fil_node_open_file(
ut_error;
}
+ if (UNIV_UNLIKELY(space->flags != flags)) {
+ if (!dict_tf_verify_flags(space->flags, flags)) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags are 0x%lx"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file %s are 0x%lx!\n",
+ space->flags, node->name, flags);
+ ut_error;
+ }
+ }
+
if (size_bytes >= 1024 * 1024) {
/* Truncate the size to whole megabytes. */
size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
@@ -687,6 +735,8 @@ add_size:
space->size += node->size;
}
+ atomic_writes = fsp_flags_get_atomic_writes(space->flags);
+
/* printf("Opening file %s\n", node->name); */
/* Open the file for reading and writing, in Windows normally in the
@@ -697,18 +747,22 @@ add_size:
node->handle = os_file_create(innodb_file_log_key,
node->name, OS_FILE_OPEN,
OS_FILE_AIO, OS_LOG_FILE,
- &ret);
+ &ret, atomic_writes);
} else if (node->is_raw_disk) {
node->handle = os_file_create(innodb_file_data_key,
node->name,
OS_FILE_OPEN_RAW,
OS_FILE_AIO, OS_DATA_FILE,
- &ret);
+ &ret, atomic_writes);
} else {
node->handle = os_file_create(innodb_file_data_key,
node->name, OS_FILE_OPEN,
OS_FILE_AIO, OS_DATA_FILE,
- &ret);
+ &ret, atomic_writes);
+ }
+
+ if (node->file_block_size == 0) {
+ node->file_block_size = os_file_get_block_size(node->handle, node->name);
}
ut_a(ret);
@@ -1112,7 +1166,6 @@ fil_space_create(
DBUG_EXECUTE_IF("fil_space_create_failure", return(false););
ut_a(fil_system);
- ut_a(fsp_flags_is_valid(flags));
/* Look for a matching tablespace and if found free it. */
do {
@@ -1189,6 +1242,7 @@ fil_space_create(
space->flags = flags;
space->magic_n = FIL_SPACE_MAGIC_N;
+ space->printed_compression_failure = false;
rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
@@ -1774,12 +1828,12 @@ fil_write_lsn_and_arch_no_to_file(
buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
err = fil_read(TRUE, space, 0, sum_of_sizes, 0,
- UNIV_PAGE_SIZE, buf, NULL);
+ UNIV_PAGE_SIZE, buf, NULL, 0);
if (err == DB_SUCCESS) {
mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
err = fil_write(TRUE, space, 0, sum_of_sizes, 0,
- UNIV_PAGE_SIZE, buf, NULL);
+ UNIV_PAGE_SIZE, buf, NULL, 0);
}
mem_free(buf1);
@@ -1913,8 +1967,10 @@ fil_read_first_page(
ulint* space_id, /*!< out: tablespace ID */
lsn_t* min_flushed_lsn, /*!< out: min of flushed
lsn values in data files */
- lsn_t* max_flushed_lsn) /*!< out: max of flushed
+ lsn_t* max_flushed_lsn, /*!< out: max of flushed
lsn values in data files */
+ ulint orig_space_id) /*!< in: original file space
+ id */
{
byte* buf;
byte* page;
@@ -1927,7 +1983,10 @@ fil_read_first_page(
page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
- os_file_read(data_file, page, 0, UNIV_PAGE_SIZE);
+ os_file_read(data_file, page, 0, UNIV_PAGE_SIZE,
+ orig_space_id != ULINT_UNDEFINED ?
+ fil_space_is_page_compressed(orig_space_id) :
+ FALSE);
/* The FSP_HEADER on page 0 is only valid for the first file
in a tablespace. So if this is not the first datafile, leave
@@ -1936,12 +1995,21 @@ fil_read_first_page(
if (!one_read_already) {
*flags = fsp_header_get_flags(page);
*space_id = fsp_header_get_space_id(page);
+ }
- check_msg = fil_check_first_page(page);
+ /* Page is page compressed page, need to decompress, before
+ continue. */
+ if (fil_page_is_compressed(page)) {
+ ulint write_size=0;
+ fil_decompress_page(NULL, page, UNIV_PAGE_SIZE, &write_size);
}
flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
+ if (!one_read_already) {
+ check_msg = fil_check_first_page(page);
+ }
+
ut_free(buf);
if (check_msg) {
@@ -3064,7 +3132,7 @@ fil_create_link_file(
file = os_file_create_simple_no_error_handling(
innodb_file_data_key, link_filepath,
- OS_FILE_CREATE, OS_FILE_READ_WRITE, &success);
+ OS_FILE_CREATE, OS_FILE_READ_WRITE, &success, 0);
if (!success) {
/* The following call will print an error message */
@@ -3084,6 +3152,8 @@ fil_create_link_file(
} else if (error == OS_FILE_DISK_FULL) {
err = DB_OUT_OF_FILE_SPACE;
+ } else if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
+ err = DB_UNSUPPORTED;
} else {
err = DB_ERROR;
}
@@ -3094,7 +3164,7 @@ fil_create_link_file(
}
if (!os_file_write(link_filepath, file, filepath, 0,
- strlen(filepath))) {
+ strlen(filepath))) {
err = DB_ERROR;
}
@@ -3173,8 +3243,9 @@ fil_open_linked_file(
/*===============*/
const char* tablename, /*!< in: database/tablename */
char** remote_filepath,/*!< out: remote filepath */
- os_file_t* remote_file) /*!< out: remote file handle */
-
+ os_file_t* remote_file, /*!< out: remote file handle */
+ ulint atomic_writes) /*!< in: atomic writes table option
+ value */
{
ibool success;
@@ -3188,7 +3259,7 @@ fil_open_linked_file(
*remote_file = os_file_create_simple_no_error_handling(
innodb_file_data_key, *remote_filepath,
OS_FILE_OPEN, OS_FILE_READ_ONLY,
- &success);
+ &success, atomic_writes);
if (!success) {
char* link_filepath = fil_make_isl_name(tablename);
@@ -3243,6 +3314,7 @@ fil_create_new_single_table_tablespace(
/* TRUE if a table is created with CREATE TEMPORARY TABLE */
bool is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
+ ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
ut_a(space_id > 0);
ut_ad(!srv_read_only_mode);
@@ -3275,7 +3347,8 @@ fil_create_new_single_table_tablespace(
OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_NORMAL,
OS_DATA_FILE,
- &ret);
+ &ret,
+ atomic_writes);
if (ret == FALSE) {
/* The following call will print an error message */
@@ -3302,6 +3375,11 @@ fil_create_new_single_table_tablespace(
goto error_exit_3;
}
+ if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
+ err = DB_UNSUPPORTED;
+ goto error_exit_3;
+ }
+
if (error == OS_FILE_DISK_FULL) {
err = DB_OUT_OF_FILE_SPACE;
goto error_exit_3;
@@ -3340,6 +3418,7 @@ fil_create_new_single_table_tablespace(
flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE);
fsp_header_init_fields(page, space_id, flags);
mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
+ ut_ad(fsp_flags_is_valid(flags));
if (!(fsp_flags_is_compressed(flags))) {
buf_flush_init_for_writing(page, NULL, 0);
@@ -3516,16 +3595,25 @@ fil_open_single_table_tablespace(
fsp_open_info remote;
ulint tablespaces_found = 0;
ulint valid_tablespaces_found = 0;
+ ulint atomic_writes = 0;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex)));
- if (!fsp_flags_is_valid(flags)) {
+ /* Table flags can be ULINT_UNDEFINED if
+ dict_tf_to_fsp_flags_failure is set. */
+ if (flags != ULINT_UNDEFINED) {
+ if (!fsp_flags_is_valid(flags)) {
+ return(DB_CORRUPTION);
+ }
+ } else {
return(DB_CORRUPTION);
}
+ atomic_writes = fsp_flags_get_atomic_writes(flags);
+
/* If the tablespace was relocated, we do not
compare the DATA_DIR flag */
ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR;
@@ -3550,7 +3638,7 @@ fil_open_single_table_tablespace(
}
link_file_found = fil_open_linked_file(
- tablename, &remote.filepath, &remote.file);
+ tablename, &remote.filepath, &remote.file, atomic_writes);
remote.success = link_file_found;
if (remote.success) {
/* possibility of multiple files. */
@@ -3578,7 +3666,7 @@ fil_open_single_table_tablespace(
if (dict.filepath) {
dict.file = os_file_create_simple_no_error_handling(
innodb_file_data_key, dict.filepath, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &dict.success);
+ OS_FILE_READ_ONLY, &dict.success, atomic_writes);
if (dict.success) {
/* possibility of multiple files. */
validate = true;
@@ -3590,7 +3678,7 @@ fil_open_single_table_tablespace(
ut_a(def.filepath);
def.file = os_file_create_simple_no_error_handling(
innodb_file_data_key, def.filepath, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &def.success);
+ OS_FILE_READ_ONLY, &def.success, atomic_writes);
if (def.success) {
tablespaces_found++;
}
@@ -3606,7 +3694,7 @@ fil_open_single_table_tablespace(
if (def.success) {
def.check_msg = fil_read_first_page(
def.file, FALSE, &def.flags, &def.id,
- &def.lsn, &def.lsn);
+ &def.lsn, &def.lsn, id);
def.valid = !def.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
@@ -3628,7 +3716,7 @@ fil_open_single_table_tablespace(
if (remote.success) {
remote.check_msg = fil_read_first_page(
remote.file, FALSE, &remote.flags, &remote.id,
- &remote.lsn, &remote.lsn);
+ &remote.lsn, &remote.lsn, id);
remote.valid = !remote.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
@@ -3651,7 +3739,7 @@ fil_open_single_table_tablespace(
if (dict.success) {
dict.check_msg = fil_read_first_page(
dict.file, FALSE, &dict.flags, &dict.id,
- &dict.lsn, &dict.lsn);
+ &dict.lsn, &dict.lsn, id);
dict.valid = !dict.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
@@ -3915,7 +4003,8 @@ fil_user_tablespace_find_space_id(
for (ulint j = 0; j < page_count; ++j) {
- st = os_file_read(fsp->file, page, (j* page_size), page_size);
+ st = os_file_read(fsp->file, page, (j* page_size), page_size,
+ fsp_flags_is_page_compressed(fsp->flags));
if (!st) {
ib_logf(IB_LOG_LEVEL_INFO,
@@ -4028,7 +4117,7 @@ fil_user_tablespace_restore_page(
err = os_file_write(fsp->filepath, fsp->file, page,
(zip_size ? zip_size : page_size) * page_no,
- buflen);
+ buflen);
os_file_flush(fsp->file);
out:
@@ -4052,7 +4141,7 @@ check_first_page:
fsp->success = TRUE;
if (const char* check_msg = fil_read_first_page(
fsp->file, FALSE, &fsp->flags, &fsp->id,
- &fsp->lsn, &fsp->lsn)) {
+ &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED)) {
ib_logf(IB_LOG_LEVEL_ERROR,
"%s in tablespace %s (table %s)",
check_msg, fsp->filepath, tablename);
@@ -4125,9 +4214,7 @@ fil_load_single_table_tablespace(
fsp_open_info def;
fsp_open_info remote;
os_offset_t size;
-#ifdef UNIV_HOTBACKUP
fil_space_t* space;
-#endif
memset(&def, 0, sizeof(def));
memset(&remote, 0, sizeof(remote));
@@ -4160,7 +4247,8 @@ fil_load_single_table_tablespace(
one of them is sent to this function. So if this table has
already been loaded, there is nothing to do.*/
mutex_enter(&fil_system->mutex);
- if (fil_space_get_by_name(tablename)) {
+ space = fil_space_get_by_name(tablename);
+ if (space) {
mem_free(tablename);
mutex_exit(&fil_system->mutex);
return;
@@ -4185,7 +4273,7 @@ fil_load_single_table_tablespace(
/* Check for a link file which locates a remote tablespace. */
remote.success = fil_open_linked_file(
- tablename, &remote.filepath, &remote.file);
+ tablename, &remote.filepath, &remote.file, FALSE);
/* Read the first page of the remote tablespace */
if (remote.success) {
@@ -4200,7 +4288,7 @@ fil_load_single_table_tablespace(
/* Try to open the tablespace in the datadir. */
def.file = os_file_create_simple_no_error_handling(
innodb_file_data_key, def.filepath, OS_FILE_OPEN,
- OS_FILE_READ_WRITE, &def.success);
+ OS_FILE_READ_ONLY, &def.success, FALSE);
/* Read the first page of the remote tablespace */
if (def.success) {
@@ -4965,6 +5053,11 @@ retry:
start_page_no = space->size;
file_start_page_no = space->size - node->size;
+ /* Determine correct file block size */
+ if (node->file_block_size == 0) {
+ node->file_block_size = os_file_get_block_size(node->handle, node->name);
+ }
+
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
os_offset_t start_offset = start_page_no * page_size;
@@ -4976,7 +5069,7 @@ retry:
"space for file \'%s\' failed. Current size "
INT64PF ", desired size " INT64PF "\n",
node->name, start_offset, len+start_offset);
- os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE);
+ os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE, __FILE__, __LINE__);
success = FALSE;
} else {
success = TRUE;
@@ -5025,7 +5118,7 @@ retry:
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
node->name, node->handle, buf,
offset, page_size * n_pages,
- NULL, NULL, space_id, NULL);
+ node, NULL, space_id, NULL, 0, 0, 0);
#endif /* UNIV_HOTBACKUP */
DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
@@ -5404,7 +5497,12 @@ _fil_io(
or from where to write; in aio this must be
appropriately aligned */
void* message, /*!< in: message for aio handler if non-sync
- aio used, else ignored */
+ aio used, else ignored */
+ ulint* write_size, /*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
trx_t* trx)
{
ulint mode;
@@ -5415,6 +5513,8 @@ _fil_io(
ulint wake_later;
os_offset_t offset;
ibool ignore_nonexistent_pages;
+ ibool page_compressed = FALSE;
+ ulint page_compression_level = 0;
is_log = type & OS_FILE_LOG;
type = type & ~OS_FILE_LOG;
@@ -5468,6 +5568,11 @@ _fil_io(
} else if (type == OS_FILE_WRITE) {
ut_ad(!srv_read_only_mode);
srv_stats.data_written.add(len);
+ if (fil_page_is_index_page((byte *)buf)) {
+ srv_stats.index_pages_written.inc();
+ } else {
+ srv_stats.non_index_pages_written.inc();
+ }
}
/* Reserve the fil_system mutex and make sure that we can open at
@@ -5477,6 +5582,8 @@ _fil_io(
space = fil_space_get_by_id(space_id);
+ page_compressed = fsp_flags_is_page_compressed(space->flags);
+ page_compression_level = fsp_flags_get_page_compression_level(space->flags);
/* If we are deleting a tablespace we don't allow any read
operations on that. However, we do allow write operations. */
if (space == 0 || (type == OS_FILE_READ && space->stop_new_ops)) {
@@ -5622,7 +5729,8 @@ _fil_io(
/* Queue the aio request */
ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
- offset, len, node, message, space_id, trx);
+ offset, len, node, message, space_id, trx,
+ page_compressed, page_compression_level, write_size);
#else
/* In mysqlbackup do normal i/o, not aio */
@@ -6173,7 +6281,8 @@ fil_iterate(
ut_ad(!(n_bytes % iter.page_size));
if (!os_file_read(iter.file, io_buffer, offset,
- (ulint) n_bytes)) {
+ (ulint) n_bytes,
+ fil_space_is_page_compressed(space_id))) {
ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
@@ -6260,7 +6369,7 @@ fil_tablespace_iterate(
file = os_file_create_simple_no_error_handling(
innodb_file_data_key, filepath,
- OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
+ OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE);
DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
{
@@ -6312,7 +6421,8 @@ fil_tablespace_iterate(
/* Read the first page and determine the page and zip size. */
- if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) {
+ if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE,
+ dict_tf_get_page_compression(table->flags))) {
err = DB_IO_ERROR;
@@ -6547,3 +6657,87 @@ fil_space_set_corrupt(
mutex_exit(&fil_system->mutex);
}
+
+/****************************************************************//**
+Acquire fil_system mutex */
+void
+fil_system_enter(void)
+/*==================*/
+{
+ ut_ad(!mutex_own(&fil_system->mutex));
+ mutex_enter(&fil_system->mutex);
+}
+
+/****************************************************************//**
+Release fil_system mutex */
+void
+fil_system_exit(void)
+/*=================*/
+{
+ ut_ad(mutex_own(&fil_system->mutex));
+ mutex_exit(&fil_system->mutex);
+}
+
+/*******************************************************************//**
+Return space name */
+char*
+fil_space_name(
+/*===========*/
+ fil_space_t* space) /*!< in: space */
+{
+ return (space->name);
+}
+
+/*******************************************************************//**
+Return page type name */
+const char*
+fil_get_page_type_name(
+/*===================*/
+ ulint page_type) /*!< in: FIL_PAGE_TYPE */
+{
+ switch(page_type) {
+ case FIL_PAGE_PAGE_COMPRESSED:
+ return (const char*)"PAGE_COMPRESSED";
+ case FIL_PAGE_INDEX:
+ return (const char*)"INDEX";
+ case FIL_PAGE_UNDO_LOG:
+ return (const char*)"UNDO LOG";
+ case FIL_PAGE_INODE:
+ return (const char*)"INODE";
+ case FIL_PAGE_IBUF_FREE_LIST:
+ return (const char*)"IBUF_FREE_LIST";
+ case FIL_PAGE_TYPE_ALLOCATED:
+ return (const char*)"ALLOCATED";
+ case FIL_PAGE_IBUF_BITMAP:
+ return (const char*)"IBUF_BITMAP";
+ case FIL_PAGE_TYPE_SYS:
+ return (const char*)"SYS";
+ case FIL_PAGE_TYPE_TRX_SYS:
+ return (const char*)"TRX_SYS";
+ case FIL_PAGE_TYPE_FSP_HDR:
+ return (const char*)"FSP_HDR";
+ case FIL_PAGE_TYPE_XDES:
+ return (const char*)"XDES";
+ case FIL_PAGE_TYPE_BLOB:
+ return (const char*)"BLOB";
+ case FIL_PAGE_TYPE_ZBLOB:
+ return (const char*)"ZBLOB";
+ case FIL_PAGE_TYPE_ZBLOB2:
+ return (const char*)"ZBLOB2";
+ case FIL_PAGE_TYPE_COMPRESSED:
+ return (const char*)"ORACLE PAGE COMPRESSED";
+ default:
+ return (const char*)"PAGE TYPE CORRUPTED";
+ }
+}
+/****************************************************************//**
+Get block size from fil node
+@return block size*/
+ulint
+fil_node_get_block_size(
+/*====================*/
+ fil_node_t* node) /*!< in: Node where to get block
+ size */
+{
+ return (node->file_block_size);
+}
diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc
new file mode 100644
index 00000000000..686f98c83c5
--- /dev/null
+++ b/storage/xtradb/fil/fil0pagecompress.cc
@@ -0,0 +1,790 @@
+/*****************************************************************************
+
+Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fil/fil0pagecompress.cc
+Implementation for page compressed file spaces.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@mariadb.com
+Updated 14/02/2015
+***********************************************************************/
+
+#include "fil0fil.h"
+#include "fil0pagecompress.h"
+
+#include <debug_sync.h>
+#include <my_dbug.h>
+
+#include "mem0mem.h"
+#include "hash0hash.h"
+#include "os0file.h"
+#include "mach0data.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "log0recv.h"
+#include "fsp0fsp.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "dict0dict.h"
+#include "page0page.h"
+#include "page0zip.h"
+#include "trx0sys.h"
+#include "row0mysql.h"
+#ifndef UNIV_HOTBACKUP
+# include "buf0lru.h"
+# include "ibuf0ibuf.h"
+# include "sync0sync.h"
+# include "os0sync.h"
+#else /* !UNIV_HOTBACKUP */
+# include "srv0srv.h"
+static ulint srv_data_read, srv_data_written;
+#endif /* !UNIV_HOTBACKUP */
+#include "zlib.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+#include "row0mysql.h"
+#ifdef HAVE_LZ4
+#include "lz4.h"
+#endif
+#ifdef HAVE_LZO
+#include "lzo/lzo1x.h"
+#endif
+#ifdef HAVE_LZMA
+#include "lzma.h"
+#endif
+#ifdef HAVE_BZIP2
+#include "bzlib.h"
+#endif
+#ifdef HAVE_SNAPPY
+#include "snappy-c.h"
+#endif
+
+/* Used for debugging */
+//#define UNIV_PAGECOMPRESS_DEBUG 1
+
+/****************************************************************//**
+For page compressed pages decompress the page after actual read
+operation. */
+static
+void
+fil_decompress_page_2(
+/*==================*/
+ byte* page_buf, /*!< out: destination buffer for
+ uncompressed data */
+ byte* buf, /*!< in: source compressed data */
+ ulong len, /*!< in: length of output buffer.*/
+ ulint* write_size) /*!< in/out: Actual payload size of
+ the compressed data. */
+{
+ ulint page_type = mach_read_from_2(buf + FIL_PAGE_TYPE);
+
+ if (page_type != FIL_PAGE_TYPE_COMPRESSED) {
+ /* It is not a compressed page */
+ return;
+ }
+
+ ulint olen = 0;
+ byte* ptr = buf + FIL_PAGE_DATA;
+ ulint version = mach_read_from_1(buf + FIL_PAGE_VERSION);
+ int err = 0;
+
+ ut_a(version == 1);
+
+ /* Read the original page type, before we compressed the data. */
+ page_type = mach_read_from_2(buf + FIL_PAGE_ORIGINAL_TYPE_V1);
+
+ ulint original_len = mach_read_from_2(buf + FIL_PAGE_ORIGINAL_SIZE_V1);
+
+ if (original_len < UNIV_PAGE_SIZE_MIN - (FIL_PAGE_DATA + 8)
+ || original_len > UNIV_PAGE_SIZE_MAX - FIL_PAGE_DATA
+ || len < original_len + FIL_PAGE_DATA) {
+ fprintf(stderr,
+ "InnoDB: Corruption: We try to uncompress corrupted page\n"
+ "InnoDB: Original len %lu len %lu.\n",
+ original_len, len);
+
+ fflush(stderr);
+ ut_error;
+
+ }
+
+ ulint algorithm = mach_read_from_1(buf + FIL_PAGE_ALGORITHM_V1);
+
+ switch(algorithm) {
+ case PAGE_ZLIB_ALGORITHM: {
+
+ fprintf(stderr, "InnoDB: [Note]: zlib\n");
+
+ err = uncompress(page_buf, &len, ptr, original_len);
+ /* If uncompress fails it means that page is corrupted */
+ if (err != Z_OK) {
+
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but uncompress failed with error %d.\n"
+ "InnoDB: size %lu len %lu\n",
+ err, original_len, len);
+
+ fflush(stderr);
+
+ ut_error;
+ }
+
+ break;
+ }
+#ifdef HAVE_LZ4
+ case PAGE_LZ4_ALGORITHM: {
+ fprintf(stderr, "InnoDB: [Note]: lz4\n");
+ err = LZ4_decompress_fast(
+ (const char*) ptr, (char*) (page_buf), original_len);
+
+ if (err < 0) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %d bytes.\n"
+ "InnoDB: size %lu len %lu\n",
+ err, original_len, len);
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+ }
+#endif /* HAVE_LZ4 */
+
+#ifdef HAVE_LZMA
+ case PAGE_LZMA_ALGORITHM: {
+
+ lzma_ret ret;
+ size_t src_pos = 0;
+ size_t dst_pos = 0;
+ uint64_t memlimit = UINT64_MAX;
+
+ fprintf(stderr, "InnoDB: [Note]: lzma\n");
+ ret = lzma_stream_buffer_decode(
+ &memlimit,
+ 0,
+ NULL,
+ ptr,
+ &src_pos,
+ original_len,
+ (page_buf),
+ &dst_pos,
+ len);
+
+
+ if (ret != LZMA_OK || (dst_pos <= 0 || dst_pos > len)) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %ld bytes.\n"
+ "InnoDB: size %lu len %lu\n",
+ dst_pos, original_len, len);
+ fflush(stderr);
+
+ ut_error;
+ }
+
+ break;
+ }
+#endif /* HAVE_LZMA */
+
+#ifdef HAVE_LZO
+ case PAGE_LZO_ALGORITHM: {
+ fprintf(stderr, "InnoDB: [Note]: lzo \n");
+ err = lzo1x_decompress((const unsigned char *)ptr,
+ original_len,(unsigned char *)(page_buf), &olen, NULL);
+
+ if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %ld bytes.\n"
+ "InnoDB: size %lu len %lu\n",
+ olen, original_len, len);
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+ }
+#endif /* HAVE_LZO */
+
+ default:
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but compression algorithm %s\n"
+ "InnoDB: is not known.\n"
+ ,fil_get_compression_alg_name(algorithm));
+
+ fflush(stderr);
+ ut_error;
+ break;
+ }
+
+ /* Leave the header alone */
+ memmove(buf+FIL_PAGE_DATA, page_buf, original_len);
+
+ mach_write_to_2(buf + FIL_PAGE_TYPE, page_type);
+
+ ut_ad(memcmp(buf + FIL_PAGE_LSN + 4,
+ buf + (original_len + FIL_PAGE_DATA)
+ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4) == 0);
+}
+
+/****************************************************************//**
+For page compressed pages compress the page before actual write
+operation.
+@return compressed page to be written*/
+byte*
+fil_compress_page(
+/*==============*/
+ ulint space_id, /*!< in: tablespace id of the
+ table. */
+ byte* buf, /*!< in: buffer from which to write; in aio
+ this must be appropriately aligned */
+ byte* out_buf, /*!< out: compressed buffer */
+ ulint len, /*!< in: length of input buffer.*/
+ ulint compression_level, /* in: compression level */
+ ulint block_size, /*!< in: block size */
+ ulint* out_len, /*!< out: actual length of compressed
+ page */
+ byte* lzo_mem) /*!< in: temporal memory used by LZO */
+{
+ int err = Z_OK;
+ int level = 0;
+ ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
+ ulint write_size=0;
+ ulint comp_method = innodb_compression_algorithm; /* Cache to avoid
+ change during
+ function execution */
+ ut_ad(buf);
+ ut_ad(out_buf);
+ ut_ad(len);
+ ut_ad(out_len);
+
+ level = compression_level;
+ ut_ad(fil_space_is_page_compressed(space_id));
+
+ fil_system_enter();
+ fil_space_t* space = fil_space_get_by_id(space_id);
+ fil_system_exit();
+
+ /* If no compression level was provided to this table, use system
+ default level */
+ if (level == 0) {
+ level = page_zip_level;
+ }
+
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr,
+ "InnoDB: Note: Preparing for compress for space %lu name %s len %lu\n",
+ space_id, fil_space_name(space), len);
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+
+ write_size = UNIV_PAGE_SIZE - header_len;
+
+ switch(comp_method) {
+#ifdef HAVE_LZ4
+ case PAGE_LZ4_ALGORITHM:
+ err = LZ4_compress_limitedOutput((const char *)buf,
+ (char *)out_buf+header_len, len, write_size);
+ write_size = err;
+
+ if (err == 0) {
+ /* If error we leave the actual page as it was */
+
+ if (space->printed_compression_failure == false) {
+ fprintf(stderr,
+ "InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
+ space_id, fil_space_name(space), len, err, write_size);
+ space->printed_compression_failure = true;
+ }
+
+ srv_stats.pages_page_compression_error.inc();
+ *out_len = len;
+ return (buf);
+ }
+ break;
+#endif /* HAVE_LZ4 */
+#ifdef HAVE_LZO
+ case PAGE_LZO_ALGORITHM:
+ err = lzo1x_1_15_compress(
+ buf, len, out_buf+header_len, &write_size, lzo_mem);
+
+ if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) {
+ if (space->printed_compression_failure == false) {
+ fprintf(stderr,
+ "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n",
+ space_id, fil_space_name(space), len, err, write_size);
+ space->printed_compression_failure = true;
+ }
+ srv_stats.pages_page_compression_error.inc();
+ *out_len = len;
+ return (buf);
+ }
+
+ break;
+#endif /* HAVE_LZO */
+#ifdef HAVE_LZMA
+ case PAGE_LZMA_ALGORITHM: {
+ size_t out_pos=0;
+
+ err = lzma_easy_buffer_encode(
+ compression_level,
+ LZMA_CHECK_NONE,
+ NULL, /* No custom allocator, use malloc/free */
+ reinterpret_cast<uint8_t*>(buf),
+ len,
+ reinterpret_cast<uint8_t*>(out_buf + header_len),
+ &out_pos,
+ (size_t)write_size);
+
+ if (err != LZMA_OK || out_pos > UNIV_PAGE_SIZE-header_len) {
+ if (space->printed_compression_failure == false) {
+ fprintf(stderr,
+ "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n",
+ space_id, fil_space_name(space), len, err, out_pos);
+ space->printed_compression_failure = true;
+ }
+ srv_stats.pages_page_compression_error.inc();
+ *out_len = len;
+ return (buf);
+ }
+
+ write_size = out_pos;
+
+ break;
+ }
+#endif /* HAVE_LZMA */
+
+#ifdef HAVE_BZIP2
+ case PAGE_BZIP2_ALGORITHM: {
+
+ err = BZ2_bzBuffToBuffCompress(
+ (char *)(out_buf + header_len),
+ (unsigned int *)&write_size,
+ (char *)buf,
+ len,
+ 1,
+ 0,
+ 0);
+
+ if (err != BZ_OK || write_size > UNIV_PAGE_SIZE-header_len) {
+ if (space->printed_compression_failure == false) {
+ fprintf(stderr,
+ "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n",
+ space_id, fil_space_name(space), len, err, write_size);
+ space->printed_compression_failure = true;
+ }
+ srv_stats.pages_page_compression_error.inc();
+ *out_len = len;
+ return (buf);
+ }
+ break;
+ }
+#endif /* HAVE_BZIP2 */
+
+#ifdef HAVE_SNAPPY
+ case PAGE_SNAPPY_ALGORITHM:
+ {
+ snappy_status cstatus;
+
+ cstatus = snappy_compress((const char *)buf, len, (char *)(out_buf+header_len), &write_size);
+
+ if (cstatus != SNAPPY_OK || write_size > UNIV_PAGE_SIZE-header_len) {
+ if (space->printed_compression_failure == false) {
+ fprintf(stderr,
+ "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n",
+ space_id, fil_space_name(space), len, (int)cstatus, write_size);
+ space->printed_compression_failure = true;
+ }
+ srv_stats.pages_page_compression_error.inc();
+ *out_len = len;
+ return (buf);
+ }
+ break;
+ }
+#endif /* HAVE_SNAPPY */
+
+ case PAGE_ZLIB_ALGORITHM:
+ err = compress2(out_buf+header_len, (ulong*)&write_size, buf, len, level);
+
+ if (err != Z_OK) {
+ /* If error we leave the actual page as it was */
+
+ if (space->printed_compression_failure == false) {
+ fprintf(stderr,
+ "InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
+ space_id, fil_space_name(space), len, err, write_size);
+ space->printed_compression_failure = true;
+ }
+
+ srv_stats.pages_page_compression_error.inc();
+ *out_len = len;
+ return (buf);
+ }
+ break;
+
+ case PAGE_UNCOMPRESSED:
+ *out_len = len;
+ return (buf);
+ break;
+ default:
+ ut_error;
+ break;
+ }
+
+ /* Set up the page header */
+ memcpy(out_buf, buf, FIL_PAGE_DATA);
+ /* Set up the checksum */
+ mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
+ /* Set up the correct page type */
+ mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
+ /* Set up the flush lsn to be compression algorithm */
+ mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, comp_method);
+ /* Set up the actual payload lenght */
+ mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
+
+#ifdef UNIV_DEBUG
+ /* Verify */
+ ut_ad(fil_page_is_compressed(out_buf));
+ ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
+ ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
+ ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == (ulint)comp_method);
+
+ /* Verify that page can be decompressed */
+ {
+ byte *comp_page;
+ byte *uncomp_page;
+
+ comp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3));
+ uncomp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3));
+ memcpy(comp_page, out_buf, UNIV_PAGE_SIZE);
+
+ fil_decompress_page(uncomp_page, comp_page, len, NULL);
+ if(buf_page_is_corrupted(false, uncomp_page, 0)) {
+ buf_page_print(uncomp_page, 0, BUF_PAGE_PRINT_NO_CRASH);
+ ut_error;
+ }
+ ut_free(comp_page);
+ ut_free(uncomp_page);
+ }
+#endif /* UNIV_DEBUG */
+
+ write_size+=header_len;
+
+ /* Actual write needs to be alligned on block size */
+ if (write_size % block_size) {
+ size_t tmp = write_size;
+#ifdef UNIV_DEBUG
+ ut_a(block_size > 0);
+#endif
+ write_size = (size_t)ut_uint64_align_up((ib_uint64_t)write_size, block_size);
+#ifdef UNIV_DEBUG
+ ut_a(write_size > 0 && ((write_size % block_size) == 0));
+ ut_a(write_size >= tmp);
+#endif
+ }
+
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr,
+ "InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n",
+ space_id, fil_space_name(space), len, write_size);
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+
+
+ srv_stats.page_compression_saved.add((len - write_size));
+ srv_stats.pages_page_compressed.inc();
+
+ /* If we do not persistently trim rest of page, we need to write it
+ all */
+ if (!srv_use_trim) {
+ write_size = len;
+ }
+
+ *out_len = write_size;
+
+ return(out_buf);
+
+}
+
+/****************************************************************//**
+For page compressed pages decompress the page after actual read
+operation. */
+void
+fil_decompress_page(
+/*================*/
+ byte* page_buf, /*!< in: preallocated buffer or NULL */
+ byte* buf, /*!< out: buffer from which to read; in aio
+ this must be appropriately aligned */
+ ulong len, /*!< in: length of output buffer.*/
+ ulint* write_size) /*!< in/out: Actual payload size of
+ the compressed data. */
+{
+ int err = 0;
+ ulint actual_size = 0;
+ ulint compression_alg = 0;
+ byte *in_buf;
+ ulint olen=0;
+ ulint ptype;
+
+ ut_ad(buf);
+ ut_ad(len);
+
+ ptype = mach_read_from_2(buf+FIL_PAGE_TYPE);
+
+ /* Do not try to uncompressed pages that are not compressed */
+ if (ptype != FIL_PAGE_PAGE_COMPRESSED && ptype != FIL_PAGE_TYPE_COMPRESSED) {
+ return;
+ }
+
+ // If no buffer was given, we need to allocate temporal buffer
+ if (page_buf == NULL) {
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr,
+ "InnoDB: Note: FIL: Compression buffer not given, allocating...\n");
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+ in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3));
+ } else {
+ in_buf = page_buf;
+ }
+
+ if (ptype == FIL_PAGE_TYPE_COMPRESSED) {
+
+ fil_decompress_page_2(in_buf, buf, len, write_size);
+ // Need to free temporal buffer if no buffer was given
+ if (page_buf == NULL) {
+ ut_free(in_buf);
+ }
+ return;
+ }
+
+ /* Before actual decompress, make sure that page type is correct */
+
+ if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC ||
+ mach_read_from_2(buf+FIL_PAGE_TYPE) != FIL_PAGE_PAGE_COMPRESSED) {
+ fprintf(stderr,
+ "InnoDB: Corruption: We try to uncompress corrupted page\n"
+ "InnoDB: CRC %lu type %lu.\n"
+ "InnoDB: len %lu\n",
+ mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM),
+ mach_read_from_2(buf+FIL_PAGE_TYPE), len);
+
+ fflush(stderr);
+ ut_error;
+ }
+
+ /* Get compression algorithm */
+ compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN);
+
+ /* Get the actual size of compressed page */
+ actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
+ /* Check if payload size is corrupted */
+ if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) {
+ fprintf(stderr,
+ "InnoDB: Corruption: We try to uncompress corrupted page\n"
+ "InnoDB: actual size %lu compression %s\n",
+ actual_size, fil_get_compression_alg_name(compression_alg));
+ fflush(stderr);
+ ut_error;
+ }
+
+ /* Store actual payload size of the compressed data. This pointer
+ points to buffer pool. */
+ if (write_size) {
+ *write_size = actual_size;
+ }
+
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr,
+ "InnoDB: Note: Preparing for decompress for len %lu\n",
+ actual_size);
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+
+
+ switch(compression_alg) {
+ case PAGE_ZLIB_ALGORITHM:
+ err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
+
+ /* If uncompress fails it means that page is corrupted */
+ if (err != Z_OK) {
+
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but uncompress failed with error %d.\n"
+ "InnoDB: size %lu len %lu\n",
+ err, actual_size, len);
+
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+
+#ifdef HAVE_LZ4
+ case PAGE_LZ4_ALGORITHM:
+ err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, len);
+
+ if (err != (int)actual_size) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %d bytes.\n"
+ "InnoDB: size %lu len %lu\n",
+ err, actual_size, len);
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+#endif /* HAVE_LZ4 */
+#ifdef HAVE_LZO
+ case PAGE_LZO_ALGORITHM:
+ err = lzo1x_decompress((const unsigned char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE,
+ actual_size,(unsigned char *)in_buf, &olen, NULL);
+
+ if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %ld bytes.\n"
+ "InnoDB: size %lu len %lu\n",
+ olen, actual_size, len);
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+#endif /* HAVE_LZO */
+#ifdef HAVE_LZMA
+ case PAGE_LZMA_ALGORITHM: {
+
+ lzma_ret ret;
+ size_t src_pos = 0;
+ size_t dst_pos = 0;
+ uint64_t memlimit = UINT64_MAX;
+
+ ret = lzma_stream_buffer_decode(
+ &memlimit,
+ 0,
+ NULL,
+ buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE,
+ &src_pos,
+ actual_size,
+ in_buf,
+ &dst_pos,
+ len);
+
+
+ if (ret != LZMA_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %ld bytes.\n"
+ "InnoDB: size %lu len %lu\n",
+ dst_pos, actual_size, len);
+ fflush(stderr);
+
+ ut_error;
+ }
+
+ break;
+ }
+#endif /* HAVE_LZMA */
+#ifdef HAVE_BZIP2
+ case PAGE_BZIP2_ALGORITHM: {
+ unsigned int dst_pos = UNIV_PAGE_SIZE;
+
+ err = BZ2_bzBuffToBuffDecompress(
+ (char *)in_buf,
+ &dst_pos,
+ (char *)(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE),
+ actual_size,
+ 1,
+ 0);
+
+ if (err != BZ_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %du bytes.\n"
+ "InnoDB: size %lu len %lu err %d\n",
+ dst_pos, actual_size, len, err);
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+ }
+#endif /* HAVE_BZIP2 */
+#ifdef HAVE_SNAPPY
+ case PAGE_SNAPPY_ALGORITHM:
+ {
+ snappy_status cstatus;
+
+ cstatus = snappy_uncompress(
+ (const char *)(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE),
+ actual_size,
+ (char *)in_buf,
+ &olen);
+
+ if (cstatus != SNAPPY_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but decompression read only %lu bytes.\n"
+ "InnoDB: size %lu len %lu err %d\n",
+ olen, actual_size, len, (int)cstatus);
+ fflush(stderr);
+
+ ut_error;
+ }
+ break;
+ }
+#endif /* HAVE_SNAPPY */
+ default:
+ fprintf(stderr,
+ "InnoDB: Corruption: Page is marked as compressed\n"
+ "InnoDB: but compression algorithm %s\n"
+ "InnoDB: is not known.\n"
+ ,fil_get_compression_alg_name(compression_alg));
+
+ fflush(stderr);
+ ut_error;
+ break;
+ }
+
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr,
+ "InnoDB: Note: Decompression succeeded for len %lu \n",
+ len);
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+
+ srv_stats.pages_page_decompressed.inc();
+
+ /* Copy the uncompressed page to the buffer pool, not
+ really any other options. */
+ memcpy(buf, in_buf, len);
+
+ // Need to free temporal buffer if no buffer was given
+ if (page_buf == NULL) {
+ ut_free(in_buf);
+ }
+}
+
+
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 0bb72ada5af..ef6aae7889e 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -4,7 +4,7 @@ Copyright (c) 2000, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2014 SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2015, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -104,6 +104,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
#endif /* UNIV_DEBUG */
#include "fts0priv.h"
#include "page0zip.h"
+#include "fil0pagecompress.h"
#define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
@@ -562,6 +563,27 @@ ib_cb_t innodb_api_cb[] = {
(ib_cb_t) ib_cursor_stmt_begin
};
+/**
+ Structure for CREATE TABLE options (table options).
+ It needs to be called ha_table_option_struct.
+
+ The option values can be specified in the CREATE TABLE at the end:
+ CREATE TABLE ( ... ) *here*
+*/
+
+ha_create_table_option innodb_table_option_list[]=
+{
+ /* With this option user can enable page compression feature for the
+ table */
+ HA_TOPTION_BOOL("PAGE_COMPRESSED", page_compressed, 0),
+ /* With this option user can set zip compression level for page
+ compression for this table*/
+ HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, ULINT_UNDEFINED, 0, 9, 1),
+ /* With this option user can enable atomic writes feature for this table */
+ HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0),
+ HA_TOPTION_END
+};
+
/*************************************************************//**
Check whether valid argument given to innodb_ft_*_stopword_table.
This function is registered as a callback with MySQL.
@@ -632,6 +654,20 @@ static int innobase_checkpoint_state(handlerton *hton, bool disable)
return 0;
}
+/*************************************************************//**
+Check for a valid value of innobase_compression_algorithm.
+@return 0 for valid innodb_compression_algorithm. */
+static
+int
+innodb_compression_algorithm_validate(
+/*==================================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to system
+ variable */
+ void* save, /*!< out: immediate result
+ for update function */
+ struct st_mysql_value* value); /*!< in: incoming string */
+
static const char innobase_hton_name[]= "InnoDB";
static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG,
@@ -681,6 +717,11 @@ static MYSQL_THDVAR_BOOL(fake_changes, PLUGIN_VAR_OPCMDARG,
"This is to cause replication prefetch IO. ATTENTION: the transaction started after enabled is affected.",
NULL, NULL, FALSE);
+static ibool innodb_have_lzo=IF_LZO(1, 0);
+static ibool innodb_have_lz4=IF_LZ4(1, 0);
+static ibool innodb_have_lzma=IF_LZMA(1, 0);
+static ibool innodb_have_bzip2=IF_BZIP2(1, 0);
+static ibool innodb_have_snappy=IF_SNAPPY(1, 0);
static SHOW_VAR innodb_status_variables[]= {
{"available_undo_logs",
@@ -885,6 +926,47 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_x_lock_spin_rounds, SHOW_LONGLONG},
{"x_lock_spin_waits",
(char*) &export_vars.innodb_x_lock_spin_waits, SHOW_LONGLONG},
+
+ /* Status variables for page compression */
+ {"page_compression_saved",
+ (char*) &export_vars.innodb_page_compression_saved, SHOW_LONGLONG},
+ {"page_compression_trim_sect512",
+ (char*) &export_vars.innodb_page_compression_trim_sect512, SHOW_LONGLONG},
+ {"page_compression_trim_sect1024",
+ (char*) &export_vars.innodb_page_compression_trim_sect1024, SHOW_LONGLONG},
+ {"page_compression_trim_sect2048",
+ (char*) &export_vars.innodb_page_compression_trim_sect2048, SHOW_LONGLONG},
+ {"page_compression_trim_sect4096",
+ (char*) &export_vars.innodb_page_compression_trim_sect4096, SHOW_LONGLONG},
+ {"page_compression_trim_sect8192",
+ (char*) &export_vars.innodb_page_compression_trim_sect8192, SHOW_LONGLONG},
+ {"page_compression_trim_sect16384",
+ (char*) &export_vars.innodb_page_compression_trim_sect16384, SHOW_LONGLONG},
+ {"page_compression_trim_sect32768",
+ (char*) &export_vars.innodb_page_compression_trim_sect32768, SHOW_LONGLONG},
+ {"num_index_pages_written",
+ (char*) &export_vars.innodb_index_pages_written, SHOW_LONGLONG},
+ {"num_non_index_pages_written",
+ (char*) &export_vars.innodb_non_index_pages_written, SHOW_LONGLONG},
+ {"num_pages_page_compressed",
+ (char*) &export_vars.innodb_pages_page_compressed, SHOW_LONGLONG},
+ {"num_page_compressed_trim_op",
+ (char*) &export_vars.innodb_page_compressed_trim_op, SHOW_LONGLONG},
+ {"num_page_compressed_trim_op_saved",
+ (char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG},
+ {"num_pages_page_decompressed",
+ (char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG},
+ {"have_lz4",
+ (char*) &innodb_have_lz4, SHOW_BOOL},
+ {"have_lzo",
+ (char*) &innodb_have_lzo, SHOW_BOOL},
+ {"have_lzma",
+ (char*) &innodb_have_lzma, SHOW_BOOL},
+ {"have_bzip2",
+ (char*) &innodb_have_bzip2, SHOW_BOOL},
+ {"have_snappy",
+ (char*) &innodb_have_snappy, SHOW_BOOL},
+
{NullS, NullS, SHOW_LONG}
};
@@ -3279,6 +3361,8 @@ innobase_init(
if (srv_file_per_table)
innobase_hton->tablefile_extensions = ha_innobase_exts;
+ innobase_hton->table_options = innodb_table_option_list;
+
ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
#ifndef DBUG_OFF
@@ -3347,6 +3431,51 @@ innobase_init(
}
}
+#ifndef HAVE_LZ4
+ if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) {
+ sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: liblz4 is not installed. \n",
+ innodb_compression_algorithm);
+ goto error;
+ }
+#endif
+
+#ifndef HAVE_LZO
+ if (innodb_compression_algorithm == PAGE_LZO_ALGORITHM) {
+ sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: liblzo is not installed. \n",
+ innodb_compression_algorithm);
+ goto error;
+ }
+#endif
+
+#ifndef HAVE_LZMA
+ if (innodb_compression_algorithm == PAGE_LZMA_ALGORITHM) {
+ sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: liblzma is not installed. \n",
+ innodb_compression_algorithm);
+ goto error;
+ }
+#endif
+
+#ifndef HAVE_BZIP2
+ if (innodb_compression_algorithm == PAGE_BZIP2_ALGORITHM) {
+ sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: libbz2 is not installed. \n",
+ innodb_compression_algorithm);
+ goto error;
+ }
+#endif
+
+#ifndef HAVE_SNAPPY
+ if (innodb_compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
+ sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: libsnappy is not installed. \n",
+ innodb_compression_algorithm);
+ goto error;
+ }
+#endif
+
os_innodb_umask = (ulint) my_umask;
/* First calculate the default path for innodb_data_home_dir etc.,
@@ -10299,11 +10428,16 @@ innobase_table_flags(
enum row_type row_format;
rec_format_t innodb_row_format = REC_FORMAT_COMPACT;
bool use_data_dir;
+ ha_table_option_struct *options= form->s->option_struct;
/* Cache the value of innodb_file_format, in case it is
modified by another thread while the table is being created. */
const ulint file_format_allowed = srv_file_format;
+ /* Cache the value of innobase_compression_level, in case it is
+ modified by another thread while the table is being created. */
+ const ulint default_compression_level = page_zip_level;
+
*flags = 0;
*flags2 = 0;
@@ -10352,6 +10486,8 @@ index_bad:
}
}
+ row_format = form->s->row_type;
+
if (create_info->key_block_size) {
/* The requested compressed page size (key_block_size)
is given in kilobytes. If it is a valid number, store
@@ -10399,8 +10535,6 @@ index_bad:
}
}
- row_format = form->s->row_type;
-
if (zip_ssize && zip_allowed) {
/* if ROW_FORMAT is set to default,
automatically change it to COMPRESSED.*/
@@ -10455,10 +10589,18 @@ index_bad:
" innodb_file_format > Antelope.",
get_row_format_name(row_format));
} else {
- innodb_row_format = (row_format == ROW_TYPE_DYNAMIC
- ? REC_FORMAT_DYNAMIC
- : REC_FORMAT_COMPRESSED);
- break;
+ switch(row_format) {
+ case ROW_TYPE_COMPRESSED:
+ innodb_row_format = REC_FORMAT_COMPRESSED;
+ break;
+ case ROW_TYPE_DYNAMIC:
+ innodb_row_format = REC_FORMAT_DYNAMIC;
+ break;
+ default:
+ /* Not possible, avoid compiler warning */
+ break;
+ }
+ break; /* Correct row_format */
}
zip_allowed = FALSE;
/* fall through to set row_format = COMPACT */
@@ -10486,7 +10628,15 @@ index_bad:
&& ((create_info->data_file_name != NULL)
&& !(create_info->options & HA_LEX_CREATE_TMP_TABLE));
- dict_tf_set(flags, innodb_row_format, zip_ssize, use_data_dir);
+ /* Set up table dictionary flags */
+ dict_tf_set(flags,
+ innodb_row_format,
+ zip_ssize,
+ use_data_dir,
+ options->page_compressed,
+ (ulint)options->page_compression_level == ULINT_UNDEFINED ?
+ default_compression_level : options->page_compression_level,
+ options->atomic_writes);
if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
*flags2 |= DICT_TF2_TEMPORARY;
@@ -10505,6 +10655,113 @@ index_bad:
}
/*****************************************************************//**
+Check engine specific table options not handled by SQL-parser.
+@return NULL if valid, string if not */
+UNIV_INTERN
+const char*
+ha_innobase::check_table_options(
+ THD *thd, /*!< in: thread handle */
+ TABLE* table, /*!< in: information on table
+ columns and indexes */
+ HA_CREATE_INFO* create_info, /*!< in: more information of the
+ created table, contains also the
+ create statement string */
+ const bool use_tablespace, /*!< in: use file par table */
+ const ulint file_format)
+{
+ enum row_type row_format = table->s->row_type;;
+ ha_table_option_struct *options= table->s->option_struct;
+ atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes;
+
+ /* Check page compression requirements */
+ if (options->page_compressed) {
+
+ if (row_format == ROW_TYPE_COMPRESSED) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED table can't have"
+ " ROW_TYPE=COMPRESSED");
+ return "PAGE_COMPRESSED";
+ }
+
+ if (row_format == ROW_TYPE_REDUNDANT) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED table can't have"
+ " ROW_TYPE=REDUNDANT");
+ return "PAGE_COMPRESSED";
+ }
+
+ if (!use_tablespace) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED requires"
+ " innodb_file_per_table.");
+ return "PAGE_COMPRESSED";
+ }
+
+ if (file_format < UNIV_FORMAT_B) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED requires"
+ " innodb_file_format > Antelope.");
+ return "PAGE_COMPRESSED";
+ }
+
+ if (create_info->key_block_size) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED table can't have"
+ " key_block_size");
+ return "PAGE_COMPRESSED";
+ }
+ }
+
+ /* Check page compression level requirements, some of them are
+ already checked above */
+ if ((ulint)options->page_compression_level != ULINT_UNDEFINED) {
+ if (options->page_compressed == false) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSION_LEVEL requires"
+ " PAGE_COMPRESSED");
+ return "PAGE_COMPRESSION_LEVEL";
+ }
+
+ if (options->page_compression_level < 0 || options->page_compression_level > 9) {
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
+ " Valid values are [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]",
+ options->page_compression_level);
+ return "PAGE_COMPRESSION_LEVEL";
+ }
+ }
+
+ /* Check atomic writes requirements */
+ if (awrites == ATOMIC_WRITES_ON ||
+ (awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
+ if (!use_tablespace) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: ATOMIC_WRITES requires"
+ " innodb_file_per_table.");
+ return "ATOMIC_WRITES";
+ }
+ }
+
+ return 0;
+}
+
+/*****************************************************************//**
Creates a new table to an InnoDB database.
@return error number */
UNIV_INTERN
@@ -10535,6 +10792,7 @@ ha_innobase::create(
while creating the table. So we read the current value here
and make all further decisions based on this. */
bool use_tablespace = srv_file_per_table;
+ const ulint file_format = srv_file_format;
/* Zip Shift Size - log2 - 9 of compressed page size,
zero for uncompressed */
@@ -10558,6 +10816,12 @@ ha_innobase::create(
/* Create the table definition in InnoDB */
+ /* Validate table options not handled by the SQL-parser */
+ if(check_table_options(thd, form, create_info, use_tablespace,
+ file_format)) {
+ DBUG_RETURN(HA_WRONG_CREATE_OPTION);
+ }
+
/* Validate create options if innodb_strict_mode is set. */
if (create_options_are_invalid(
thd, form, create_info, use_tablespace)) {
@@ -14949,6 +15213,12 @@ ha_innobase::check_if_incompatible_data(
HA_CREATE_INFO* info,
uint table_changes)
{
+ ha_table_option_struct *param_old, *param_new;
+
+ /* Cache engine specific options */
+ param_new = info->option_struct;
+ param_old = table->s->option_struct;
+
innobase_copy_frm_flags_from_create_info(prebuilt->table, info);
if (table_changes != IS_EQUAL_YES) {
@@ -14975,6 +15245,13 @@ ha_innobase::check_if_incompatible_data(
return(COMPATIBLE_DATA_NO);
}
+ /* Changes on engine specific table options requests a rebuild of the table. */
+ if (param_new->page_compressed != param_old->page_compressed ||
+ param_new->page_compression_level != param_old->page_compression_level ||
+ param_new->atomic_writes != param_old->atomic_writes) {
+ return(COMPATIBLE_DATA_NO);
+ }
+
return(COMPATIBLE_DATA_YES);
}
@@ -17521,12 +17798,6 @@ static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
"innodb_thread_concurrency is reached (0 by default)",
NULL, NULL, 0, 0, ~0UL, 0);
-static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
- PLUGIN_VAR_RQCMDARG,
- "Compression level used for compressed row format. 0 is no compression"
- ", 1 is fastest, 9 is best compression and default is 6.",
- NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
-
static MYSQL_SYSVAR_BOOL(log_compressed_pages, page_zip_log_pages,
PLUGIN_VAR_OPCMDARG,
"Enables/disables the logging of entire compressed page images."
@@ -18225,6 +18496,47 @@ static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace,
"Print stacktrace on long semaphore wait (off by default supported only on linux)",
NULL, NULL, FALSE);
+static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
+ PLUGIN_VAR_RQCMDARG,
+ "Compression level used for zlib compression. 0 is no compression"
+ ", 1 is fastest, 9 is best compression and default is 6.",
+ NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
+
+static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
+ PLUGIN_VAR_OPCMDARG,
+ "Use trim. Default FALSE.",
+ NULL, NULL, FALSE);
+
+static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 };
+static TYPELIB page_compression_algorithms_typelib=
+{
+ array_elements(page_compression_algorithms) - 1, 0,
+ page_compression_algorithms, 0
+};
+static MYSQL_SYSVAR_ENUM(compression_algorithm, innodb_compression_algorithm,
+ PLUGIN_VAR_OPCMDARG,
+ "Compression algorithm used on page compression. One of: none, zlib, lz4, lzo, lzma, or bzip2",
+ innodb_compression_algorithm_validate, NULL,
+ /* We use here the largest number of supported compression method to
+ enable all those methods that are available. Availability of compression
+ method is verified on innodb_compression_algorithm_validate function. */
+ PAGE_UNCOMPRESSED,
+ &page_compression_algorithms_typelib);
+
+static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Number of multi-threaded flush threads",
+ NULL, NULL,
+ MTFLUSH_DEFAULT_WORKER, /* Default setting */
+ 1, /* Minimum setting */
+ MTFLUSH_MAX_WORKER, /* Max setting */
+ 0);
+
+static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Use multi-threaded flush. Default FALSE.",
+ NULL, NULL, FALSE);
+
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(log_block_size),
MYSQL_SYSVAR(additional_mem_pool_size),
@@ -18422,6 +18734,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(fake_changes),
MYSQL_SYSVAR(locking_fake_changes),
MYSQL_SYSVAR(use_stacktrace),
+ MYSQL_SYSVAR(use_trim),
+ MYSQL_SYSVAR(compression_algorithm),
+ MYSQL_SYSVAR(mtflush_threads),
+ MYSQL_SYSVAR(use_mtflush),
MYSQL_SYSVAR(simulate_comp_failures),
NULL
};
@@ -18714,6 +19030,9 @@ ib_senderrf(
case IB_LOG_LEVEL_FATAL:
l = 0;
break;
+ default:
+ l = 0;
+ break;
}
my_printv_error(code, format, MYF(l), args);
@@ -18920,15 +19239,117 @@ int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t
return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
}
-/*
+/*
A helper function used only in index_cond_func_innodb
*/
bool ha_innobase::is_thd_killed()
-{
+{
return thd_kill_level(user_thd);
}
+/*************************************************************//**
+Check for a valid value of innobase_compression_algorithm.
+@return 0 for valid innodb_compression_algorithm. */
+static
+int
+innodb_compression_algorithm_validate(
+/*==================================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to system
+ variable */
+ void* save, /*!< out: immediate result
+ for update function */
+ struct st_mysql_value* value) /*!< in: incoming string */
+{
+ long compression_algorithm;
+ DBUG_ENTER("innobase_compression_algorithm_validate");
+
+ if (value->value_type(value) == MYSQL_VALUE_TYPE_STRING) {
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ const char *str;
+ int length= sizeof(buff);
+
+ if (!(str= value->val_str(value, buff, &length))) {
+ DBUG_RETURN(1);
+ }
+
+ if ((compression_algorithm= (long)find_type(str, &page_compression_algorithms_typelib, 0) - 1) < 0) {
+ DBUG_RETURN(1);
+ }
+ } else {
+ long long tmp;
+
+ if (value->val_int(value, &tmp)) {
+ DBUG_RETURN(1);
+ }
+
+ if (tmp < 0 || tmp >= page_compression_algorithms_typelib.count) {
+ DBUG_RETURN(1);
+ }
+
+ compression_algorithm= (long) tmp;
+ }
+
+ *reinterpret_cast<ulong*>(save) = compression_algorithm;
+
+#ifndef HAVE_LZ4
+ if (compression_algorithm == PAGE_LZ4_ALGORITHM) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_UNSUPPORTED,
+ "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: liblz4 is not installed. \n",
+ compression_algorithm);
+ DBUG_RETURN(1);
+ }
+#endif
+
+#ifndef HAVE_LZO
+ if (compression_algorithm == PAGE_LZO_ALGORITHM) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_UNSUPPORTED,
+ "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: liblzo is not installed. \n",
+ compression_algorithm);
+ DBUG_RETURN(1);
+ }
+#endif
+
+#ifndef HAVE_LZMA
+ if (compression_algorithm == PAGE_LZMA_ALGORITHM) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_UNSUPPORTED,
+ "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: liblzma is not installed. \n",
+ compression_algorithm);
+ DBUG_RETURN(1);
+ }
+#endif
+
+#ifndef HAVE_BZIP2
+ if (compression_algorithm == PAGE_BZIP2_ALGORITHM) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_UNSUPPORTED,
+ "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: libbz2 is not installed. \n",
+ compression_algorithm);
+ DBUG_RETURN(1);
+ }
+#endif
+
+#ifndef HAVE_SNAPPY
+ if (compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_UNSUPPORTED,
+ "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
+ "InnoDB: libsnappy is not installed. \n",
+ compression_algorithm);
+ DBUG_RETURN(1);
+ }
+#endif
+ DBUG_RETURN(0);
+}
+
/**********************************************************************
Issue a warning that the row is too big. */
UNIV_INTERN
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index 823d136d54b..6ed4174f042 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -57,6 +58,21 @@ typedef struct st_innobase_share {
/** Prebuilt structures in an InnoDB table handle used within MySQL */
struct row_prebuilt_t;
+/** Engine specific table options are definined using this struct */
+struct ha_table_option_struct
+{
+ bool page_compressed; /*!< Table is using page compression
+ if this option is true. */
+ int page_compression_level; /*!< Table page compression level
+ or UNIV_UNSPECIFIED. */
+ uint atomic_writes; /*!< Use atomic writes for this
+ table if this options is ON or
+ in DEFAULT if
+ srv_use_atomic_writes=1.
+ Atomic writes are not used if
+ value OFF.*/
+};
+
/** The class defining a handle to an Innodb table */
class ha_innobase: public handler
{
@@ -176,6 +192,8 @@ class ha_innobase: public handler
char* norm_name,
char* temp_path,
char* remote_path);
+ const char* check_table_options(THD *thd, TABLE* table,
+ HA_CREATE_INFO* create_info, const bool use_tablespace, const ulint file_format);
int create(const char *name, register TABLE *form,
HA_CREATE_INFO *create_info);
int truncate();
diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc
index cc18f709043..439c92b0638 100644
--- a/storage/xtradb/handler/handler0alter.cc
+++ b/storage/xtradb/handler/handler0alter.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2005, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -263,6 +264,22 @@ ha_innobase::check_if_supported_inplace_alter(
update_thd();
trx_search_latch_release_if_reserved(prebuilt->trx);
+ /* Change on engine specific table options require rebuild of the
+ table */
+ if (ha_alter_info->handler_flags
+ == Alter_inplace_info::CHANGE_CREATE_OPTION) {
+ ha_table_option_struct *new_options= ha_alter_info->create_info->option_struct;
+ ha_table_option_struct *old_options= table->s->option_struct;
+
+ if (new_options->page_compressed != old_options->page_compressed ||
+ new_options->page_compression_level != old_options->page_compression_level ||
+ new_options->atomic_writes != old_options->atomic_writes) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+ }
+
if (ha_alter_info->handler_flags
& ~(INNOBASE_INPLACE_IGNORE
| INNOBASE_ALTER_NOREBUILD
@@ -3414,6 +3431,17 @@ ha_innobase::prepare_inplace_alter_table(
if (ha_alter_info->handler_flags
& Alter_inplace_info::CHANGE_CREATE_OPTION) {
+ /* Check engine specific table options */
+ if (const char* invalid_tbopt = check_table_options(
+ user_thd, altered_table,
+ ha_alter_info->create_info,
+ prebuilt->table->space != 0,
+ srv_file_format)) {
+ my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
+ table_type(), invalid_tbopt);
+ goto err_exit_no_heap;
+ }
+
if (const char* invalid_opt = create_options_are_invalid(
user_thd, altered_table,
ha_alter_info->create_info,
diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc
index 02d2a2100a4..af5a7467c8e 100644
--- a/storage/xtradb/handler/i_s.cc
+++ b/storage/xtradb/handler/i_s.cc
@@ -104,6 +104,7 @@ static buf_page_desc_t i_s_page_type[] = {
{"COMPRESSED_BLOB", FIL_PAGE_TYPE_ZBLOB},
{"COMPRESSED_BLOB2", FIL_PAGE_TYPE_ZBLOB2},
{"IBUF_INDEX", I_S_PAGE_TYPE_IBUF},
+ {"PAGE COMPRESSED", FIL_PAGE_PAGE_COMPRESSED},
{"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN}
};
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
index 797287ad222..d8a3e77d820 100644
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1533,6 +1534,12 @@ struct buf_page_t{
state == BUF_BLOCK_ZIP_PAGE and
zip.data == NULL means an active
buf_pool->watch */
+
+ ulint write_size; /* Write size is set when this
+ page is first time written and then
+ if written again we check is TRIM
+ operation needed. */
+
#ifndef UNIV_HOTBACKUP
buf_page_t* hash; /*!< node used in chaining to
buf_pool->page_hash or
@@ -2174,6 +2181,20 @@ struct CheckUnzipLRUAndLRUList {
};
#endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */
+/*********************************************************************//**
+Aquire LRU list mutex */
+void
+buf_pool_mutex_enter(
+/*=================*/
+ buf_pool_t* buf_pool); /*!< in: buffer pool */
+/*********************************************************************//**
+Exit LRU list mutex */
+void
+buf_pool_mutex_exit(
+/*================*/
+ buf_pool_t* buf_pool); /*!< in: buffer pool */
+
+
#ifndef UNIV_NONINL
#include "buf0buf.ic"
#endif
diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h
index 56b0c314b5c..5cc0eb9d4cf 100644
--- a/storage/xtradb/include/buf0flu.h
+++ b/storage/xtradb/include/buf0flu.h
@@ -36,7 +36,14 @@ Created 11/5/1995 Heikki Tuuri
/** Flag indicating if the page_cleaner is in active state. */
extern ibool buf_page_cleaner_is_active;
-/** Flag indicating if the lru_manager is in active state. */
+/** Handled page counters for a single flush */
+struct flush_counters_t {
+ ulint flushed; /*!< number of dirty pages flushed */
+ ulint evicted; /*!< number of clean pages evicted */
+ ulint unzip_LRU_evicted;/*!< number of uncompressed page images
+ evicted */
+};
+
extern bool buf_lru_manager_is_active;
/********************************************************************//**
@@ -304,6 +311,63 @@ buf_flush_flush_list_in_progress(void)
/*==================================*/
__attribute__((warn_unused_result));
+/******************************************************************//**
+Start a buffer flush batch for LRU or flush list */
+ibool
+buf_flush_start(
+/*============*/
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU
+ or BUF_FLUSH_LIST */
+
+/******************************************************************//**
+End a buffer flush batch for LRU or flush list */
+void
+buf_flush_end(
+/*==========*/
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU
+ or BUF_FLUSH_LIST */
+
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the LRU list or flush_list.
+NOTE 1: in the case of an LRU flush the calling thread may own latches to
+pages: to avoid deadlocks, this function must be written so that it cannot
+end up waiting for these latches! NOTE 2: in the case of a flush list flush,
+the calling thread is not allowed to own any latches on pages!
+@return number of blocks for which the write request was queued */
+__attribute__((nonnull))
+void
+buf_flush_batch(
+/*============*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
+ BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
+ then the caller must not own any
+ latches on pages */
+ ulint min_n, /*!< in: wished minimum mumber of blocks
+ flushed (it is not guaranteed that the
+ actual number is that big, though) */
+ lsn_t lsn_limit, /*!< in: in the case of BUF_FLUSH_LIST
+ all blocks whose oldest_modification is
+ smaller than this should be flushed
+ (if their number does not exceed
+ min_n), otherwise ignored */
+ bool limited_lru_scan,/*!< in: for LRU flushes, if true,
+ allow to scan only up to
+ srv_LRU_scan_depth pages in total */
+ flush_counters_t* n); /*!< out: flushed/evicted page
+ counts */
+
+
+/******************************************************************//**
+Gather the aggregated stats for both flush list and LRU list flushing */
+void
+buf_flush_common(
+/*=============*/
+ buf_flush_t flush_type, /*!< in: type of flush */
+ ulint page_count); /*!< in: number of pages flushed */
+
#ifndef UNIV_NONINL
#include "buf0flu.ic"
#endif
diff --git a/storage/xtradb/include/buf0mtflu.h b/storage/xtradb/include/buf0mtflu.h
new file mode 100644
index 00000000000..0475335bbf5
--- /dev/null
+++ b/storage/xtradb/include/buf0mtflu.h
@@ -0,0 +1,95 @@
+/*****************************************************************************
+
+Copyright (C) 2014 SkySQL Ab. All Rights Reserved.
+Copyright (C) 2014 Fusion-io. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/buf0mtflu.h
+Multi-threadef flush method interface function prototypes
+
+Created 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
+ Dhananjoy Das DDas@fusionio.com
+***********************************************************************/
+
+#ifndef buf0mtflu_h
+#define buf0mtflu_h
+
+/******************************************************************//**
+Add exit work item to work queue to signal multi-threded flush
+threads that they should exit.
+*/
+void
+buf_mtflu_io_thread_exit(void);
+/*===========================*/
+
+/******************************************************************//**
+Initialize multi-threaded flush thread syncronization data.
+@return Initialized multi-threaded flush thread syncroniztion data. */
+void*
+buf_mtflu_handler_init(
+/*===================*/
+ ulint n_threads, /*!< in: Number of threads to create */
+ ulint wrk_cnt); /*!< in: Number of work items */
+
+/******************************************************************//**
+Return true if multi-threaded flush is initialized
+@return true if initialized, false if not */
+bool
+buf_mtflu_init_done(void);
+/*======================*/
+
+/*********************************************************************//**
+Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+UNIV_INTERN
+ulint
+buf_mtflu_flush_LRU_tail(void);
+/*===========================*/
+
+/*******************************************************************//**
+Multi-threaded version of buf_flush_list
+*/
+bool
+buf_mtflu_flush_list(
+/*=================*/
+ ulint min_n, /*!< in: wished minimum mumber of blocks
+ flushed (it is not guaranteed that the
+ actual number is that big, though) */
+ lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
+ blocks whose oldest_modification is
+ smaller than this should be flushed
+ (if their number does not exceed
+ min_n), otherwise ignored */
+ ulint* n_processed); /*!< out: the number of pages
+ which were processed is passed
+ back to caller. Ignored if NULL */
+
+/*********************************************************************//**
+Set correct thread identifiers to io thread array based on
+information we have. */
+void
+buf_mtflu_set_thread_ids(
+/*=====================*/
+ ulint n_threads, /*!<in: Number of threads to fill */
+ void* ctx, /*!<in: thread context */
+ os_thread_id_t* thread_ids); /*!<in: thread id array */
+
+#endif
diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h
index def7b246ead..43fa613e756 100644
--- a/storage/xtradb/include/dict0dict.h
+++ b/storage/xtradb/include/dict0dict.h
@@ -2,6 +2,7 @@
Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,6 +43,8 @@ Created 1/8/1996 Heikki Tuuri
#include "ut0byte.h"
#include "trx0types.h"
#include "row0types.h"
+#include "fsp0fsp.h"
+#include "dict0pagecompress.h"
extern bool innodb_table_stats_not_found;
extern bool innodb_index_stats_not_found;
@@ -918,7 +921,14 @@ dict_tf_set(
ulint* flags, /*!< in/out: table */
rec_format_t format, /*!< in: file format */
ulint zip_ssize, /*!< in: zip shift size */
- bool remote_path) /*!< in: table uses DATA DIRECTORY */
+ bool remote_path, /*!< in: table uses DATA DIRECTORY
+ */
+ bool page_compressed,/*!< in: table uses page compressed
+ pages */
+ ulint page_compression_level, /*!< in: table page compression
+ level */
+ ulint atomic_writes) /*!< in: table atomic
+ writes option value*/
__attribute__((nonnull));
/********************************************************************//**
Convert a 32 bit integer table flags to the 32 bit integer that is
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
index 6bfd7f6cdae..2b698dd7218 100644
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -537,10 +538,27 @@ dict_tf_is_valid(
ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
ulint unused = DICT_TF_GET_UNUSED(flags);
+ ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags);
+ ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
+ ulint data_dir = DICT_TF_HAS_DATA_DIR(flags);
+ ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags);
/* Make sure there are no bits that we do not know about. */
if (unused != 0) {
+ fprintf(stderr,
+ "InnoDB: Error: table unused flags are %ld"
+ " in the data dictionary and are corrupted\n"
+ "InnoDB: Error: data dictionary flags are\n"
+ "InnoDB: compact %ld atomic_blobs %ld\n"
+ "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+ "InnoDB: page_compression %ld page_compression_level %ld\n"
+ "InnoDB: atomic_writes %ld\n",
+ unused,
+ compact, atomic_blobs, unused, data_dir, zip_ssize,
+ page_compression, page_compression_level, atomic_writes
+ );
+
return(false);
} else if (atomic_blobs) {
@@ -550,12 +568,36 @@ dict_tf_is_valid(
data stored off-page in the clustered index. */
if (!compact) {
+ fprintf(stderr,
+ "InnoDB: Error: table compact flags are %ld"
+ " in the data dictionary and are corrupted\n"
+ "InnoDB: Error: data dictionary flags are\n"
+ "InnoDB: compact %ld atomic_blobs %ld\n"
+ "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+ "InnoDB: page_compression %ld page_compression_level %ld\n"
+ "InnoDB: atomic_writes %ld\n",
+ compact, compact, atomic_blobs, unused, data_dir, zip_ssize,
+ page_compression, page_compression_level, atomic_writes
+ );
+
return(false);
}
} else if (zip_ssize) {
/* Antelope does not support COMPRESSED row format. */
+ fprintf(stderr,
+ "InnoDB: Error: table flags are %ld"
+ " in the data dictionary and are corrupted\n"
+ "InnoDB: Error: data dictionary flags are\n"
+ "InnoDB: compact %ld atomic_blobs %ld\n"
+ "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+ "InnoDB: page_compression %ld page_compression_level %ld\n"
+ "InnoDB: atomic_writes %ld\n",
+ flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
+ page_compression, page_compression_level, atomic_writes
+ );
+
return(false);
}
@@ -568,6 +610,59 @@ dict_tf_is_valid(
|| !atomic_blobs
|| zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+ fprintf(stderr,
+ "InnoDB: Error: table compact flags are %ld in the data dictionary and are corrupted\n"
+ "InnoDB: Error: data dictionary flags are\n"
+ "InnoDB: compact %ld atomic_blobs %ld\n"
+ "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+ "InnoDB: page_compression %ld page_compression_level %ld\n"
+ "InnoDB: atomic_writes %ld\n",
+ flags,
+ compact, atomic_blobs, unused, data_dir, zip_ssize,
+ page_compression, page_compression_level, atomic_writes
+
+ );
+ return(false);
+ }
+ }
+
+ if (page_compression || page_compression_level) {
+ /* Page compression format must have compact and
+ atomic_blobs and page_compression_level requires
+ page_compression */
+ if (!compact
+ || !page_compression
+ || !atomic_blobs) {
+
+ fprintf(stderr,
+ "InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n"
+ "InnoDB: Error: data dictionary flags are\n"
+ "InnoDB: compact %ld atomic_blobs %ld\n"
+ "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+ "InnoDB: page_compression %ld page_compression_level %ld\n"
+ "InnoDB: atomic_writes %ld\n",
+ flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
+ page_compression, page_compression_level, atomic_writes
+ );
+
+ return(false);
+ }
+ }
+
+ if (atomic_writes) {
+
+ if(atomic_writes > ATOMIC_WRITES_OFF) {
+
+ fprintf(stderr,
+ "InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n"
+ "InnoDB: Error: data dictionary flags are\n"
+ "InnoDB: compact %ld atomic_blobs %ld\n"
+ "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+ "InnoDB: page_compression %ld page_compression_level %ld\n"
+ "InnoDB: atomic_writes %ld\n",
+ flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
+ page_compression, page_compression_level, atomic_writes
+ );
return(false);
}
}
@@ -594,6 +689,11 @@ dict_sys_tables_type_validate(
ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(type);
ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type);
ulint unused = DICT_TF_GET_UNUSED(type);
+ ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
+ ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type);
+ ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type);
+
+ ut_a(atomic_writes <= ATOMIC_WRITES_OFF);
/* The low order bit of SYS_TABLES.TYPE is always set to 1.
If the format is UNIV_FORMAT_B or higher, this field is the same
@@ -604,12 +704,16 @@ dict_sys_tables_type_validate(
if (redundant) {
if (zip_ssize || atomic_blobs) {
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=Redundant, zip_ssize %lu atomic_blobs %lu\n",
+ zip_ssize, atomic_blobs);
return(ULINT_UNDEFINED);
}
}
/* Make sure there are no bits that we do not know about. */
if (unused) {
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, unused %lu\n",
+ type, unused);
return(ULINT_UNDEFINED);
}
@@ -624,6 +728,8 @@ dict_sys_tables_type_validate(
} else if (zip_ssize) {
/* Antelope does not support COMPRESSED format. */
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu\n",
+ type, zip_ssize);
return(ULINT_UNDEFINED);
}
@@ -633,11 +739,15 @@ dict_sys_tables_type_validate(
should be in N_COLS, but we already know about the
low_order_bit and DICT_N_COLS_COMPACT flags. */
if (!atomic_blobs) {
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu atomic_blobs %lu\n",
+ type, zip_ssize, atomic_blobs);
return(ULINT_UNDEFINED);
}
/* Validate that the number is within allowed range. */
if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu max %d\n",
+ type, zip_ssize, PAGE_ZIP_SSIZE_MAX);
return(ULINT_UNDEFINED);
}
}
@@ -647,6 +757,27 @@ dict_sys_tables_type_validate(
format, so the DATA_DIR flag is compatible with any other
table flags. However, it is not used with TEMPORARY tables.*/
+ if (page_compression || page_compression_level) {
+ /* page compressed row format must have low_order_bit and
+ atomic_blobs bits set and the DICT_N_COLS_COMPACT flag
+ should be in N_COLS, but we already know about the
+ low_order_bit and DICT_N_COLS_COMPACT flags. */
+
+ if (!atomic_blobs || !page_compression) {
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, page_compression %lu page_compression_level %lu\n"
+ "InnoDB: Error: atomic_blobs %lu\n",
+ type, page_compression, page_compression_level, atomic_blobs);
+ return(ULINT_UNDEFINED);
+ }
+ }
+
+ /* Validate that the atomic writes number is within allowed range. */
+ if (atomic_writes > ATOMIC_WRITES_OFF) {
+ fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, atomic_writes %lu\n",
+ type, atomic_writes);
+ return(ULINT_UNDEFINED);
+ }
+
/* Return the validated SYS_TABLES.TYPE. */
return(type);
}
@@ -719,8 +850,16 @@ dict_tf_set(
ulint* flags, /*!< in/out: table flags */
rec_format_t format, /*!< in: file format */
ulint zip_ssize, /*!< in: zip shift size */
- bool use_data_dir) /*!< in: table uses DATA DIRECTORY */
+ bool use_data_dir, /*!< in: table uses DATA DIRECTORY
+ */
+ bool page_compressed,/*!< in: table uses page compressed
+ pages */
+ ulint page_compression_level, /*!< in: table page compression
+ level */
+ ulint atomic_writes) /*!< in: table atomic writes setup */
{
+ atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
+
switch (format) {
case REC_FORMAT_REDUNDANT:
*flags = 0;
@@ -745,6 +884,19 @@ dict_tf_set(
if (use_data_dir) {
*flags |= (1 << DICT_TF_POS_DATA_DIR);
}
+
+ if (page_compressed) {
+ *flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS)
+ | (1 << DICT_TF_POS_PAGE_COMPRESSION)
+ | (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
+
+ ut_ad(zip_ssize == 0);
+ ut_ad(dict_tf_get_page_compression(*flags) == TRUE);
+ ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level);
+ }
+
+ *flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
+ ut_a(dict_tf_get_atomic_writes(*flags) == awrites);
}
/********************************************************************//**
@@ -765,6 +917,9 @@ dict_tf_to_fsp_flags(
ulint table_flags) /*!< in: dict_table_t::flags */
{
ulint fsp_flags;
+ ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
+ ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
+ ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
return(ULINT_UNDEFINED););
@@ -783,7 +938,20 @@ dict_tf_to_fsp_flags(
fsp_flags |= DICT_TF_HAS_DATA_DIR(table_flags)
? FSP_FLAGS_MASK_DATA_DIR : 0;
+ /* In addition, tablespace flags also contain if the page
+ compression is used for this table. */
+ fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION(fsp_flags, page_compression);
+
+ /* In addition, tablespace flags also contain page compression level
+ if page compression is used for this table. */
+ fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level);
+
+ /* In addition, tablespace flags also contain flag if atomic writes
+ is used for this table */
+ fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes);
+
ut_a(fsp_flags_is_valid(fsp_flags));
+ ut_a(dict_tf_verify_flags(table_flags, fsp_flags));
return(fsp_flags);
}
@@ -811,10 +979,15 @@ dict_sys_tables_type_to_tf(
/* Adjust bit zero. */
flags = redundant ? 0 : 1;
- /* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+ /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
+ PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */
flags |= type & (DICT_TF_MASK_ZIP_SSIZE
| DICT_TF_MASK_ATOMIC_BLOBS
- | DICT_TF_MASK_DATA_DIR);
+ | DICT_TF_MASK_DATA_DIR
+ | DICT_TF_MASK_PAGE_COMPRESSION
+ | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+ | DICT_TF_MASK_ATOMIC_WRITES
+ );
return(flags);
}
@@ -842,10 +1015,14 @@ dict_tf_to_sys_tables_type(
/* Adjust bit zero. It is always 1 in SYS_TABLES.TYPE */
type = 1;
- /* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+ /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
+ PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */
type |= flags & (DICT_TF_MASK_ZIP_SSIZE
| DICT_TF_MASK_ATOMIC_BLOBS
- | DICT_TF_MASK_DATA_DIR);
+ | DICT_TF_MASK_DATA_DIR
+ | DICT_TF_MASK_PAGE_COMPRESSION
+ | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+ | DICT_TF_MASK_ATOMIC_WRITES);
return(type);
}
diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h
index b8df96acc30..473aefec418 100644
--- a/storage/xtradb/include/dict0mem.h
+++ b/storage/xtradb/include/dict0mem.h
@@ -2,6 +2,7 @@
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -130,11 +131,26 @@ This flag prevents older engines from attempting to open the table and
allows InnoDB to update_create_info() accordingly. */
#define DICT_TF_WIDTH_DATA_DIR 1
+/**
+Width of the page compression flag
+*/
+#define DICT_TF_WIDTH_PAGE_COMPRESSION 1
+#define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4
+
+/**
+Width of atomic writes flag
+DEFAULT=0, ON = 1, OFF = 2
+*/
+#define DICT_TF_WIDTH_ATOMIC_WRITES 2
+
/** Width of all the currently known table flags */
#define DICT_TF_BITS (DICT_TF_WIDTH_COMPACT \
+ DICT_TF_WIDTH_ZIP_SSIZE \
+ DICT_TF_WIDTH_ATOMIC_BLOBS \
- + DICT_TF_WIDTH_DATA_DIR)
+ + DICT_TF_WIDTH_DATA_DIR \
+ + DICT_TF_WIDTH_PAGE_COMPRESSION \
+ + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \
+ + DICT_TF_WIDTH_ATOMIC_WRITES)
/** A mask of all the known/used bits in table flags */
#define DICT_TF_BIT_MASK (~(~0 << DICT_TF_BITS))
@@ -150,9 +166,18 @@ allows InnoDB to update_create_info() accordingly. */
/** Zero relative shift position of the DATA_DIR field */
#define DICT_TF_POS_DATA_DIR (DICT_TF_POS_ATOMIC_BLOBS \
+ DICT_TF_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the PAGE_COMPRESSION field */
+#define DICT_TF_POS_PAGE_COMPRESSION (DICT_TF_POS_DATA_DIR \
+ + DICT_TF_WIDTH_DATA_DIR)
+/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
+#define DICT_TF_POS_PAGE_COMPRESSION_LEVEL (DICT_TF_POS_PAGE_COMPRESSION \
+ + DICT_TF_WIDTH_PAGE_COMPRESSION)
+/** Zero relative shift position of the ATOMIC_WRITES field */
+#define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \
+ + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
/** Zero relative shift position of the start of the UNUSED bits */
-#define DICT_TF_POS_UNUSED (DICT_TF_POS_DATA_DIR \
- + DICT_TF_WIDTH_DATA_DIR)
+#define DICT_TF_POS_UNUSED (DICT_TF_POS_ATOMIC_WRITES \
+ + DICT_TF_WIDTH_ATOMIC_WRITES)
/** Bit mask of the COMPACT field */
#define DICT_TF_MASK_COMPACT \
@@ -170,6 +195,18 @@ allows InnoDB to update_create_info() accordingly. */
#define DICT_TF_MASK_DATA_DIR \
((~(~0 << DICT_TF_WIDTH_DATA_DIR)) \
<< DICT_TF_POS_DATA_DIR)
+/** Bit mask of the PAGE_COMPRESSION field */
+#define DICT_TF_MASK_PAGE_COMPRESSION \
+ ((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION)) \
+ << DICT_TF_POS_PAGE_COMPRESSION)
+/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
+#define DICT_TF_MASK_PAGE_COMPRESSION_LEVEL \
+ ((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)) \
+ << DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
+/** Bit mask of the ATOMIC_WRITES field */
+#define DICT_TF_MASK_ATOMIC_WRITES \
+ ((~(~0 << DICT_TF_WIDTH_ATOMIC_WRITES)) \
+ << DICT_TF_POS_ATOMIC_WRITES)
/** Return the value of the COMPACT field */
#define DICT_TF_GET_COMPACT(flags) \
@@ -190,6 +227,19 @@ allows InnoDB to update_create_info() accordingly. */
/** Return the contents of the UNUSED bits */
#define DICT_TF_GET_UNUSED(flags) \
(flags >> DICT_TF_POS_UNUSED)
+
+/** Return the value of the PAGE_COMPRESSION field */
+#define DICT_TF_GET_PAGE_COMPRESSION(flags) \
+ ((flags & DICT_TF_MASK_PAGE_COMPRESSION) \
+ >> DICT_TF_POS_PAGE_COMPRESSION)
+/** Return the value of the PAGE_COMPRESSION_LEVEL field */
+#define DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags) \
+ ((flags & DICT_TF_MASK_PAGE_COMPRESSION_LEVEL) \
+ >> DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
+/** Return the value of the ATOMIC_WRITES field */
+#define DICT_TF_GET_ATOMIC_WRITES(flags) \
+ ((flags & DICT_TF_MASK_ATOMIC_WRITES) \
+ >> DICT_TF_POS_ATOMIC_WRITES)
/* @} */
#ifndef UNIV_INNOCHECKSUM
diff --git a/storage/xtradb/include/dict0pagecompress.h b/storage/xtradb/include/dict0pagecompress.h
new file mode 100644
index 00000000000..19a2a6c52f3
--- /dev/null
+++ b/storage/xtradb/include/dict0pagecompress.h
@@ -0,0 +1,94 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0pagecompress.h
+Helper functions for extracting/storing page compression information
+to dictionary.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#ifndef dict0pagecompress_h
+#define dict0pagecompress_h
+
+/********************************************************************//**
+Extract the page compression level from table flags.
+@return page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_tf_get_page_compression_level(
+/*===============================*/
+ ulint flags) /*!< in: flags */
+ __attribute__((const));
+/********************************************************************//**
+Extract the page compression flag from table flags
+@return page compression flag, or false if not compressed */
+UNIV_INLINE
+ibool
+dict_tf_get_page_compression(
+/*==========================*/
+ ulint flags) /*!< in: flags */
+ __attribute__((const));
+
+/********************************************************************//**
+Check whether the table uses the page compressed page format.
+@return page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_page_compression_level(
+/*==============================*/
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((const));
+
+/********************************************************************//**
+Verify that dictionary flags match tablespace flags
+@return true if flags match, false if not */
+UNIV_INLINE
+ibool
+dict_tf_verify_flags(
+/*=================*/
+ ulint table_flags, /*!< in: dict_table_t::flags */
+ ulint fsp_flags) /*!< in: fil_space_t::flags */
+ __attribute__((const));
+
+/********************************************************************//**
+Extract the atomic writes flag from table flags.
+@return true if atomic writes are used, false if not used */
+UNIV_INLINE
+atomic_writes_t
+dict_tf_get_atomic_writes(
+/*======================*/
+ ulint flags) /*!< in: flags */
+ __attribute__((const));
+
+/********************************************************************//**
+Check whether the table uses the atomic writes.
+@return true if atomic writes is used, false if not */
+UNIV_INLINE
+atomic_writes_t
+dict_table_get_atomic_writes(
+/*=========================*/
+ const dict_table_t* table); /*!< in: table */
+
+
+#ifndef UNIV_NONINL
+#include "dict0pagecompress.ic"
+#endif
+
+#endif
diff --git a/storage/xtradb/include/dict0pagecompress.ic b/storage/xtradb/include/dict0pagecompress.ic
new file mode 100644
index 00000000000..811976434a8
--- /dev/null
+++ b/storage/xtradb/include/dict0pagecompress.ic
@@ -0,0 +1,191 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0pagecompress.ic
+Inline implementation for helper functions for extracting/storing
+page compression and atomic writes information to dictionary.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+/********************************************************************//**
+Verify that dictionary flags match tablespace flags
+@return true if flags match, false if not */
+UNIV_INLINE
+ibool
+dict_tf_verify_flags(
+/*=================*/
+ ulint table_flags, /*!< in: dict_table_t::flags */
+ ulint fsp_flags) /*!< in: fil_space_t::flags */
+{
+ ulint table_unused = DICT_TF_GET_UNUSED(table_flags);
+ ulint compact = DICT_TF_GET_COMPACT(table_flags);
+ ulint ssize = DICT_TF_GET_ZIP_SSIZE(table_flags);
+ ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(table_flags);
+ ulint data_dir = DICT_TF_HAS_DATA_DIR(table_flags);
+ ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
+ ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
+ ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
+ ulint post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags);
+ ulint zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags);
+ ulint fsp_atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags);
+ ulint page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(fsp_flags);
+ ulint fsp_unused = FSP_FLAGS_GET_UNUSED(fsp_flags);
+ ulint fsp_page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(fsp_flags);
+ ulint fsp_page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(fsp_flags);
+ ulint fsp_atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(fsp_flags);
+
+ DBUG_EXECUTE_IF("dict_tf_verify_flags_failure",
+ return(ULINT_UNDEFINED););
+
+ ut_a(!table_unused);
+ ut_a(!fsp_unused);
+ ut_a(page_ssize == 0 || page_ssize != 0); /* silence compiler */
+ ut_a(compact == 0 || compact == 1); /* silence compiler */
+ ut_a(data_dir == 0 || data_dir == 1); /* silence compiler */
+ ut_a(post_antelope == 0 || post_antelope == 1); /* silence compiler */
+
+ if (ssize != zip_ssize) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags has zip_ssize %ld"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file has zip_ssize %ld\n",
+ ssize, zip_ssize);
+ return (FALSE);
+ }
+ if (atomic_blobs != fsp_atomic_blobs) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags has atomic_blobs %ld"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file has atomic_blobs %ld\n",
+ atomic_blobs, fsp_atomic_blobs);
+
+ return (FALSE);
+ }
+ if (page_compression != fsp_page_compression) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags has page_compression %ld"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file ahas page_compression %ld\n",
+ page_compression, fsp_page_compression);
+
+ return (FALSE);
+ }
+ if (page_compression_level != fsp_page_compression_level) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags has page_compression_level %ld"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file has page_compression_level %ld\n",
+ page_compression_level, fsp_page_compression_level);
+
+ return (FALSE);
+ }
+
+ if (atomic_writes != fsp_atomic_writes) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags has atomic writes %ld"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file has atomic_writes %ld\n",
+ atomic_writes, fsp_atomic_writes);
+
+ return (FALSE);
+ }
+
+ return(TRUE);
+}
+
+/********************************************************************//**
+Extract the page compression level from dict_table_t::flags.
+These flags are in memory, so assert that they are valid.
+@return page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_tf_get_page_compression_level(
+/*===============================*/
+ ulint flags) /*!< in: flags */
+{
+ ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
+
+ ut_ad(page_compression_level <= 9);
+
+ return(page_compression_level);
+}
+
+/********************************************************************//**
+Check whether the table uses the page compression page format.
+@return page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_page_compression_level(
+/*==============================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ ut_ad(table);
+ ut_ad(dict_tf_get_page_compression(table->flags));
+
+ return(dict_tf_get_page_compression_level(table->flags));
+}
+
+/********************************************************************//**
+Check whether the table uses the page compression page format.
+@return true if page compressed, false if not */
+UNIV_INLINE
+ibool
+dict_tf_get_page_compression(
+/*=========================*/
+ ulint flags) /*!< in: flags */
+{
+ return(DICT_TF_GET_PAGE_COMPRESSION(flags));
+}
+
+/********************************************************************//**
+Check whether the table uses the page compression page format.
+@return true if page compressed, false if not */
+UNIV_INLINE
+ibool
+dict_table_is_page_compressed(
+/*==========================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ return (dict_tf_get_page_compression(table->flags));
+}
+
+/********************************************************************//**
+Extract the atomic writes flag from table flags.
+@return enumerated value of atomic writes */
+UNIV_INLINE
+atomic_writes_t
+dict_tf_get_atomic_writes(
+/*======================*/
+ ulint flags) /*!< in: flags */
+{
+ return((atomic_writes_t)DICT_TF_GET_ATOMIC_WRITES(flags));
+}
+
+/********************************************************************//**
+Check whether the table uses the atomic writes.
+@return enumerated value of atomic writes */
+UNIV_INLINE
+atomic_writes_t
+dict_table_get_atomic_writes(
+/*=========================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ return ((atomic_writes_t)dict_tf_get_atomic_writes(table->flags));
+}
diff --git a/storage/xtradb/include/dict0types.h b/storage/xtradb/include/dict0types.h
index d34b6f7eab3..909fdf9cf3d 100644
--- a/storage/xtradb/include/dict0types.h
+++ b/storage/xtradb/include/dict0types.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -83,6 +84,14 @@ enum ib_quiesce_t {
#define TEMP_TABLE_PREFIX "#sql"
#define TEMP_TABLE_PATH_PREFIX "/" TEMP_TABLE_PREFIX
+
+/** Enum values for atomic_writes table option */
+typedef enum {
+ ATOMIC_WRITES_DEFAULT = 0,
+ ATOMIC_WRITES_ON = 1,
+ ATOMIC_WRITES_OFF = 2
+} atomic_writes_t;
+
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/** Flag to control insert buffer debugging. */
extern uint ibuf_debug;
diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h
index 7edf79043d3..4fd84ad9fbe 100644
--- a/storage/xtradb/include/fil0fil.h
+++ b/storage/xtradb/include/fil0fil.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -127,11 +128,36 @@ extern fil_addr_t fil_addr_null;
data file (ibdata*, not *.ibd):
the file has been flushed to disk
at least up to this lsn */
+/** If page type is FIL_PAGE_COMPRESSED then the 8 bytes starting at
+FIL_PAGE_FILE_FLUSH_LSN are broken down as follows: */
+
+/** Control information version format (u8) */
+static const ulint FIL_PAGE_VERSION = FIL_PAGE_FILE_FLUSH_LSN;
+
+/** Compression algorithm (u8) */
+static const ulint FIL_PAGE_ALGORITHM_V1 = FIL_PAGE_VERSION + 1;
+
+/** Original page type (u16) */
+static const ulint FIL_PAGE_ORIGINAL_TYPE_V1 = FIL_PAGE_ALGORITHM_V1 + 1;
+
+/** Original data size in bytes (u16)*/
+static const ulint FIL_PAGE_ORIGINAL_SIZE_V1 = FIL_PAGE_ORIGINAL_TYPE_V1 + 2;
+
+/** Size after compression (u16)*/
+static const ulint FIL_PAGE_COMPRESS_SIZE_V1 = FIL_PAGE_ORIGINAL_SIZE_V1 + 2;
+
#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this
contains the space id of the page */
#define FIL_PAGE_SPACE_ID FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
#define FIL_PAGE_DATA 38 /*!< start of the data on the page */
+/* Following are used when page compression is used */
+#define FIL_PAGE_COMPRESSED_SIZE 2 /*!< Number of bytes used to store
+ actual payload data size on
+ compressed pages. */
+#define FIL_PAGE_COMPRESSION_ZLIB 1 /*!< Compressin algorithm ZLIB. */
+#define FIL_PAGE_COMPRESSION_LZ4 2 /*!< Compressin algorithm LZ4. */
+
/* @} */
/** File page trailer @{ */
#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used
@@ -142,6 +168,7 @@ extern fil_addr_t fil_addr_null;
/* @} */
/** File page types (values of FIL_PAGE_TYPE) @{ */
+#define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< Page compressed page */
#define FIL_PAGE_INDEX 17855 /*!< B-tree node */
#define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */
#define FIL_PAGE_INODE 3 /*!< Index node */
@@ -156,7 +183,8 @@ extern fil_addr_t fil_addr_null;
#define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */
#define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */
#define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */
-#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_ZBLOB2
+#define FIL_PAGE_TYPE_COMPRESSED 13 /*!< Compressed page */
+#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_COMPRESSED
/*!< Last page type */
/* @} */
@@ -221,6 +249,7 @@ struct fil_node_t {
ib_int64_t flush_counter;/*!< up to what
modification_counter value we have
flushed the modifications to disk */
+ ulint file_block_size;
UT_LIST_NODE_T(fil_node_t) chain;
/*!< link field for the file chain */
UT_LIST_NODE_T(fil_node_t) LRU;
@@ -299,6 +328,9 @@ struct fil_space_t {
/*!< true if this space is currently in
unflushed_spaces */
ibool is_corrupt;
+ bool printed_compression_failure;
+ /*!< true if we have already printed
+ compression failure */
UT_LIST_NODE_T(fil_space_t) space_list;
/*!< list of all spaces */
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
@@ -576,8 +608,10 @@ fil_read_first_page(
ulint* space_id, /*!< out: tablespace ID */
lsn_t* min_flushed_lsn, /*!< out: min of flushed
lsn values in data files */
- lsn_t* max_flushed_lsn) /*!< out: max of flushed
+ lsn_t* max_flushed_lsn, /*!< out: max of flushed
lsn values in data files */
+ ulint orig_space_id) /*!< in: file space id or
+ ULINT_UNDEFINED */
__attribute__((warn_unused_result));
/*******************************************************************//**
Increments the count of pending operation, if space is not being deleted.
@@ -914,8 +948,8 @@ fil_space_get_n_reserved_extents(
Reads or writes data. This operation is asynchronous (aio).
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
i/o on a tablespace which does not exist */
-#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message) \
- _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, NULL)
+#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, write_size) \
+ _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, write_size, NULL)
UNIV_INTERN
dberr_t
@@ -945,7 +979,12 @@ _fil_io(
or from where to write; in aio this must be
appropriately aligned */
void* message, /*!< in: message for aio handler if non-sync
- aio used, else ignored */
+ aio used, else ignored */
+ ulint* write_size, /*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
trx_t* trx)
__attribute__((nonnull(8)));
/**********************************************************************//**
@@ -1223,4 +1262,50 @@ fil_space_set_corrupt(
/*==================*/
ulint space_id);
+/****************************************************************//**
+Acquire fil_system mutex */
+void
+fil_system_enter(void);
+/*==================*/
+/****************************************************************//**
+Release fil_system mutex */
+void
+fil_system_exit(void);
+/*==================*/
+
+#ifndef UNIV_INNOCHECKSUM
+/*******************************************************************//**
+Returns the table space by a given id, NULL if not found. */
+fil_space_t*
+fil_space_get_by_id(
+/*================*/
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
+Return space name */
+char*
+fil_space_name(
+/*===========*/
+ fil_space_t* space); /*!< in: space */
+#endif
+
+/****************************************************************//**
+Does error handling when a file operation fails.
+@return TRUE if we should retry the operation */
+ibool
+os_file_handle_error_no_exit(
+/*=========================*/
+ const char* name, /*!< in: name of a file or NULL */
+ const char* operation, /*!< in: operation */
+ ibool on_error_silent,/*!< in: if TRUE then don't print
+ any message to the log. */
+ const char* file, /*!< in: file name */
+ const ulint line); /*!< in: line */
+
+/*******************************************************************//**
+Return page type name */
+const char*
+fil_get_page_type_name(
+/*===================*/
+ ulint page_type); /*!< in: FIL_PAGE_TYPE */
+
#endif /* fil0fil_h */
diff --git a/storage/xtradb/include/fil0pagecompress.h b/storage/xtradb/include/fil0pagecompress.h
new file mode 100644
index 00000000000..c797c221efc
--- /dev/null
+++ b/storage/xtradb/include/fil0pagecompress.h
@@ -0,0 +1,145 @@
+/*****************************************************************************
+
+Copyright (C) 2013, 2014 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+#ifndef fil0pagecompress_h
+#define fil0pagecompress_h
+
+#include "fsp0fsp.h"
+#include "fsp0pagecompress.h"
+
+/******************************************************************//**
+@file include/fil0pagecompress.h
+Helper functions for extracting/storing page compression and
+atomic writes information to table space.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+/*******************************************************************//**
+Returns the page compression level flag of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return page compression level if page compressed, ULINT_UNDEFINED if space not found */
+ulint
+fil_space_get_page_compression_level(
+/*=================================*/
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
+Returns the page compression flag of the space, or false if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return true if page compressed, false if not or space not found */
+ibool
+fil_space_is_page_compressed(
+/*=========================*/
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
+Returns the page compression flag of the space, or false if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return true if page compressed, false if not or space not found */
+ibool
+fil_space_get_page_compressed(
+/*=========================*/
+ fil_space_t* space); /*!< in: space id */
+/*******************************************************************//**
+Returns the atomic writes flag of the space, or false if the space
+is not using atomic writes. The tablespace must be cached in the memory cache.
+@return atomic write table option value */
+atomic_writes_t
+fil_space_get_atomic_writes(
+/*=========================*/
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
+Find out wheather the page is index page or not
+@return true if page type index page, false if not */
+ibool
+fil_page_is_index_page(
+/*===================*/
+ byte *buf); /*!< in: page */
+
+/****************************************************************//**
+Get the name of the compression algorithm used for page
+compression.
+@return compression algorithm name or "UNKNOWN" if not known*/
+const char*
+fil_get_compression_alg_name(
+/*=========================*/
+ ulint comp_alg); /*!<in: compression algorithm number */
+
+/****************************************************************//**
+For page compressed pages compress the page before actual write
+operation.
+@return compressed page to be written*/
+byte*
+fil_compress_page(
+/*==============*/
+ ulint space_id, /*!< in: tablespace id of the
+ table. */
+ byte* buf, /*!< in: buffer from which to write; in aio
+ this must be appropriately aligned */
+ byte* out_buf, /*!< out: compressed buffer */
+ ulint len, /*!< in: length of input buffer.*/
+ ulint compression_level, /*!< in: compression level */
+ ulint block_size, /*!< in: block size */
+ ulint* out_len, /*!< out: actual length of compressed
+ page */
+ byte* lzo_mem); /*!< in: temporal memory used by LZO */
+
+/****************************************************************//**
+For page compressed pages decompress the page after actual read
+operation.
+@return uncompressed page */
+void
+fil_decompress_page(
+/*================*/
+ byte* page_buf, /*!< in: preallocated buffer or NULL */
+ byte* buf, /*!< out: buffer from which to read; in aio
+ this must be appropriately aligned */
+ ulong len, /*!< in: length of output buffer.*/
+ ulint* write_size); /*!< in/out: Actual payload size of
+ the compressed data. */
+
+/****************************************************************//**
+Get space id from fil node
+@return space id*/
+ulint
+fil_node_get_space_id(
+/*==================*/
+ fil_node_t* node); /*!< in: Node where to get space id*/
+
+/****************************************************************//**
+Get block size from fil node
+@return block size*/
+ulint
+fil_node_get_block_size(
+ fil_node_t* node); /*!< in: Node where to get block
+ size */
+/*******************************************************************//**
+Find out wheather the page is page compressed
+@return true if page is page compressed*/
+ibool
+fil_page_is_compressed(
+/*===================*/
+ byte *buf); /*!< in: page */
+
+/*******************************************************************//**
+Find out wheather the page is page compressed with lzo method
+@return true if page is page compressed with lzo method*/
+ibool
+fil_page_is_lzo_compressed(
+/*=======================*/
+ byte *buf); /*!< in: page */
+#endif
diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h
index a587ccc9f20..6fe44a0ef16 100644
--- a/storage/xtradb/include/fsp0fsp.h
+++ b/storage/xtradb/include/fsp0fsp.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -53,12 +54,21 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */
/** Width of the DATA_DIR flag. This flag indicates that the tablespace
is found in a remote location, not the default data directory. */
#define FSP_FLAGS_WIDTH_DATA_DIR 1
+/** Number of flag bits used to indicate the page compression and compression level */
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION 1
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4
+/** Number of flag bits used to indicate atomic writes for this tablespace */
+#define FSP_FLAGS_WIDTH_ATOMIC_WRITES 2
+
/** Width of all the currently known tablespace flags */
#define FSP_FLAGS_WIDTH (FSP_FLAGS_WIDTH_POST_ANTELOPE \
+ FSP_FLAGS_WIDTH_ZIP_SSIZE \
+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS \
+ FSP_FLAGS_WIDTH_PAGE_SSIZE \
- + FSP_FLAGS_WIDTH_DATA_DIR)
+ + FSP_FLAGS_WIDTH_DATA_DIR \
+ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION \
+ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \
+ + FSP_FLAGS_WIDTH_ATOMIC_WRITES)
/** A mask of all the known/used bits in tablespace flags */
#define FSP_FLAGS_MASK (~(~0 << FSP_FLAGS_WIDTH))
@@ -71,10 +81,21 @@ is found in a remote location, not the default data directory. */
/** Zero relative shift position of the ATOMIC_BLOBS field */
#define FSP_FLAGS_POS_ATOMIC_BLOBS (FSP_FLAGS_POS_ZIP_SSIZE \
+ FSP_FLAGS_WIDTH_ZIP_SSIZE)
-/** Zero relative shift position of the PAGE_SSIZE field */
-#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_BLOBS \
+/** Note that these need to be before the page size to be compatible with
+dictionary */
+/** Zero relative shift position of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION (FSP_FLAGS_POS_ATOMIC_BLOBS \
+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
-/** Zero relative shift position of the start of the UNUSED bits */
+/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL (FSP_FLAGS_POS_PAGE_COMPRESSION \
+ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION)
+/** Zero relative shift position of the ATOMIC_WRITES field */
+#define FSP_FLAGS_POS_ATOMIC_WRITES (FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL \
+ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)
+ /** Zero relative shift position of the PAGE_SSIZE field */
+#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_WRITES \
+ + FSP_FLAGS_WIDTH_ATOMIC_WRITES)
+/** Zero relative shift position of the start of the DATA DIR bits */
#define FSP_FLAGS_POS_DATA_DIR (FSP_FLAGS_POS_PAGE_SSIZE \
+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
/** Zero relative shift position of the start of the UNUSED bits */
@@ -101,6 +122,19 @@ is found in a remote location, not the default data directory. */
#define FSP_FLAGS_MASK_DATA_DIR \
((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR)) \
<< FSP_FLAGS_POS_DATA_DIR)
+/** Bit mask of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION \
+ ((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION)) \
+ << FSP_FLAGS_POS_PAGE_COMPRESSION)
+/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL \
+ ((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)) \
+ << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Bit mask of the ATOMIC_WRITES field */
+#define FSP_FLAGS_MASK_ATOMIC_WRITES \
+ ((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_WRITES)) \
+ << FSP_FLAGS_POS_ATOMIC_WRITES)
+
/** Return the value of the POST_ANTELOPE field */
#define FSP_FLAGS_GET_POST_ANTELOPE(flags) \
@@ -125,12 +159,38 @@ is found in a remote location, not the default data directory. */
/** Return the contents of the UNUSED bits */
#define FSP_FLAGS_GET_UNUSED(flags) \
(flags >> FSP_FLAGS_POS_UNUSED)
+/** Return the value of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION(flags) \
+ ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION) \
+ >> FSP_FLAGS_POS_PAGE_COMPRESSION)
+/** Return the value of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags) \
+ ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL) \
+ >> FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Return the value of the ATOMIC_WRITES field */
+#define FSP_FLAGS_GET_ATOMIC_WRITES(flags) \
+ ((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) \
+ >> FSP_FLAGS_POS_ATOMIC_WRITES)
/** Set a PAGE_SSIZE into the correct bits in a given
tablespace flags. */
#define FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize) \
(flags | (ssize << FSP_FLAGS_POS_PAGE_SSIZE))
+/** Set a PAGE_COMPRESSION into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_COMPRESSION(flags, compression) \
+ (flags | (compression << FSP_FLAGS_POS_PAGE_COMPRESSION))
+
+/** Set a PAGE_COMPRESSION_LEVEL into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level) \
+ (flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL))
+/** Set a ATOMIC_WRITES into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics) \
+ (flags | (atomics << FSP_FLAGS_POS_ATOMIC_WRITES))
+
/* @} */
/* @defgroup Tablespace Header Constants (moved from fsp0fsp.c) @{ */
diff --git a/storage/xtradb/include/fsp0fsp.ic b/storage/xtradb/include/fsp0fsp.ic
index 0d81e817cc9..ddcb87b0e57 100644
--- a/storage/xtradb/include/fsp0fsp.ic
+++ b/storage/xtradb/include/fsp0fsp.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -63,12 +64,17 @@ fsp_flags_is_valid(
ulint atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
ulint page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
ulint unused = FSP_FLAGS_GET_UNUSED(flags);
+ ulint page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags);
+ ulint page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags);
+ ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
/* fsp_flags is zero unless atomic_blobs is set. */
/* Make sure there are no bits that we do not know about. */
if (unused != 0 || flags == 1) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted unused %lu\n",
+ flags, unused);
return(false);
} else if (post_antelope) {
/* The Antelope row formats REDUNDANT and COMPACT did
@@ -76,6 +82,8 @@ fsp_flags_is_valid(
4-byte field is zero for Antelope row formats. */
if (!atomic_blobs) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_blobs %lu\n",
+ flags, atomic_blobs);
return(false);
}
}
@@ -87,10 +95,14 @@ fsp_flags_is_valid(
externally stored parts. */
if (post_antelope || zip_ssize != 0) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu atomic_blobs %lu\n",
+ flags, zip_ssize, atomic_blobs);
return(false);
}
} else if (!post_antelope || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu max %d\n",
+ flags, zip_ssize, PAGE_ZIP_SSIZE_MAX);
return(false);
} else if (page_ssize > UNIV_PAGE_SSIZE_MAX) {
@@ -98,9 +110,13 @@ fsp_flags_is_valid(
be zero for an original 16k page size.
Validate the page shift size is within allowed range. */
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu\n",
+ flags, page_ssize, UNIV_PAGE_SSIZE_MAX);
return(false);
} else if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_ORIG && !page_ssize) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu:%d\n",
+ flags, page_ssize, UNIV_PAGE_SIZE, UNIV_PAGE_SIZE_ORIG);
return(false);
}
@@ -108,6 +124,23 @@ fsp_flags_is_valid(
# error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations."
#endif
+ /* Page compression level requires page compression and atomic blobs
+ to be set */
+ if (page_compression_level || page_compression) {
+ if (!page_compression || !atomic_blobs) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_compression %lu\n"
+ "InnoDB: Error: page_compression_level %lu atomic_blobs %lu\n",
+ flags, page_compression, page_compression_level, atomic_blobs);
+ return(false);
+ }
+ }
+
+ if (atomic_writes > ATOMIC_WRITES_OFF) {
+ fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_writes %lu\n",
+ flags, atomic_writes);
+ return (false);
+ }
+
/* The DATA_DIR field can be used for any row type so there is
nothing here to validate. */
diff --git a/storage/xtradb/include/fsp0pagecompress.h b/storage/xtradb/include/fsp0pagecompress.h
new file mode 100644
index 00000000000..5f943ee2b83
--- /dev/null
+++ b/storage/xtradb/include/fsp0pagecompress.h
@@ -0,0 +1,84 @@
+/*****************************************************************************
+
+Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fsp0pagecompress.h
+Helper functions for extracting/storing page compression and
+atomic writes information to file space.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#ifndef fsp0pagecompress_h
+#define fsp0pagecompress_h
+
+/* Supported page compression methods */
+
+#define PAGE_UNCOMPRESSED 0
+#define PAGE_ZLIB_ALGORITHM 1
+#define PAGE_LZ4_ALGORITHM 2
+#define PAGE_LZO_ALGORITHM 3
+#define PAGE_LZMA_ALGORITHM 4
+#define PAGE_BZIP2_ALGORITHM 5
+#define PAGE_SNAPPY_ALGORITHM 6
+#define PAGE_ALGORITHM_LAST PAGE_SNAPPY_ALGORITHM
+
+/**********************************************************************//**
+Reads the page compression level from the first page of a tablespace.
+@return page compression level, or 0 if uncompressed */
+UNIV_INTERN
+ulint
+fsp_header_get_compression_level(
+/*=============================*/
+ const page_t* page); /*!< in: first page of a tablespace */
+
+/********************************************************************//**
+Determine if the tablespace is page compressed from dict_table_t::flags.
+@return TRUE if page compressed, FALSE if not compressed */
+UNIV_INLINE
+ibool
+fsp_flags_is_page_compressed(
+/*=========================*/
+ ulint flags); /*!< in: tablespace flags */
+
+/********************************************************************//**
+Extract the page compression level from tablespace flags.
+A tablespace has only one physical page compression level
+whether that page is compressed or not.
+@return page compression level of the file-per-table tablespace,
+or zero if the table is not compressed. */
+UNIV_INLINE
+ulint
+fsp_flags_get_page_compression_level(
+/*=================================*/
+ ulint flags); /*!< in: tablespace flags */
+
+/********************************************************************//**
+Determine the tablespace is using atomic writes from dict_table_t::flags.
+@return true if atomic writes is used, false if not */
+UNIV_INLINE
+atomic_writes_t
+fsp_flags_get_atomic_writes(
+/*========================*/
+ ulint flags); /*!< in: tablespace flags */
+
+#ifndef UNIV_NONINL
+#include "fsp0pagecompress.ic"
+#endif
+
+#endif
diff --git a/storage/xtradb/include/fsp0pagecompress.ic b/storage/xtradb/include/fsp0pagecompress.ic
new file mode 100644
index 00000000000..4dde042e19e
--- /dev/null
+++ b/storage/xtradb/include/fsp0pagecompress.ic
@@ -0,0 +1,197 @@
+/*****************************************************************************
+
+Copyright (C) 2013, 2014, SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fsp0pagecompress.ic
+Implementation for helper functions for extracting/storing page
+compression and atomic writes information to file space.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+/********************************************************************//**
+Determine if the tablespace is page compressed from dict_table_t::flags.
+@return TRUE if page compressed, FALSE if not page compressed */
+UNIV_INLINE
+ibool
+fsp_flags_is_page_compressed(
+/*=========================*/
+ ulint flags) /*!< in: tablespace flags */
+{
+ return(FSP_FLAGS_GET_PAGE_COMPRESSION(flags));
+}
+
+/********************************************************************//**
+Determine the tablespace is page compression level from dict_table_t::flags.
+@return page compression level or 0 if not compressed*/
+UNIV_INLINE
+ulint
+fsp_flags_get_page_compression_level(
+/*=================================*/
+ ulint flags) /*!< in: tablespace flags */
+{
+ return(FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags));
+}
+
+/********************************************************************//**
+Determine the tablespace is using atomic writes from dict_table_t::flags.
+@return true if atomic writes is used, false if not */
+UNIV_INLINE
+atomic_writes_t
+fsp_flags_get_atomic_writes(
+/*========================*/
+ ulint flags) /*!< in: tablespace flags */
+{
+ return((atomic_writes_t)FSP_FLAGS_GET_ATOMIC_WRITES(flags));
+}
+
+/*******************************************************************//**
+Find out wheather the page is index page or not
+@return true if page type index page, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_index_page(
+/*===================*/
+ byte *buf) /*!< in: page */
+{
+ return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX);
+}
+
+/*******************************************************************//**
+Find out wheather the page is page compressed
+@return true if page is page compressed, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_compressed(
+/*===================*/
+ byte *buf) /*!< in: page */
+{
+ return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
+}
+
+/*******************************************************************//**
+Returns the page compression level of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return page compression level, ULINT_UNDEFINED if space not found */
+UNIV_INLINE
+ulint
+fil_space_get_page_compression_level(
+/*=================================*/
+ ulint id) /*!< in: space id */
+{
+ ulint flags;
+
+ flags = fil_space_get_flags(id);
+
+ if (flags && flags != ULINT_UNDEFINED) {
+
+ return(fsp_flags_get_page_compression_level(flags));
+ }
+
+ return(flags);
+}
+
+/*******************************************************************//**
+Extract the page compression from space.
+@return true if space is page compressed, false if space is not found
+or space is not page compressed. */
+UNIV_INLINE
+ibool
+fil_space_is_page_compressed(
+/*=========================*/
+ ulint id) /*!< in: space id */
+{
+ ulint flags;
+
+ flags = fil_space_get_flags(id);
+
+ if (flags && flags != ULINT_UNDEFINED) {
+
+ return(fsp_flags_is_page_compressed(flags));
+ }
+
+ return(flags);
+}
+
+/****************************************************************//**
+Get the name of the compression algorithm used for page
+compression.
+@return compression algorithm name or "UNKNOWN" if not known*/
+UNIV_INLINE
+const char*
+fil_get_compression_alg_name(
+/*=========================*/
+ ulint comp_alg) /*!<in: compression algorithm number */
+{
+ switch(comp_alg) {
+ case PAGE_UNCOMPRESSED:
+ return ("uncompressed");
+ break;
+ case PAGE_ZLIB_ALGORITHM:
+ return ("ZLIB");
+ break;
+ case PAGE_LZ4_ALGORITHM:
+ return ("LZ4");
+ break;
+ case PAGE_LZO_ALGORITHM:
+ return ("LZO");
+ break;
+ case PAGE_LZMA_ALGORITHM:
+ return ("LZMA");
+ break;
+ default:
+ return("UNKNOWN");
+ ut_error;
+ break;
+ }
+}
+
+/*******************************************************************//**
+Returns the atomic writes flag of the space, or false if the space
+is not using atomic writes. The tablespace must be cached in the memory cache.
+@return atomic writes table option value */
+UNIV_INLINE
+atomic_writes_t
+fil_space_get_atomic_writes(
+/*========================*/
+ ulint id) /*!< in: space id */
+{
+ ulint flags;
+
+ flags = fil_space_get_flags(id);
+
+ if (flags && flags != ULINT_UNDEFINED) {
+
+ return((atomic_writes_t)fsp_flags_get_atomic_writes(flags));
+ }
+
+ return((atomic_writes_t)0);
+}
+
+/*******************************************************************//**
+Find out wheather the page is page compressed with lzo method
+@return true if page is page compressed with lzo method, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_lzo_compressed(
+/*=======================*/
+ byte *buf) /*!< in: page */
+{
+ return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED &&
+ mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN) == PAGE_LZO_ALGORITHM);
+}
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
index 7b880b891bd..78af907c006 100644
--- a/storage/xtradb/include/os0file.h
+++ b/storage/xtradb/include/os0file.h
@@ -2,6 +2,7 @@
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -156,10 +157,9 @@ enum os_file_create_t {
#define OS_FILE_INSUFFICIENT_RESOURCE 78
#define OS_FILE_AIO_INTERRUPTED 79
#define OS_FILE_OPERATION_ABORTED 80
-
#define OS_FILE_ACCESS_VIOLATION 81
-
-#define OS_FILE_ERROR_MAX 100
+#define OS_FILE_OPERATION_NOT_SUPPORTED 125
+#define OS_FILE_ERROR_MAX 200
/* @} */
/** Types for aio operations @{ */
@@ -305,43 +305,45 @@ os_file_write
The wrapper functions have the prefix of "innodb_". */
#ifdef UNIV_PFS_IO
-# define os_file_create(key, name, create, purpose, type, success) \
+# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \
pfs_os_file_create_func(key, name, create, purpose, type, \
- success, __FILE__, __LINE__)
+ success, atomic_writes, __FILE__, __LINE__)
# define os_file_create_simple(key, name, create, access, success) \
pfs_os_file_create_simple_func(key, name, create, access, \
success, __FILE__, __LINE__)
# define os_file_create_simple_no_error_handling( \
- key, name, create_mode, access, success) \
+ key, name, create_mode, access, success, atomic_writes) \
pfs_os_file_create_simple_no_error_handling_func( \
- key, name, create_mode, access, success, __FILE__, __LINE__)
+ key, name, create_mode, access, success, atomic_writes, __FILE__, __LINE__)
# define os_file_close(file) \
pfs_os_file_close_func(file, __FILE__, __LINE__)
# define os_aio(type, mode, name, file, buf, offset, \
- n, message1, message2, space_id, trx) \
+ n, message1, message2, space_id, \
+ trx, page_compressed, page_compression_level, write_size) \
pfs_os_aio_func(type, mode, name, file, buf, offset, \
n, message1, message2, space_id, trx, \
+ page_compressed, page_compression_level, write_size, \
__FILE__, __LINE__)
-# define os_file_read(file, buf, offset, n) \
- pfs_os_file_read_func(file, buf, offset, n, NULL, \
+# define os_file_read(file, buf, offset, n, compressed) \
+ pfs_os_file_read_func(file, buf, offset, n, NULL, compressed, \
__FILE__, __LINE__)
-# define os_file_read_trx(file, buf, offset, n, trx) \
- pfs_os_file_read_func(file, buf, offset, n, trx, \
+# define os_file_read_trx(file, buf, offset, n, trx, compressed) \
+ pfs_os_file_read_func(file, buf, offset, n, trx, compressed, \
__FILE__, __LINE__)
-# define os_file_read_no_error_handling(file, buf, offset, n) \
- pfs_os_file_read_no_error_handling_func(file, buf, offset, n, \
+# define os_file_read_no_error_handling(file, buf, offset, n, compressed) \
+ pfs_os_file_read_no_error_handling_func(file, buf, offset, n, compressed, \
__FILE__, __LINE__)
-# define os_file_write(name, file, buf, offset, n) \
- pfs_os_file_write_func(name, file, buf, offset, \
- n, __FILE__, __LINE__)
+# define os_file_write(name, file, buf, offset, n) \
+ pfs_os_file_write_func(name, file, buf, offset, n, \
+ __FILE__, __LINE__)
# define os_file_flush(file) \
pfs_os_file_flush_func(file, __FILE__, __LINE__)
@@ -358,32 +360,34 @@ The wrapper functions have the prefix of "innodb_". */
/* If UNIV_PFS_IO is not defined, these I/O APIs point
to original un-instrumented file I/O APIs */
-# define os_file_create(key, name, create, purpose, type, success) \
- os_file_create_func(name, create, purpose, type, success)
+# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \
+ os_file_create_func(name, create, purpose, type, success, atomic_writes)
# define os_file_create_simple(key, name, create_mode, access, success) \
os_file_create_simple_func(name, create_mode, access, success)
# define os_file_create_simple_no_error_handling( \
- key, name, create_mode, access, success) \
+ key, name, create_mode, access, success, atomic_writes) \
os_file_create_simple_no_error_handling_func( \
- name, create_mode, access, success)
+ name, create_mode, access, success, atomic_writes)
# define os_file_close(file) os_file_close_func(file)
# define os_aio(type, mode, name, file, buf, offset, n, message1, \
- message2, space_id, trx) \
+ message2, space_id, trx, \
+ page_compressed, page_compression_level, write_size) \
os_aio_func(type, mode, name, file, buf, offset, n, \
- message1, message2, space_id, trx)
+ message1, message2, space_id, trx, \
+ page_compressed, page_compression_level, write_size)
-# define os_file_read(file, buf, offset, n) \
- os_file_read_func(file, buf, offset, n, NULL)
+# define os_file_read(file, buf, offset, n, compressed) \
+ os_file_read_func(file, buf, offset, n, NULL, compressed)
-# define os_file_read_trx(file, buf, offset, n, trx) \
- os_file_read_func(file, buf, offset, n, trx)
+# define os_file_read_trx(file, buf, offset, n, trx, compressed) \
+ os_file_read_func(file, buf, offset, n, trx, compressed)
-# define os_file_read_no_error_handling(file, buf, offset, n) \
- os_file_read_no_error_handling_func(file, buf, offset, n)
+# define os_file_read_no_error_handling(file, buf, offset, n, compressed) \
+ os_file_read_no_error_handling_func(file, buf, offset, n, compressed)
# define os_file_write(name, file, buf, offset, n) \
os_file_write_func(name, file, buf, offset, n)
@@ -526,7 +530,9 @@ os_file_create_simple_func(
ulint create_mode,/*!< in: create mode */
ulint access_type,/*!< in: OS_FILE_READ_ONLY or
OS_FILE_READ_WRITE */
- ibool* success);/*!< out: TRUE if succeed, FALSE if error */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes);/*!< in: atomic writes table option
+ value */
/****************************************************************//**
NOTE! Use the corresponding macro
os_file_create_simple_no_error_handling(), not directly this function!
@@ -544,7 +550,9 @@ os_file_create_simple_no_error_handling_func(
OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes)/*!< in: atomic writes table option
+ value */
__attribute__((nonnull, warn_unused_result));
/****************************************************************//**
Tries to disable OS caching on an opened file descriptor. */
@@ -578,7 +586,9 @@ os_file_create_func(
async i/o or unbuffered i/o: look in the
function source code for the exact rules */
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes) /*!< in: atomic writes table option
+ value */
__attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
Deletes a file. The file has to be closed before calling this.
@@ -643,6 +653,8 @@ pfs_os_file_create_simple_func(
ulint access_type,/*!< in: OS_FILE_READ_ONLY or
OS_FILE_READ_WRITE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes,/*!< in: atomic writes table option
+ value */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
__attribute__((nonnull, warn_unused_result));
@@ -668,6 +680,8 @@ pfs_os_file_create_simple_no_error_handling_func(
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes,/*!< in: atomic writes table option
+ value*/
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
__attribute__((nonnull, warn_unused_result));
@@ -696,6 +710,8 @@ pfs_os_file_create_func(
function source code for the exact rules */
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes,/*!< in: atomic writes table option
+ value */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
__attribute__((nonnull, warn_unused_result));
@@ -726,7 +742,9 @@ pfs_os_file_read_func(
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
- trx_t* trx,
+ trx_t* trx, /*!< in: trx */
+ ibool compressed, /*!< in: is this file space
+ compressed ? */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
@@ -745,6 +763,8 @@ pfs_os_file_read_no_error_handling_func(
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
+ ibool compressed, /*!< in: is this file space
+ compressed ? */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
@@ -777,6 +797,15 @@ pfs_os_aio_func(
OS_AIO_SYNC */
ulint space_id,
trx_t* trx,
+ ibool page_compression, /*!< in: is page compression used
+ on this file space */
+ ulint page_compression_level, /*!< page compression
+ level to be used */
+ ulint* write_size,/*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
/*******************************************************************//**
@@ -940,7 +969,9 @@ os_file_read_func(
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
- trx_t* trx);
+ trx_t* trx, /*!< in: trx */
+ ibool compressed); /*!< in: is this file space
+ compressed ? */
/*******************************************************************//**
Rewind file to its start, read at most size - 1 bytes from it to str, and
NUL-terminate str. All errors are silently ignored. This function is
@@ -965,7 +996,9 @@ os_file_read_no_error_handling_func(
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
- ulint n); /*!< in: number of bytes to read */
+ ulint n, /*!< in: number of bytes to read */
+ ibool compressed); /*!< in: is this file space
+ compressed ? */
/*******************************************************************//**
NOTE! Use the corresponding macro os_file_write(), not directly this
@@ -982,6 +1015,7 @@ os_file_write_func(
const void* buf, /*!< in: buffer from which to write */
os_offset_t offset, /*!< in: file offset where to write */
ulint n); /*!< in: number of bytes to write */
+
/*******************************************************************//**
Check the existence and type of the given file.
@return TRUE if call succeeded */
@@ -1149,7 +1183,17 @@ os_aio_func(
aio operation); ignored if mode is
OS_AIO_SYNC */
ulint space_id,
- trx_t* trx);
+ trx_t* trx,
+ ibool page_compression, /*!< in: is page compression used
+ on this file space */
+ ulint page_compression_level, /*!< page compression
+ level to be used */
+ ulint* write_size);/*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
+
/************************************************************************//**
Wakes up all async i/o threads so that they know to exit themselves in
shutdown. */
@@ -1211,6 +1255,7 @@ os_aio_windows_handle(
void** message2,
ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */
ulint* space_id);
+
#endif
/**********************************************************************//**
@@ -1330,6 +1375,16 @@ os_file_handle_error_no_exit(
any message to the log. */
+/***********************************************************************//**
+Try to get number of bytes per sector from file system.
+@return file block size */
+UNIV_INTERN
+ulint
+os_file_get_block_size(
+/*===================*/
+ os_file_t file, /*!< in: handle to a file */
+ const char* name); /*!< in: file name */
+
#ifndef UNIV_NONINL
#include "os0file.ic"
#endif
diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic
index 25a1397147e..61300387e1b 100644
--- a/storage/xtradb/include/os0file.ic
+++ b/storage/xtradb/include/os0file.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -44,6 +45,8 @@ pfs_os_file_create_simple_func(
ulint access_type,/*!< in: OS_FILE_READ_ONLY or
OS_FILE_READ_WRITE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes,/*!< in: atomic writes table option
+ value */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -59,7 +62,7 @@ pfs_os_file_create_simple_func(
name, src_file, src_line);
file = os_file_create_simple_func(name, create_mode,
- access_type, success);
+ access_type, success, atomic_writes);
/* Regsiter the returning "file" value with the system */
register_pfs_file_open_end(locker, file);
@@ -88,6 +91,8 @@ pfs_os_file_create_simple_no_error_handling_func(
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes,/*!< in: atomic writes table option
+ value */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -103,7 +108,7 @@ pfs_os_file_create_simple_no_error_handling_func(
name, src_file, src_line);
file = os_file_create_simple_no_error_handling_func(
- name, create_mode, access_type, success);
+ name, create_mode, access_type, success, atomic_writes);
register_pfs_file_open_end(locker, file);
@@ -134,6 +139,8 @@ pfs_os_file_create_func(
function source code for the exact rules */
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes, /*!< in: atomic writes table option
+ value */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -148,7 +155,8 @@ pfs_os_file_create_func(
: PSI_FILE_OPEN),
name, src_file, src_line);
- file = os_file_create_func(name, create_mode, purpose, type, success);
+ file = os_file_create_func(name, create_mode, purpose, type,
+ success, atomic_writes);
register_pfs_file_open_end(locker, file);
@@ -212,6 +220,15 @@ pfs_os_aio_func(
OS_AIO_SYNC */
ulint space_id,
trx_t* trx,
+ ibool page_compression, /*!< in: is page compression used
+ on this file space */
+ ulint page_compression_level, /*!< page compression
+ level to be used */
+ ulint* write_size,/*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -227,7 +244,8 @@ pfs_os_aio_func(
src_file, src_line);
result = os_aio_func(type, mode, name, file, buf, offset,
- n, message1, message2, space_id, trx);
+ n, message1, message2, space_id, trx,
+ page_compression, page_compression_level, write_size);
register_pfs_file_io_end(locker, n);
@@ -249,6 +267,8 @@ pfs_os_file_read_func(
os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
trx_t* trx,
+ ibool compressed, /*!< in: is this file space
+ compressed ? */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -259,7 +279,7 @@ pfs_os_file_read_func(
register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
src_file, src_line);
- result = os_file_read_func(file, buf, offset, n, trx);
+ result = os_file_read_func(file, buf, offset, n, trx, compressed);
register_pfs_file_io_end(locker, n);
@@ -282,6 +302,8 @@ pfs_os_file_read_no_error_handling_func(
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
+ ibool compressed, /*!< in: is this file space
+ compressed ? */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -292,7 +314,7 @@ pfs_os_file_read_no_error_handling_func(
register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
src_file, src_line);
- result = os_file_read_no_error_handling_func(file, buf, offset, n);
+ result = os_file_read_no_error_handling_func(file, buf, offset, n, compressed);
register_pfs_file_io_end(locker, n);
diff --git a/storage/xtradb/include/srv0mon.h b/storage/xtradb/include/srv0mon.h
index 2d90f47eefe..3d9e16b19f9 100644
--- a/storage/xtradb/include/srv0mon.h
+++ b/storage/xtradb/include/srv0mon.h
@@ -2,6 +2,7 @@
Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -163,6 +164,8 @@ enum monitor_id_t {
MONITOR_OVLD_BUF_POOL_PAGES_FREE,
MONITOR_OVLD_PAGE_CREATED,
MONITOR_OVLD_PAGES_WRITTEN,
+ MONITOR_OVLD_INDEX_PAGES_WRITTEN,
+ MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN,
MONITOR_OVLD_PAGES_READ,
MONITOR_OVLD_BYTE_READ,
MONITOR_OVLD_BYTE_WRITTEN,
@@ -305,6 +308,21 @@ enum monitor_id_t {
MONITOR_PAD_INCREMENTS,
MONITOR_PAD_DECREMENTS,
+ /* New monitor variables for page compression */
+ MONITOR_OVLD_PAGE_COMPRESS_SAVED,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384,
+ MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768,
+ MONITOR_OVLD_PAGES_PAGE_COMPRESSED,
+ MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP,
+ MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED,
+ MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED,
+ MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR,
+
/* Index related counters */
MONITOR_MODULE_INDEX,
MONITOR_INDEX_SPLIT,
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index cabb047112c..ea7508f0c38 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -103,6 +103,37 @@ struct srv_stats_t {
a disk page */
ulint_ctr_1_t buf_pool_reads;
+ /** Number of bytes saved by page compression */
+ ulint_ctr_64_t page_compression_saved;
+ /** Number of 512Byte TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect512;
+ /** Number of 1K TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect1024;
+ /** Number of 2K TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect2048;
+ /** Number of 4K TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect4096;
+ /** Number of 8K TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect8192;
+ /** Number of 16K TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect16384;
+ /** Number of 32K TRIM by page compression */
+ ulint_ctr_64_t page_compression_trim_sect32768;
+ /* Number of index pages written */
+ ulint_ctr_64_t index_pages_written;
+ /* Number of non index pages written */
+ ulint_ctr_64_t non_index_pages_written;
+ /* Number of pages compressed with page compression */
+ ulint_ctr_64_t pages_page_compressed;
+ /* Number of TRIM operations induced by page compression */
+ ulint_ctr_64_t page_compressed_trim_op;
+ /* Number of TRIM operations saved by using actual write size knowledge */
+ ulint_ctr_64_t page_compressed_trim_op_saved;
+ /* Number of pages decompressed with page compression */
+ ulint_ctr_64_t pages_page_decompressed;
+ /* Number of page compression errors */
+ ulint_ctr_64_t pages_page_compression_error;
+
/** Number of data read in total (in bytes) */
ulint_ctr_1_t data_read;
@@ -257,6 +288,28 @@ extern ibool srv_use_native_conditions;
#endif /* __WIN__ */
#endif /* !UNIV_HOTBACKUP */
+/* Use trim operation */
+extern my_bool srv_use_trim;
+
+/* Use posix fallocate */
+extern my_bool srv_use_posix_fallocate;
+
+/* Use atomic writes i.e disable doublewrite buffer */
+extern my_bool srv_use_atomic_writes;
+
+/* Compression algorithm*/
+extern ulong innodb_compression_algorithm;
+
+/* Number of flush threads */
+#define MTFLUSH_MAX_WORKER 64
+#define MTFLUSH_DEFAULT_WORKER 8
+
+/* Number of threads used for multi-threaded flush */
+extern long srv_mtflush_threads;
+
+/* If this flag is TRUE, then we will use multi threaded flush. */
+extern my_bool srv_use_mtflush;
+
/** Server undo tablespaces directory, can be absolute path. */
extern char* srv_undo_dir;
@@ -432,10 +485,6 @@ extern my_bool srv_stats_sample_traditional;
extern ibool srv_use_doublewrite_buf;
extern ulong srv_doublewrite_batch_size;
-extern ibool srv_use_atomic_writes;
-#ifdef HAVE_POSIX_FALLOCATE
-extern ibool srv_use_posix_fallocate;
-#endif
extern ulong srv_checksum_algorithm;
extern ulong srv_log_arch_expire_sec;
@@ -1089,6 +1138,39 @@ struct export_var_t{
ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id
- purged view's min trx_id */
#endif /* UNIV_DEBUG */
+
+
+ ib_int64_t innodb_page_compression_saved;/*!< Number of bytes saved
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM
+ by page compression */
+ ib_int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM
+ by page compression */
+ ib_int64_t innodb_index_pages_written; /*!< Number of index pages
+ written */
+ ib_int64_t innodb_non_index_pages_written; /*!< Number of non index pages
+ written */
+ ib_int64_t innodb_pages_page_compressed;/*!< Number of pages
+ compressed by page compression */
+ ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
+ induced by page compression */
+ ib_int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations
+ saved by page compression */
+ ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages
+ decompressed by page
+ compression */
+ ib_int64_t innodb_pages_page_compression_error;/*!< Number of page
+ compression errors */
};
/** Thread slot in the thread table. */
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 7d97deb71ef..6b0c33df44c 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -2,6 +2,7 @@
Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
+Copyright (c) 2013, 2015, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -350,6 +351,36 @@ typedef enum innodb_file_formats_enum innodb_file_formats_t;
/** The 2-logarithm of UNIV_PAGE_SIZE: */
#define UNIV_PAGE_SIZE_SHIFT srv_page_size_shift
+#ifdef HAVE_LZO
+#define IF_LZO(A,B) A
+#else
+#define IF_LZO(A,B) B
+#endif
+
+#ifdef HAVE_LZ4
+#define IF_LZ4(A,B) A
+#else
+#define IF_LZ4(A,B) B
+#endif
+
+#ifdef HAVE_LZMA
+#define IF_LZMA(A,B) A
+#else
+#define IF_LZMA(A,B) B
+#endif
+
+#ifdef HAVE_BZIP2
+#define IF_BZIP2(A,B) A
+#else
+#define IF_BZIP2(A,B) B
+#endif
+
+#ifdef HAVE_SNAPPY
+#define IF_SNAPPY(A,B) A
+#else
+#define IF_SNAPPY(A,B) B
+#endif
+
/** The universal page size of the database */
#define UNIV_PAGE_SIZE ((ulint) srv_page_size)
diff --git a/storage/xtradb/include/ut0list.h b/storage/xtradb/include/ut0list.h
index 29fc8669ce4..796a272db59 100644
--- a/storage/xtradb/include/ut0list.h
+++ b/storage/xtradb/include/ut0list.h
@@ -150,6 +150,15 @@ ib_list_is_empty(
/* out: TRUE if empty else */
const ib_list_t* list); /* in: list */
+/********************************************************************
+Get number of items on list.
+@return number of items on list */
+UNIV_INLINE
+ulint
+ib_list_len(
+/*========*/
+ const ib_list_t* list); /*<! in: list */
+
/* List. */
struct ib_list_t {
ib_list_node_t* first; /*!< first node */
diff --git a/storage/xtradb/include/ut0list.ic b/storage/xtradb/include/ut0list.ic
index d9dcb2eac99..7a7f53adb2f 100644
--- a/storage/xtradb/include/ut0list.ic
+++ b/storage/xtradb/include/ut0list.ic
@@ -58,3 +58,23 @@ ib_list_is_empty(
{
return(!(list->first || list->last));
}
+
+/********************************************************************
+Get number of items on list.
+@return number of items on list */
+UNIV_INLINE
+ulint
+ib_list_len(
+/*========*/
+ const ib_list_t* list) /*<! in: list */
+{
+ ulint len = 0;
+ ib_list_node_t* node = list->first;
+
+ while(node) {
+ len++;
+ node = node->next;
+ }
+
+ return (len);
+}
diff --git a/storage/xtradb/include/ut0wqueue.h b/storage/xtradb/include/ut0wqueue.h
index 33385ddf2d4..e6b9891aed1 100644
--- a/storage/xtradb/include/ut0wqueue.h
+++ b/storage/xtradb/include/ut0wqueue.h
@@ -95,6 +95,23 @@ ib_wqueue_timedwait(
ib_wqueue_t* wq, /* in: work queue */
ib_time_t wait_in_usecs); /* in: wait time in micro seconds */
+/********************************************************************
+Return first item on work queue or NULL if queue is empty
+@return work item or NULL */
+void*
+ib_wqueue_nowait(
+/*=============*/
+ ib_wqueue_t* wq); /*<! in: work queue */
+
+
+/********************************************************************
+Get number of items on queue.
+@return number of items on queue */
+ulint
+ib_wqueue_len(
+/*==========*/
+ ib_wqueue_t* wq); /*<! in: work queue */
+
/* Work queue. */
struct ib_wqueue_t {
ib_mutex_t mutex; /*!< mutex protecting everything */
diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc
index 1ff00dbcb8c..903bdab02ce 100644
--- a/storage/xtradb/log/log0log.cc
+++ b/storage/xtradb/log/log0log.cc
@@ -1382,7 +1382,7 @@ log_group_file_header_flush(
(ulint) (dest_offset / UNIV_PAGE_SIZE),
(ulint) (dest_offset % UNIV_PAGE_SIZE),
OS_FILE_LOG_BLOCK_SIZE,
- buf, group);
+ buf, group, 0);
srv_stats.os_log_pending_writes.dec();
}
@@ -1510,7 +1510,7 @@ loop:
fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
(ulint) (next_offset / UNIV_PAGE_SIZE),
(ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
- group);
+ group, 0);
srv_stats.os_log_pending_writes.dec();
@@ -2101,7 +2101,7 @@ log_group_checkpoint(
write_offset / UNIV_PAGE_SIZE,
write_offset % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE,
- buf, ((byte*) group + 1));
+ buf, ((byte*) group + 1), 0);
ut_ad(((ulint) group & 0x1UL) == 0);
}
@@ -2181,7 +2181,7 @@ log_group_read_checkpoint_info(
fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
+ OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0);
}
/******************************************************//**
@@ -2564,7 +2564,7 @@ loop:
fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
(ulint) (source_offset / UNIV_PAGE_SIZE),
(ulint) (source_offset % UNIV_PAGE_SIZE),
- len, buf, (type == LOG_ARCHIVE) ? &log_archive_io : NULL);
+ len, buf, (type == LOG_ARCHIVE) ? &log_archive_io : NULL, 0);
start_lsn += len;
buf += len;
@@ -2689,7 +2689,7 @@ log_group_archive_file_header_write(
dest_offset / UNIV_PAGE_SIZE,
dest_offset % UNIV_PAGE_SIZE,
2 * OS_FILE_LOG_BLOCK_SIZE,
- buf, &log_archive_io);
+ buf, &log_archive_io, 0);
}
/******************************************************//**
@@ -2726,7 +2726,7 @@ log_group_archive_completed_header_write(
dest_offset % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE,
buf + LOG_FILE_ARCH_COMPLETED,
- &log_archive_io);
+ &log_archive_io, 0);
}
/******************************************************//**
@@ -2789,12 +2789,12 @@ loop:
file_handle = os_file_create(innodb_file_log_key,
name, open_mode,
OS_FILE_AIO,
- OS_DATA_FILE, &ret);
+ OS_DATA_FILE, &ret, FALSE);
if (!ret && (open_mode == OS_FILE_CREATE)) {
file_handle = os_file_create(
innodb_file_log_key, name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_DATA_FILE, &ret);
+ OS_FILE_AIO, OS_DATA_FILE, &ret, FALSE);
}
if (!ret) {
@@ -2863,7 +2863,7 @@ loop:
(ulint) (next_offset / UNIV_PAGE_SIZE),
(ulint) (next_offset % UNIV_PAGE_SIZE),
ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
- &log_archive_io);
+ &log_archive_io, 0);
start_lsn += len;
next_offset += len;
diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc
index 0b9b9aa3205..60ae4a50626 100644
--- a/storage/xtradb/log/log0online.cc
+++ b/storage/xtradb/log/log0online.cc
@@ -283,7 +283,7 @@ log_online_read_bitmap_page(
ut_a(bitmap_file->offset % MODIFIED_PAGE_BLOCK_SIZE == 0);
success = os_file_read(bitmap_file->file, page, bitmap_file->offset,
- MODIFIED_PAGE_BLOCK_SIZE);
+ MODIFIED_PAGE_BLOCK_SIZE, FALSE);
if (UNIV_UNLIKELY(!success)) {
@@ -539,7 +539,7 @@ log_online_start_bitmap_file(void)
log_bmp_sys->out.name,
OS_FILE_CREATE,
OS_FILE_READ_WRITE,
- &success);
+ &success, FALSE);
}
if (UNIV_UNLIKELY(!success)) {
@@ -699,7 +699,7 @@ log_online_read_init(void)
log_bmp_sys->out.file
= os_file_create_simple_no_error_handling
(innodb_file_bmp_key, log_bmp_sys->out.name, OS_FILE_OPEN,
- OS_FILE_READ_WRITE, &success);
+ OS_FILE_READ_WRITE, &success, FALSE);
if (!success) {
@@ -1104,7 +1104,7 @@ log_online_write_bitmap_page(
success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file,
block, log_bmp_sys->out.offset,
- MODIFIED_PAGE_BLOCK_SIZE);
+ MODIFIED_PAGE_BLOCK_SIZE);
if (UNIV_UNLIKELY(!success)) {
/* The following call prints an error message */
@@ -1491,7 +1491,7 @@ log_online_open_bitmap_file_read_only(
bitmap_file->name,
OS_FILE_OPEN,
OS_FILE_READ_ONLY,
- &success);
+ &success, FALSE);
if (UNIV_UNLIKELY(!success)) {
/* Here and below assume that bitmap file names do not
diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc
index c7482e93c25..42c238810e8 100644
--- a/storage/xtradb/log/log0recv.cc
+++ b/storage/xtradb/log/log0recv.cc
@@ -2,6 +2,7 @@
Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -2149,7 +2150,7 @@ recv_apply_log_recs_for_backup(void)
error = fil_io(OS_FILE_READ, true,
recv_addr->space, zip_size,
recv_addr->page_no, 0, zip_size,
- block->page.zip.data, NULL);
+ block->page.zip.data, NULL, 0);
if (error == DB_SUCCESS
&& !buf_zip_decompress(block, TRUE)) {
exit(1);
@@ -2159,7 +2160,7 @@ recv_apply_log_recs_for_backup(void)
recv_addr->space, 0,
recv_addr->page_no, 0,
UNIV_PAGE_SIZE,
- block->frame, NULL);
+ block->frame, NULL, 0);
}
if (error != DB_SUCCESS) {
@@ -2188,13 +2189,13 @@ recv_apply_log_recs_for_backup(void)
recv_addr->space, zip_size,
recv_addr->page_no, 0,
zip_size,
- block->page.zip.data, NULL);
+ block->page.zip.data, NULL, 0);
} else {
error = fil_io(OS_FILE_WRITE, true,
recv_addr->space, 0,
recv_addr->page_no, 0,
UNIV_PAGE_SIZE,
- block->frame, NULL);
+ block->frame, NULL, 0);
}
skip_this_recv_addr:
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
@@ -3159,7 +3160,7 @@ recv_recovery_from_checkpoint_start_func(
fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0,
0, 0, LOG_FILE_HDR_SIZE,
- log_hdr_buf, max_cp_group);
+ log_hdr_buf, max_cp_group, 0);
if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
(byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
@@ -3190,7 +3191,7 @@ recv_recovery_from_checkpoint_start_func(
fil_io(OS_FILE_WRITE | OS_FILE_LOG, true,
max_cp_group->space_id, 0,
0, 0, OS_FILE_LOG_BLOCK_SIZE,
- log_hdr_buf, max_cp_group);
+ log_hdr_buf, max_cp_group, 0);
}
log_hdr_log_block_size
@@ -3786,7 +3787,7 @@ try_open_again:
file_handle = os_file_create(innodb_file_log_key,
name, OS_FILE_OPEN,
- OS_FILE_LOG, OS_FILE_AIO, &ret);
+ OS_FILE_LOG, OS_FILE_AIO, &ret, FALSE);
if (ret == FALSE) {
ask_again:
@@ -3838,7 +3839,7 @@ ask_again:
/* Read the archive file header */
fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0,
0, 0,
- LOG_FILE_HDR_SIZE, buf, NULL);
+ LOG_FILE_HDR_SIZE, buf, NULL, 0);
/* Check if the archive file header is consistent */
@@ -3912,7 +3913,7 @@ ask_again:
fil_io(OS_FILE_READ | OS_FILE_LOG, true,
group->archive_space_id, 0,
read_offset / UNIV_PAGE_SIZE,
- read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
+ read_offset % UNIV_PAGE_SIZE, len, buf, NULL, 0);
ret = recv_scan_log_recs(
(buf_pool_get_n_pages()
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 978f3dc7cc4..28f1b156224 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -2,6 +2,7 @@
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, 2015, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -42,10 +43,17 @@ Created 10/21/1995 Heikki Tuuri
#include "srv0srv.h"
#include "srv0start.h"
#include "fil0fil.h"
+#include "fil0pagecompress.h"
#include "buf0buf.h"
#include "btr0types.h"
#include "trx0trx.h"
#include "srv0mon.h"
+#include "srv0srv.h"
+#ifdef HAVE_POSIX_FALLOCATE
+#include "unistd.h"
+#include "fcntl.h"
+#include "linux/falloc.h"
+#endif
#ifndef UNIV_HOTBACKUP
# include "os0sync.h"
# include "os0thread.h"
@@ -73,6 +81,31 @@ Created 10/21/1995 Heikki Tuuri
# endif
#endif
+#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H)
+#include <sys/statvfs.h>
+#endif
+
+#if defined(UNIV_LINUX) && defined(HAVE_LINUX_FALLOC_H)
+#include <linux/falloc.h>
+#endif
+
+#if defined(HAVE_FALLOCATE)
+#ifndef FALLOC_FL_KEEP_SIZE
+#define FALLOC_FL_KEEP_SIZE 0x01
+#endif
+#ifndef FALLOC_FL_PUNCH_HOLE
+#define FALLOC_FL_PUNCH_HOLE 0x02
+#endif
+#endif
+
+#ifdef HAVE_LZO
+#include "lzo/lzo1x.h"
+#endif
+
+#ifdef HAVE_SNAPPY
+#include "snappy-c.h"
+#endif
+
/** Insert buffer segment id */
static const ulint IO_IBUF_SEGMENT = 0;
@@ -196,11 +229,38 @@ struct os_aio_slot_t{
and which can be used to identify
which pending aio operation was
completed */
+ ulint bitmap;
+
+ byte* page_compression_page; /*!< Memory allocated for
+ page compressed page and
+ freed after the write
+ has been completed */
+
+ ibool page_compression;
+ ulint page_compression_level;
+
+ ulint* write_size; /*!< Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
+
+ byte* page_buf; /*!< Actual page buffer for
+ page compressed pages, do not
+ free this */
+
+ ibool page_compress_success;
+ /*!< TRUE if page compression was
+ successfull, false if not */
+
+ ulint file_block_size;/*!< file block size */
+
#ifdef LINUX_NATIVE_AIO
struct iocb control; /* Linux control block for aio */
int n_bytes; /* bytes written/read. */
int ret; /* AIO return code */
#endif /* WIN_ASYNC_IO */
+ byte *lzo_mem; /* Temporal memory used by LZO */
};
/** The asynchronous i/o array structure */
@@ -301,6 +361,66 @@ UNIV_INTERN ulint os_n_pending_writes = 0;
/** Number of pending read operations */
UNIV_INTERN ulint os_n_pending_reads = 0;
+/** After first fallocate failure we will disable os_file_trim */
+UNIV_INTERN ibool os_fallocate_failed = FALSE;
+
+/**********************************************************************//**
+Directly manipulate the allocated disk space by deallocating for the file referred to
+by fd for the byte range starting at offset and continuing for len bytes.
+Within the specified range, partial file system blocks are zeroed, and whole
+file system blocks are removed from the file. After a successful call,
+subsequent reads from this range will return zeroes.
+@return true if success, false if error */
+UNIV_INTERN
+ibool
+os_file_trim(
+/*=========*/
+ os_aio_slot_t* slot); /*!< in: slot structure */
+
+/**********************************************************************//**
+Allocate memory for temporal buffer used for page compression. This
+buffer is freed later. */
+UNIV_INTERN
+void
+os_slot_alloc_page_buf(
+/*===================*/
+ os_aio_slot_t* slot); /*!< in: slot structure */
+
+#ifdef HAVE_LZO
+/**********************************************************************//**
+Allocate memory for temporal memory used for page compression when
+LZO compression method is used */
+UNIV_INTERN
+void
+os_slot_alloc_lzo_mem(
+/*===================*/
+ os_aio_slot_t* slot); /*!< in: slot structure */
+#endif
+
+/****************************************************************//**
+Does error handling when a file operation fails.
+@return TRUE if we should retry the operation */
+ibool
+os_file_handle_error_no_exit(
+/*=========================*/
+ const char* name, /*!< in: name of a file or NULL */
+ const char* operation, /*!< in: operation */
+ ibool on_error_silent,/*!< in: if TRUE then don't print
+ any message to the log. */
+ const char* file, /*!< in: file name */
+ const ulint line); /*!< in: line */
+
+/****************************************************************//**
+Tries to enable the atomic write feature, if available, for the specified file
+handle.
+@return TRUE if success */
+static __attribute__((warn_unused_result))
+ibool
+os_file_set_atomic_writes(
+/*======================*/
+ const char* name, /*!< in: name of the file */
+ os_file_t file); /*!< in: handle to the file */
+
#ifdef UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
/**********************************************************************//**
@@ -537,6 +657,19 @@ os_file_get_last_error_low(
"InnoDB: because of either a thread exit"
" or an application request.\n"
"InnoDB: Retry attempt is made.\n");
+ } else if (err == ECANCELED || err == ENOTTY) {
+ if (strerror(err) != NULL) {
+ fprintf(stderr,
+ "InnoDB: Error number %d"
+ " means '%s'.\n",
+ err, strerror(err));
+ }
+
+ if(srv_use_atomic_writes) {
+ fprintf(stderr,
+ "InnoDB: Error trying to enable atomic writes on "
+ "non-supported destination!\n");
+ }
} else {
fprintf(stderr,
"InnoDB: Some operating system error numbers"
@@ -601,6 +734,20 @@ os_file_get_last_error_low(
"InnoDB: The error means mysqld does not have"
" the access rights to\n"
"InnoDB: the directory.\n");
+ } else if (err == ECANCELED || err == ENOTTY) {
+ if (strerror(err) != NULL) {
+ fprintf(stderr,
+ "InnoDB: Error number %d"
+ " means '%s'.\n",
+ err, strerror(err));
+ }
+
+
+ if(srv_use_atomic_writes) {
+ fprintf(stderr,
+ "InnoDB: Error trying to enable atomic writes on "
+ "non-supported destination!\n");
+ }
} else {
if (strerror(err) != NULL) {
fprintf(stderr,
@@ -639,6 +786,9 @@ os_file_get_last_error_low(
return(OS_FILE_AIO_RESOURCES_RESERVED);
}
break;
+ case ECANCELED:
+ case ENOTTY:
+ return(OS_FILE_OPERATION_NOT_SUPPORTED);
case EINTR:
if (srv_use_native_aio) {
return(OS_FILE_AIO_INTERRUPTED);
@@ -672,7 +822,6 @@ Does error handling when a file operation fails.
Conditionally exits (calling exit(3)) based on should_exit value and the
error type, if should_exit is TRUE then on_error_silent is ignored.
@return TRUE if we should retry the operation */
-static
ibool
os_file_handle_error_cond_exit(
/*===========================*/
@@ -680,9 +829,11 @@ os_file_handle_error_cond_exit(
const char* operation, /*!< in: operation */
ibool should_exit, /*!< in: call exit(3) if unknown error
and this parameter is TRUE */
- ibool on_error_silent)/*!< in: if TRUE then don't print
+ ibool on_error_silent,/*!< in: if TRUE then don't print
any message to the log iff it is
an unknown non-fatal error */
+ const char* file, /*!< in: file name */
+ const ulint line) /*!< in: line */
{
ulint err;
@@ -714,6 +865,9 @@ os_file_handle_error_cond_exit(
os_has_said_disk_full = TRUE;
+ fprintf(stderr,
+ " InnoDB: at file %s and at line %ld\n", file, line);
+
fflush(stderr);
ut_error;
@@ -747,6 +901,9 @@ os_file_handle_error_cond_exit(
is better to ignore on_error_silent and print an error message
to the log. */
+ fprintf(stderr,
+ " InnoDB: at file %s and at line %ld\n", file, line);
+
if (should_exit || !on_error_silent) {
ib_logf(IB_LOG_LEVEL_ERROR, "File %s: '%s' returned OS "
"error " ULINTPF ".%s", name ? name : "(unknown)",
@@ -770,10 +927,12 @@ ibool
os_file_handle_error(
/*=================*/
const char* name, /*!< in: name of a file or NULL */
- const char* operation) /*!< in: operation */
+ const char* operation, /*!< in: operation */
+ const char* file, /*!< in: file name */
+ const ulint line) /*!< in: line */
{
/* exit in case of unknown error */
- return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE));
+ return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE, file, line));
}
/****************************************************************//**
@@ -784,12 +943,14 @@ os_file_handle_error_no_exit(
/*=========================*/
const char* name, /*!< in: name of a file or NULL */
const char* operation, /*!< in: operation */
- ibool on_error_silent)/*!< in: if TRUE then don't print
+ ibool on_error_silent,/*!< in: if TRUE then don't print
any message to the log. */
+ const char* file, /*!< in: file name */
+ const ulint line) /*!< in: line */
{
/* don't exit in case of unknown error */
return(os_file_handle_error_cond_exit(
- name, operation, FALSE, on_error_silent));
+ name, operation, FALSE, on_error_silent, file, line));
}
#undef USE_FILE_LOCK
@@ -932,7 +1093,7 @@ os_file_opendir(
if (dir == INVALID_HANDLE_VALUE) {
if (error_is_fatal) {
- os_file_handle_error(dirname, "opendir");
+ os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
}
return(NULL);
@@ -943,7 +1104,7 @@ os_file_opendir(
dir = opendir(dirname);
if (dir == NULL && error_is_fatal) {
- os_file_handle_error(dirname, "opendir");
+ os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
}
return(dir);
@@ -965,7 +1126,7 @@ os_file_closedir(
ret = FindClose(dir);
if (!ret) {
- os_file_handle_error_no_exit(NULL, "closedir", FALSE);
+ os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
return(-1);
}
@@ -977,7 +1138,7 @@ os_file_closedir(
ret = closedir(dir);
if (ret) {
- os_file_handle_error_no_exit(NULL, "closedir", FALSE);
+ os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
}
return(ret);
@@ -1049,7 +1210,7 @@ next_file:
return(1);
} else {
- os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE);
+ os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE, __FILE__, __LINE__);
return(-1);
}
#else
@@ -1135,7 +1296,7 @@ next_file:
goto next_file;
}
- os_file_handle_error_no_exit(full_path, "stat", FALSE);
+ os_file_handle_error_no_exit(full_path, "stat", FALSE, __FILE__, __LINE__);
ut_free(full_path);
@@ -1186,7 +1347,7 @@ os_file_create_directory(
&& !fail_if_exists))) {
os_file_handle_error_no_exit(
- pathname, "CreateDirectory", FALSE);
+ pathname, "CreateDirectory", FALSE, __FILE__, __LINE__);
return(FALSE);
}
@@ -1199,7 +1360,7 @@ os_file_create_directory(
if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
/* failure */
- os_file_handle_error_no_exit(pathname, "mkdir", FALSE);
+ os_file_handle_error_no_exit(pathname, "mkdir", FALSE, __FILE__, __LINE__);
return(FALSE);
}
@@ -1309,7 +1470,7 @@ os_file_create_simple_func(
retry = os_file_handle_error(
name, create_mode == OS_FILE_OPEN ?
- "open" : "create");
+ "open" : "create", __FILE__, __LINE__);
} else {
*success = TRUE;
@@ -1377,7 +1538,7 @@ os_file_create_simple_func(
retry = os_file_handle_error(
name,
create_mode == OS_FILE_OPEN
- ? "open" : "create");
+ ? "open" : "create", __FILE__, __LINE__);
} else {
*success = TRUE;
retry = false;
@@ -1419,9 +1580,12 @@ os_file_create_simple_no_error_handling_func(
OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes) /*! in: atomic writes table option
+ value */
{
os_file_t file;
+ atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
*success = FALSE;
#ifdef __WIN__
@@ -1482,6 +1646,23 @@ os_file_create_simple_no_error_handling_func(
attributes,
NULL); // No template file
+ /* If we have proper file handle and atomic writes should be used,
+ try to set atomic writes and if that fails when creating a new
+ table, produce a error. If atomic writes are used on existing
+ file, ignore error and use traditional writes for that file */
+ if (file != INVALID_HANDLE_VALUE
+ && (awrites == ATOMIC_WRITES_ON ||
+ (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
+ && !os_file_set_atomic_writes(name, file)) {
+ if (create_mode == OS_FILE_CREATE) {
+ fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+ CloseHandle(file);
+ os_file_delete_if_exists_func(name);
+ *success = FALSE;
+ file = INVALID_HANDLE_VALUE;
+ }
+ }
+
*success = (file != INVALID_HANDLE_VALUE);
#else /* __WIN__ */
int create_flag;
@@ -1542,6 +1723,23 @@ os_file_create_simple_no_error_handling_func(
}
#endif /* USE_FILE_LOCK */
+ /* If we have proper file handle and atomic writes should be used,
+ try to set atomic writes and if that fails when creating a new
+ table, produce a error. If atomic writes are used on existing
+ file, ignore error and use traditional writes for that file */
+ if (file != -1
+ && (awrites == ATOMIC_WRITES_ON ||
+ (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
+ && !os_file_set_atomic_writes(name, file)) {
+ if (create_mode == OS_FILE_CREATE) {
+ fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+ close(file);
+ os_file_delete_if_exists_func(name);
+ *success = FALSE;
+ file = -1;
+ }
+ }
+
#endif /* __WIN__ */
return(file);
@@ -1625,15 +1823,16 @@ os_file_set_atomic_writes(
if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) {
- os_file_handle_error_no_exit(name, "ioctl", FALSE);
+ fprintf(stderr, "InnoDB: Warning:Trying to enable atomic writes on "
+ "file %s on non-supported platform!\n", name);
+ os_file_handle_error_no_exit(name, "ioctl(DFS_IOCTL_ATOMIC_WRITE_SET)", FALSE, __FILE__, __LINE__);
return(FALSE);
}
return(TRUE);
#else
- ib_logf(IB_LOG_LEVEL_ERROR,
- "trying to enable atomic writes on non-supported platform! "
- "Please restart with innodb_use_atomic_writes disabled.\n");
+ fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on "
+ "file %s on non-supported platform!\n", name);
return(FALSE);
#endif
}
@@ -1659,12 +1858,15 @@ os_file_create_func(
async i/o or unbuffered i/o: look in the
function source code for the exact rules */
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ ulint atomic_writes) /*! in: atomic writes table option
+ value */
{
os_file_t file;
ibool retry;
ibool on_error_no_exit;
ibool on_error_silent;
+ atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
#ifdef __WIN__
DBUG_EXECUTE_IF(
@@ -1807,9 +2009,9 @@ os_file_create_func(
if (on_error_no_exit) {
retry = os_file_handle_error_no_exit(
- name, operation, on_error_silent);
+ name, operation, on_error_silent, __FILE__, __LINE__);
} else {
- retry = os_file_handle_error(name, operation);
+ retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
}
} else {
*success = TRUE;
@@ -1821,11 +2023,21 @@ os_file_create_func(
} while (retry);
- if (srv_use_atomic_writes && type == OS_DATA_FILE &&
- !os_file_set_atomic_writes(name, file)) {
- CloseHandle(file);
+ /* If we have proper file handle and atomic writes should be used,
+ try to set atomic writes and if that fails when creating a new
+ table, produce a error. If atomic writes are used on existing
+ file, ignore error and use traditional writes for that file */
+ if (file != INVALID_HANDLE_VALUE && type == OS_DATA_FILE
+ && (awrites == ATOMIC_WRITES_ON ||
+ (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
+ && !os_file_set_atomic_writes(name, file)) {
+ if (create_mode == OS_FILE_CREATE) {
+ fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+ CloseHandle(file);
+ os_file_delete_if_exists_func(name);
*success = FALSE;
file = INVALID_HANDLE_VALUE;
+ }
}
#else /* __WIN__ */
@@ -1902,9 +2114,9 @@ os_file_create_func(
if (on_error_no_exit) {
retry = os_file_handle_error_no_exit(
- name, operation, on_error_silent);
+ name, operation, on_error_silent, __FILE__, __LINE__);
} else {
- retry = os_file_handle_error(name, operation);
+ retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
}
} else {
*success = TRUE;
@@ -1958,14 +2170,24 @@ os_file_create_func(
}
#endif /* USE_FILE_LOCK */
- if (srv_use_atomic_writes && type == OS_DATA_FILE
- && file != -1 && !os_file_set_atomic_writes(name, file)) {
-
- *success = FALSE;
- close(file);
- file = -1;
+ /* If we have proper file handle and atomic writes should be used,
+ try to set atomic writes and if that fails when creating a new
+ table, produce a error. If atomic writes are used on existing
+ file, ignore error and use traditional writes for that file */
+ if (file != -1 && type == OS_DATA_FILE
+ && (awrites == ATOMIC_WRITES_ON ||
+ (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
+ && !os_file_set_atomic_writes(name, file)) {
+ if (create_mode == OS_FILE_CREATE) {
+ fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+ close(file);
+ os_file_delete_if_exists_func(name);
+ *success = FALSE;
+ file = -1;
+ }
}
+
#endif /* __WIN__ */
return(file);
@@ -2024,7 +2246,7 @@ loop:
ret = unlink(name);
if (ret != 0 && errno != ENOENT) {
- os_file_handle_error_no_exit(name, "delete", FALSE);
+ os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
return(false);
}
@@ -2088,7 +2310,7 @@ loop:
ret = unlink(name);
if (ret != 0) {
- os_file_handle_error_no_exit(name, "delete", FALSE);
+ os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
return(false);
}
@@ -2132,7 +2354,7 @@ os_file_rename_func(
return(TRUE);
}
- os_file_handle_error_no_exit(oldpath, "rename", FALSE);
+ os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__);
return(FALSE);
#else
@@ -2141,7 +2363,7 @@ os_file_rename_func(
ret = rename(oldpath, newpath);
if (ret != 0) {
- os_file_handle_error_no_exit(oldpath, "rename", FALSE);
+ os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__);
return(FALSE);
}
@@ -2170,7 +2392,7 @@ os_file_close_func(
return(TRUE);
}
- os_file_handle_error(NULL, "close");
+ os_file_handle_error(NULL, "close", __FILE__, __LINE__);
return(FALSE);
#else
@@ -2179,7 +2401,7 @@ os_file_close_func(
ret = close(file);
if (ret == -1) {
- os_file_handle_error(NULL, "close");
+ os_file_handle_error(NULL, "close", __FILE__, __LINE__);
return(FALSE);
}
@@ -2269,6 +2491,12 @@ os_file_set_size(
current_size = 0;
+#ifdef UNIV_DEBUG
+ fprintf(stderr, "InnoDB: Note: File %s current_size %lu extended_size %lu\n",
+ name, os_file_get_size(file), size);
+#endif
+
+
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
@@ -2279,7 +2507,7 @@ os_file_set_size(
INT64PF ", desired size " INT64PF "\n",
name, current_size, size);
os_file_handle_error_no_exit (name, "posix_fallocate",
- FALSE);
+ FALSE, __FILE__, __LINE__);
return(FALSE);
}
return(TRUE);
@@ -2312,6 +2540,7 @@ os_file_set_size(
}
ret = os_file_write(name, file, buf, current_size, n_bytes);
+
if (!ret) {
ut_free(buf2);
goto error_handling;
@@ -2466,7 +2695,7 @@ os_file_flush_func(
return(TRUE);
}
- os_file_handle_error(NULL, "flush");
+ os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
/* It is a fatal error if a file flush does not succeed, because then
the database can get corrupt on disk */
@@ -2520,7 +2749,7 @@ os_file_flush_func(
ib_logf(IB_LOG_LEVEL_ERROR, "The OS said file flush did not succeed");
- os_file_handle_error(NULL, "flush");
+ os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
/* It is a fatal error if a file flush does not succeed, because then
the database can get corrupt on disk */
@@ -2834,7 +3063,9 @@ os_file_read_func(
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
ulint n, /*!< in: number of bytes to read */
- trx_t* trx)
+ trx_t* trx,
+ ibool compressed) /*!< in: is this file space
+ compressed ? */
{
#ifdef __WIN__
BOOL ret;
@@ -2876,6 +3107,14 @@ try_again:
os_mutex_exit(os_file_count_mutex);
if (ret && len == n) {
+ /* Note that InnoDB writes files that are not formated
+ as file spaces and they do not have FIL_PAGE_TYPE
+ field, thus we must use here information is the actual
+ file space compressed. */
+ if (fil_page_is_compressed((byte *)buf)) {
+ fil_decompress_page(NULL, (byte *)buf, len, NULL);
+ }
+
return(TRUE);
}
#else /* __WIN__ */
@@ -2888,6 +3127,14 @@ try_again:
ret = os_file_pread(file, buf, n, offset, trx);
if ((ulint) ret == n) {
+ /* Note that InnoDB writes files that are not formated
+ as file spaces and they do not have FIL_PAGE_TYPE
+ field, thus we must use here information is the actual
+ file space compressed. */
+ if (fil_page_is_compressed((byte *)buf)) {
+ fil_decompress_page(NULL, (byte *)buf, n, NULL);
+ }
+
return(TRUE);
} else if (ret == -1) {
ib_logf(IB_LOG_LEVEL_ERROR,
@@ -2901,7 +3148,7 @@ try_again:
n, offset, (lint) ret);
}
#endif /* __WIN__ */
- retry = os_file_handle_error(NULL, "read");
+ retry = os_file_handle_error(NULL, "read", __FILE__, __LINE__);
if (retry) {
goto try_again;
@@ -2936,7 +3183,9 @@ os_file_read_no_error_handling_func(
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
os_offset_t offset, /*!< in: file offset where to read */
- ulint n) /*!< in: number of bytes to read */
+ ulint n, /*!< in: number of bytes to read */
+ ibool compressed) /*!< in: is this file space
+ compressed ? */
{
#ifdef __WIN__
BOOL ret;
@@ -2980,6 +3229,15 @@ try_again:
os_mutex_exit(os_file_count_mutex);
if (ret && len == n) {
+
+ /* Note that InnoDB writes files that are not formated
+ as file spaces and they do not have FIL_PAGE_TYPE
+ field, thus we must use here information is the actual
+ file space compressed. */
+ if (fil_page_is_compressed((byte *)buf)) {
+ fil_decompress_page(NULL, (byte *)buf, n, NULL);
+ }
+
return(TRUE);
}
#else /* __WIN__ */
@@ -2992,6 +3250,14 @@ try_again:
ret = os_file_pread(file, buf, n, offset, NULL);
if ((ulint) ret == n) {
+ /* Note that InnoDB writes files that are not formated
+ as file spaces and they do not have FIL_PAGE_TYPE
+ field, thus we must use here information is the actual
+ file space compressed. */
+ if (fil_page_is_compressed((byte *)buf)) {
+ fil_decompress_page(NULL, (byte *)buf, n, NULL);
+ }
+
return(TRUE);
} else if (ret == -1) {
ib_logf(IB_LOG_LEVEL_ERROR,
@@ -3005,7 +3271,7 @@ try_again:
n, offset, (lint) ret);
}
#endif /* __WIN__ */
- retry = os_file_handle_error_no_exit(NULL, "read", FALSE);
+ retry = os_file_handle_error_no_exit(NULL, "read", FALSE, __FILE__, __LINE__);
if (retry) {
goto try_again;
@@ -3071,6 +3337,7 @@ os_file_write_func(
ut_ad(buf);
ut_ad(n > 0);
+
retry:
os_mutex_enter(os_file_count_mutex);
@@ -3244,7 +3511,7 @@ os_file_status(
} else if (ret) {
/* file exists, but stat call failed */
- os_file_handle_error_no_exit(path, "stat", FALSE);
+ os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
return(FALSE);
}
@@ -3272,7 +3539,7 @@ os_file_status(
} else if (ret) {
/* file exists, but stat call failed */
- os_file_handle_error_no_exit(path, "stat", FALSE);
+ os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
return(FALSE);
}
@@ -3321,7 +3588,7 @@ os_file_get_status(
} else if (ret) {
/* file exists, but stat call failed */
- os_file_handle_error_no_exit(path, "stat", FALSE);
+ os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
return(DB_FAIL);
@@ -3374,7 +3641,7 @@ os_file_get_status(
} else if (ret) {
/* file exists, but stat call failed */
- os_file_handle_error_no_exit(path, "stat", FALSE);
+ os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
return(DB_FAIL);
@@ -3938,7 +4205,7 @@ os_aio_array_create(
array->slots = static_cast<os_aio_slot_t*>(
ut_malloc(n * sizeof(*array->slots)));
- memset(array->slots, 0x0, sizeof(n * sizeof(*array->slots)));
+ memset(array->slots, 0x0, n * sizeof(*array->slots));
#if defined(LINUX_NATIVE_AIO)
array->aio_ctx = NULL;
@@ -4013,6 +4280,8 @@ os_aio_array_free(
/*==============*/
os_aio_array_t*& array) /*!< in, own: array to free */
{
+ ulint i;
+
os_mutex_free(array->mutex);
os_event_free(array->not_full);
os_event_free(array->is_empty);
@@ -4024,6 +4293,19 @@ os_aio_array_free(
}
#endif /* LINUX_NATIVE_AIO */
+ for (i = 0; i < array->n_slots; i++) {
+ os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
+ if (slot->page_compression_page) {
+ ut_free(slot->page_compression_page);
+ slot->page_compression_page = NULL;
+ }
+
+ if (slot->lzo_mem) {
+ ut_free(slot->lzo_mem);
+ slot->lzo_mem = NULL;
+ }
+ }
+
ut_free(array->slots);
ut_free(array);
@@ -4368,7 +4650,16 @@ os_aio_array_reserve_slot(
to write */
os_offset_t offset, /*!< in: file offset */
ulint len, /*!< in: length of the block to read or write */
- ulint space_id)
+ ulint space_id,
+ ibool page_compression, /*!< in: is page compression used
+ on this file space */
+ ulint page_compression_level, /*!< page compression
+ level to be used */
+ ulint* write_size)/*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
{
os_aio_slot_t* slot = NULL;
#ifdef WIN_ASYNC_IO
@@ -4460,6 +4751,59 @@ found:
slot->io_already_done = FALSE;
slot->space_id = space_id;
+ slot->page_compress_success = FALSE;
+ slot->write_size = write_size;
+ slot->page_compression_level = page_compression_level;
+ slot->page_compression = page_compression;
+
+ if (message1) {
+ slot->file_block_size = fil_node_get_block_size(message1);
+ }
+
+ /* If the space is page compressed and this is write operation
+ then we compress the page */
+ if (message1 && type == OS_FILE_WRITE && page_compression ) {
+ ulint real_len = len;
+ byte* tmp = NULL;
+
+ /* Release the array mutex while compressing */
+ os_mutex_exit(array->mutex);
+
+ // We allocate memory for page compressed buffer if and only
+ // if it is not yet allocated.
+ os_slot_alloc_page_buf(slot);
+
+#ifdef HAVE_LZO
+ if (innodb_compression_algorithm == 3) {
+ os_slot_alloc_lzo_mem(slot);
+ }
+#endif
+
+ /* Call page compression */
+ tmp = fil_compress_page(fil_node_get_space_id(slot->message1),
+ (byte *)buf,
+ slot->page_buf,
+ len,
+ page_compression_level,
+ fil_node_get_block_size(slot->message1),
+ &real_len,
+ slot->lzo_mem
+ );
+
+ /* If compression succeeded, set up the length and buffer */
+ if (tmp != buf) {
+ len = real_len;
+ buf = slot->page_buf;
+ slot->len = real_len;
+ slot->page_compress_success = TRUE;
+ } else {
+ slot->page_compress_success = FALSE;
+ }
+
+ /* Take array mutex back */
+ os_mutex_enter(array->mutex);
+ }
+
#ifdef WIN_ASYNC_IO
control = &slot->control;
control->Offset = (DWORD) offset & 0xFFFFFFFF;
@@ -4735,7 +5079,16 @@ os_aio_func(
aio operation); ignored if mode is
OS_AIO_SYNC */
ulint space_id,
- trx_t* trx)
+ trx_t* trx,
+ ibool page_compression, /*!< in: is page compression used
+ on this file space */
+ ulint page_compression_level, /*!< page compression
+ level to be used */
+ ulint* write_size)/*!< in/out: Actual write size initialized
+ after fist successfull trim
+ operation for this page and if
+ initialized we do not trim again if
+ actual page size does not decrease. */
{
os_aio_array_t* array;
os_aio_slot_t* slot;
@@ -4766,7 +5119,8 @@ os_aio_func(
no need to use an i/o-handler thread */
if (type == OS_FILE_READ) {
- ret = os_file_read_func(file, buf, offset, n, trx);
+ ret = os_file_read_func(file, buf, offset, n, trx, page_compression);
+
} else {
ut_ad(!srv_read_only_mode);
ut_a(type == OS_FILE_WRITE);
@@ -4777,14 +5131,10 @@ os_aio_func(
os_has_said_disk_full = FALSE; ret = 0; errno = 28;);
if (!ret) {
- os_file_handle_error_cond_exit(name, "os_file_write_func", TRUE, FALSE);
+ os_file_handle_error_cond_exit(name, "os_file_write_func", TRUE, FALSE, __FILE__, __LINE__);
}
}
- if (!ret) {
- fprintf(stderr, "FAIL");
- }
-
return ret;
}
@@ -4835,8 +5185,11 @@ try_again:
trx->io_reads++;
trx->io_read += n;
}
+
slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
- name, buf, offset, n, space_id);
+ name, buf, offset, n, space_id,
+ page_compression, page_compression_level, write_size);
+
if (type == OS_FILE_READ) {
if (srv_use_native_aio) {
os_n_file_reads++;
@@ -4894,7 +5247,7 @@ err_exit:
os_aio_array_free_slot(array, slot);
if (os_file_handle_error(
- name,type == OS_FILE_READ ? "aio read" : "aio write")) {
+ name,type == OS_FILE_READ ? "aio read" : "aio write", __FILE__, __LINE__)) {
goto try_again;
}
@@ -4994,7 +5347,7 @@ os_aio_windows_handle(
if (ret && len == slot->len) {
ret_val = TRUE;
- } else if (os_file_handle_error(slot->name, "Windows aio")) {
+ } else if (os_file_handle_error(slot->name, "Windows aio", __FILE__, __LINE__)) {
retry = TRUE;
} else {
@@ -5022,12 +5375,18 @@ os_aio_windows_handle(
switch (slot->type) {
case OS_FILE_WRITE:
- ret_val = os_file_write(slot->name, slot->file, slot->buf,
- slot->control.Offset, slot->control.OffsetHigh, slot->len);
+ if (slot->message1 && slot->page_compression && slot->page_compress_success && slot->page_buf) {
+ ret_val = os_file_write(slot->name, slot->file, slot->page_buf,
+ slot->offset, slot->len);
+ } else {
+
+ ret_val = os_file_write(slot->name, slot->file, slot->buf,
+ slot->offset, slot->len);
+ }
break;
case OS_FILE_READ:
- ret_val = os_file_read(slot->file, slot->buf,
- slot->control.Offset, slot->control.OffsetHigh, slot->len);
+ ret_val = os_file_read(slot->file, slot->buf,
+ slot->offset, slot->len, slot->page_compression);
break;
default:
ut_error;
@@ -5052,6 +5411,27 @@ os_aio_windows_handle(
ret_val = ret && len == slot->len;
}
+ if (slot->type == OS_FILE_READ) {
+ if (fil_page_is_compressed(slot->buf)) {
+ os_slot_alloc_page_buf(slot);
+
+#ifdef HAVE_LZO
+ if (fil_page_is_lzo_compressed(slot->buf)) {
+ os_slot_alloc_lzo_mem(slot);
+ }
+#endif
+ fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size);
+ }
+ } else {
+ /* OS_FILE_WRITE */
+ if (slot->page_compress_success && fil_page_is_compressed(slot->page_buf)) {
+ if (srv_use_trim && os_fallocate_failed == FALSE) {
+ // Deallocate unused blocks from file system
+ os_file_trim(slot);
+ }
+ }
+ }
+
os_aio_array_free_slot((os_aio_array_t *)slot->arr, slot);
return(ret_val);
@@ -5141,6 +5521,33 @@ retry:
/* We have not overstepped to next segment. */
ut_a(slot->pos < end_pos);
+ if (slot->type == OS_FILE_READ) {
+ /* If the table is page compressed and this is read,
+ we decompress before we annouce the read is
+ complete. For writes, we free the compressed page. */
+ if (fil_page_is_compressed(slot->buf)) {
+ // We allocate memory for page compressed buffer if and only
+ // if it is not yet allocated.
+ os_slot_alloc_page_buf(slot);
+#ifdef HAVE_LZO
+ if (fil_page_is_lzo_compressed(slot->buf)) {
+ os_slot_alloc_lzo_mem(slot);
+ }
+#endif
+ fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size);
+ }
+ } else {
+ /* OS_FILE_WRITE */
+ if (slot->page_compress_success &&
+ fil_page_is_compressed(slot->page_buf)) {
+ ut_ad(slot->page_compression_page);
+ if (srv_use_trim && os_fallocate_failed == FALSE) {
+ // Deallocate unused blocks from file system
+ os_file_trim(slot);
+ }
+ }
+ }
+
/* Mark this request as completed. The error handling
will be done in the calling function. */
os_mutex_enter(array->mutex);
@@ -5322,6 +5729,13 @@ found:
} else {
errno = -slot->ret;
+ if (slot->ret == 0) {
+ fprintf(stderr,
+ "InnoDB: Number of bytes after aio %d requested %lu\n"
+ "InnoDB: from file %s\n",
+ slot->n_bytes, slot->len, slot->name);
+ }
+
/* os_file_handle_error does tell us if we should retry
this IO. As it stands now, we don't do this retry when
reaping requests from a different context than
@@ -5329,7 +5743,7 @@ found:
windows and linux native AIO.
We should probably look into this to transparently
re-submit the IO. */
- os_file_handle_error(slot->name, "Linux aio");
+ os_file_handle_error(slot->name, "Linux aio", __FILE__, __LINE__);
ret = FALSE;
}
@@ -5612,13 +6026,14 @@ consecutive_loop:
os_has_said_disk_full = FALSE; ret = 0; errno = 28;);
if (!ret) {
- os_file_handle_error_cond_exit(aio_slot->name, "os_file_write_func", TRUE, FALSE);
+ os_file_handle_error_cond_exit(aio_slot->name, "os_file_write_func", TRUE, FALSE, __FILE__, __LINE__);
}
} else {
ret = os_file_read(
aio_slot->file, combined_buf,
- aio_slot->offset, total_len);
+ aio_slot->offset, total_len,
+ aio_slot->page_compression);
}
srv_set_io_thread_op_info(global_segment, "file i/o done");
@@ -6010,3 +6425,289 @@ os_aio_all_slots_free(void)
#endif /* UNIV_DEBUG */
#endif /* !UNIV_HOTBACKUP */
+
+#ifdef _WIN32
+#include <winioctl.h>
+#ifndef FSCTL_FILE_LEVEL_TRIM
+#define FSCTL_FILE_LEVEL_TRIM CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 130, METHOD_BUFFERED, FILE_WRITE_DATA)
+typedef struct _FILE_LEVEL_TRIM_RANGE {
+ DWORDLONG Offset;
+ DWORDLONG Length;
+} FILE_LEVEL_TRIM_RANGE, *PFILE_LEVEL_TRIM_RANGE;
+
+typedef struct _FILE_LEVEL_TRIM {
+ DWORD Key;
+ DWORD NumRanges;
+ FILE_LEVEL_TRIM_RANGE Ranges[1];
+} FILE_LEVEL_TRIM, *PFILE_LEVEL_TRIM;
+#endif
+#endif
+
+/**********************************************************************//**
+Directly manipulate the allocated disk space by deallocating for the file referred to
+by fd for the byte range starting at offset and continuing for len bytes.
+Within the specified range, partial file system blocks are zeroed, and whole
+file system blocks are removed from the file. After a successful call,
+subsequent reads from this range will return zeroes.
+@return true if success, false if error */
+UNIV_INTERN
+ibool
+os_file_trim(
+/*=========*/
+ os_aio_slot_t* slot) /*!< in: slot structure */
+{
+ size_t len = slot->len;
+ size_t trim_len = UNIV_PAGE_SIZE - len;
+ os_offset_t off = slot->offset + len;
+ size_t bsize = slot->file_block_size;
+
+ // len here should be alligned to sector size
+ ut_ad((trim_len % bsize) == 0);
+ ut_ad((len % bsize) == 0);
+ ut_ad(bsize != 0);
+ ut_ad((off % bsize) == 0);
+
+#ifdef UNIV_DEBUG
+ fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu block_size %lu\n",
+ *slot->write_size, trim_len, len, off, bsize);
+#endif
+
+ // Nothing to do if trim length is zero or if actual write
+ // size is initialized and it is smaller than current write size.
+ // In first write if we trim we set write_size to actual bytes
+ // written and rest of the page is trimmed. In following writes
+ // there is no need to trim again if write_size only increases
+ // because rest of the page is already trimmed. If actual write
+ // size decreases we need to trim again.
+ if (trim_len == 0 ||
+ (slot->write_size &&
+ *slot->write_size > 0 &&
+ len >= *slot->write_size)) {
+
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n",
+ *slot->write_size, trim_len, len);
+#endif
+
+ if (*slot->write_size > 0 && len >= *slot->write_size) {
+ srv_stats.page_compressed_trim_op_saved.inc();
+ }
+
+ *slot->write_size = len;
+
+ return (TRUE);
+ }
+
+#ifdef __linux__
+#if defined(HAVE_FALLOCATE)
+ int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
+
+ if (ret) {
+ /* After first failure do not try to trim again */
+ os_fallocate_failed = TRUE;
+ srv_use_trim = FALSE;
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: [Warning] fallocate call failed with error code %d.\n"
+ " InnoDB: start: %lu len: %lu payload: %lu\n"
+ " InnoDB: Disabling fallocate for now.\n", ret, off, trim_len, len);
+
+ os_file_handle_error_no_exit(slot->name,
+ " fallocate(FALLOC_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) ",
+ FALSE, __FILE__, __LINE__);
+
+ if (slot->write_size) {
+ *slot->write_size = 0;
+ }
+
+ return (FALSE);
+ } else {
+ if (slot->write_size) {
+ *slot->write_size = len;
+ }
+ }
+#else
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: [Warning] fallocate not supported on this installation."
+ " InnoDB: Disabling fallocate for now.");
+ os_fallocate_failed = TRUE;
+ srv_use_trim = FALSE;
+ if (slot->write_size) {
+ *slot->write_size = 0;
+ }
+
+#endif /* HAVE_FALLOCATE ... */
+
+#elif defined(_WIN32)
+ FILE_LEVEL_TRIM flt;
+ flt.Key = 0;
+ flt.NumRanges = 1;
+ flt.Ranges[0].Offset = off;
+ flt.Ranges[0].Length = trim_len;
+
+ BOOL ret = DeviceIoControl(slot->file, FSCTL_FILE_LEVEL_TRIM,
+ &flt, sizeof(flt), NULL, NULL, NULL, NULL);
+
+ if (!ret) {
+ /* After first failure do not try to trim again */
+ os_fallocate_failed = TRUE;
+ srv_use_trim = FALSE;
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: [Warning] fallocate call failed with error.\n"
+ " InnoDB: start: %lx len: %du payload: %lu\n"
+ " InnoDB: Disabling fallocate for now.\n", (slot->offset+len), trim_len, len);
+
+ os_file_handle_error_no_exit(slot->name,
+ " DeviceIOControl(FSCTL_FILE_LEVEL_TRIM) ",
+ FALSE, __FILE__, __LINE__);
+
+ if (slot->write_size) {
+ *slot->write_size = 0;
+ }
+ return (FALSE);
+ } else {
+ if (slot->write_size) {
+ *slot->write_size = len;
+ }
+ }
+#endif
+
+ switch(bsize) {
+ case 512:
+ srv_stats.page_compression_trim_sect512.add((trim_len / bsize));
+ break;
+ case 1024:
+ srv_stats.page_compression_trim_sect1024.add((trim_len / bsize));
+ break;
+ case 2948:
+ srv_stats.page_compression_trim_sect2048.add((trim_len / bsize));
+ break;
+ case 4096:
+ srv_stats.page_compression_trim_sect4096.add((trim_len / bsize));
+ break;
+ case 8192:
+ srv_stats.page_compression_trim_sect8192.add((trim_len / bsize));
+ break;
+ case 16384:
+ srv_stats.page_compression_trim_sect16384.add((trim_len / bsize));
+ break;
+ case 32768:
+ srv_stats.page_compression_trim_sect32768.add((trim_len / bsize));
+ break;
+ default:
+ break;
+ }
+
+ srv_stats.page_compressed_trim_op.inc();
+
+ return (TRUE);
+
+}
+
+/**********************************************************************//**
+Allocate memory for temporal buffer used for page compression. This
+buffer is freed later. */
+UNIV_INTERN
+void
+os_slot_alloc_page_buf(
+/*===================*/
+ os_aio_slot_t* slot) /*!< in: slot structure */
+{
+ byte* cbuf2;
+ byte* cbuf;
+ ulint asize = UNIV_PAGE_SIZE;
+
+ ut_a(slot != NULL);
+ if (slot->page_compression_page == NULL) {
+ /* We allocate extra to avoid memory overwrite on compression */
+#ifdef HAVE_SNAPPY
+ asize += snappy_max_compressed_length(asize) - UNIV_PAGE_SIZE;
+#endif
+ cbuf2 = static_cast<byte *>(ut_malloc(asize*2));
+ cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE));
+ slot->page_compression_page = static_cast<byte *>(cbuf2);
+ slot->page_buf = static_cast<byte *>(cbuf);
+ memset(slot->page_compression_page, 0, asize*2);
+ ut_a(slot->page_buf != NULL);
+ }
+}
+
+#ifdef HAVE_LZO
+/**********************************************************************//**
+Allocate memory for temporal memory used for page compression when
+LZO compression method is used */
+UNIV_INTERN
+void
+os_slot_alloc_lzo_mem(
+/*===================*/
+ os_aio_slot_t* slot) /*!< in: slot structure */
+{
+ ut_a(slot != NULL);
+ if(slot->lzo_mem == NULL) {
+ slot->lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS));
+ memset(slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS);
+ ut_a(slot->lzo_mem != NULL);
+ }
+}
+#endif
+
+/***********************************************************************//**
+Try to get number of bytes per sector from file system.
+@return file block size */
+UNIV_INTERN
+ulint
+os_file_get_block_size(
+/*===================*/
+ os_file_t file, /*!< in: handle to a file */
+ const char* name) /*!< in: file name */
+{
+ ulint fblock_size = 512;
+
+#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H)
+ struct statvfs fstat;
+ int err;
+
+ err = fstatvfs(file, &fstat);
+
+ if (err != 0) {
+ fprintf(stderr, "InnoDB: Warning: fstatvfs() failed on file %s\n", name);
+ os_file_handle_error_no_exit(name, "fstatvfs()", FALSE, __FILE__, __LINE__);
+ } else {
+ fblock_size = fstat.f_bsize;
+ }
+#endif /* UNIV_LINUX */
+#ifdef __WIN__
+ {
+ DWORD SectorsPerCluster = 0;
+ DWORD BytesPerSector = 0;
+ DWORD NumberOfFreeClusters = 0;
+ DWORD TotalNumberOfClusters = 0;
+
+ if (GetFreeSpace((LPCTSTR)name, &SectorsPerCluster, &BytesPerSector, &NumberOfFreeClusters, &TotalNumberOfClusters)) {
+ fblock_size = BytesPerSector;
+ } else {
+ fprintf(stderr, "InnoDB: Warning: GetFreeSpace() failed on file %s\n", name);
+ os_file_handle_error_no_exit(name, "GetFreeSpace()", FALSE, __FILE__, __LINE__);
+ }
+ }
+#endif /* __WIN__*/
+
+ if (fblock_size > UNIV_PAGE_SIZE/2 || fblock_size < 512) {
+ fprintf(stderr, "InnoDB: Note: File system for file %s has "
+ "file block size %lu not supported for page_size %lu\n",
+ name, fblock_size, UNIV_PAGE_SIZE);
+
+ if (fblock_size < 512) {
+ fblock_size = 512;
+ } else {
+ fblock_size = UNIV_PAGE_SIZE/2;
+ }
+
+ fprintf(stderr, "InnoDB: Note: Using file block size %ld for file %s\n",
+ fblock_size, name);
+ }
+
+ return fblock_size;
+}
diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc
index 4a50be3d098..bba8c32b752 100644
--- a/storage/xtradb/row/row0log.cc
+++ b/storage/xtradb/row/row0log.cc
@@ -2565,7 +2565,7 @@ all_done:
success = os_file_read_no_error_handling(
OS_FILE_FROM_FD(index->online_log->fd),
index->online_log->head.block, ofs,
- srv_sort_buf_size);
+ srv_sort_buf_size, FALSE);
if (!success) {
fprintf(stderr, "InnoDB: unable to read temporary file"
@@ -3393,7 +3393,7 @@ all_done:
success = os_file_read_no_error_handling(
OS_FILE_FROM_FD(index->online_log->fd),
index->online_log->head.block, ofs,
- srv_sort_buf_size);
+ srv_sort_buf_size, FALSE);
if (!success) {
fprintf(stderr, "InnoDB: unable to read temporary file"
diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc
index 0a5eb4374f1..58c700f08e5 100644
--- a/storage/xtradb/row/row0merge.cc
+++ b/storage/xtradb/row/row0merge.cc
@@ -872,7 +872,8 @@ row_merge_read(
#endif /* UNIV_DEBUG */
success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf,
- ofs, srv_sort_buf_size);
+ ofs, srv_sort_buf_size, FALSE);
+
#ifdef POSIX_FADV_DONTNEED
/* Each block is read exactly once. Free up the file cache. */
posix_fadvise(fd, ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc
index a0dd32c203f..5e15dd15db2 100644
--- a/storage/xtradb/srv/srv0mon.cc
+++ b/storage/xtradb/srv/srv0mon.cc
@@ -2,6 +2,7 @@
Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, 2014, MariaDB Corporation
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -290,6 +291,18 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_EXISTING | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_WRITTEN},
+ {"buffer_index_pages_written", "buffer",
+ "Number of index pages written (innodb_index_pages_written)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_PAGES_WRITTEN},
+
+ {"buffer_non_index_pages_written", "buffer",
+ "Number of non index pages written (innodb_non_index_pages_written)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN},
+
{"buffer_pages_read", "buffer",
"Number of pages read (innodb_pages_read)",
static_cast<monitor_type_t>(
@@ -879,6 +892,71 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS},
+ {"compress_saved", "compression",
+ "Number of bytes saved by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED},
+
+ {"compress_trim_sect512", "compression",
+ "Number of sect-512 TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512},
+
+ {"compress_trim_sect1024", "compression",
+ "Number of sect-1024 TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024},
+
+ {"compress_trim_sect2048", "compression",
+ "Number of sect-2048 TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048},
+
+ {"compress_trim_sect4096", "compression",
+ "Number of sect-4K TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096},
+
+ {"compress_trim_sect8192", "compression",
+ "Number of sect-8K TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192},
+
+ {"compress_trim_sect16384", "compression",
+ "Number of sect-16K TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384},
+
+ {"compress_trim_sect32768", "compression",
+ "Number of sect-32K TRIMed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768},
+
+ {"compress_pages_page_compressed", "compression",
+ "Number of pages compressed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSED},
+
+ {"compress_page_compressed_trim_op", "compression",
+ "Number of TRIM operation performed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP},
+
+ {"compress_page_compressed_trim_op_saved", "compression",
+ "Number of TRIM operation saved by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED},
+
+ {"compress_pages_page_decompressed", "compression",
+ "Number of pages decompressed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED},
+
+ {"compress_pages_page_compression_error", "compression",
+ "Number of page compression errors",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR},
+
/* ========== Counters for Index ========== */
{"module_index", "index", "Index Manager",
MONITOR_MODULE,
@@ -1573,6 +1651,16 @@ srv_mon_process_existing_counter(
value = stat.n_pages_written;
break;
+ /* innodb_index_pages_written, the number of index pages written */
+ case MONITOR_OVLD_INDEX_PAGES_WRITTEN:
+ value = srv_stats.index_pages_written;
+ break;
+
+ /* innodb_non_index_pages_written, the number of non index pages written */
+ case MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN:
+ value = srv_stats.non_index_pages_written;
+ break;
+
/* innodb_pages_read */
case MONITOR_OVLD_PAGES_READ:
buf_get_total_stat(&stat);
@@ -1834,6 +1922,46 @@ srv_mon_process_existing_counter(
value = btr_cur_n_non_sea;
break;
+ case MONITOR_OVLD_PAGE_COMPRESS_SAVED:
+ value = srv_stats.page_compression_saved;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512:
+ value = srv_stats.page_compression_trim_sect512;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024:
+ value = srv_stats.page_compression_trim_sect1024;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048:
+ value = srv_stats.page_compression_trim_sect2048;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096:
+ value = srv_stats.page_compression_trim_sect4096;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192:
+ value = srv_stats.page_compression_trim_sect8192;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384:
+ value = srv_stats.page_compression_trim_sect16384;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768:
+ value = srv_stats.page_compression_trim_sect32768;
+ break;
+ case MONITOR_OVLD_PAGES_PAGE_COMPRESSED:
+ value = srv_stats.pages_page_compressed;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP:
+ value = srv_stats.page_compressed_trim_op;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED:
+ value = srv_stats.page_compressed_trim_op_saved;
+ break;
+ case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED:
+ value = srv_stats.pages_page_decompressed;
+ break;
+ case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR:
+ value = srv_stats.pages_page_compression_error;
+ break;
+
default:
ut_error;
}
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index 303985cdae2..92238cee405 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -73,6 +73,7 @@ Created 10/8/1995 Heikki Tuuri
#include "mysql/plugin.h"
#include "mysql/service_thd_wait.h"
+#include "fil0pagecompress.h"
/* prototypes of new functions added to ha_innodb.cc for kill_idle_transaction */
ibool innobase_thd_is_idle(const void* thd);
@@ -161,6 +162,23 @@ use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
UNIV_INTERN my_bool srv_use_native_aio = TRUE;
+/* Default compression level if page compression is used and no compression
+level is set for the table*/
+UNIV_INTERN long srv_compress_zlib_level = 6;
+/* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
+to the pages */
+UNIV_INTERN my_bool srv_use_trim = FALSE;
+/* If this flag is TRUE, then we will use posix fallocate for file extentsion */
+UNIV_INTERN my_bool srv_use_posix_fallocate = FALSE;
+/* If this flag is TRUE, then we disable doublewrite buffer */
+UNIV_INTERN my_bool srv_use_atomic_writes = FALSE;
+/* If this flag IS TRUE, then we use this algorithm for page compressing the pages */
+UNIV_INTERN ulong innodb_compression_algorithm = PAGE_ZLIB_ALGORITHM;
+/* Number of threads used for multi-threaded flush */
+UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
+/* If this flag is TRUE, then we will use multi threaded flush. */
+UNIV_INTERN my_bool srv_use_mtflush = FALSE;
+
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
pointers, because they are not available on Windows Server 2003 and
@@ -466,10 +484,6 @@ pages default true. */
UNIV_INTERN my_bool srv_stats_sample_traditional = TRUE;
UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
-UNIV_INTERN ibool srv_use_atomic_writes = FALSE;
-#ifdef HAVE_POSIX_FALLOCATE
-UNIV_INTERN ibool srv_use_posix_fallocate = FALSE;
-#endif
/** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages.
The following parameter is the size of the buffer that is used for
@@ -514,6 +528,16 @@ static ulint srv_n_system_rows_read_old = 0;
UNIV_INTERN ulint srv_truncated_status_writes = 0;
UNIV_INTERN ulint srv_available_undo_logs = 0;
+UNIV_INTERN ib_uint64_t srv_page_compression_saved = 0;
+UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect512 = 0;
+UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect4096 = 0;
+UNIV_INTERN ib_uint64_t srv_index_pages_written = 0;
+UNIV_INTERN ib_uint64_t srv_non_index_pages_written = 0;
+UNIV_INTERN ib_uint64_t srv_pages_page_compressed = 0;
+UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op = 0;
+UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op_saved = 0;
+UNIV_INTERN ib_uint64_t srv_index_page_decompressed = 0;
+
/* Ensure status variables are on separate cache lines */
#ifdef __powerpc__
@@ -1899,6 +1923,16 @@ srv_export_innodb_status(void)
export_vars.innodb_descriptors_memory
= os_atomic_increment_ulint(&srv_descriptors_memory, 0);
+ export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved;
+ export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512;
+ export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096;
+ export_vars.innodb_index_pages_written = srv_stats.index_pages_written;
+ export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written;
+ export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed;
+ export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
+ export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;
+ export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
+
#ifdef UNIV_DEBUG
rw_lock_s_lock(&purge_sys->latch);
trx_id_t done_trx_no = purge_sys->done.trx_no;
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index 73866520ef0..248f1e4db89 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -3,6 +3,7 @@
Copyright (c) 1996, 2014, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -67,11 +68,14 @@ Created 2/16/1996 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "srv0start.h"
#include "srv0srv.h"
+#include "buf0flu.h"
+
#ifndef UNIV_HOTBACKUP
# include "trx0rseg.h"
# include "os0proc.h"
# include "sync0sync.h"
# include "buf0flu.h"
+# include "buf0mtflu.h"
# include "buf0rea.h"
# include "dict0boot.h"
# include "dict0load.h"
@@ -141,11 +145,13 @@ SRV_MAX_N_IO_THREADS + 6: srv_purge_coordinator_thread
SRV_MAX_N_IO_THREADS + 7: srv_worker_thread
...
SRV_MAX_N_IO_THREADS + 7 + srv_n_purge_threads - 1: srv_worker_thread */
-static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7
- + SRV_MAX_N_PURGE_THREADS];
+static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7 + SRV_MAX_N_PURGE_THREADS + MTFLUSH_MAX_WORKER];
/** Thead handles */
-static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 7 + SRV_MAX_N_PURGE_THREADS];
+static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 7 + SRV_MAX_N_PURGE_THREADS+ MTFLUSH_MAX_WORKER];
+/* Thread contex data for multi-threaded flush */
+void *mtflush_ctx=NULL;
+
static os_thread_t buf_flush_page_cleaner_thread_handle;
static os_thread_t buf_dump_thread_handle;
static os_thread_t dict_stats_thread_handle;
@@ -570,7 +576,7 @@ create_log_file(
*file = os_file_create(
innodb_file_log_key, name,
OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
- OS_LOG_FILE, &ret);
+ OS_LOG_FILE, &ret, FALSE);
if (!ret) {
ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
@@ -787,7 +793,7 @@ open_log_file(
*file = os_file_create(innodb_file_log_key, name,
OS_FILE_OPEN, OS_FILE_AIO,
- OS_LOG_FILE, &ret);
+ OS_LOG_FILE, &ret, FALSE);
if (!ret) {
ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
return(DB_ERROR);
@@ -878,7 +884,7 @@ open_or_create_data_files(
files[i] = os_file_create(
innodb_file_data_key, name, OS_FILE_CREATE,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
if (srv_read_only_mode) {
@@ -921,7 +927,7 @@ open_or_create_data_files(
files[i] = os_file_create(
innodb_file_data_key, name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
if (!ret) {
ib_logf(IB_LOG_LEVEL_ERROR,
@@ -954,17 +960,17 @@ open_or_create_data_files(
files[i] = os_file_create(
innodb_file_data_key,
name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
} else if (i == 0) {
files[i] = os_file_create(
innodb_file_data_key,
name, OS_FILE_OPEN_RETRY,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
} else {
files[i] = os_file_create(
innodb_file_data_key,
name, OS_FILE_OPEN, OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
+ OS_DATA_FILE, &ret, FALSE);
}
if (!ret) {
@@ -1046,7 +1052,7 @@ skip_size_check:
check_first_page:
check_msg = fil_read_first_page(
files[i], one_opened, &flags, &space,
- min_flushed_lsn, max_flushed_lsn);
+ min_flushed_lsn, max_flushed_lsn, ULINT_UNDEFINED);
if (check_msg) {
@@ -1181,7 +1187,7 @@ srv_undo_tablespace_create(
innodb_file_data_key,
name,
srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
if (srv_read_only_mode && ret) {
ib_logf(IB_LOG_LEVEL_INFO,
@@ -1268,7 +1274,8 @@ srv_undo_tablespace_open(
| OS_FILE_ON_ERROR_SILENT,
OS_FILE_NORMAL,
OS_DATA_FILE,
- &ret);
+ &ret,
+ FALSE);
/* If the file open was successful then load the tablespace. */
@@ -2825,6 +2832,24 @@ files_checked:
}
if (!srv_read_only_mode) {
+ if (srv_use_mtflush) {
+ /* Start multi-threaded flush threads */
+ mtflush_ctx = buf_mtflu_handler_init(
+ srv_mtflush_threads,
+ srv_buf_pool_instances);
+
+ /* Set up the thread ids */
+ buf_mtflu_set_thread_ids(
+ srv_mtflush_threads,
+ mtflush_ctx,
+ (thread_ids + 6 + SRV_MAX_N_PURGE_THREADS));
+#if UNIV_DEBUG
+ fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n",
+ __FILE__, __LINE__, srv_buf_pool_instances, srv_mtflush_threads);
+#endif
+ }
+
+
buf_flush_page_cleaner_thread_handle = os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
buf_flush_page_cleaner_thread_started = true;
}
@@ -3102,6 +3127,13 @@ innobase_shutdown_for_mysql(void)
logs_empty_and_mark_files_at_shutdown() and should have
already quit or is quitting right now. */
+
+ if (srv_use_mtflush) {
+ /* g. Exit the multi threaded flush threads */
+
+ buf_mtflu_io_thread_exit();
+ }
+
os_mutex_enter(os_sync_mutex);
if (os_thread_count == 0) {
diff --git a/storage/xtradb/ut/ut0wqueue.cc b/storage/xtradb/ut/ut0wqueue.cc
index d1ba36b3b00..1607e535a94 100644
--- a/storage/xtradb/ut/ut0wqueue.cc
+++ b/storage/xtradb/ut/ut0wqueue.cc
@@ -162,6 +162,38 @@ ib_wqueue_timedwait(
}
/********************************************************************
+Return first item on work queue or NULL if queue is empty
+@return work item or NULL */
+void*
+ib_wqueue_nowait(
+/*=============*/
+ ib_wqueue_t* wq) /*<! in: work queue */
+{
+ ib_list_node_t* node = NULL;
+
+ mutex_enter(&wq->mutex);
+
+ if(!ib_list_is_empty(wq->items)) {
+ node = ib_list_get_first(wq->items);
+
+ if (node) {
+ ib_list_remove(wq->items, node);
+
+ }
+ }
+
+ /* We must reset the event when the list
+ gets emptied. */
+ if(ib_list_is_empty(wq->items)) {
+ os_event_reset(wq->event);
+ }
+
+ mutex_exit(&wq->mutex);
+
+ return (node ? node->data : NULL);
+}
+
+/********************************************************************
Check if queue is empty. */
ibool
@@ -173,3 +205,20 @@ ib_wqueue_is_empty(
{
return(ib_list_is_empty(wq->items));
}
+
+/********************************************************************
+Get number of items on queue.
+@return number of items on queue */
+ulint
+ib_wqueue_len(
+/*==========*/
+ ib_wqueue_t* wq) /*<! in: work queue */
+{
+ ulint len = 0;
+
+ mutex_enter(&wq->mutex);
+ len = ib_list_len(wq->items);
+ mutex_exit(&wq->mutex);
+
+ return(len);
+}