diff options
140 files changed, 15138 insertions, 551 deletions
diff --git a/cmake/bzip2.cmake b/cmake/bzip2.cmake new file mode 100644 index 00000000000..0c15853d0b9 --- /dev/null +++ b/cmake/bzip2.cmake @@ -0,0 +1,33 @@ +# Copyright (C) 2014, SkySQL Ab. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +SET(WITH_INNODB_BZIP2 AUTO CACHE STRING + "Build with bzip2. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'") + +MACRO (MYSQL_CHECK_BZIP2) + IF (WITH_INNODB_BZIP2 STREQUAL "ON" OR WITH_INNODB_BZIP2 STREQUAL "AUTO") + CHECK_INCLUDE_FILES(bzlib.h HAVE_BZLIB2_H) + CHECK_LIBRARY_EXISTS(bz2 BZ2_bzBuffToBuffCompress "" HAVE_BZLIB2_COMPRESS) + CHECK_LIBRARY_EXISTS(bz2 BZ2_bzBuffToBuffDecompress "" HAVE_BZLIB2_DECOMPRESS) + + IF (HAVE_BZLIB2_COMPRESS AND HAVE_BZLIB2_DECOMPRESS AND HAVE_BZLIB2_H) + ADD_DEFINITIONS(-DHAVE_BZIP2=1) + LINK_LIBRARIES(bz2) + ELSE() + IF (WITH_INNODB_BZIP2 STREQUAL "ON") + MESSAGE(FATAL_ERROR "Required bzip2 library is not found") + ENDIF() + ENDIF() + ENDIF() +ENDMACRO() diff --git a/cmake/lz4.cmake b/cmake/lz4.cmake new file mode 100644 index 00000000000..1607c68c5fb --- /dev/null +++ b/cmake/lz4.cmake @@ -0,0 +1,48 @@ +# Copyright (C) 2014, SkySQL Ab. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +SET(WITH_INNODB_LZ4 AUTO CACHE STRING + "Build with lz4. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'") + +MACRO (MYSQL_CHECK_LZ4) + IF (WITH_INNODB_LZ4 STREQUAL "ON" OR WITH_INNODB_LZ4 STREQUAL "AUTO") + CHECK_INCLUDE_FILES(lz4.h HAVE_LZ4_H) + CHECK_LIBRARY_EXISTS(lz4 LZ4_compress_limitedOutput "" HAVE_LZ4_SHARED_LIB) + + IF (HAVE_LZ4_SHARED_LIB AND HAVE_LZ4_H) + ADD_DEFINITIONS(-DHAVE_LZ4=1) + LINK_LIBRARIES(lz4) + ELSE() + IF (WITH_INNODB_LZ4 STREQUAL "ON") + MESSAGE(FATAL_ERROR "Required lz4 library is not found") + ENDIF() + ENDIF() + ENDIF() +ENDMACRO() + +MACRO (MYSQL_CHECK_LZ4_STATIC) + IF (WITH_INNODB_LZ4 STREQUAL "ON" OR WITH_INNODB_LZ4 STREQUAL "AUTO") + CHECK_INCLUDE_FILES(lz4.h HAVE_LZ4_H) + CHECK_LIBRARY_EXISTS(liblz4.a LZ4_compress_limitedOutput "" HAVE_LZ4_LIB) + + IF(HAVE_LZ4_LIB AND HAVE_LZ4_H) + ADD_DEFINITIONS(-DHAVE_LZ4=1) + LINK_LIBRARIES(liblz4.a) + ELSE() + IF (WITH_INNODB_LZ4 STREQUAL "ON") + MESSAGE(FATAL_ERROR "Required lz4 library is not found") + ENDIF() + ENDIF() + ENDIF() +ENDMACRO()
\ No newline at end of file diff --git a/cmake/lzma.cmake b/cmake/lzma.cmake new file mode 100644 index 00000000000..12a28a17a43 --- /dev/null +++ b/cmake/lzma.cmake @@ -0,0 +1,33 @@ +# Copyright (C) 2014, SkySQL Ab. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +SET(WITH_INNODB_LZMA AUTO CACHE STRING + "Build with lzma. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'") + +MACRO (MYSQL_CHECK_LZMA) + IF (WITH_INNODB_LZMA STREQUAL "ON" OR WITH_INNODB_LZMA STREQUAL "AUTO") + CHECK_INCLUDE_FILES(lzma.h HAVE_LZMA_H) + CHECK_LIBRARY_EXISTS(lzma lzma_stream_buffer_decode "" HAVE_LZMA_DECODE) + CHECK_LIBRARY_EXISTS(lzma lzma_easy_buffer_encode "" HAVE_LZMA_ENCODE) + + IF (HAVE_LZMA_DECODE AND HAVE_LZMA_ENCODE AND HAVE_LZMA_H) + ADD_DEFINITIONS(-DHAVE_LZMA=1) + LINK_LIBRARIES(lzma) + ELSE() + IF (WITH_INNODB_LZMA STREQUAL "ON") + MESSAGE(FATAL_ERROR "Required lzma library is not found") + ENDIF() + ENDIF() + ENDIF() +ENDMACRO() diff --git a/cmake/lzo.cmake b/cmake/lzo.cmake new file mode 100644 index 00000000000..07cba011c06 --- /dev/null +++ b/cmake/lzo.cmake @@ -0,0 +1,48 @@ +# Copyright (C) 2014, SkySQL Ab. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +SET(WITH_INNODB_LZO AUTO CACHE STRING + "Build with lzo. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'") + +MACRO (MYSQL_CHECK_LZO_STATIC) + IF (WITH_INNODB_LZO STREQUAL "ON" OR WITH_INNODB_LZO STREQUAL "AUTO") + CHECK_INCLUDE_FILES(lzo/lzo1x.h HAVE_LZO_H) + CHECK_LIBRARY_EXISTS(liblzo2.a lzo1x_1_compress "" HAVE_LZO_LIB) + + IF(HAVE_LZO_LIB AND HAVE_LZO_H) + ADD_DEFINITIONS(-DHAVE_LZO=1) + LINK_LIBRARIES(liblzo2.a) + ELSE() + IF (WITH_INNODB_LZO STREQUAL "ON") + MESSAGE(FATAL_ERROR "Required lzo library is not found") + ENDIF() + ENDIF() + ENDIF() +ENDMACRO() + +MACRO (MYSQL_CHECK_LZO) + IF (WITH_INNODB_LZO STREQUAL "ON" OR WITH_INNODB_LZO STREQUAL "AUTO") + CHECK_INCLUDE_FILES(lzo/lzo1x.h HAVE_LZO_H) + CHECK_LIBRARY_EXISTS(lzo2 lzo1x_1_compress "" HAVE_LZO_SHARED_LIB) + + IF(HAVE_LZO_SHARED_LIB AND HAVE_LZO_H) + ADD_DEFINITIONS(-DHAVE_LZO=1) + LINK_LIBRARIES(lzo2) + ELSE() + IF (WITH_INNODB_LZO STREQUAL "ON") + MESSAGE(FATAL_ERROR "Required lzo library is not found") + ENDIF() + ENDIF() + ENDIF() +ENDMACRO() diff --git a/cmake/snappy.cmake b/cmake/snappy.cmake new file mode 100644 index 00000000000..cb0839a3480 --- /dev/null +++ b/cmake/snappy.cmake @@ -0,0 +1,32 @@ +# Copyright (C) 2015, MariaDB Corporation. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +SET(WITH_INNODB_SNAPPY AUTO CACHE STRING + "Build with snappy. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'") + +MACRO (MYSQL_CHECK_SNAPPY) + IF (WITH_INNODB_SNAPPY STREQUAL "ON" OR WITH_INNODB_SNAPPY STREQUAL "AUTO") + CHECK_INCLUDE_FILES(snappy-c.h HAVE_SNAPPY_H) + CHECK_LIBRARY_EXISTS(snappy snappy_uncompress "" HAVE_SNAPPY_SHARED_LIB) + + IF(HAVE_SNAPPY_SHARED_LIB AND HAVE_SNAPPY_H) + ADD_DEFINITIONS(-DHAVE_SNAPPY=1) + LINK_LIBRARIES(snappy) + ELSE() + IF (WITH_INNODB_SNAPPY STREQUAL "ON") + MESSAGE(FATAL_ERROR "Required snappy library is not found") + ENDIF() + ENDIF() + ENDIF() +ENDMACRO() diff --git a/config.h.cmake b/config.h.cmake index 46eed79dfa5..a90e636aa9c 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -93,6 +93,7 @@ #cmakedefine HAVE_SYS_TYPES_H 1 #cmakedefine HAVE_SYS_UN_H 1 #cmakedefine HAVE_SYS_VADVISE_H 1 +#cmakedefine HAVE_SYS_STATVFS_H 1 #cmakedefine HAVE_TERM_H 1 #cmakedefine HAVE_TERMBITS_H 1 #cmakedefine HAVE_TERMIOS_H 1 @@ -213,6 +214,8 @@ #cmakedefine HAVE_POLL 1 #cmakedefine HAVE_PORT_CREATE 1 #cmakedefine HAVE_POSIX_FALLOCATE 1 +#cmakedefine HAVE_LINUX_FALLOC_H 1 +#cmakedefine HAVE_FALLOCATE 1 #cmakedefine HAVE_PREAD 1 #cmakedefine HAVE_PAUSE_INSTRUCTION 1 #cmakedefine HAVE_FAKE_PAUSE_INSTRUCTION 1 diff --git a/configure.cmake b/configure.cmake index fb127688bd2..cbcb26001cb 100644 --- a/configure.cmake +++ b/configure.cmake @@ -189,6 +189,7 @@ CHECK_INCLUDE_FILES (ieeefp.h HAVE_IEEEFP_H) CHECK_INCLUDE_FILES (inttypes.h HAVE_INTTYPES_H) CHECK_INCLUDE_FILES (langinfo.h HAVE_LANGINFO_H) CHECK_INCLUDE_FILES (linux/unistd.h HAVE_LINUX_UNISTD_H) +CHECK_INCLUDE_FILES (linux/falloc.h HAVE_LINUX_FALLOC_H) CHECK_INCLUDE_FILES (limits.h HAVE_LIMITS_H) CHECK_INCLUDE_FILES (locale.h HAVE_LOCALE_H) CHECK_INCLUDE_FILES (malloc.h HAVE_MALLOC_H) @@ -250,6 +251,7 @@ CHECK_INCLUDE_FILES (wchar.h HAVE_WCHAR_H) CHECK_INCLUDE_FILES (wctype.h HAVE_WCTYPE_H) CHECK_INCLUDE_FILES (sys/sockio.h HAVE_SYS_SOCKIO_H) CHECK_INCLUDE_FILES (sys/utsname.h HAVE_SYS_UTSNAME_H) +CHECK_INCLUDE_FILES (sys/statvfs.h HAVE_SYS_STATVFS_H) IF(BFD_H_EXISTS) IF(NOT_FOR_DISTRIBUTION) @@ -385,6 +387,7 @@ CHECK_FUNCTION_EXISTS (perror HAVE_PERROR) CHECK_FUNCTION_EXISTS (poll HAVE_POLL) CHECK_FUNCTION_EXISTS (port_create HAVE_PORT_CREATE) CHECK_FUNCTION_EXISTS (posix_fallocate HAVE_POSIX_FALLOCATE) +CHECK_FUNCTION_EXISTS (fallocate HAVE_FALLOCATE) CHECK_FUNCTION_EXISTS (pread HAVE_PREAD) CHECK_FUNCTION_EXISTS (pthread_attr_create HAVE_PTHREAD_ATTR_CREATE) CHECK_FUNCTION_EXISTS (pthread_attr_getstacksize HAVE_PTHREAD_ATTR_GETSTACKSIZE) diff --git a/extra/CMakeLists.txt b/extra/CMakeLists.txt index 585b5aef6f6..3f87bb2df62 100644 --- a/extra/CMakeLists.txt +++ b/extra/CMakeLists.txt @@ -72,32 +72,27 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") ENDIF() ENDIF() -MYSQL_ADD_EXECUTABLE(replace replace.c COMPONENT Server) -TARGET_LINK_LIBRARIES(replace mysys) -IF(UNIX) - MYSQL_ADD_EXECUTABLE(resolve_stack_dump resolve_stack_dump.c) - TARGET_LINK_LIBRARIES(resolve_stack_dump mysys) - - MYSQL_ADD_EXECUTABLE(mysql_waitpid mysql_waitpid.c COMPONENT Client) - TARGET_LINK_LIBRARIES(mysql_waitpid mysys) -ENDIF() - - +IF(WITH_INNOBASE_STORAGE_ENGINE) # Add path to the InnoDB headers - INCLUDE_DIRECTORIES( - ${CMAKE_SOURCE_DIR}/storage/innobase/include - ${CMAKE_SOURCE_DIR}/sql) - + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include) # We use the InnoDB code directly in case the code changes. ADD_DEFINITIONS("-DUNIV_INNOCHECKSUM") SET(INNOBASE_SOURCES ../storage/innobase/buf/buf0checksum.cc ../storage/innobase/ut/ut0crc32.cc ../storage/innobase/ut/ut0ut.cc - ../storage/innobase/page/page0zip.cc - ) - + ) MYSQL_ADD_EXECUTABLE(innochecksum innochecksum.cc ${INNOBASE_SOURCES}) TARGET_LINK_LIBRARIES(innochecksum mysys mysys_ssl) - ADD_DEPENDENCIES(innochecksum GenError) +ENDIF() + +MYSQL_ADD_EXECUTABLE(replace replace.c COMPONENT Server) +TARGET_LINK_LIBRARIES(replace mysys) + +IF(UNIX) + MYSQL_ADD_EXECUTABLE(resolve_stack_dump resolve_stack_dump.c) + TARGET_LINK_LIBRARIES(resolve_stack_dump mysys) + MYSQL_ADD_EXECUTABLE(mysql_waitpid mysql_waitpid.c COMPONENT Client) + TARGET_LINK_LIBRARIES(mysql_waitpid mysys) +ENDIF() diff --git a/extra/innochecksum.cc.moved b/extra/innochecksum.cc.moved new file mode 100644 index 00000000000..c89196b1eee --- /dev/null +++ b/extra/innochecksum.cc.moved @@ -0,0 +1,396 @@ +/* + Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +/* + InnoDB offline file checksum utility. 85% of the code in this utility + is included from the InnoDB codebase. + + The final 15% was originally written by Mark Smith of Danga + Interactive, Inc. <junior@danga.com> + + Published with a permission. +*/ + +#include <my_global.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#ifndef __WIN__ +# include <unistd.h> +#endif +#include <my_getopt.h> +#include <m_string.h> +#include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */ + +/* Only parts of these files are included from the InnoDB codebase. +The parts not included are excluded by #ifndef UNIV_INNOCHECKSUM. */ + +#include "univ.i" /* include all of this */ + +#include "buf0checksum.h" /* buf_calc_page_*() */ +#include "fil0fil.h" /* FIL_* */ +#include "fsp0fsp.h" /* fsp_flags_get_page_size() & + fsp_flags_get_zip_size() */ +#include "mach0data.h" /* mach_read_from_4() */ +#include "ut0crc32.h" /* ut_crc32_init() */ + +#ifdef UNIV_NONINL +# include "fsp0fsp.ic" +# include "mach0data.ic" +# include "ut0rnd.ic" +#endif + +/* Global variables */ +static my_bool verbose; +static my_bool debug; +static my_bool just_count; +static ulong start_page; +static ulong end_page; +static ulong do_page; +static my_bool use_end_page; +static my_bool do_one_page; +ulong srv_page_size; /* replaces declaration in srv0srv.c */ +static ulong physical_page_size; /* Page size in bytes on disk. */ +static ulong logical_page_size; /* Page size when uncompressed. */ + +/* Get the page size of the filespace from the filespace header. */ +static +my_bool +get_page_size( +/*==========*/ + FILE* f, /*!< in: file pointer, must be open + and set to start of file */ + byte* buf, /*!< in: buffer used to read the page */ + ulong* logical_page_size, /*!< out: Logical/Uncompressed page size */ + ulong* physical_page_size) /*!< out: Physical/Commpressed page size */ +{ + ulong flags; + + int bytes= fread(buf, 1, UNIV_PAGE_SIZE_MIN, f); + + if (ferror(f)) + { + perror("Error reading file header"); + return FALSE; + } + + if (bytes != UNIV_PAGE_SIZE_MIN) + { + fprintf(stderr, "Error; Was not able to read the minimum page size "); + fprintf(stderr, "of %d bytes. Bytes read was %d\n", UNIV_PAGE_SIZE_MIN, bytes); + return FALSE; + } + + rewind(f); + + flags = mach_read_from_4(buf + FIL_PAGE_DATA + FSP_SPACE_FLAGS); + + /* srv_page_size is used by InnoDB code as UNIV_PAGE_SIZE */ + srv_page_size = *logical_page_size = fsp_flags_get_page_size(flags); + + /* fsp_flags_get_zip_size() will return zero if not compressed. */ + *physical_page_size = fsp_flags_get_zip_size(flags); + if (*physical_page_size == 0) + *physical_page_size= *logical_page_size; + + return TRUE; +} + + +/* command line argument to do page checks (that's it) */ +/* another argument to specify page ranges... seek to right spot and go from there */ + +static struct my_option innochecksum_options[] = +{ + {"help", '?', "Displays this help and exits.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"info", 'I', "Synonym for --help.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Displays version information and exits.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Verbose (prints progress every 5 seconds).", + &verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"debug", 'd', "Debug mode (prints checksums for each page, implies verbose).", + &debug, &debug, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"count", 'c', "Print the count of pages in the file.", + &just_count, &just_count, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"start_page", 's', "Start on this page number (0 based).", + &start_page, &start_page, 0, GET_ULONG, REQUIRED_ARG, + 0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0}, + {"end_page", 'e', "End at this page number (0 based).", + &end_page, &end_page, 0, GET_ULONG, REQUIRED_ARG, + 0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0}, + {"page", 'p', "Check only this page (0 based).", + &do_page, &do_page, 0, GET_ULONG, REQUIRED_ARG, + 0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0}, + + {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +static void print_version(void) +{ + printf("%s Ver %s, for %s (%s)\n", + my_progname, INNODB_VERSION_STR, + SYSTEM_TYPE, MACHINE_TYPE); +} + +static void usage(void) +{ + print_version(); + puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000")); + printf("InnoDB offline file checksum utility.\n"); + printf("Usage: %s [-c] [-s <start page>] [-e <end page>] [-p <page>] [-v] [-d] <filename>\n", my_progname); + my_print_help(innochecksum_options); + my_print_variables(innochecksum_options); +} + +extern "C" my_bool +innochecksum_get_one_option( +/*========================*/ + int optid, + const struct my_option *opt __attribute__((unused)), + char *argument __attribute__((unused))) +{ + switch (optid) { + case 'd': + verbose=1; /* debug implies verbose... */ + break; + case 'e': + use_end_page= 1; + break; + case 'p': + end_page= start_page= do_page; + use_end_page= 1; + do_one_page= 1; + break; + case 'V': + print_version(); + exit(0); + break; + case 'I': + case '?': + usage(); + exit(0); + break; + } + return 0; +} + +static int get_options( +/*===================*/ + int *argc, + char ***argv) +{ + int ho_error; + + if ((ho_error=handle_options(argc, argv, innochecksum_options, innochecksum_get_one_option))) + exit(ho_error); + + /* The next arg must be the filename */ + if (!*argc) + { + usage(); + return 1; + } + return 0; +} /* get_options */ + + +int main(int argc, char **argv) +{ + FILE* f; /* our input file */ + char* filename; /* our input filename. */ + unsigned char buf[UNIV_PAGE_SIZE_MAX]; /* Buffer to store pages read */ + ulong bytes; /* bytes read count */ + ulint ct; /* current page number (0 based) */ + time_t now; /* current time */ + time_t lastt; /* last time */ + ulint oldcsum, oldcsumfield, csum, csumfield, crc32, logseq, logseqfield; + /* ulints for checksum storage */ + struct stat st; /* for stat, if you couldn't guess */ + unsigned long long int size; /* size of file (has to be 64 bits) */ + ulint pages; /* number of pages in file */ + off_t offset= 0; + int fd; + + printf("InnoDB offline file checksum utility.\n"); + + ut_crc32_init(); + + MY_INIT(argv[0]); + + if (get_options(&argc,&argv)) + exit(1); + + if (verbose) + my_print_variables(innochecksum_options); + + /* The file name is not optional */ + filename = *argv; + if (*filename == '\0') + { + fprintf(stderr, "Error; File name missing\n"); + return 1; + } + + /* stat the file to get size and page count */ + if (stat(filename, &st)) + { + fprintf(stderr, "Error; %s cannot be found\n", filename); + return 1; + } + size= st.st_size; + + /* Open the file for reading */ + f= fopen(filename, "rb"); + if (f == NULL) + { + fprintf(stderr, "Error; %s cannot be opened", filename); + perror(" "); + return 1; + } + + if (!get_page_size(f, buf, &logical_page_size, &physical_page_size)) + { + return 1; + } + + /* This tool currently does not support Compressed tables */ + if (logical_page_size != physical_page_size) + { + fprintf(stderr, "Error; This file contains compressed pages\n"); + return 1; + } + + pages= (ulint) (size / physical_page_size); + + if (just_count) + { + if (verbose) + printf("Number of pages: "); + printf("%lu\n", pages); + return 0; + } + else if (verbose) + { + printf("file %s = %llu bytes (%lu pages)...\n", filename, size, pages); + if (do_one_page) + printf("InnoChecksum; checking page %lu\n", do_page); + else + printf("InnoChecksum; checking pages in range %lu to %lu\n", start_page, use_end_page ? end_page : (pages - 1)); + } + + /* seek to the necessary position */ + if (start_page) + { + fd= fileno(f); + if (!fd) + { + perror("Error; Unable to obtain file descriptor number"); + return 1; + } + + offset= (off_t)start_page * (off_t)physical_page_size; + + if (lseek(fd, offset, SEEK_SET) != offset) + { + perror("Error; Unable to seek to necessary offset"); + return 1; + } + } + + /* main checksumming loop */ + ct= start_page; + lastt= 0; + while (!feof(f)) + { + bytes= fread(buf, 1, physical_page_size, f); + if (!bytes && feof(f)) + return 0; + + if (ferror(f)) + { + fprintf(stderr, "Error reading %lu bytes", physical_page_size); + perror(" "); + return 1; + } + if (bytes != physical_page_size) + { + fprintf(stderr, "Error; bytes read (%lu) doesn't match page size (%lu)\n", bytes, physical_page_size); + return 1; + } + + /* check the "stored log sequence numbers" */ + logseq= mach_read_from_4(buf + FIL_PAGE_LSN + 4); + logseqfield= mach_read_from_4(buf + logical_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM + 4); + if (debug) + printf("page %lu: log sequence number: first = %lu; second = %lu\n", ct, logseq, logseqfield); + if (logseq != logseqfield) + { + fprintf(stderr, "Fail; page %lu invalid (fails log sequence number check)\n", ct); + return 1; + } + + /* check old method of checksumming */ + oldcsum= buf_calc_page_old_checksum(buf); + oldcsumfield= mach_read_from_4(buf + logical_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM); + if (debug) + printf("page %lu: old style: calculated = %lu; recorded = %lu\n", ct, oldcsum, oldcsumfield); + if (oldcsumfield != mach_read_from_4(buf + FIL_PAGE_LSN) && oldcsumfield != oldcsum) + { + fprintf(stderr, "Fail; page %lu invalid (fails old style checksum)\n", ct); + return 1; + } + + /* now check the new method */ + csum= buf_calc_page_new_checksum(buf); + crc32= buf_calc_page_crc32(buf); + csumfield= mach_read_from_4(buf + FIL_PAGE_SPACE_OR_CHKSUM); + if (debug) + printf("page %lu: new style: calculated = %lu; crc32 = %lu; recorded = %lu\n", + ct, csum, crc32, csumfield); + if (csumfield != 0 && crc32 != csumfield && csum != csumfield) + { + fprintf(stderr, "Fail; page %lu invalid (fails innodb and crc32 checksum)\n", ct); + return 1; + } + + /* end if this was the last page we were supposed to check */ + if (use_end_page && (ct >= end_page)) + return 0; + + /* do counter increase and progress printing */ + ct++; + if (verbose) + { + if (ct % 64 == 0) + { + now= time(0); + if (!lastt) lastt= now; + if (now - lastt >= 1) + { + printf("page %lu okay: %.3f%% done\n", (ct - 1), (float) ct / pages * 100); + lastt= now; + } + } + } + } + return 0; +} + diff --git a/mysql-test/disabled.def b/mysql-test/disabled.def index bb0e243326a..e5fa24786e1 100644 --- a/mysql-test/disabled.def +++ b/mysql-test/disabled.def @@ -20,4 +20,3 @@ mysql_embedded : Bug#12561297 2011-05-14 Anitha Dependent on PB2 chang ssl_crl_clients_valid : broken upstream ssl_crl : broken upstream ssl_crl_clrpath : broken upstream -file_contents : MDEV-6526 these files are not installed anymore diff --git a/mysql-test/include/have_innodb_bzip2.inc b/mysql-test/include/have_innodb_bzip2.inc new file mode 100644 index 00000000000..afbe78f0cf9 --- /dev/null +++ b/mysql-test/include/have_innodb_bzip2.inc @@ -0,0 +1,4 @@ +if (! `SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_bzip2' AND variable_value = 'ON'`) +{ + --skip Test requires InnoDB compiled with libbz2 +} diff --git a/mysql-test/include/have_innodb_lz4.inc b/mysql-test/include/have_innodb_lz4.inc new file mode 100644 index 00000000000..bda3ffa8623 --- /dev/null +++ b/mysql-test/include/have_innodb_lz4.inc @@ -0,0 +1,4 @@ +if (!`SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_lz4' AND variable_value = 'ON'`) +{ + --skip Test requires InnoDB compiled with liblz4 +} diff --git a/mysql-test/include/have_innodb_lzma.inc b/mysql-test/include/have_innodb_lzma.inc new file mode 100644 index 00000000000..86eda33f194 --- /dev/null +++ b/mysql-test/include/have_innodb_lzma.inc @@ -0,0 +1,4 @@ +if (!`SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_lzma' AND variable_value = 'ON' `) +{ + --skip Test requires InnoDB compiled with liblzma +} diff --git a/mysql-test/include/have_innodb_lzo.inc b/mysql-test/include/have_innodb_lzo.inc new file mode 100644 index 00000000000..f40418b00fb --- /dev/null +++ b/mysql-test/include/have_innodb_lzo.inc @@ -0,0 +1,4 @@ +if (! `SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_lzo' AND variable_value = 'ON'`) +{ + --skip Test requires InnoDB compiled with liblzo +} diff --git a/mysql-test/include/have_innodb_snappy.inc b/mysql-test/include/have_innodb_snappy.inc new file mode 100644 index 00000000000..c4dca4c19ee --- /dev/null +++ b/mysql-test/include/have_innodb_snappy.inc @@ -0,0 +1,4 @@ +if (! `SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_snappy' AND variable_value = 'ON'`) +{ + --skip Test requires InnoDB compiled with libsnappy +} diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_bzip2.result b/mysql-test/suite/innodb/r/innodb-page_compression_bzip2.result new file mode 100644 index 00000000000..8d3bc063a71 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb-page_compression_bzip2.result @@ -0,0 +1,437 @@ +set global innodb_file_format = `barracuda`; +set global innodb_file_per_table = on; +set global innodb_compression_algorithm = 5; +create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8; +show warnings; +Level Code Message +create table innodb_normal (c1 int, b char(20)) engine=innodb; +show warnings; +Level Code Message +create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1; +show warnings; +Level Code Message +show create table innodb_page_compressed1; +Table Create Table +innodb_page_compressed1 CREATE TABLE `innodb_page_compressed1` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=1 +create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2; +show warnings; +Level Code Message +show create table innodb_page_compressed2; +Table Create Table +innodb_page_compressed2 CREATE TABLE `innodb_page_compressed2` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=2 +create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3; +show warnings; +Level Code Message +show create table innodb_page_compressed3; +Table Create Table +innodb_page_compressed3 CREATE TABLE `innodb_page_compressed3` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=3 +create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4; +show warnings; +Level Code Message +show create table innodb_page_compressed4; +Table Create Table +innodb_page_compressed4 CREATE TABLE `innodb_page_compressed4` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=4 +create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5; +show warnings; +Level Code Message +show create table innodb_page_compressed5; +Table Create Table +innodb_page_compressed5 CREATE TABLE `innodb_page_compressed5` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=5 +create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6; +show warnings; +Level Code Message +show create table innodb_page_compressed6; +Table Create Table +innodb_page_compressed6 CREATE TABLE `innodb_page_compressed6` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=6 +create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7; +show warnings; +Level Code Message +show create table innodb_page_compressed7; +Table Create Table +innodb_page_compressed7 CREATE TABLE `innodb_page_compressed7` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=7 +create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8; +show warnings; +Level Code Message +show create table innodb_page_compressed8; +Table Create Table +innodb_page_compressed8 CREATE TABLE `innodb_page_compressed8` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +Level Code Message +show create table innodb_page_compressed9; +Table Create Table +innodb_page_compressed9 CREATE TABLE `innodb_page_compressed9` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=9 +create procedure innodb_insert_proc (repeat_count int) +begin +declare current_num int; +set current_num = 0; +while current_num < repeat_count do +insert into innodb_normal values(current_num,'testing..'); +set current_num = current_num + 1; +end while; +end// +commit; +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +select count(*) from innodb_normal; +count(*) +5000 +insert into innodb_compressed select * from innodb_normal; +insert into innodb_page_compressed1 select * from innodb_normal; +insert into innodb_page_compressed2 select * from innodb_normal; +insert into innodb_page_compressed3 select * from innodb_normal; +insert into innodb_page_compressed4 select * from innodb_normal; +insert into innodb_page_compressed5 select * from innodb_normal; +insert into innodb_page_compressed6 select * from innodb_normal; +insert into innodb_page_compressed7 select * from innodb_normal; +insert into innodb_page_compressed8 select * from innodb_normal; +insert into innodb_page_compressed9 select * from innodb_normal; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +alter table innodb_normal page_compressed=1 page_compression_level=8; +show warnings; +Level Code Message +show create table innodb_normal; +Table Create Table +innodb_normal CREATE TABLE `innodb_normal` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0; +show warnings; +Level Code Message +show create table innodb_compressed; +Table Create Table +innodb_compressed CREATE TABLE `innodb_compressed` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +set global innodb_compression_algorithm = 1; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +set global innodb_compression_algorithm = 0; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compressed; +drop table innodb_page_compressed1; +drop table innodb_page_compressed2; +drop table innodb_page_compressed3; +drop table innodb_page_compressed4; +drop table innodb_page_compressed5; +drop table innodb_page_compressed6; +drop table innodb_page_compressed7; +drop table innodb_page_compressed8; +drop table innodb_page_compressed9; diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_lz4.result b/mysql-test/suite/innodb/r/innodb-page_compression_lz4.result new file mode 100644 index 00000000000..eeab2622cb6 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb-page_compression_lz4.result @@ -0,0 +1,438 @@ +set global innodb_file_format = `barracuda`; +set global innodb_file_per_table = on; +set global innodb_compression_algorithm = 2; +create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8; +show warnings; +Level Code Message +create table innodb_normal (c1 int, b char(20)) engine=innodb; +show warnings; +Level Code Message +create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1; +show warnings; +Level Code Message +show create table innodb_page_compressed1; +Table Create Table +innodb_page_compressed1 CREATE TABLE `innodb_page_compressed1` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=1 +create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2; +show warnings; +Level Code Message +show create table innodb_page_compressed2; +Table Create Table +innodb_page_compressed2 CREATE TABLE `innodb_page_compressed2` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=2 +create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3; +show warnings; +Level Code Message +show create table innodb_page_compressed3; +Table Create Table +innodb_page_compressed3 CREATE TABLE `innodb_page_compressed3` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=3 +create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4; +show warnings; +Level Code Message +show create table innodb_page_compressed4; +Table Create Table +innodb_page_compressed4 CREATE TABLE `innodb_page_compressed4` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=4 +create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5; +show warnings; +Level Code Message +show create table innodb_page_compressed5; +Table Create Table +innodb_page_compressed5 CREATE TABLE `innodb_page_compressed5` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=5 +create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6; +show warnings; +Level Code Message +show create table innodb_page_compressed6; +Table Create Table +innodb_page_compressed6 CREATE TABLE `innodb_page_compressed6` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=6 +create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7; +show warnings; +Level Code Message +show create table innodb_page_compressed7; +Table Create Table +innodb_page_compressed7 CREATE TABLE `innodb_page_compressed7` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=7 +create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8; +show warnings; +Level Code Message +show create table innodb_page_compressed8; +Table Create Table +innodb_page_compressed8 CREATE TABLE `innodb_page_compressed8` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +Level Code Message +show create table innodb_page_compressed9; +Table Create Table +innodb_page_compressed9 CREATE TABLE `innodb_page_compressed9` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=9 +create procedure innodb_insert_proc (repeat_count int) +begin +declare current_num int; +set current_num = 0; +while current_num < repeat_count do +insert into innodb_normal values(current_num,'testing..'); +set current_num = current_num + 1; +end while; +end// +commit; +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +select count(*) from innodb_normal; +count(*) +5000 +insert into innodb_compressed select * from innodb_normal; +insert into innodb_page_compressed1 select * from innodb_normal; +insert into innodb_page_compressed2 select * from innodb_normal; +insert into innodb_page_compressed3 select * from innodb_normal; +insert into innodb_page_compressed4 select * from innodb_normal; +insert into innodb_page_compressed5 select * from innodb_normal; +insert into innodb_page_compressed6 select * from innodb_normal; +insert into innodb_page_compressed7 select * from innodb_normal; +insert into innodb_page_compressed8 select * from innodb_normal; +insert into innodb_page_compressed9 select * from innodb_normal; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +alter table innodb_normal page_compressed=1 page_compression_level=8; +show warnings; +Level Code Message +show create table innodb_normal; +Table Create Table +innodb_normal CREATE TABLE `innodb_normal` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0; +show warnings; +Level Code Message +show create table innodb_compressed; +Table Create Table +innodb_compressed CREATE TABLE `innodb_compressed` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +set global innodb_compression_algorithm = 1; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +set global innodb_compression_algorithm = 0; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compressed; +drop table innodb_page_compressed1; +drop table innodb_page_compressed2; +drop table innodb_page_compressed3; +drop table innodb_page_compressed4; +drop table innodb_page_compressed5; +drop table innodb_page_compressed6; +drop table innodb_page_compressed7; +drop table innodb_page_compressed8; +drop table innodb_page_compressed9; diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_lzma.result b/mysql-test/suite/innodb/r/innodb-page_compression_lzma.result new file mode 100644 index 00000000000..d340801b656 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb-page_compression_lzma.result @@ -0,0 +1,437 @@ +set global innodb_file_format = `barracuda`; +set global innodb_file_per_table = on; +set global innodb_compression_algorithm = 4; +create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8; +show warnings; +Level Code Message +create table innodb_normal (c1 int, b char(20)) engine=innodb; +show warnings; +Level Code Message +create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1; +show warnings; +Level Code Message +show create table innodb_page_compressed1; +Table Create Table +innodb_page_compressed1 CREATE TABLE `innodb_page_compressed1` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=1 +create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2; +show warnings; +Level Code Message +show create table innodb_page_compressed2; +Table Create Table +innodb_page_compressed2 CREATE TABLE `innodb_page_compressed2` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=2 +create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3; +show warnings; +Level Code Message +show create table innodb_page_compressed3; +Table Create Table +innodb_page_compressed3 CREATE TABLE `innodb_page_compressed3` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=3 +create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4; +show warnings; +Level Code Message +show create table innodb_page_compressed4; +Table Create Table +innodb_page_compressed4 CREATE TABLE `innodb_page_compressed4` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=4 +create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5; +show warnings; +Level Code Message +show create table innodb_page_compressed5; +Table Create Table +innodb_page_compressed5 CREATE TABLE `innodb_page_compressed5` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=5 +create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6; +show warnings; +Level Code Message +show create table innodb_page_compressed6; +Table Create Table +innodb_page_compressed6 CREATE TABLE `innodb_page_compressed6` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=6 +create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7; +show warnings; +Level Code Message +show create table innodb_page_compressed7; +Table Create Table +innodb_page_compressed7 CREATE TABLE `innodb_page_compressed7` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=7 +create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8; +show warnings; +Level Code Message +show create table innodb_page_compressed8; +Table Create Table +innodb_page_compressed8 CREATE TABLE `innodb_page_compressed8` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +Level Code Message +show create table innodb_page_compressed9; +Table Create Table +innodb_page_compressed9 CREATE TABLE `innodb_page_compressed9` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=9 +create procedure innodb_insert_proc (repeat_count int) +begin +declare current_num int; +set current_num = 0; +while current_num < repeat_count do +insert into innodb_normal values(current_num,'testing..'); +set current_num = current_num + 1; +end while; +end// +commit; +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +select count(*) from innodb_normal; +count(*) +5000 +insert into innodb_compressed select * from innodb_normal; +insert into innodb_page_compressed1 select * from innodb_normal; +insert into innodb_page_compressed2 select * from innodb_normal; +insert into innodb_page_compressed3 select * from innodb_normal; +insert into innodb_page_compressed4 select * from innodb_normal; +insert into innodb_page_compressed5 select * from innodb_normal; +insert into innodb_page_compressed6 select * from innodb_normal; +insert into innodb_page_compressed7 select * from innodb_normal; +insert into innodb_page_compressed8 select * from innodb_normal; +insert into innodb_page_compressed9 select * from innodb_normal; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +alter table innodb_normal page_compressed=1 page_compression_level=8; +show warnings; +Level Code Message +show create table innodb_normal; +Table Create Table +innodb_normal CREATE TABLE `innodb_normal` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0; +show warnings; +Level Code Message +show create table innodb_compressed; +Table Create Table +innodb_compressed CREATE TABLE `innodb_compressed` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +set global innodb_compression_algorithm = 1; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +set global innodb_compression_algorithm = 0; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compressed; +drop table innodb_page_compressed1; +drop table innodb_page_compressed2; +drop table innodb_page_compressed3; +drop table innodb_page_compressed4; +drop table innodb_page_compressed5; +drop table innodb_page_compressed6; +drop table innodb_page_compressed7; +drop table innodb_page_compressed8; +drop table innodb_page_compressed9; diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_lzo.result b/mysql-test/suite/innodb/r/innodb-page_compression_lzo.result new file mode 100644 index 00000000000..fdbc99f60d9 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb-page_compression_lzo.result @@ -0,0 +1,351 @@ +set global innodb_file_format = `barracuda`; +set global innodb_file_per_table = on; +set global innodb_compression_algorithm = 3; +create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8; +show warnings; +Level Code Message +create table innodb_normal (c1 int, b char(20)) engine=innodb; +show warnings; +Level Code Message +create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1; +show warnings; +Level Code Message +show create table innodb_page_compressed1; +Table Create Table +innodb_page_compressed1 CREATE TABLE `innodb_page_compressed1` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=1 +create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2; +show warnings; +Level Code Message +show create table innodb_page_compressed2; +Table Create Table +innodb_page_compressed2 CREATE TABLE `innodb_page_compressed2` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=2 +create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3; +show warnings; +Level Code Message +show create table innodb_page_compressed3; +Table Create Table +innodb_page_compressed3 CREATE TABLE `innodb_page_compressed3` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=3 +create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4; +show warnings; +Level Code Message +show create table innodb_page_compressed4; +Table Create Table +innodb_page_compressed4 CREATE TABLE `innodb_page_compressed4` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=4 +create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5; +show warnings; +Level Code Message +show create table innodb_page_compressed5; +Table Create Table +innodb_page_compressed5 CREATE TABLE `innodb_page_compressed5` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=5 +create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6; +show warnings; +Level Code Message +show create table innodb_page_compressed6; +Table Create Table +innodb_page_compressed6 CREATE TABLE `innodb_page_compressed6` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=6 +create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7; +show warnings; +Level Code Message +show create table innodb_page_compressed7; +Table Create Table +innodb_page_compressed7 CREATE TABLE `innodb_page_compressed7` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=7 +create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8; +show warnings; +Level Code Message +show create table innodb_page_compressed8; +Table Create Table +innodb_page_compressed8 CREATE TABLE `innodb_page_compressed8` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +Level Code Message +show create table innodb_page_compressed9; +Table Create Table +innodb_page_compressed9 CREATE TABLE `innodb_page_compressed9` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=9 +create procedure innodb_insert_proc (repeat_count int) +begin +declare current_num int; +set current_num = 0; +while current_num < repeat_count do +insert into innodb_normal values(current_num,'testing..'); +set current_num = current_num + 1; +end while; +end// +commit; +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +select count(*) from innodb_normal; +count(*) +5000 +insert into innodb_compressed select * from innodb_normal; +insert into innodb_page_compressed1 select * from innodb_normal; +insert into innodb_page_compressed2 select * from innodb_normal; +insert into innodb_page_compressed3 select * from innodb_normal; +insert into innodb_page_compressed4 select * from innodb_normal; +insert into innodb_page_compressed5 select * from innodb_normal; +insert into innodb_page_compressed6 select * from innodb_normal; +insert into innodb_page_compressed7 select * from innodb_normal; +insert into innodb_page_compressed8 select * from innodb_normal; +insert into innodb_page_compressed9 select * from innodb_normal; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +alter table innodb_normal page_compressed=1 page_compression_level=8; +show warnings; +Level Code Message +show create table innodb_normal; +Table Create Table +innodb_normal CREATE TABLE `innodb_normal` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0; +show warnings; +Level Code Message +show create table innodb_compressed; +Table Create Table +innodb_compressed CREATE TABLE `innodb_compressed` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +set global innodb_compression_algorithm = 1; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compressed; +drop table innodb_page_compressed1; +drop table innodb_page_compressed2; +drop table innodb_page_compressed3; +drop table innodb_page_compressed4; +drop table innodb_page_compressed5; +drop table innodb_page_compressed6; +drop table innodb_page_compressed7; +drop table innodb_page_compressed8; +drop table innodb_page_compressed9; diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_snappy.result b/mysql-test/suite/innodb/r/innodb-page_compression_snappy.result new file mode 100644 index 00000000000..1709d8e9d2b --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb-page_compression_snappy.result @@ -0,0 +1,438 @@ +call mtr.add_suppression("InnoDB: Warning: Compression failed for space*"); +set global innodb_file_format = `barracuda`; +set global innodb_file_per_table = on; +set global innodb_compression_algorithm = 6; +create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8; +show warnings; +Level Code Message +create table innodb_normal (c1 int, b char(20)) engine=innodb; +show warnings; +Level Code Message +create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1; +show warnings; +Level Code Message +show create table innodb_page_compressed1; +Table Create Table +innodb_page_compressed1 CREATE TABLE `innodb_page_compressed1` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=1 +create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2; +show warnings; +Level Code Message +show create table innodb_page_compressed2; +Table Create Table +innodb_page_compressed2 CREATE TABLE `innodb_page_compressed2` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=2 +create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3; +show warnings; +Level Code Message +show create table innodb_page_compressed3; +Table Create Table +innodb_page_compressed3 CREATE TABLE `innodb_page_compressed3` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=3 +create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4; +show warnings; +Level Code Message +show create table innodb_page_compressed4; +Table Create Table +innodb_page_compressed4 CREATE TABLE `innodb_page_compressed4` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=4 +create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5; +show warnings; +Level Code Message +show create table innodb_page_compressed5; +Table Create Table +innodb_page_compressed5 CREATE TABLE `innodb_page_compressed5` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=5 +create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6; +show warnings; +Level Code Message +show create table innodb_page_compressed6; +Table Create Table +innodb_page_compressed6 CREATE TABLE `innodb_page_compressed6` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=6 +create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7; +show warnings; +Level Code Message +show create table innodb_page_compressed7; +Table Create Table +innodb_page_compressed7 CREATE TABLE `innodb_page_compressed7` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=7 +create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8; +show warnings; +Level Code Message +show create table innodb_page_compressed8; +Table Create Table +innodb_page_compressed8 CREATE TABLE `innodb_page_compressed8` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +Level Code Message +show create table innodb_page_compressed9; +Table Create Table +innodb_page_compressed9 CREATE TABLE `innodb_page_compressed9` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=9 +create procedure innodb_insert_proc (repeat_count int) +begin +declare current_num int; +set current_num = 0; +while current_num < repeat_count do +insert into innodb_normal values(current_num,'testing..'); +set current_num = current_num + 1; +end while; +end// +commit; +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +select count(*) from innodb_normal; +count(*) +5000 +insert into innodb_compressed select * from innodb_normal; +insert into innodb_page_compressed1 select * from innodb_normal; +insert into innodb_page_compressed2 select * from innodb_normal; +insert into innodb_page_compressed3 select * from innodb_normal; +insert into innodb_page_compressed4 select * from innodb_normal; +insert into innodb_page_compressed5 select * from innodb_normal; +insert into innodb_page_compressed6 select * from innodb_normal; +insert into innodb_page_compressed7 select * from innodb_normal; +insert into innodb_page_compressed8 select * from innodb_normal; +insert into innodb_page_compressed9 select * from innodb_normal; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +alter table innodb_normal page_compressed=1 page_compression_level=8; +show warnings; +Level Code Message +show create table innodb_normal; +Table Create Table +innodb_normal CREATE TABLE `innodb_normal` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0; +show warnings; +Level Code Message +show create table innodb_compressed; +Table Create Table +innodb_compressed CREATE TABLE `innodb_compressed` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +set global innodb_compression_algorithm = 1; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +set global innodb_compression_algorithm = 0; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compressed; +drop table innodb_page_compressed1; +drop table innodb_page_compressed2; +drop table innodb_page_compressed3; +drop table innodb_page_compressed4; +drop table innodb_page_compressed5; +drop table innodb_page_compressed6; +drop table innodb_page_compressed7; +drop table innodb_page_compressed8; +drop table innodb_page_compressed9; diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_tables.result b/mysql-test/suite/innodb/r/innodb-page_compression_tables.result new file mode 100644 index 00000000000..98de5db3c12 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb-page_compression_tables.result @@ -0,0 +1,121 @@ +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +set global innodb_compression_algorithm = 1; +create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb; +create table innodb_compact(c1 bigint not null, b char(200)) engine=innodb row_format=compact page_compressed=1; +create table innodb_dynamic(c1 bigint not null, b char(200)) engine=innodb row_format=dynamic page_compressed=1; +create table innodb_compressed(c1 bigint not null, b char(200)) engine=innodb row_format=compressed page_compressed=1; +ERROR HY000: Can't create table `test`.`innodb_compressed` (errno: 140 "Wrong create options") +show warnings; +Level Code Message +Warning 140 InnoDB: PAGE_COMPRESSED table can't have ROW_TYPE=COMPRESSED +Error 1005 Can't create table `test`.`innodb_compressed` (errno: 140 "Wrong create options") +Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB +show create table innodb_compact; +Table Create Table +innodb_compact CREATE TABLE `innodb_compact` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT `page_compressed`=1 +show create table innodb_dynamic; +Table Create Table +innodb_dynamic CREATE TABLE `innodb_dynamic` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC `page_compressed`=1 +create table innodb_redundant(c1 bigint not null, b char(200)) engine=innodb row_format=redundant page_compressed=1; +ERROR HY000: Can't create table `test`.`innodb_redundant` (errno: 140 "Wrong create options") +show warnings; +Level Code Message +Warning 140 InnoDB: PAGE_COMPRESSED table can't have ROW_TYPE=REDUNDANT +Error 1005 Can't create table `test`.`innodb_redundant` (errno: 140 "Wrong create options") +Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB +create table innodb_redundant(c1 bigint not null, b char(200)) engine=innodb row_format=redundant; +show create table innodb_redundant; +Table Create Table +innodb_redundant CREATE TABLE `innodb_redundant` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT +alter table innodb_redundant page_compressed=1; +ERROR HY000: Can't create table `test`.`#sql-temporary` (errno: 140 "Wrong create options") +show warnings; +Level Code Message +Warning 140 InnoDB: PAGE_COMPRESSED table can't have ROW_TYPE=REDUNDANT +Error 1005 Can't create table `test`.`#sql-temporary` (errno: 140 "Wrong create options") +Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB +show create table innodb_redundant; +Table Create Table +innodb_redundant CREATE TABLE `innodb_redundant` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT +alter table innodb_redundant row_format=compact page_compressed=1; +show create table innodb_redundant; +Table Create Table +innodb_redundant CREATE TABLE `innodb_redundant` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT `page_compressed`=1 +drop table innodb_redundant; +create procedure innodb_insert_proc (repeat_count int) +begin +declare current_num int; +set current_num = 0; +while current_num < repeat_count do +insert into innodb_normal values(current_num, substring(MD5(RAND()), -64)); +set current_num = current_num + 1; +end while; +end// +commit; +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +insert into innodb_compact select * from innodb_normal; +insert into innodb_dynamic select * from innodb_normal; +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_dynamic where c1 < 1500000; +count(*) +5000 +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_dynamic where c1 < 1500000; +count(*) +5000 +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +set global innodb_compression_algorithm = 0; +alter table innodb_compact engine=innodb page_compressed=DEFAULT; +alter table innodb_dynamic engine=innodb page_compressed=DEFAULT; +show create table innodb_compact; +Table Create Table +innodb_compact CREATE TABLE `innodb_compact` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT +show create table innodb_dynamic; +Table Create Table +innodb_dynamic CREATE TABLE `innodb_dynamic` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_dynamic where c1 < 1500000; +count(*) +5000 +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compact; +drop table innodb_dynamic; diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_zip.result b/mysql-test/suite/innodb/r/innodb-page_compression_zip.result new file mode 100644 index 00000000000..4c3ab273b2e --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb-page_compression_zip.result @@ -0,0 +1,351 @@ +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +set global innodb_compression_algorithm = 1; +create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8; +show warnings; +Level Code Message +create table innodb_normal (c1 int, b char(20)) engine=innodb; +show warnings; +Level Code Message +create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1; +show warnings; +Level Code Message +show create table innodb_page_compressed1; +Table Create Table +innodb_page_compressed1 CREATE TABLE `innodb_page_compressed1` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=1 +create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2; +show warnings; +Level Code Message +show create table innodb_page_compressed2; +Table Create Table +innodb_page_compressed2 CREATE TABLE `innodb_page_compressed2` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=2 +create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3; +show warnings; +Level Code Message +show create table innodb_page_compressed3; +Table Create Table +innodb_page_compressed3 CREATE TABLE `innodb_page_compressed3` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=3 +create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4; +show warnings; +Level Code Message +show create table innodb_page_compressed4; +Table Create Table +innodb_page_compressed4 CREATE TABLE `innodb_page_compressed4` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=4 +create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5; +show warnings; +Level Code Message +show create table innodb_page_compressed5; +Table Create Table +innodb_page_compressed5 CREATE TABLE `innodb_page_compressed5` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=5 +create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6; +show warnings; +Level Code Message +show create table innodb_page_compressed6; +Table Create Table +innodb_page_compressed6 CREATE TABLE `innodb_page_compressed6` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=6 +create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7; +show warnings; +Level Code Message +show create table innodb_page_compressed7; +Table Create Table +innodb_page_compressed7 CREATE TABLE `innodb_page_compressed7` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=7 +create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8; +show warnings; +Level Code Message +show create table innodb_page_compressed8; +Table Create Table +innodb_page_compressed8 CREATE TABLE `innodb_page_compressed8` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +Level Code Message +show create table innodb_page_compressed9; +Table Create Table +innodb_page_compressed9 CREATE TABLE `innodb_page_compressed9` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=9 +create procedure innodb_insert_proc (repeat_count int) +begin +declare current_num int; +set current_num = 0; +while current_num < repeat_count do +insert into innodb_normal values(current_num,'testing..'); +set current_num = current_num + 1; +end while; +end// +commit; +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +select count(*) from innodb_normal; +count(*) +5000 +insert into innodb_compressed select * from innodb_normal; +insert into innodb_page_compressed1 select * from innodb_normal; +insert into innodb_page_compressed2 select * from innodb_normal; +insert into innodb_page_compressed3 select * from innodb_normal; +insert into innodb_page_compressed4 select * from innodb_normal; +insert into innodb_page_compressed5 select * from innodb_normal; +insert into innodb_page_compressed6 select * from innodb_normal; +insert into innodb_page_compressed7 select * from innodb_normal; +insert into innodb_page_compressed8 select * from innodb_normal; +insert into innodb_page_compressed9 select * from innodb_normal; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +alter table innodb_normal page_compressed=1 page_compression_level=8; +show warnings; +Level Code Message +show create table innodb_normal; +Table Create Table +innodb_normal CREATE TABLE `innodb_normal` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0; +show warnings; +Level Code Message +show create table innodb_compressed; +Table Create Table +innodb_compressed CREATE TABLE `innodb_compressed` ( + `c1` int(11) DEFAULT NULL, + `b` char(20) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 `page_compression_level`=8 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +set global innodb_compression_algorithm = 0; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +count(*) +5000 +select count(*) from innodb_page_compressed1; +count(*) +5000 +select count(*) from innodb_page_compressed1 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed2 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed3 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed4 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed5 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed6 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed7 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed8 where c1 < 500000; +count(*) +5000 +select count(*) from innodb_page_compressed9 where c1 < 500000; +count(*) +5000 +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compressed; +drop table innodb_page_compressed1; +drop table innodb_page_compressed2; +drop table innodb_page_compressed3; +drop table innodb_page_compressed4; +drop table innodb_page_compressed5; +drop table innodb_page_compressed6; +drop table innodb_page_compressed7; +drop table innodb_page_compressed8; +drop table innodb_page_compressed9; diff --git a/mysql-test/suite/innodb/r/innodb_monitor.result b/mysql-test/suite/innodb/r/innodb_monitor.result index f8d24f4e6f5..03c78f2e040 100644 --- a/mysql-test/suite/innodb/r/innodb_monitor.result +++ b/mysql-test/suite/innodb/r/innodb_monitor.result @@ -37,6 +37,8 @@ buffer_pool_bytes_dirty disabled buffer_pool_pages_free disabled buffer_pages_created disabled buffer_pages_written disabled +buffer_index_pages_written disabled +buffer_non_index_pages_written disabled buffer_pages_read disabled buffer_data_reads disabled buffer_data_written disabled @@ -160,6 +162,19 @@ compress_pages_compressed disabled compress_pages_decompressed disabled compression_pad_increments disabled compression_pad_decrements disabled +compress_saved disabled +compress_trim_sect512 disabled +compress_trim_sect1024 disabled +compress_trim_sect2048 disabled +compress_trim_sect4096 disabled +compress_trim_sect8192 disabled +compress_trim_sect16384 disabled +compress_trim_sect32768 disabled +compress_pages_page_compressed disabled +compress_page_compressed_trim_op disabled +compress_page_compressed_trim_op_saved disabled +compress_pages_page_decompressed disabled +compress_pages_page_compression_error disabled index_page_splits disabled index_page_merge_attempts disabled index_page_merge_successful disabled diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_bzip2.test b/mysql-test/suite/innodb/t/innodb-page_compression_bzip2.test new file mode 100644 index 00000000000..2e8831151c7 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_compression_bzip2.test @@ -0,0 +1,251 @@ +-- source include/have_innodb.inc +-- source include/have_innodb_bzip2.inc + +--disable_query_log +let $innodb_compression_algorithm_orig=`select @@innodb_compression_algorithm`; +let $innodb_file_format_orig = `select @@innodb_file_format`; +let $innodb_file_per_table_orig = `select @@innodb_file_per_table`; +--enable_query_log + +set global innodb_file_format = `barracuda`; +set global innodb_file_per_table = on; + +# bzip2 +set global innodb_compression_algorithm = 5; + +create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8; +show warnings; +create table innodb_normal (c1 int, b char(20)) engine=innodb; +show warnings; +create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1; +show warnings; +show create table innodb_page_compressed1; +create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2; +show warnings; +show create table innodb_page_compressed2; +create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3; +show warnings; +show create table innodb_page_compressed3; +create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4; +show warnings; +show create table innodb_page_compressed4; +create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5; +show warnings; +show create table innodb_page_compressed5; +create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6; +show warnings; +show create table innodb_page_compressed6; +create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7; +show warnings; +show create table innodb_page_compressed7; +create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8; +show warnings; +show create table innodb_page_compressed8; +create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +show create table innodb_page_compressed9; +delimiter //; +create procedure innodb_insert_proc (repeat_count int) +begin + declare current_num int; + set current_num = 0; + while current_num < repeat_count do + insert into innodb_normal values(current_num,'testing..'); + set current_num = current_num + 1; + end while; +end// +delimiter ;// +commit; + +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +select count(*) from innodb_normal; +insert into innodb_compressed select * from innodb_normal; +insert into innodb_page_compressed1 select * from innodb_normal; +insert into innodb_page_compressed2 select * from innodb_normal; +insert into innodb_page_compressed3 select * from innodb_normal; +insert into innodb_page_compressed4 select * from innodb_normal; +insert into innodb_page_compressed5 select * from innodb_normal; +insert into innodb_page_compressed6 select * from innodb_normal; +insert into innodb_page_compressed7 select * from innodb_normal; +insert into innodb_page_compressed8 select * from innodb_normal; +insert into innodb_page_compressed9 select * from innodb_normal; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +alter table innodb_normal page_compressed=1 page_compression_level=8; +show warnings; +show create table innodb_normal; +alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0; +show warnings; +show create table innodb_compressed; + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +# zlib +set global innodb_compression_algorithm = 1; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +# none +set global innodb_compression_algorithm = 0; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compressed; +drop table innodb_page_compressed1; +drop table innodb_page_compressed2; +drop table innodb_page_compressed3; +drop table innodb_page_compressed4; +drop table innodb_page_compressed5; +drop table innodb_page_compressed6; +drop table innodb_page_compressed7; +drop table innodb_page_compressed8; +drop table innodb_page_compressed9; + +# reset system +--disable_query_log +EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig; +EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig; +EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig; +--enable_query_log diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_lz4.test b/mysql-test/suite/innodb/t/innodb-page_compression_lz4.test new file mode 100644 index 00000000000..731cbdeab66 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_compression_lz4.test @@ -0,0 +1,252 @@ +-- source include/have_innodb.inc +-- source include/have_innodb_lz4.inc + +--disable_query_log +let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`; +let $innodb_file_format_orig = `SELECT @@innodb_file_format`; +let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`; +--enable_query_log + +set global innodb_file_format = `barracuda`; +set global innodb_file_per_table = on; + +# lz4 +set global innodb_compression_algorithm = 2; + +create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8; +show warnings; +create table innodb_normal (c1 int, b char(20)) engine=innodb; +show warnings; +create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1; +show warnings; +show create table innodb_page_compressed1; +create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2; +show warnings; +show create table innodb_page_compressed2; +create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3; +show warnings; +show create table innodb_page_compressed3; +create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4; +show warnings; +show create table innodb_page_compressed4; +create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5; +show warnings; +show create table innodb_page_compressed5; +create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6; +show warnings; +show create table innodb_page_compressed6; +create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7; +show warnings; +show create table innodb_page_compressed7; +create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8; +show warnings; +show create table innodb_page_compressed8; +create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +show create table innodb_page_compressed9; +delimiter //; +create procedure innodb_insert_proc (repeat_count int) +begin + declare current_num int; + set current_num = 0; + while current_num < repeat_count do + insert into innodb_normal values(current_num,'testing..'); + set current_num = current_num + 1; + end while; +end// +delimiter ;// +commit; + +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +select count(*) from innodb_normal; +insert into innodb_compressed select * from innodb_normal; +insert into innodb_page_compressed1 select * from innodb_normal; +insert into innodb_page_compressed2 select * from innodb_normal; +insert into innodb_page_compressed3 select * from innodb_normal; +insert into innodb_page_compressed4 select * from innodb_normal; +insert into innodb_page_compressed5 select * from innodb_normal; +insert into innodb_page_compressed6 select * from innodb_normal; +insert into innodb_page_compressed7 select * from innodb_normal; +insert into innodb_page_compressed8 select * from innodb_normal; +insert into innodb_page_compressed9 select * from innodb_normal; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +alter table innodb_normal page_compressed=1 page_compression_level=8; +show warnings; +show create table innodb_normal; +alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0; +show warnings; +show create table innodb_compressed; + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +# zlib +set global innodb_compression_algorithm = 1; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +# none +set global innodb_compression_algorithm = 0; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compressed; +drop table innodb_page_compressed1; +drop table innodb_page_compressed2; +drop table innodb_page_compressed3; +drop table innodb_page_compressed4; +drop table innodb_page_compressed5; +drop table innodb_page_compressed6; +drop table innodb_page_compressed7; +drop table innodb_page_compressed8; +drop table innodb_page_compressed9; + +# reset system +--disable_query_log +EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig; +EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig; +EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig; +--enable_query_log diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_lzma.test b/mysql-test/suite/innodb/t/innodb-page_compression_lzma.test new file mode 100644 index 00000000000..071e86b0f9b --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_compression_lzma.test @@ -0,0 +1,251 @@ +-- source include/have_innodb.inc +-- source include/have_innodb_lzma.inc + +--disable_query_log +let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`; +let $innodb_file_format_orig = `SELECT @@innodb_file_format`; +let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`; +--enable_query_log + +set global innodb_file_format = `barracuda`; +set global innodb_file_per_table = on; + +# lzma +set global innodb_compression_algorithm = 4; + +create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8; +show warnings; +create table innodb_normal (c1 int, b char(20)) engine=innodb; +show warnings; +create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1; +show warnings; +show create table innodb_page_compressed1; +create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2; +show warnings; +show create table innodb_page_compressed2; +create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3; +show warnings; +show create table innodb_page_compressed3; +create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4; +show warnings; +show create table innodb_page_compressed4; +create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5; +show warnings; +show create table innodb_page_compressed5; +create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6; +show warnings; +show create table innodb_page_compressed6; +create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7; +show warnings; +show create table innodb_page_compressed7; +create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8; +show warnings; +show create table innodb_page_compressed8; +create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +show create table innodb_page_compressed9; +delimiter //; +create procedure innodb_insert_proc (repeat_count int) +begin + declare current_num int; + set current_num = 0; + while current_num < repeat_count do + insert into innodb_normal values(current_num,'testing..'); + set current_num = current_num + 1; + end while; +end// +delimiter ;// +commit; + +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +select count(*) from innodb_normal; +insert into innodb_compressed select * from innodb_normal; +insert into innodb_page_compressed1 select * from innodb_normal; +insert into innodb_page_compressed2 select * from innodb_normal; +insert into innodb_page_compressed3 select * from innodb_normal; +insert into innodb_page_compressed4 select * from innodb_normal; +insert into innodb_page_compressed5 select * from innodb_normal; +insert into innodb_page_compressed6 select * from innodb_normal; +insert into innodb_page_compressed7 select * from innodb_normal; +insert into innodb_page_compressed8 select * from innodb_normal; +insert into innodb_page_compressed9 select * from innodb_normal; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +alter table innodb_normal page_compressed=1 page_compression_level=8; +show warnings; +show create table innodb_normal; +alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0; +show warnings; +show create table innodb_compressed; + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +# zlib +set global innodb_compression_algorithm = 1; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +# none +set global innodb_compression_algorithm = 0; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compressed; +drop table innodb_page_compressed1; +drop table innodb_page_compressed2; +drop table innodb_page_compressed3; +drop table innodb_page_compressed4; +drop table innodb_page_compressed5; +drop table innodb_page_compressed6; +drop table innodb_page_compressed7; +drop table innodb_page_compressed8; +drop table innodb_page_compressed9; + +# reset system +--disable_query_log +EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig; +EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig; +EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig; +--enable_query_log diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_lzo.test b/mysql-test/suite/innodb/t/innodb-page_compression_lzo.test new file mode 100644 index 00000000000..6a73f793f26 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_compression_lzo.test @@ -0,0 +1,204 @@ +-- source include/have_innodb.inc +-- source include/have_innodb_lzo.inc + +--disable_query_log +let $innodb_compression_algorithm_orig=`select @@innodb_compression_algorithm`; +let $innodb_file_format_orig = `select @@innodb_file_format`; +let $innodb_file_per_table_orig = `select @@innodb_file_per_table`; +--enable_query_log + +set global innodb_file_format = `barracuda`; +set global innodb_file_per_table = on; + +# lzo +set global innodb_compression_algorithm = 3; + +create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8; +show warnings; +create table innodb_normal (c1 int, b char(20)) engine=innodb; +show warnings; +create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1; +show warnings; +show create table innodb_page_compressed1; +create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2; +show warnings; +show create table innodb_page_compressed2; +create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3; +show warnings; +show create table innodb_page_compressed3; +create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4; +show warnings; +show create table innodb_page_compressed4; +create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5; +show warnings; +show create table innodb_page_compressed5; +create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6; +show warnings; +show create table innodb_page_compressed6; +create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7; +show warnings; +show create table innodb_page_compressed7; +create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8; +show warnings; +show create table innodb_page_compressed8; +create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +show create table innodb_page_compressed9; +delimiter //; +create procedure innodb_insert_proc (repeat_count int) +begin + declare current_num int; + set current_num = 0; + while current_num < repeat_count do + insert into innodb_normal values(current_num,'testing..'); + set current_num = current_num + 1; + end while; +end// +delimiter ;// +commit; + +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +select count(*) from innodb_normal; +insert into innodb_compressed select * from innodb_normal; +insert into innodb_page_compressed1 select * from innodb_normal; +insert into innodb_page_compressed2 select * from innodb_normal; +insert into innodb_page_compressed3 select * from innodb_normal; +insert into innodb_page_compressed4 select * from innodb_normal; +insert into innodb_page_compressed5 select * from innodb_normal; +insert into innodb_page_compressed6 select * from innodb_normal; +insert into innodb_page_compressed7 select * from innodb_normal; +insert into innodb_page_compressed8 select * from innodb_normal; +insert into innodb_page_compressed9 select * from innodb_normal; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +alter table innodb_normal page_compressed=1 page_compression_level=8; +show warnings; +show create table innodb_normal; +alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0; +show warnings; +show create table innodb_compressed; + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +# zlib +set global innodb_compression_algorithm = 1; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compressed; +drop table innodb_page_compressed1; +drop table innodb_page_compressed2; +drop table innodb_page_compressed3; +drop table innodb_page_compressed4; +drop table innodb_page_compressed5; +drop table innodb_page_compressed6; +drop table innodb_page_compressed7; +drop table innodb_page_compressed8; +drop table innodb_page_compressed9; + +# reset system +--disable_query_log +eval set global innodb_compression_algorithm = $innodb_compression_algorithm_orig; +eval set global innodb_file_per_table = $innodb_file_per_table_orig; +eval set global innodb_file_format = $innodb_file_format_orig; +--enable_query_log diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_snappy.test b/mysql-test/suite/innodb/t/innodb-page_compression_snappy.test new file mode 100644 index 00000000000..8c4980ff479 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_compression_snappy.test @@ -0,0 +1,253 @@ +-- source include/have_innodb.inc +-- source include/have_innodb_snappy.inc + +call mtr.add_suppression("InnoDB: Warning: Compression failed for space*"); + +--disable_query_log +let $innodb_compression_algorithm_orig=`select @@innodb_compression_algorithm`; +let $innodb_file_format_orig = `select @@innodb_file_format`; +let $innodb_file_per_table_orig = `select @@innodb_file_per_table`; +--enable_query_log + +set global innodb_file_format = `barracuda`; +set global innodb_file_per_table = on; + +# snappy +set global innodb_compression_algorithm = 6; + +create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8; +show warnings; +create table innodb_normal (c1 int, b char(20)) engine=innodb; +show warnings; +create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1; +show warnings; +show create table innodb_page_compressed1; +create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2; +show warnings; +show create table innodb_page_compressed2; +create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3; +show warnings; +show create table innodb_page_compressed3; +create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4; +show warnings; +show create table innodb_page_compressed4; +create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5; +show warnings; +show create table innodb_page_compressed5; +create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6; +show warnings; +show create table innodb_page_compressed6; +create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7; +show warnings; +show create table innodb_page_compressed7; +create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8; +show warnings; +show create table innodb_page_compressed8; +create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +show create table innodb_page_compressed9; +delimiter //; +create procedure innodb_insert_proc (repeat_count int) +begin + declare current_num int; + set current_num = 0; + while current_num < repeat_count do + insert into innodb_normal values(current_num,'testing..'); + set current_num = current_num + 1; + end while; +end// +delimiter ;// +commit; + +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +select count(*) from innodb_normal; +insert into innodb_compressed select * from innodb_normal; +insert into innodb_page_compressed1 select * from innodb_normal; +insert into innodb_page_compressed2 select * from innodb_normal; +insert into innodb_page_compressed3 select * from innodb_normal; +insert into innodb_page_compressed4 select * from innodb_normal; +insert into innodb_page_compressed5 select * from innodb_normal; +insert into innodb_page_compressed6 select * from innodb_normal; +insert into innodb_page_compressed7 select * from innodb_normal; +insert into innodb_page_compressed8 select * from innodb_normal; +insert into innodb_page_compressed9 select * from innodb_normal; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +alter table innodb_normal page_compressed=1 page_compression_level=8; +show warnings; +show create table innodb_normal; +alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0; +show warnings; +show create table innodb_compressed; + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +# zlib +set global innodb_compression_algorithm = 1; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +# none +set global innodb_compression_algorithm = 0; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compressed; +drop table innodb_page_compressed1; +drop table innodb_page_compressed2; +drop table innodb_page_compressed3; +drop table innodb_page_compressed4; +drop table innodb_page_compressed5; +drop table innodb_page_compressed6; +drop table innodb_page_compressed7; +drop table innodb_page_compressed8; +drop table innodb_page_compressed9; + +# reset system +--disable_query_log +EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig; +EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig; +EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig; +--enable_query_log diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_tables.test b/mysql-test/suite/innodb/t/innodb-page_compression_tables.test new file mode 100644 index 00000000000..f7810a44c48 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_compression_tables.test @@ -0,0 +1,102 @@ +-- source include/have_innodb.inc + +--disable_query_log +let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`; +let $innodb_file_format_orig = `SELECT @@innodb_file_format`; +let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`; +--enable_query_log + +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +# zlib +set global innodb_compression_algorithm = 1; + +create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb; +create table innodb_compact(c1 bigint not null, b char(200)) engine=innodb row_format=compact page_compressed=1; +create table innodb_dynamic(c1 bigint not null, b char(200)) engine=innodb row_format=dynamic page_compressed=1; +--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/ +--error 1005 +create table innodb_compressed(c1 bigint not null, b char(200)) engine=innodb row_format=compressed page_compressed=1; +--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/ +show warnings; +show create table innodb_compact; +show create table innodb_dynamic; + +# MDEV-7133: InnoDB: Assertion failure in thread 140737091569408 in file dict0mem.cc line 74 +# InnoDB: Failing assertion: dict_tf_is_valid(flags) +--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/ +--error 1005 +create table innodb_redundant(c1 bigint not null, b char(200)) engine=innodb row_format=redundant page_compressed=1; +--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/ +show warnings; +create table innodb_redundant(c1 bigint not null, b char(200)) engine=innodb row_format=redundant; +show create table innodb_redundant; +--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/ +--error 1005 +alter table innodb_redundant page_compressed=1; +--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/ +show warnings; +show create table innodb_redundant; +alter table innodb_redundant row_format=compact page_compressed=1; +show create table innodb_redundant; +drop table innodb_redundant; + +delimiter //; +create procedure innodb_insert_proc (repeat_count int) +begin + declare current_num int; + set current_num = 0; + while current_num < repeat_count do + insert into innodb_normal values(current_num, substring(MD5(RAND()), -64)); + set current_num = current_num + 1; + end while; +end// +delimiter ;// +commit; + +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; + +insert into innodb_compact select * from innodb_normal; +insert into innodb_dynamic select * from innodb_normal; + +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +select count(*) from innodb_dynamic where c1 < 1500000; + +--source include/restart_mysqld.inc + +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +select count(*) from innodb_dynamic where c1 < 1500000; + +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +# none +set global innodb_compression_algorithm = 0; + +alter table innodb_compact engine=innodb page_compressed=DEFAULT; +alter table innodb_dynamic engine=innodb page_compressed=DEFAULT; +show create table innodb_compact; +show create table innodb_dynamic; + +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +select count(*) from innodb_dynamic where c1 < 1500000; + +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compact; +drop table innodb_dynamic; + +# reset system +--disable_query_log +EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig; +EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig; +EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig; +--enable_query_log diff --git a/mysql-test/suite/innodb/t/innodb-page_compression_zip.test b/mysql-test/suite/innodb/t/innodb-page_compression_zip.test new file mode 100644 index 00000000000..8d06367e9b2 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_compression_zip.test @@ -0,0 +1,202 @@ +-- source include/have_innodb.inc + +--disable_query_log +let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`; +let $innodb_file_format_orig = `SELECT @@innodb_file_format`; +let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`; +--enable_query_log + +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; + +# zlib +set global innodb_compression_algorithm = 1; + +create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8; +show warnings; +create table innodb_normal (c1 int, b char(20)) engine=innodb; +show warnings; +create table innodb_page_compressed1 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=1; +show warnings; +show create table innodb_page_compressed1; +create table innodb_page_compressed2 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=2; +show warnings; +show create table innodb_page_compressed2; +create table innodb_page_compressed3 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=3; +show warnings; +show create table innodb_page_compressed3; +create table innodb_page_compressed4 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=4; +show warnings; +show create table innodb_page_compressed4; +create table innodb_page_compressed5 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=5; +show warnings; +show create table innodb_page_compressed5; +create table innodb_page_compressed6 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=6; +show warnings; +show create table innodb_page_compressed6; +create table innodb_page_compressed7 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=7; +show warnings; +show create table innodb_page_compressed7; +create table innodb_page_compressed8 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=8; +show warnings; +show create table innodb_page_compressed8; +create table innodb_page_compressed9 (c1 int, b char(20)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +show create table innodb_page_compressed9; +delimiter //; +create procedure innodb_insert_proc (repeat_count int) +begin + declare current_num int; + set current_num = 0; + while current_num < repeat_count do + insert into innodb_normal values(current_num,'testing..'); + set current_num = current_num + 1; + end while; +end// +delimiter ;// +commit; + +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +select count(*) from innodb_normal; +insert into innodb_compressed select * from innodb_normal; +insert into innodb_page_compressed1 select * from innodb_normal; +insert into innodb_page_compressed2 select * from innodb_normal; +insert into innodb_page_compressed3 select * from innodb_normal; +insert into innodb_page_compressed4 select * from innodb_normal; +insert into innodb_page_compressed5 select * from innodb_normal; +insert into innodb_page_compressed6 select * from innodb_normal; +insert into innodb_page_compressed7 select * from innodb_normal; +insert into innodb_page_compressed8 select * from innodb_normal; +insert into innodb_page_compressed9 select * from innodb_normal; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +alter table innodb_normal page_compressed=1 page_compression_level=8; +show warnings; +show create table innodb_normal; +alter table innodb_compressed row_format=default page_compressed=1 page_compression_level=8 key_block_size=0; +show warnings; +show create table innodb_compressed; + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +# none +set global innodb_compression_algorithm = 0; +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +commit; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; +--source include/restart_mysqld.inc + +update innodb_page_compressed1 set c1 = c1 + 1; +update innodb_page_compressed2 set c1 = c1 + 1; +update innodb_page_compressed3 set c1 = c1 + 1; +update innodb_page_compressed4 set c1 = c1 + 1; +update innodb_page_compressed5 set c1 = c1 + 1; +update innodb_page_compressed6 set c1 = c1 + 1; +update innodb_page_compressed7 set c1 = c1 + 1; +update innodb_page_compressed8 set c1 = c1 + 1; +update innodb_page_compressed9 set c1 = c1 + 1; +select count(*) from innodb_compressed; +select count(*) from innodb_page_compressed1; +select count(*) from innodb_page_compressed1 where c1 < 500000; +select count(*) from innodb_page_compressed2 where c1 < 500000; +select count(*) from innodb_page_compressed3 where c1 < 500000; +select count(*) from innodb_page_compressed4 where c1 < 500000; +select count(*) from innodb_page_compressed5 where c1 < 500000; +select count(*) from innodb_page_compressed6 where c1 < 500000; +select count(*) from innodb_page_compressed7 where c1 < 500000; +select count(*) from innodb_page_compressed8 where c1 < 500000; +select count(*) from innodb_page_compressed9 where c1 < 500000; + +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compressed; +drop table innodb_page_compressed1; +drop table innodb_page_compressed2; +drop table innodb_page_compressed3; +drop table innodb_page_compressed4; +drop table innodb_page_compressed5; +drop table innodb_page_compressed6; +drop table innodb_page_compressed7; +drop table innodb_page_compressed8; +drop table innodb_page_compressed9; + +# reset system +--disable_query_log +EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig; +EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig; +EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig; +--enable_query_log diff --git a/mysql-test/suite/sys_vars/r/innodb_compression_algorithm_basic.result b/mysql-test/suite/sys_vars/r/innodb_compression_algorithm_basic.result new file mode 100644 index 00000000000..1213ec8bf10 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_compression_algorithm_basic.result @@ -0,0 +1,47 @@ +SET @start_global_value = @@global.innodb_compression_algorithm; +SELECT @start_global_value; +@start_global_value +zlib +select @@global.innodb_compression_algorithm; +@@global.innodb_compression_algorithm +zlib +select @@session.innodb_compression_algorithm; +ERROR HY000: Variable 'innodb_compression_algorithm' is a GLOBAL variable +show global variables like 'innodb_compression_algorithm'; +Variable_name Value +innodb_compression_algorithm zlib +show session variables like 'innodb_compression_algorithm'; +Variable_name Value +innodb_compression_algorithm zlib +select * from information_schema.global_variables where variable_name='innodb_compression_algorithm'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_COMPRESSION_ALGORITHM zlib +select * from information_schema.session_variables where variable_name='innodb_compression_algorithm'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_COMPRESSION_ALGORITHM zlib +set global innodb_compression_algorithm=1; +select @@global.innodb_compression_algorithm; +@@global.innodb_compression_algorithm +zlib +select * from information_schema.global_variables where variable_name='innodb_compression_algorithm'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_COMPRESSION_ALGORITHM zlib +select * from information_schema.session_variables where variable_name='innodb_compression_algorithm'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_COMPRESSION_ALGORITHM zlib +set session innodb_compression_algorithm=0; +ERROR HY000: Variable 'innodb_compression_algorithm' is a GLOBAL variable and should be set with SET GLOBAL +set global innodb_compression_algorithm=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_compression_algorithm' +set global innodb_compression_algorithm=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_compression_algorithm' +set global innodb_compression_algorithm="foo"; +ERROR 42000: Variable 'innodb_compression_algorithm' can't be set to the value of 'foo' +set global innodb_compression_algorithm=0; +select @@global.innodb_compression_algorithm; +@@global.innodb_compression_algorithm +none +SET @@global.innodb_compression_algorithm = @start_global_value; +SELECT @@global.innodb_compression_algorithm; +@@global.innodb_compression_algorithm +zlib diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result index 8c0af874228..4e9c6839c95 100644 --- a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result @@ -37,6 +37,8 @@ buffer_pool_bytes_dirty disabled buffer_pool_pages_free disabled buffer_pages_created disabled buffer_pages_written disabled +buffer_index_pages_written disabled +buffer_non_index_pages_written disabled buffer_pages_read disabled buffer_data_reads disabled buffer_data_written disabled @@ -160,6 +162,19 @@ compress_pages_compressed disabled compress_pages_decompressed disabled compression_pad_increments disabled compression_pad_decrements disabled +compress_saved disabled +compress_trim_sect512 disabled +compress_trim_sect1024 disabled +compress_trim_sect2048 disabled +compress_trim_sect4096 disabled +compress_trim_sect8192 disabled +compress_trim_sect16384 disabled +compress_trim_sect32768 disabled +compress_pages_page_compressed disabled +compress_page_compressed_trim_op disabled +compress_page_compressed_trim_op_saved disabled +compress_pages_page_decompressed disabled +compress_pages_page_compression_error disabled index_page_splits disabled index_page_merge_attempts disabled index_page_merge_successful disabled diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result index 8c0af874228..4e9c6839c95 100644 --- a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result @@ -37,6 +37,8 @@ buffer_pool_bytes_dirty disabled buffer_pool_pages_free disabled buffer_pages_created disabled buffer_pages_written disabled +buffer_index_pages_written disabled +buffer_non_index_pages_written disabled buffer_pages_read disabled buffer_data_reads disabled buffer_data_written disabled @@ -160,6 +162,19 @@ compress_pages_compressed disabled compress_pages_decompressed disabled compression_pad_increments disabled compression_pad_decrements disabled +compress_saved disabled +compress_trim_sect512 disabled +compress_trim_sect1024 disabled +compress_trim_sect2048 disabled +compress_trim_sect4096 disabled +compress_trim_sect8192 disabled +compress_trim_sect16384 disabled +compress_trim_sect32768 disabled +compress_pages_page_compressed disabled +compress_page_compressed_trim_op disabled +compress_page_compressed_trim_op_saved disabled +compress_pages_page_decompressed disabled +compress_pages_page_compression_error disabled index_page_splits disabled index_page_merge_attempts disabled index_page_merge_successful disabled diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result index 8c0af874228..4e9c6839c95 100644 --- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result @@ -37,6 +37,8 @@ buffer_pool_bytes_dirty disabled buffer_pool_pages_free disabled buffer_pages_created disabled buffer_pages_written disabled +buffer_index_pages_written disabled +buffer_non_index_pages_written disabled buffer_pages_read disabled buffer_data_reads disabled buffer_data_written disabled @@ -160,6 +162,19 @@ compress_pages_compressed disabled compress_pages_decompressed disabled compression_pad_increments disabled compression_pad_decrements disabled +compress_saved disabled +compress_trim_sect512 disabled +compress_trim_sect1024 disabled +compress_trim_sect2048 disabled +compress_trim_sect4096 disabled +compress_trim_sect8192 disabled +compress_trim_sect16384 disabled +compress_trim_sect32768 disabled +compress_pages_page_compressed disabled +compress_page_compressed_trim_op disabled +compress_page_compressed_trim_op_saved disabled +compress_pages_page_decompressed disabled +compress_pages_page_compression_error disabled index_page_splits disabled index_page_merge_attempts disabled index_page_merge_successful disabled diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result index 8c0af874228..4e9c6839c95 100644 --- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result @@ -37,6 +37,8 @@ buffer_pool_bytes_dirty disabled buffer_pool_pages_free disabled buffer_pages_created disabled buffer_pages_written disabled +buffer_index_pages_written disabled +buffer_non_index_pages_written disabled buffer_pages_read disabled buffer_data_reads disabled buffer_data_written disabled @@ -160,6 +162,19 @@ compress_pages_compressed disabled compress_pages_decompressed disabled compression_pad_increments disabled compression_pad_decrements disabled +compress_saved disabled +compress_trim_sect512 disabled +compress_trim_sect1024 disabled +compress_trim_sect2048 disabled +compress_trim_sect4096 disabled +compress_trim_sect8192 disabled +compress_trim_sect16384 disabled +compress_trim_sect32768 disabled +compress_pages_page_compressed disabled +compress_page_compressed_trim_op disabled +compress_page_compressed_trim_op_saved disabled +compress_pages_page_decompressed disabled +compress_pages_page_compression_error disabled index_page_splits disabled index_page_merge_attempts disabled index_page_merge_successful disabled diff --git a/mysql-test/suite/sys_vars/r/innodb_mtflush_threads_basic.result b/mysql-test/suite/sys_vars/r/innodb_mtflush_threads_basic.result new file mode 100644 index 00000000000..75a1cc5262e --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_mtflush_threads_basic.result @@ -0,0 +1,21 @@ +select @@global.innodb_mtflush_threads; +@@global.innodb_mtflush_threads +8 +select @@session.innodb_mtflush_threads; +ERROR HY000: Variable 'innodb_mtflush_threads' is a GLOBAL variable +show global variables like 'innodb_mtflush_threads'; +Variable_name Value +innodb_mtflush_threads 8 +show session variables like 'innodb_mtflush_threads'; +Variable_name Value +innodb_mtflush_threads 8 +select * from information_schema.global_variables where variable_name='innodb_mtflush_threads'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_MTFLUSH_THREADS 8 +select * from information_schema.session_variables where variable_name='innodb_mtflush_threads'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_MTFLUSH_THREADS 8 +set global innodb_mtflush_threads=1; +ERROR HY000: Variable 'innodb_mtflush_threads' is a read only variable +set session innodb_mtflush_threads=1; +ERROR HY000: Variable 'innodb_mtflush_threads' is a read only variable diff --git a/mysql-test/suite/sys_vars/r/innodb_use_lz4_basic.result b/mysql-test/suite/sys_vars/r/innodb_use_lz4_basic.result new file mode 100644 index 00000000000..4c3cfa524af --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_use_lz4_basic.result @@ -0,0 +1,3 @@ +select @@global.innodb_use_fallocate; +@@global.innodb_use_fallocate +0 diff --git a/mysql-test/suite/sys_vars/r/innodb_use_mtflush_basic.result b/mysql-test/suite/sys_vars/r/innodb_use_mtflush_basic.result new file mode 100644 index 00000000000..f77abba7ac9 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_use_mtflush_basic.result @@ -0,0 +1,21 @@ +select @@global.innodb_use_mtflush; +@@global.innodb_use_mtflush +0 +select @@session.innodb_use_mtflush; +ERROR HY000: Variable 'innodb_use_mtflush' is a GLOBAL variable +show global variables like 'innodb_use_mtflush'; +Variable_name Value +innodb_use_mtflush OFF +show session variables like 'innodb_use_mtflush'; +Variable_name Value +innodb_use_mtflush OFF +select * from information_schema.global_variables where variable_name='innodb_use_mtflush'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_USE_MTFLUSH OFF +select * from information_schema.session_variables where variable_name='innodb_use_mtflush'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_USE_MTFLUSH OFF +set global innodb_use_mtflush=1; +ERROR HY000: Variable 'innodb_use_mtflush' is a read only variable +set session innodb_use_mtflush=1; +ERROR HY000: Variable 'innodb_use_mtflush' is a read only variable diff --git a/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result b/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result new file mode 100644 index 00000000000..63292f5d3c8 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result @@ -0,0 +1,33 @@ +SET @start_use_trim = @@global.innodb_use_trim; +SELECT @start_use_trim; +@start_use_trim +0 +SELECT COUNT(@@GLOBAL.innodb_use_trim); +COUNT(@@GLOBAL.innodb_use_trim) +1 +1 Expected +SET @@GLOBAL.innodb_use_trim=1; +SELECT COUNT(@@GLOBAL.innodb_use_trim); +COUNT(@@GLOBAL.innodb_use_trim) +1 +1 Expected +SELECT IF(@@GLOBAL.innodb_use_trim, 'ON', 'OFF') = VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_use_trim'; +IF(@@GLOBAL.innodb_use_trim, 'ON', 'OFF') = VARIABLE_VALUE +1 +1 Expected +SELECT COUNT(@@GLOBAL.innodb_use_trim); +COUNT(@@GLOBAL.innodb_use_trim) +1 +1 Expected +SELECT COUNT(VARIABLE_VALUE) +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_use_trim'; +COUNT(VARIABLE_VALUE) +1 +1 Expected +SET @@global.innodb_use_trim = @start_use_trim; +SELECT @@global.innodb_use_trim; +@@global.innodb_use_trim +0 diff --git a/mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic-master.opt b/mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic-master.opt new file mode 100644 index 00000000000..77db41721ca --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic-master.opt @@ -0,0 +1 @@ +--innodb-compression-algorithm=1
\ No newline at end of file diff --git a/mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic.test b/mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic.test new file mode 100644 index 00000000000..6f09ced7dd5 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_compression_algorithm_basic.test @@ -0,0 +1,46 @@ +--source include/have_innodb.inc + +SET @start_global_value = @@global.innodb_compression_algorithm; +SELECT @start_global_value; + +# +# exists as global only +# +select @@global.innodb_compression_algorithm; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_compression_algorithm; +show global variables like 'innodb_compression_algorithm'; +show session variables like 'innodb_compression_algorithm'; +select * from information_schema.global_variables where variable_name='innodb_compression_algorithm'; +select * from information_schema.session_variables where variable_name='innodb_compression_algorithm'; + +# +# show that it's writable +# +set global innodb_compression_algorithm=1; +select @@global.innodb_compression_algorithm; +select * from information_schema.global_variables where variable_name='innodb_compression_algorithm'; +select * from information_schema.session_variables where variable_name='innodb_compression_algorithm'; +--error ER_GLOBAL_VARIABLE +set session innodb_compression_algorithm=0; + +# +# incorrect types +# +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_compression_algorithm=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_compression_algorithm=1e1; +--error 1231 +set global innodb_compression_algorithm="foo"; +# +# min/max values +# +set global innodb_compression_algorithm=0; +select @@global.innodb_compression_algorithm; +# +# cleanup +# + +SET @@global.innodb_compression_algorithm = @start_global_value; +SELECT @@global.innodb_compression_algorithm; diff --git a/mysql-test/suite/sys_vars/t/innodb_mtflush_threads_basic.test b/mysql-test/suite/sys_vars/t/innodb_mtflush_threads_basic.test new file mode 100644 index 00000000000..c8412f969eb --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_mtflush_threads_basic.test @@ -0,0 +1,21 @@ +--source include/have_innodb.inc +# bool readonly + +# +# show values; +# +select @@global.innodb_mtflush_threads; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_mtflush_threads; +show global variables like 'innodb_mtflush_threads'; +show session variables like 'innodb_mtflush_threads'; +select * from information_schema.global_variables where variable_name='innodb_mtflush_threads'; +select * from information_schema.session_variables where variable_name='innodb_mtflush_threads'; + +# +# show that it's read-only +# +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +set global innodb_mtflush_threads=1; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +set session innodb_mtflush_threads=1; diff --git a/mysql-test/suite/sys_vars/t/innodb_use_lz4_basic.test b/mysql-test/suite/sys_vars/t/innodb_use_lz4_basic.test new file mode 100644 index 00000000000..aefa276dcee --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_use_lz4_basic.test @@ -0,0 +1,5 @@ +--source include/have_innodb.inc +# bool readonly +# not on all compilations +select @@global.innodb_use_fallocate; + diff --git a/mysql-test/suite/sys_vars/t/innodb_use_mtflush_basic.test b/mysql-test/suite/sys_vars/t/innodb_use_mtflush_basic.test new file mode 100644 index 00000000000..a9c40b9e522 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_use_mtflush_basic.test @@ -0,0 +1,22 @@ +--source include/have_innodb.inc +# bool readonly + +# +# show values; +# +select @@global.innodb_use_mtflush; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_use_mtflush; +show global variables like 'innodb_use_mtflush'; +show session variables like 'innodb_use_mtflush'; +select * from information_schema.global_variables where variable_name='innodb_use_mtflush'; +select * from information_schema.session_variables where variable_name='innodb_use_mtflush'; + +# +# show that it's read-only +# +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +set global innodb_use_mtflush=1; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +set session innodb_use_mtflush=1; + diff --git a/mysql-test/suite/sys_vars/t/innodb_use_trim_basic.test b/mysql-test/suite/sys_vars/t/innodb_use_trim_basic.test new file mode 100644 index 00000000000..c1b0f142179 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_use_trim_basic.test @@ -0,0 +1,36 @@ +--source include/have_innodb.inc + +SET @start_use_trim = @@global.innodb_use_trim; +SELECT @start_use_trim; + +SELECT COUNT(@@GLOBAL.innodb_use_trim); +--echo 1 Expected + +#################################################################### +# Check if Value can set # +#################################################################### + +SET @@GLOBAL.innodb_use_trim=1; + +SELECT COUNT(@@GLOBAL.innodb_use_trim); +--echo 1 Expected + +################################################################# +# Check if the value in GLOBAL Table matches value in variable # +################################################################# + +SELECT IF(@@GLOBAL.innodb_use_trim, 'ON', 'OFF') = VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_use_trim'; +--echo 1 Expected + +SELECT COUNT(@@GLOBAL.innodb_use_trim); +--echo 1 Expected + +SELECT COUNT(VARIABLE_VALUE) +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_use_trim'; +--echo 1 Expected + +SET @@global.innodb_use_trim = @start_use_trim; +SELECT @@global.innodb_use_trim;
\ No newline at end of file diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index c24f1cda59e..eb94f6ba703 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -18,6 +18,17 @@ INCLUDE(CheckFunctionExists) INCLUDE(CheckCSourceCompiles) INCLUDE(CheckCSourceRuns) +INCLUDE(lz4) +INCLUDE(lzo) +INCLUDE(lzma) +INCLUDE(bzip2) +INCLUDE(snappy) + +MYSQL_CHECK_LZ4() +MYSQL_CHECK_LZO() +MYSQL_CHECK_LZMA() +MYSQL_CHECK_BZIP2() +MYSQL_CHECK_SNAPPY() # OS tests IF(UNIX) @@ -338,6 +349,7 @@ SET(INNOBASE_SOURCES buf/buf0flu.cc buf/buf0lru.cc buf/buf0rea.cc + buf/buf0mtflu.cc data/data0data.cc data/data0type.cc dict/dict0boot.cc @@ -351,6 +363,7 @@ SET(INNOBASE_SOURCES eval/eval0eval.cc eval/eval0proc.cc fil/fil0fil.cc + fil/fil0pagecompress.cc fsp/fsp0fsp.cc fut/fut0fut.cc fut/fut0lst.cc diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 79b533481b7..01bcd18db1d 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -2031,7 +2031,7 @@ btr_parse_page_reorganize( buf_block_t* block, /*!< in: page to be reorganized, or NULL */ mtr_t* mtr) /*!< in: mtr or NULL */ { - ulint level; + ulint level = page_zip_level; ut_ad(ptr && end_ptr); diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index c8dd4fae0a9..315a1d1b558 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -1873,9 +1873,13 @@ btr_cur_update_alloc_zip_func( false=update-in-place */ mtr_t* mtr) /*!< in/out: mini-transaction */ { + + /* Have a local copy of the variables as these can change + dynamically. */ const page_t* page = page_cur_get_page(cursor); ut_ad(page_zip == page_cur_get_page_zip(cursor)); + ut_ad(page_zip); ut_ad(!dict_index_is_ibuf(index)); ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets)); diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index e5800ef30c0..46f7f5a49df 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2,6 +2,7 @@ Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -878,6 +879,11 @@ buf_page_print( mach_read_from_4(read_buf + FIL_PAGE_OFFSET), mach_read_from_4(read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)); + + ulint page_type = mach_read_from_4(read_buf + FIL_PAGE_TYPE); + + fprintf(stderr, "InnoDB: page type %ld meaning %s\n", page_type, + fil_get_page_type_name(page_type)); } #ifndef UNIV_HOTBACKUP @@ -3415,6 +3421,7 @@ buf_page_init_low( bpage->access_time = 0; bpage->newest_modification = 0; bpage->oldest_modification = 0; + bpage->write_size = 0; HASH_INVALIDATE(bpage, hash); #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG bpage->file_page_was_freed = FALSE; diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index 62222993622..ad76765145b 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -382,7 +383,7 @@ buf_dblwr_init_or_load_pages( /* Read the trx sys header to check if we are using the doublewrite buffer */ off_t trx_sys_page = TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE; - os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE); + os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE, FALSE); doublewrite = read_buf + TRX_SYS_DOUBLEWRITE; @@ -416,12 +417,11 @@ buf_dblwr_init_or_load_pages( } /* Read the pages from the doublewrite buffer to memory */ - block_bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; - os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes); + os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes, FALSE); os_file_read(file, buf + block_bytes, block2 * UNIV_PAGE_SIZE, - block_bytes); + block_bytes, FALSE); /* Check if any of these pages is half-written in data files, in the intended position */ @@ -514,7 +514,7 @@ buf_dblwr_process() fil_io(OS_FILE_READ, true, space_id, zip_size, page_no, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - read_buf, NULL); + read_buf, NULL, 0); /* Check if the page is corrupt */ @@ -566,7 +566,7 @@ buf_dblwr_process() fil_io(OS_FILE_WRITE, true, space_id, zip_size, page_no, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - page, NULL); + page, NULL, 0); ib_logf(IB_LOG_LEVEL_INFO, "Recovered the page from" @@ -586,7 +586,7 @@ buf_dblwr_process() zip_size, page_no, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - page, NULL); + page, NULL, NULL); } } } @@ -798,7 +798,7 @@ buf_dblwr_write_block_to_datafile( buf_page_get_page_no(bpage), 0, buf_page_get_zip_size(bpage), (void*) bpage->zip.data, - (void*) bpage); + (void*) bpage, 0); return; } @@ -810,8 +810,7 @@ buf_dblwr_write_block_to_datafile( fil_io(flags, sync, buf_block_get_space(block), 0, buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE, - (void*) block->frame, (void*) block); - + (void*) block->frame, (void*) block, (ulint *)&bpage->write_size); } /********************************************************************//** @@ -905,7 +904,7 @@ try_again: fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, buf_dblwr->block1, 0, len, - (void*) write_buf, NULL); + (void*) write_buf, NULL, 0); if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { /* No unwritten pages in the second block. */ @@ -921,7 +920,7 @@ try_again: fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, buf_dblwr->block2, 0, len, - (void*) write_buf, NULL); + (void*) write_buf, NULL, 0); flush: /* increment the doublewrite flushed pages counter */ @@ -1150,14 +1149,14 @@ retry: fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, offset, 0, UNIV_PAGE_SIZE, (void*) (buf_dblwr->write_buf - + UNIV_PAGE_SIZE * i), NULL); + + UNIV_PAGE_SIZE * i), NULL, 0); } else { /* It is a regular page. Write it directly to the doublewrite buffer */ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, offset, 0, UNIV_PAGE_SIZE, (void*) ((buf_block_t*) bpage)->frame, - NULL); + NULL, 0); } /* Now flush the doublewrite buffer data to disk */ diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index f5145297b3f..79f02b19933 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1,6 +1,8 @@ /***************************************************************************** Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. +Copyright (c) 2013, 2014, Fusion-io. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -30,6 +32,7 @@ Created 11/11/1995 Heikki Tuuri #endif #include "buf0buf.h" +#include "buf0mtflu.h" #include "buf0checksum.h" #include "srv0start.h" #include "srv0srv.h" @@ -44,10 +47,12 @@ Created 11/11/1995 Heikki Tuuri #include "ibuf0ibuf.h" #include "log0log.h" #include "os0file.h" +#include "os0sync.h" #include "trx0sys.h" #include "srv0mon.h" #include "mysql/plugin.h" #include "mysql/service_thd_wait.h" +#include "fil0pagecompress.h" /** Number of pages flushed through non flush_list flushes. */ static ulint buf_lru_flush_page_count = 0; @@ -75,15 +80,6 @@ in thrashing. */ /* @} */ -/** Handled page counters for a single flush */ -struct flush_counters_t { - ulint flushed; /*!< number of dirty pages flushed */ - ulint evicted; /*!< number of clean pages evicted, including - evicted uncompressed page images */ - ulint unzip_LRU_evicted;/*!< number of uncompressed page images - evicted */ -}; - /******************************************************************//** Increases flush_list size in bytes with zip_size for compressed page, UNIV_PAGE_SIZE for uncompressed page in inline function */ @@ -732,8 +728,10 @@ buf_flush_write_complete( flush_type = buf_page_get_flush_type(bpage); buf_pool->n_flush[flush_type]--; +#ifdef UNIV_DEBUG /* fprintf(stderr, "n pending flush %lu\n", buf_pool->n_flush[flush_type]); */ +#endif if (buf_pool->n_flush[flush_type] == 0 && buf_pool->init_flush[flush_type] == FALSE) { @@ -887,6 +885,8 @@ buf_flush_write_block_low( { ulint zip_size = buf_page_get_zip_size(bpage); page_t* frame = NULL; + ulint space_id = buf_page_get_space(bpage); + atomic_writes_t awrites = fil_space_get_atomic_writes(space_id); #ifdef UNIV_DEBUG buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); @@ -963,12 +963,28 @@ buf_flush_write_block_low( sync, buf_page_get_space(bpage), zip_size, buf_page_get_page_no(bpage), 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - frame, bpage); - } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) { - buf_dblwr_write_single_page(bpage, sync); + frame, bpage, &bpage->write_size); } else { - ut_ad(!sync); - buf_dblwr_add_to_batch(bpage); + + /* InnoDB uses doublewrite buffer and doublewrite buffer + is initialized. User can define do we use atomic writes + on a file space (table) or not. If atomic writes are + not used we should use doublewrite buffer and if + atomic writes should be used, no doublewrite buffer + is used. */ + + if (awrites == ATOMIC_WRITES_ON) { + fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, + FALSE, buf_page_get_space(bpage), zip_size, + buf_page_get_page_no(bpage), 0, + zip_size ? zip_size : UNIV_PAGE_SIZE, + frame, bpage, &bpage->write_size); + } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) { + buf_dblwr_write_single_page(bpage, sync); + } else { + ut_ad(!sync); + buf_dblwr_add_to_batch(bpage); + } } /* When doing single page flushing the IO is done synchronously @@ -1228,7 +1244,9 @@ buf_flush_try_neighbors( } } +#ifdef UNIV_DEBUG /* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */ +#endif if (high > fil_space_get_size(space)) { high = fil_space_get_size(space); @@ -1681,7 +1699,6 @@ pages: to avoid deadlocks, this function must be written so that it cannot end up waiting for these latches! NOTE 2: in the case of a flush list flush, the calling thread is not allowed to own any latches on pages! @return number of blocks for which the write request was queued */ -static void buf_flush_batch( /*============*/ @@ -1738,7 +1755,6 @@ buf_flush_batch( /******************************************************************//** Gather the aggregated stats for both flush list and LRU list flushing */ -static void buf_flush_common( /*=============*/ @@ -1763,7 +1779,6 @@ buf_flush_common( /******************************************************************//** Start a buffer flush batch for LRU or flush list */ -static ibool buf_flush_start( /*============*/ @@ -1792,7 +1807,6 @@ buf_flush_start( /******************************************************************//** End a buffer flush batch for LRU or flush list */ -static void buf_flush_end( /*==========*/ @@ -1908,6 +1922,10 @@ buf_flush_list( ulint i; bool success = true; + if (buf_mtflu_init_done()) { + return(buf_mtflu_flush_list(min_n, lsn_limit, n_processed)); + } + if (n_processed) { *n_processed = 0; } @@ -2078,6 +2096,11 @@ buf_flush_LRU_tail(void) { ulint total_flushed = 0; + if(buf_mtflu_init_done()) + { + return(buf_mtflu_flush_LRU_tail()); + } + for (ulint i = 0; i < srv_buf_pool_instances; i++) { buf_pool_t* buf_pool = buf_pool_from_array(i); @@ -2394,6 +2417,8 @@ page_cleaner_sleep_if_needed( } } + + /******************************************************************//** page_cleaner thread tasked with flushing dirty pages from the buffer pools. As of now we'll have only one instance of this thread. @@ -2420,7 +2445,6 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( fprintf(stderr, "InnoDB: page_cleaner thread running, id %lu\n", os_thread_pf(os_thread_get_curr_id())); #endif /* UNIV_DEBUG_THREAD_CREATION */ - buf_page_cleaner_is_active = TRUE; while (srv_shutdown_state == SRV_SHUTDOWN_NONE) { @@ -2435,10 +2459,11 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( /* Flush pages from flush_list if required */ page_cleaner_flush_pages_if_needed(); n_flushed = 0; + } else { n_flushed = page_cleaner_do_flush_batch( - PCT_IO(100), - LSN_MAX); + PCT_IO(100), + LSN_MAX); if (n_flushed) { MONITOR_INC_VALUE_CUMULATIVE( @@ -2454,6 +2479,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( } ut_ad(srv_shutdown_state > 0); + if (srv_fast_shutdown == 2) { /* In very fast shutdown we simulate a crash of buffer pool. We are not required to do any flushing */ @@ -2619,9 +2645,11 @@ buf_flush_validate( return(ret); } + #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #endif /* !UNIV_HOTBACKUP */ + #ifdef UNIV_DEBUG /******************************************************************//** Check if there are any dirty pages that belong to a space id in the flush diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc new file mode 100644 index 00000000000..f5b3d81991a --- /dev/null +++ b/storage/innobase/buf/buf0mtflu.cc @@ -0,0 +1,732 @@ +/***************************************************************************** + +Copyright (C) 2013, 2014, Fusion-io. All Rights Reserved. +Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file buf/buf0mtflu.cc +Multi-threaded flush method implementation + +Created 06/11/2013 Dhananjoy Das DDas@fusionio.com +Modified 12/12/2013 Jan Lindström jan.lindstrom@skysql.com +Modified 03/02/2014 Dhananjoy Das DDas@fusionio.com +Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com +***********************************************************************/ + +#include "buf0buf.h" +#include "buf0flu.h" +#include "buf0mtflu.h" +#include "buf0checksum.h" +#include "srv0start.h" +#include "srv0srv.h" +#include "page0zip.h" +#include "ut0byte.h" +#include "ut0lst.h" +#include "page0page.h" +#include "fil0fil.h" +#include "buf0lru.h" +#include "buf0rea.h" +#include "ibuf0ibuf.h" +#include "log0log.h" +#include "os0file.h" +#include "os0sync.h" +#include "trx0sys.h" +#include "srv0mon.h" +#include "mysql/plugin.h" +#include "mysql/service_thd_wait.h" +#include "fil0pagecompress.h" + +#define MT_COMP_WATER_MARK 50 +/** Time to wait for a message. */ +#define MT_WAIT_IN_USECS 5000000 + +/* Work item status */ +typedef enum wrk_status { + WRK_ITEM_UNSET=0, /*!< Work item is not set */ + WRK_ITEM_START=1, /*!< Processing of work item has started */ + WRK_ITEM_DONE=2, /*!< Processing is done usually set to + SUCCESS/FAILED */ + WRK_ITEM_SUCCESS=2, /*!< Work item successfully processed */ + WRK_ITEM_FAILED=3, /*!< Work item process failed */ + WRK_ITEM_EXIT=4, /*!< Exiting */ + WRK_ITEM_SET=5, /*!< Work item is set */ + WRK_ITEM_STATUS_UNDEFINED +} wrk_status_t; + +/* Work item task type */ +typedef enum mt_wrk_tsk { + MT_WRK_NONE=0, /*!< Exit queue-wait */ + MT_WRK_WRITE=1, /*!< Flush operation */ + MT_WRK_READ=2, /*!< Read operation */ + MT_WRK_UNDEFINED +} mt_wrk_tsk_t; + +/* Work thread status */ +typedef enum wthr_status { + WTHR_NOT_INIT=0, /*!< Work thread not initialized */ + WTHR_INITIALIZED=1, /*!< Work thread initialized */ + WTHR_SIG_WAITING=2, /*!< Work thread wating signal */ + WTHR_RUNNING=3, /*!< Work thread running */ + WTHR_NO_WORK=4, /*!< Work thread has no work */ + WTHR_KILL_IT=5, /*!< Work thread should exit */ + WTHR_STATUS_UNDEFINED +} wthr_status_t; + +/* Write work task */ +typedef struct wr_tsk { + buf_pool_t *buf_pool; /*!< buffer-pool instance */ + buf_flush_t flush_type; /*!< flush-type for buffer-pool + flush operation */ + ulint min; /*!< minimum number of pages + requested to be flushed */ + lsn_t lsn_limit; /*!< lsn limit for the buffer-pool + flush operation */ +} wr_tsk_t; + +/* Read work task */ +typedef struct rd_tsk { + buf_pool_t *page_pool; /*!< list of pages to decompress; */ +} rd_tsk_t; + +/* Work item */ +typedef struct wrk_itm +{ + mt_wrk_tsk_t tsk; /*!< Task type. Based on task-type + one of the entries wr_tsk/rd_tsk + will be used */ + wr_tsk_t wr; /*!< Flush page list */ + rd_tsk_t rd; /*!< Decompress page list */ + ulint n_flushed; /*!< Flushed pages count */ + os_thread_id_t id_usr; /*!< Thread-id currently working */ + wrk_status_t wi_status; /*!< Work item status */ + mem_heap_t *wheap; /*!< Heap were to allocate memory + for queue nodes */ + mem_heap_t *rheap; +} wrk_t; + +typedef struct thread_data +{ + os_thread_id_t wthread_id; /*!< Identifier */ + os_thread_t wthread; /*!< Thread id */ + wthr_status_t wt_status; /*!< Worker thread status */ +} thread_data_t; + +/* Thread syncronization data */ +typedef struct thread_sync +{ + /* Global variables used by all threads */ + os_fast_mutex_t thread_global_mtx; /*!< Mutex used protecting below + variables */ + ulint n_threads; /*!< Number of threads */ + ib_wqueue_t *wq; /*!< Work Queue */ + ib_wqueue_t *wr_cq; /*!< Write Completion Queue */ + ib_wqueue_t *rd_cq; /*!< Read Completion Queue */ + mem_heap_t* wheap; /*!< Work heap where memory + is allocated */ + mem_heap_t* rheap; /*!< Work heap where memory + is allocated */ + wthr_status_t gwt_status; /*!< Global thread status */ + + /* Variables used by only one thread at a time */ + thread_data_t* thread_data; /*!< Thread specific data */ + +} thread_sync_t; + +static int mtflush_work_initialized = -1; +static thread_sync_t* mtflush_ctx=NULL; +static os_fast_mutex_t mtflush_mtx; + +/******************************************************************//** +Set multi-threaded flush work initialized. */ +static inline +void +buf_mtflu_work_init(void) +/*=====================*/ +{ + mtflush_work_initialized = 1; +} + +/******************************************************************//** +Return true if multi-threaded flush is initialized +@return true if initialized */ +bool +buf_mtflu_init_done(void) +/*=====================*/ +{ + return(mtflush_work_initialized == 1); +} + +/******************************************************************//** +Fush buffer pool instance. +@return number of flushed pages, or 0 if error happened +*/ +static +ulint +buf_mtflu_flush_pool_instance( +/*==========================*/ + wrk_t *work_item) /*!< inout: work item to be flushed */ +{ + flush_counters_t n; + ut_a(work_item != NULL); + ut_a(work_item->wr.buf_pool != NULL); + + if (!buf_flush_start(work_item->wr.buf_pool, work_item->wr.flush_type)) { + /* We have two choices here. If lsn_limit was + specified then skipping an instance of buffer + pool means we cannot guarantee that all pages + up to lsn_limit has been flushed. We can + return right now with failure or we can try + to flush remaining buffer pools up to the + lsn_limit. We attempt to flush other buffer + pools based on the assumption that it will + help in the retry which will follow the + failure. */ +#ifdef UNIV_MTFLUSH_DEBUG + fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n"); +#endif + return 0; + } + + memset(&n, 0, sizeof(flush_counters_t)); + + if (work_item->wr.flush_type == BUF_FLUSH_LRU) { + /* srv_LRU_scan_depth can be arbitrarily large value. + * We cap it with current LRU size. + */ + buf_pool_mutex_enter(work_item->wr.buf_pool); + work_item->wr.min = UT_LIST_GET_LEN(work_item->wr.buf_pool->LRU); + buf_pool_mutex_exit(work_item->wr.buf_pool); + work_item->wr.min = ut_min(srv_LRU_scan_depth,work_item->wr.min); + } + + buf_flush_batch(work_item->wr.buf_pool, + work_item->wr.flush_type, + work_item->wr.min, + work_item->wr.lsn_limit, + &n); + + work_item->n_flushed = n.flushed; + buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type); + buf_flush_common(work_item->wr.flush_type, work_item->n_flushed); + + return work_item->n_flushed; +} + +/******************************************************************//** +Worker function to wait for work items and processing them and +sending reply back. +*/ +static +void +mtflush_service_io( +/*===============*/ + thread_sync_t* mtflush_io, /*!< inout: multi-threaded flush + syncronization data */ + thread_data_t* thread_data) /* Thread status data */ +{ + wrk_t *work_item = NULL; + ulint n_flushed=0; + + ut_a(mtflush_io != NULL); + ut_a(thread_data != NULL); + + thread_data->wt_status = WTHR_SIG_WAITING; + + work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq); + + if (work_item == NULL) { + work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq); + } + + if (work_item) { + thread_data->wt_status = WTHR_RUNNING; + } else { + /* Thread did not get any work */ + thread_data->wt_status = WTHR_NO_WORK; + return; + } + + if (work_item->wi_status != WRK_ITEM_EXIT) { + work_item->wi_status = WRK_ITEM_SET; + } + +#ifdef UNIV_MTFLUSH_DEBUG + ut_a(work_item->id_usr == 0); +#endif + work_item->id_usr = os_thread_get_curr_id(); + + /* This works as a producer/consumer model, where in tasks are + * inserted into the work-queue (wq) and completions are based + * on the type of operations performed and as a result the WRITE/ + * compression/flush operation completions get posted to wr_cq. + * And READ/decompress operations completions get posted to rd_cq. + * in future we may have others. + */ + + switch(work_item->tsk) { + case MT_WRK_NONE: + ut_a(work_item->wi_status == WRK_ITEM_EXIT); + work_item->wi_status = WRK_ITEM_EXIT; + ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap); + thread_data->wt_status = WTHR_KILL_IT; + break; + + case MT_WRK_WRITE: + ut_a(work_item->wi_status == WRK_ITEM_SET); + work_item->wi_status = WRK_ITEM_START; + /* Process work item */ + if (0 == (n_flushed = buf_mtflu_flush_pool_instance(work_item))) { + work_item->wi_status = WRK_ITEM_FAILED; + } + work_item->wi_status = WRK_ITEM_SUCCESS; + ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap); + break; + + case MT_WRK_READ: + ut_a(0); + break; + + default: + /* None other than Write/Read handling planned */ + ut_a(0); + break; + } +} + +/******************************************************************//** +Thead used to flush dirty pages when multi-threaded flush is +used. +@return a dummy parameter*/ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(mtflush_io_thread)( +/*==============================*/ + void * arg) +{ + thread_sync_t *mtflush_io = ((thread_sync_t *)arg); + thread_data_t *this_thread_data = NULL; + ulint i; + + /* Find correct slot for this thread */ + os_fast_mutex_lock(&(mtflush_io->thread_global_mtx)); + for(i=0; i < mtflush_io->n_threads; i ++) { + if (mtflush_io->thread_data[i].wthread_id == os_thread_get_curr_id()) { + break; + } + } + + ut_a(i <= mtflush_io->n_threads); + this_thread_data = &mtflush_io->thread_data[i]; + os_fast_mutex_unlock(&(mtflush_io->thread_global_mtx)); + + while (TRUE) { + +#ifdef UNIV_MTFLUSH_DEBUG + fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n", + os_thread_get_curr_id(), + ib_wqueue_len(mtflush_io->wq), + ib_wqueue_len(mtflush_io->wr_cq)); +#endif /* UNIV_MTFLUSH_DEBUG */ + + mtflush_service_io(mtflush_io, this_thread_data); + + + if (this_thread_data->wt_status == WTHR_KILL_IT) { + break; + } + } + + os_thread_exit(NULL); + OS_THREAD_DUMMY_RETURN; +} + +/******************************************************************//** +Add exit work item to work queue to signal multi-threded flush +threads that they should exit. +*/ +void +buf_mtflu_io_thread_exit(void) +/*==========================*/ +{ + ulint i; + thread_sync_t* mtflush_io = mtflush_ctx; + wrk_t* work_item = NULL; + + ut_a(mtflush_io != NULL); + + /* Allocate work items for shutdown message */ + work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads); + + /* Confirm if the io-thread KILL is in progress, bailout */ + if (mtflush_io->gwt_status == WTHR_KILL_IT) { + return; + } + + mtflush_io->gwt_status = WTHR_KILL_IT; + + /* This lock is to safequard against timing bug: flush request take + this mutex before sending work items to be processed by flush + threads. Inside flush thread we assume that work queue contains only + a constant number of items. Thus, we may not install new work items + below before all previous ones are processed. This mutex is released + by flush request after all work items sent to flush threads have + been processed. Thus, we can get this mutex if and only if work + queue is empty. */ + + os_fast_mutex_lock(&mtflush_mtx); + + /* Make sure the work queue is empty */ + ut_a(ib_wqueue_is_empty(mtflush_io->wq)); + + /* Send one exit work item/thread */ + for (i=0; i < (ulint)srv_mtflush_threads; i++) { + work_item[i].tsk = MT_WRK_NONE; + work_item[i].wi_status = WRK_ITEM_EXIT; + work_item[i].wheap = mtflush_io->wheap; + work_item[i].rheap = mtflush_io->rheap; + work_item[i].id_usr = 0; + + ib_wqueue_add(mtflush_io->wq, + (void *)&(work_item[i]), + mtflush_io->wheap); + } + + /* Requests sent */ + os_fast_mutex_unlock(&mtflush_mtx); + + /* Wait until all work items on a work queue are processed */ + while(!ib_wqueue_is_empty(mtflush_io->wq)) { + /* Wait */ + os_thread_sleep(MT_WAIT_IN_USECS); + } + + ut_a(ib_wqueue_is_empty(mtflush_io->wq)); + + /* Collect all work done items */ + for (i=0; i < (ulint)srv_mtflush_threads;) { + wrk_t* work_item = NULL; + + work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, MT_WAIT_IN_USECS); + + /* If we receive reply to work item and it's status is exit, + thead has processed this message and existed */ + if (work_item && work_item->wi_status == WRK_ITEM_EXIT) { + i++; + } + } + + /* Wait about 1/2 sec to allow threads really exit */ + os_thread_sleep(MT_WAIT_IN_USECS); + + /* Make sure that work queue is empty */ + while(!ib_wqueue_is_empty(mtflush_io->wq)) + { + ib_wqueue_nowait(mtflush_io->wq); + } + + os_fast_mutex_lock(&mtflush_mtx); + + ut_a(ib_wqueue_is_empty(mtflush_io->wq)); + ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq)); + ut_a(ib_wqueue_is_empty(mtflush_io->rd_cq)); + + /* Free all queues */ + ib_wqueue_free(mtflush_io->wq); + ib_wqueue_free(mtflush_io->wr_cq); + ib_wqueue_free(mtflush_io->rd_cq); + + mtflush_io->wq = NULL; + mtflush_io->wr_cq = NULL; + mtflush_io->rd_cq = NULL; + mtflush_work_initialized = 0; + + /* Free heap */ + mem_heap_free(mtflush_io->wheap); + mem_heap_free(mtflush_io->rheap); + + os_fast_mutex_unlock(&mtflush_mtx); + os_fast_mutex_free(&mtflush_mtx); + os_fast_mutex_free(&mtflush_io->thread_global_mtx); +} + +/******************************************************************//** +Initialize multi-threaded flush thread syncronization data. +@return Initialized multi-threaded flush thread syncroniztion data. */ +void* +buf_mtflu_handler_init( +/*===================*/ + ulint n_threads, /*!< in: Number of threads to create */ + ulint wrk_cnt) /*!< in: Number of work items */ +{ + ulint i; + mem_heap_t* mtflush_heap; + mem_heap_t* mtflush_heap2; + + /* Create heap, work queue, write completion queue, read + completion queue for multi-threaded flush, and init + handler. */ + mtflush_heap = mem_heap_create(0); + ut_a(mtflush_heap != NULL); + mtflush_heap2 = mem_heap_create(0); + ut_a(mtflush_heap2 != NULL); + + mtflush_ctx = (thread_sync_t *)mem_heap_alloc(mtflush_heap, + sizeof(thread_sync_t)); + memset(mtflush_ctx, 0, sizeof(thread_sync_t)); + ut_a(mtflush_ctx != NULL); + mtflush_ctx->thread_data = (thread_data_t*)mem_heap_alloc( + mtflush_heap, sizeof(thread_data_t) * n_threads); + ut_a(mtflush_ctx->thread_data); + memset(mtflush_ctx->thread_data, 0, sizeof(thread_data_t) * n_threads); + + mtflush_ctx->n_threads = n_threads; + mtflush_ctx->wq = ib_wqueue_create(); + ut_a(mtflush_ctx->wq); + mtflush_ctx->wr_cq = ib_wqueue_create(); + ut_a(mtflush_ctx->wr_cq); + mtflush_ctx->rd_cq = ib_wqueue_create(); + ut_a(mtflush_ctx->rd_cq); + mtflush_ctx->wheap = mtflush_heap; + mtflush_ctx->rheap = mtflush_heap2; + + os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_ctx->thread_global_mtx); + os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx); + + /* Create threads for page-compression-flush */ + for(i=0; i < n_threads; i++) { + os_thread_id_t new_thread_id; + + mtflush_ctx->thread_data[i].wt_status = WTHR_INITIALIZED; + + mtflush_ctx->thread_data[i].wthread = os_thread_create( + mtflush_io_thread, + ((void *) mtflush_ctx), + &new_thread_id); + + mtflush_ctx->thread_data[i].wthread_id = new_thread_id; + } + + buf_mtflu_work_init(); + + return((void *)mtflush_ctx); +} + +/******************************************************************//** +Flush buffer pool instances. +@return number of pages flushed. */ +ulint +buf_mtflu_flush_work_items( +/*=======================*/ + ulint buf_pool_inst, /*!< in: Number of buffer pool instances */ + ulint *per_pool_pages_flushed, /*!< out: Number of pages + flushed/instance */ + buf_flush_t flush_type, /*!< in: Type of flush */ + ulint min_n, /*!< in: Wished minimum number of + blocks to be flushed */ + lsn_t lsn_limit) /*!< in: All blocks whose + oldest_modification is smaller than + this should be flushed (if their + number does not exceed min_n) */ +{ + ulint n_flushed=0, i; + mem_heap_t* work_heap; + mem_heap_t* reply_heap; + wrk_t work_item[MTFLUSH_MAX_WORKER]; + + if (mtflush_ctx->gwt_status == WTHR_KILL_IT) { + return 0; + } + + /* Allocate heap where all work items used and queue + node items areallocated */ + work_heap = mem_heap_create(0); + reply_heap = mem_heap_create(0); + + + for(i=0;i<buf_pool_inst; i++) { + work_item[i].tsk = MT_WRK_WRITE; + work_item[i].wr.buf_pool = buf_pool_from_array(i); + work_item[i].wr.flush_type = flush_type; + work_item[i].wr.min = min_n; + work_item[i].wr.lsn_limit = lsn_limit; + work_item[i].wi_status = WRK_ITEM_UNSET; + work_item[i].wheap = work_heap; + work_item[i].rheap = reply_heap; + work_item[i].n_flushed = 0; + work_item[i].id_usr = 0; + + ib_wqueue_add(mtflush_ctx->wq, + (void *)(work_item + i), + work_heap); + } + + /* wait on the completion to arrive */ + for(i=0; i< buf_pool_inst;) { + wrk_t *done_wi = NULL; + done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq); + + if (done_wi != NULL) { + per_pool_pages_flushed[i] = done_wi->n_flushed; + +#ifdef UNIV_MTFLUSH_DEBUG + if((int)done_wi->id_usr == 0 && + (done_wi->wi_status == WRK_ITEM_SET || + done_wi->wi_status == WRK_ITEM_UNSET)) { + fprintf(stderr, + "**Set/Unused work_item[%lu] flush_type=%d\n", + i, + done_wi->wr.flush_type); + ut_a(0); + } +#endif + + n_flushed+= done_wi->n_flushed; + i++; + } + } + + /* Release used work_items and queue nodes */ + mem_heap_free(work_heap); + mem_heap_free(reply_heap); + + return(n_flushed); +} + +/*******************************************************************//** +Multi-threaded version of buf_flush_list +*/ +bool +buf_mtflu_flush_list( +/*=================*/ + ulint min_n, /*!< in: wished minimum mumber of blocks + flushed (it is not guaranteed that the + actual number is that big, though) */ + lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all + blocks whose oldest_modification is + smaller than this should be flushed + (if their number does not exceed + min_n), otherwise ignored */ + ulint* n_processed) /*!< out: the number of pages + which were processed is passed + back to caller. Ignored if NULL */ + +{ + ulint i; + bool success = true; + ulint cnt_flush[MTFLUSH_MAX_WORKER]; + + if (n_processed) { + *n_processed = 0; + } + + if (min_n != ULINT_MAX) { + /* Ensure that flushing is spread evenly amongst the + buffer pool instances. When min_n is ULINT_MAX + we need to flush everything up to the lsn limit + so no limit here. */ + min_n = (min_n + srv_buf_pool_instances - 1) + / srv_buf_pool_instances; + } + + /* This lock is to safequard against re-entry if any. */ + os_fast_mutex_lock(&mtflush_mtx); + buf_mtflu_flush_work_items(srv_buf_pool_instances, + cnt_flush, BUF_FLUSH_LIST, + min_n, lsn_limit); + os_fast_mutex_unlock(&mtflush_mtx); + + for (i = 0; i < srv_buf_pool_instances; i++) { + if (n_processed) { + *n_processed += cnt_flush[i]; + } + if (cnt_flush[i]) { + MONITOR_INC_VALUE_CUMULATIVE( + MONITOR_FLUSH_BATCH_TOTAL_PAGE, + MONITOR_FLUSH_BATCH_COUNT, + MONITOR_FLUSH_BATCH_PAGES, + cnt_flush[i]); + } + } +#ifdef UNIV_MTFLUSH_DEBUG + fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu ]\n", + __FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed); +#endif + return(success); +} + +/*********************************************************************//** +Clears up tail of the LRU lists: +* Put replaceable pages at the tail of LRU to the free list +* Flush dirty pages at the tail of LRU to the disk +The depth to which we scan each buffer pool is controlled by dynamic +config parameter innodb_LRU_scan_depth. +@return total pages flushed */ +UNIV_INTERN +ulint +buf_mtflu_flush_LRU_tail(void) +/*==========================*/ +{ + ulint total_flushed=0, i; + ulint cnt_flush[MTFLUSH_MAX_WORKER]; + + ut_a(buf_mtflu_init_done()); + + /* This lock is to safeguard against re-entry if any */ + os_fast_mutex_lock(&mtflush_mtx); + buf_mtflu_flush_work_items(srv_buf_pool_instances, + cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0); + os_fast_mutex_unlock(&mtflush_mtx); + + for (i = 0; i < srv_buf_pool_instances; i++) { + if (cnt_flush[i]) { + total_flushed += cnt_flush[i]; + + MONITOR_INC_VALUE_CUMULATIVE( + MONITOR_LRU_BATCH_TOTAL_PAGE, + MONITOR_LRU_BATCH_COUNT, + MONITOR_LRU_BATCH_PAGES, + cnt_flush[i]); + } + } + +#if UNIV_MTFLUSH_DEBUG + fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu ]\n", ( + srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed); +#endif + + return(total_flushed); +} + +/*********************************************************************//** +Set correct thread identifiers to io thread array based on +information we have. */ +void +buf_mtflu_set_thread_ids( +/*=====================*/ + ulint n_threads, /*!<in: Number of threads to fill */ + void* ctx, /*!<in: thread context */ + os_thread_id_t* thread_ids) /*!<in: thread id array */ +{ + thread_sync_t *mtflush_io = ((thread_sync_t *)ctx); + ulint i; + ut_a(mtflush_io != NULL); + ut_a(thread_ids != NULL); + + for(i = 0; i < n_threads; i++) { + thread_ids[i] = mtflush_io->thread_data[i].wthread_id; + } +} diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index 7c8369c0c09..ec76c9923fe 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -184,14 +185,14 @@ buf_read_page_low( *err = fil_io(OS_FILE_READ | wake_later | ignore_nonexistent_pages, sync, space, zip_size, offset, 0, zip_size, - bpage->zip.data, bpage); + bpage->zip.data, bpage, &bpage->write_size); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); *err = fil_io(OS_FILE_READ | wake_later | ignore_nonexistent_pages, sync, space, 0, offset, 0, UNIV_PAGE_SIZE, - ((buf_block_t*) bpage)->frame, bpage); + ((buf_block_t*) bpage)->frame, bpage, 0); } if (sync) { diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index b866f44cc54..52a9bb686df 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -2,6 +2,7 @@ Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 506ba320853..cb43477ed59 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -24,6 +25,8 @@ Created 10/25/1995 Heikki Tuuri *******************************************************/ #include "fil0fil.h" +#include "fil0pagecompress.h" +#include "fsp0pagecompress.h" #include <debug_sync.h> #include <my_dbug.h> @@ -45,6 +48,7 @@ Created 10/25/1995 Heikki Tuuri #include "page0zip.h" #include "trx0sys.h" #include "row0mysql.h" +#include "os0file.h" #ifndef UNIV_HOTBACKUP # include "buf0lru.h" # include "ibuf0ibuf.h" @@ -54,6 +58,13 @@ Created 10/25/1995 Heikki Tuuri # include "srv0srv.h" static ulint srv_data_read, srv_data_written; #endif /* !UNIV_HOTBACKUP */ +#include "zlib.h" +#ifdef __linux__ +#include <linux/fs.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#endif +#include "row0mysql.h" MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system; @@ -262,11 +273,16 @@ fil_read( block size multiple */ void* buf, /*!< in/out: buffer where to store data read; in aio this must be appropriately aligned */ - void* message) /*!< in: message for aio handler if non-sync + void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ + ulint* write_size) /*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ { return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset, - byte_offset, len, buf, message)); + byte_offset, len, buf, message, write_size)); } /********************************************************************//** @@ -291,18 +307,22 @@ fil_write( be a block size multiple */ void* buf, /*!< in: buffer from which to write; in aio this must be appropriately aligned */ - void* message) /*!< in: message for aio handler if non-sync + void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ + ulint* write_size) /*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ { ut_ad(!srv_read_only_mode); return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset, - byte_offset, len, buf, message)); + byte_offset, len, buf, message, write_size)); } /*******************************************************************//** Returns the table space by a given id, NULL if not found. */ -UNIV_INLINE fil_space_t* fil_space_get_by_id( /*================*/ @@ -320,6 +340,19 @@ fil_space_get_by_id( return(space); } +/****************************************************************//** +Get space id from fil node */ +ulint +fil_node_get_space_id( +/*==================*/ + fil_node_t* node) /*!< in: Compressed node*/ +{ + ut_ad(node); + ut_ad(node->space); + + return (node->space->id); +} + /*******************************************************************//** Returns the table space by a given name, NULL if not found. */ UNIV_INLINE @@ -540,8 +573,9 @@ fil_node_open_file( byte* buf2; byte* page; ulint space_id; - ulint flags; + ulint flags=0; ulint page_size; + ulint atomic_writes=0; ut_ad(mutex_own(&(system->mutex))); ut_a(node->n_pending == 0); @@ -558,7 +592,7 @@ fil_node_open_file( node->handle = os_file_create_simple_no_error_handling( innodb_file_data_key, node->name, OS_FILE_OPEN, - OS_FILE_READ_ONLY, &success); + OS_FILE_READ_ONLY, &success, 0); if (!success) { /* The following call prints an error message */ os_file_get_last_error(true); @@ -575,6 +609,8 @@ fil_node_open_file( size_bytes = os_file_get_size(node->handle); ut_a(size_bytes != (os_offset_t) -1); + + node->file_block_size = os_file_get_block_size(node->handle, node->name); #ifdef UNIV_HOTBACKUP if (space->id == 0) { node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE); @@ -606,10 +642,14 @@ fil_node_open_file( set */ page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE)); - success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE); + success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE, + space->flags); + space_id = fsp_header_get_space_id(page); flags = fsp_header_get_flags(page); page_size = fsp_flags_get_page_size(flags); + atomic_writes = fsp_flags_get_atomic_writes(flags); + ut_free(buf2); @@ -660,6 +700,17 @@ fil_node_open_file( ut_error; } + if (UNIV_UNLIKELY(space->flags != flags)) { + if (!dict_tf_verify_flags(space->flags, flags)) { + fprintf(stderr, + "InnoDB: Error: table flags are 0x%lx" + " in the data dictionary\n" + "InnoDB: but the flags in file %s are 0x%lx!\n", + space->flags, node->name, flags); + ut_error; + } + } + if (size_bytes >= 1024 * 1024) { /* Truncate the size to whole megabytes. */ size_bytes = ut_2pow_round(size_bytes, 1024 * 1024); @@ -679,6 +730,8 @@ add_size: space->size += node->size; } + atomic_writes = fsp_flags_get_atomic_writes(space->flags); + /* printf("Opening file %s\n", node->name); */ /* Open the file for reading and writing, in Windows normally in the @@ -689,18 +742,22 @@ add_size: node->handle = os_file_create(innodb_file_log_key, node->name, OS_FILE_OPEN, OS_FILE_AIO, OS_LOG_FILE, - &ret); + &ret, atomic_writes); } else if (node->is_raw_disk) { node->handle = os_file_create(innodb_file_data_key, node->name, OS_FILE_OPEN_RAW, OS_FILE_AIO, OS_DATA_FILE, - &ret); + &ret, atomic_writes); } else { node->handle = os_file_create(innodb_file_data_key, node->name, OS_FILE_OPEN, OS_FILE_AIO, OS_DATA_FILE, - &ret); + &ret, atomic_writes); + } + + if (node->file_block_size == 0) { + node->file_block_size = os_file_get_block_size(node->handle, node->name); } ut_a(ret); @@ -1071,7 +1128,6 @@ fil_space_create( DBUG_EXECUTE_IF("fil_space_create_failure", return(false);); ut_a(fil_system); - ut_a(fsp_flags_is_valid(flags)); /* Look for a matching tablespace and if found free it. */ do { @@ -1148,6 +1204,7 @@ fil_space_create( space->flags = flags; space->magic_n = FIL_SPACE_MAGIC_N; + space->printed_compression_failure = false; rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP); @@ -1725,12 +1782,12 @@ fil_write_lsn_and_arch_no_to_file( buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE)); err = fil_read(TRUE, space, 0, sum_of_sizes, 0, - UNIV_PAGE_SIZE, buf, NULL); + UNIV_PAGE_SIZE, buf, NULL, 0); if (err == DB_SUCCESS) { mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn); err = fil_write(TRUE, space, 0, sum_of_sizes, 0, - UNIV_PAGE_SIZE, buf, NULL); + UNIV_PAGE_SIZE, buf, NULL, 0); } mem_free(buf1); @@ -1870,8 +1927,10 @@ fil_read_first_page( #endif /* UNIV_LOG_ARCHIVE */ lsn_t* min_flushed_lsn, /*!< out: min of flushed lsn values in data files */ - lsn_t* max_flushed_lsn) /*!< out: max of flushed + lsn_t* max_flushed_lsn, /*!< out: max of flushed lsn values in data files */ + ulint orig_space_id) /*!< in: original file space + id */ { byte* buf; byte* page; @@ -1884,7 +1943,10 @@ fil_read_first_page( page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE)); - os_file_read(data_file, page, 0, UNIV_PAGE_SIZE); + os_file_read(data_file, page, 0, UNIV_PAGE_SIZE, + orig_space_id != ULINT_UNDEFINED ? + fil_space_is_page_compressed(orig_space_id) : + FALSE); /* The FSP_HEADER on page 0 is only valid for the first file in a tablespace. So if this is not the first datafile, leave @@ -1893,12 +1955,21 @@ fil_read_first_page( if (!one_read_already) { *flags = fsp_header_get_flags(page); *space_id = fsp_header_get_space_id(page); + } - check_msg = fil_check_first_page(page); + /* Page is page compressed page, need to decompress, before + continue. */ + if (fil_page_is_compressed(page)) { + ulint write_size=0; + fil_decompress_page(NULL, page, UNIV_PAGE_SIZE, &write_size); } flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN); + if (!one_read_already) { + check_msg = fil_check_first_page(page); + } + ut_free(buf); if (check_msg) { @@ -3027,7 +3098,7 @@ fil_create_link_file( file = os_file_create_simple_no_error_handling( innodb_file_data_key, link_filepath, - OS_FILE_CREATE, OS_FILE_READ_WRITE, &success); + OS_FILE_CREATE, OS_FILE_READ_WRITE, &success, 0); if (!success) { /* The following call will print an error message */ @@ -3043,10 +3114,10 @@ fil_create_link_file( ut_print_filename(stderr, filepath); fputs(" already exists.\n", stderr); err = DB_TABLESPACE_EXISTS; - } else if (error == OS_FILE_DISK_FULL) { err = DB_OUT_OF_FILE_SPACE; - + } else if (error == OS_FILE_OPERATION_NOT_SUPPORTED) { + err = DB_UNSUPPORTED; } else { err = DB_ERROR; } @@ -3057,7 +3128,7 @@ fil_create_link_file( } if (!os_file_write(link_filepath, file, filepath, 0, - strlen(filepath))) { + strlen(filepath))) { err = DB_ERROR; } @@ -3136,8 +3207,9 @@ fil_open_linked_file( /*===============*/ const char* tablename, /*!< in: database/tablename */ char** remote_filepath,/*!< out: remote filepath */ - os_file_t* remote_file) /*!< out: remote file handle */ - + os_file_t* remote_file, /*!< out: remote file handle */ + ulint atomic_writes) /*!< in: atomic writes table option + value */ { ibool success; @@ -3151,7 +3223,7 @@ fil_open_linked_file( *remote_file = os_file_create_simple_no_error_handling( innodb_file_data_key, *remote_filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, - &success); + &success, atomic_writes); if (!success) { char* link_filepath = fil_make_isl_name(tablename); @@ -3206,6 +3278,7 @@ fil_create_new_single_table_tablespace( /* TRUE if a table is created with CREATE TEMPORARY TABLE */ bool is_temp = !!(flags2 & DICT_TF2_TEMPORARY); bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags); + ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags); ut_a(space_id > 0); ut_ad(!srv_read_only_mode); @@ -3238,7 +3311,8 @@ fil_create_new_single_table_tablespace( OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL, OS_DATA_FILE, - &ret); + &ret, + atomic_writes); if (ret == FALSE) { /* The following call will print an error message */ @@ -3265,6 +3339,11 @@ fil_create_new_single_table_tablespace( goto error_exit_3; } + if (error == OS_FILE_OPERATION_NOT_SUPPORTED) { + err = DB_UNSUPPORTED; + goto error_exit_3; + } + if (error == OS_FILE_DISK_FULL) { err = DB_OUT_OF_FILE_SPACE; goto error_exit_3; @@ -3303,6 +3382,7 @@ fil_create_new_single_table_tablespace( flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE); fsp_header_init_fields(page, space_id, flags); mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); + ut_ad(fsp_flags_is_valid(flags)); if (!(fsp_flags_is_compressed(flags))) { buf_flush_init_for_writing(page, NULL, 0); @@ -3479,16 +3559,25 @@ fil_open_single_table_tablespace( fsp_open_info remote; ulint tablespaces_found = 0; ulint valid_tablespaces_found = 0; + ulint atomic_writes = 0; #ifdef UNIV_SYNC_DEBUG ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex))); - if (!fsp_flags_is_valid(flags)) { + /* Table flags can be ULINT_UNDEFINED if + dict_tf_to_fsp_flags_failure is set. */ + if (flags != ULINT_UNDEFINED) { + if (!fsp_flags_is_valid(flags)) { + return(DB_CORRUPTION); + } + } else { return(DB_CORRUPTION); } + atomic_writes = fsp_flags_get_atomic_writes(flags); + /* If the tablespace was relocated, we do not compare the DATA_DIR flag */ ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR; @@ -3513,7 +3602,7 @@ fil_open_single_table_tablespace( } link_file_found = fil_open_linked_file( - tablename, &remote.filepath, &remote.file); + tablename, &remote.filepath, &remote.file, atomic_writes); remote.success = link_file_found; if (remote.success) { /* possibility of multiple files. */ @@ -3541,7 +3630,7 @@ fil_open_single_table_tablespace( if (dict.filepath) { dict.file = os_file_create_simple_no_error_handling( innodb_file_data_key, dict.filepath, OS_FILE_OPEN, - OS_FILE_READ_ONLY, &dict.success); + OS_FILE_READ_ONLY, &dict.success, atomic_writes); if (dict.success) { /* possibility of multiple files. */ validate = true; @@ -3553,7 +3642,7 @@ fil_open_single_table_tablespace( ut_a(def.filepath); def.file = os_file_create_simple_no_error_handling( innodb_file_data_key, def.filepath, OS_FILE_OPEN, - OS_FILE_READ_ONLY, &def.success); + OS_FILE_READ_ONLY, &def.success, atomic_writes); if (def.success) { tablespaces_found++; } @@ -3572,7 +3661,7 @@ fil_open_single_table_tablespace( #ifdef UNIV_LOG_ARCHIVE &space_arch_log_no, &space_arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ - &def.lsn, &def.lsn); + &def.lsn, &def.lsn, id); def.valid = !def.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3597,7 +3686,7 @@ fil_open_single_table_tablespace( #ifdef UNIV_LOG_ARCHIVE &remote.arch_log_no, &remote.arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ - &remote.lsn, &remote.lsn); + &remote.lsn, &remote.lsn, id); remote.valid = !remote.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3623,7 +3712,7 @@ fil_open_single_table_tablespace( #ifdef UNIV_LOG_ARCHIVE &dict.arch_log_no, &dict.arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ - &dict.lsn, &dict.lsn); + &dict.lsn, &dict.lsn, id); dict.valid = !dict.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3887,7 +3976,8 @@ fil_user_tablespace_find_space_id( for (ulint j = 0; j < page_count; ++j) { - st = os_file_read(fsp->file, page, (j* page_size), page_size); + st = os_file_read(fsp->file, page, (j* page_size), page_size, + fsp_flags_is_page_compressed(fsp->flags)); if (!st) { ib_logf(IB_LOG_LEVEL_INFO, @@ -4000,7 +4090,7 @@ fil_user_tablespace_restore_page( err = os_file_write(fsp->filepath, fsp->file, page, (zip_size ? zip_size : page_size) * page_no, - buflen); + buflen); os_file_flush(fsp->file); out: @@ -4027,7 +4117,7 @@ check_first_page: #ifdef UNIV_LOG_ARCHIVE &fsp->arch_log_no, &fsp->arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ - &fsp->lsn, &fsp->lsn)) { + &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED)) { ib_logf(IB_LOG_LEVEL_ERROR, "%s in tablespace %s (table %s)", check_msg, fsp->filepath, tablename); @@ -4100,9 +4190,7 @@ fil_load_single_table_tablespace( fsp_open_info def; fsp_open_info remote; os_offset_t size; -#ifdef UNIV_HOTBACKUP fil_space_t* space; -#endif memset(&def, 0, sizeof(def)); memset(&remote, 0, sizeof(remote)); @@ -4135,7 +4223,8 @@ fil_load_single_table_tablespace( one of them is sent to this function. So if this table has already been loaded, there is nothing to do.*/ mutex_enter(&fil_system->mutex); - if (fil_space_get_by_name(tablename)) { + space = fil_space_get_by_name(tablename); + if (space) { mem_free(tablename); mutex_exit(&fil_system->mutex); return; @@ -4160,7 +4249,7 @@ fil_load_single_table_tablespace( /* Check for a link file which locates a remote tablespace. */ remote.success = fil_open_linked_file( - tablename, &remote.filepath, &remote.file); + tablename, &remote.filepath, &remote.file, FALSE); /* Read the first page of the remote tablespace */ if (remote.success) { @@ -4175,7 +4264,7 @@ fil_load_single_table_tablespace( /* Try to open the tablespace in the datadir. */ def.file = os_file_create_simple_no_error_handling( innodb_file_data_key, def.filepath, OS_FILE_OPEN, - OS_FILE_READ_WRITE, &def.success); + OS_FILE_READ_ONLY, &def.success, FALSE); /* Read the first page of the remote tablespace */ if (def.success) { @@ -4903,6 +4992,7 @@ retry: } page_size = fsp_flags_get_zip_size(space->flags); + if (!page_size) { page_size = UNIV_PAGE_SIZE; } @@ -4940,6 +5030,11 @@ retry: start_page_no = space->size; file_start_page_no = space->size - node->size; + /* Determine correct file block size */ + if (node->file_block_size == 0) { + node->file_block_size = os_file_get_block_size(node->handle, node->name); + } + #ifdef HAVE_POSIX_FALLOCATE if (srv_use_posix_fallocate) { os_offset_t start_offset = start_page_no * page_size; @@ -4951,7 +5046,7 @@ retry: "space for file \'%s\' failed. Current size " INT64PF ", desired size " INT64PF "\n", node->name, start_offset, len+start_offset); - os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE); + os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE, __FILE__, __LINE__); success = FALSE; } else { success = TRUE; @@ -4961,9 +5056,11 @@ retry: success = FALSE; errno = 28; os_has_said_disk_full = TRUE;); mutex_enter(&fil_system->mutex); + if (success) { - node->size += n_pages; - space->size += n_pages; + node->size += (size_after_extend - start_page_no); + space->size += (size_after_extend - start_page_no); + os_has_said_disk_full = FALSE; } @@ -4999,7 +5096,7 @@ retry: success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, node->name, node->handle, buf, offset, page_size * n_pages, - NULL, NULL); + node, NULL, 0, FALSE, 0); #endif /* UNIV_HOTBACKUP */ @@ -5098,7 +5195,7 @@ fil_extend_tablespaces_to_stored_len(void) single-threaded operation */ error = fil_read(TRUE, space->id, fsp_flags_get_zip_size(space->flags), - 0, 0, UNIV_PAGE_SIZE, buf, NULL); + 0, 0, UNIV_PAGE_SIZE, buf, NULL, 0); ut_a(error == DB_SUCCESS); size_in_header = fsp_get_size_low(buf); @@ -5378,8 +5475,13 @@ fil_io( void* buf, /*!< in/out: buffer where to store read data or from where to write; in aio this must be appropriately aligned */ - void* message) /*!< in: message for aio handler if non-sync + void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ + ulint* write_size) /*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ { ulint mode; fil_space_t* space; @@ -5389,6 +5491,8 @@ fil_io( ulint wake_later; os_offset_t offset; ibool ignore_nonexistent_pages; + ibool page_compressed = FALSE; + ulint page_compression_level = 0; is_log = type & OS_FILE_LOG; type = type & ~OS_FILE_LOG; @@ -5442,6 +5546,11 @@ fil_io( } else if (type == OS_FILE_WRITE) { ut_ad(!srv_read_only_mode); srv_stats.data_written.add(len); + if (fil_page_is_index_page((byte *)buf)) { + srv_stats.index_pages_written.inc(); + } else { + srv_stats.non_index_pages_written.inc(); + } } /* Reserve the fil_system mutex and make sure that we can open at @@ -5567,6 +5676,9 @@ fil_io( ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0); ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0); + page_compressed = fsp_flags_is_page_compressed(space->flags); + page_compression_level = fsp_flags_get_page_compression_level(space->flags); + #ifdef UNIV_HOTBACKUP /* In mysqlbackup do normal i/o, not aio */ if (type == OS_FILE_READ) { @@ -5579,7 +5691,8 @@ fil_io( #else /* Queue the aio request */ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset, len, node, message); + offset, len, node, message, write_size, + page_compressed, page_compression_level); #endif /* UNIV_HOTBACKUP */ @@ -6119,7 +6232,8 @@ fil_iterate( ut_ad(!(n_bytes % iter.page_size)); if (!os_file_read(iter.file, io_buffer, offset, - (ulint) n_bytes)) { + (ulint) n_bytes, + fil_space_is_page_compressed(space_id))) { ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed"); @@ -6206,7 +6320,7 @@ fil_tablespace_iterate( file = os_file_create_simple_no_error_handling( innodb_file_data_key, filepath, - OS_FILE_OPEN, OS_FILE_READ_WRITE, &success); + OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE); DBUG_EXECUTE_IF("fil_tablespace_iterate_failure", { @@ -6258,7 +6372,8 @@ fil_tablespace_iterate( /* Read the first page and determine the page and zip size. */ - if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) { + if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE, + dict_tf_get_page_compression(table->flags))) { err = DB_IO_ERROR; @@ -6424,3 +6539,87 @@ fil_mtr_rename_log( 0, 0, new_name, old_name, mtr); } } + +/****************************************************************//** +Acquire fil_system mutex */ +void +fil_system_enter(void) +/*==================*/ +{ + ut_ad(!mutex_own(&fil_system->mutex)); + mutex_enter(&fil_system->mutex); +} + +/****************************************************************//** +Release fil_system mutex */ +void +fil_system_exit(void) +/*=================*/ +{ + ut_ad(mutex_own(&fil_system->mutex)); + mutex_exit(&fil_system->mutex); +} + +/*******************************************************************//** +Return space name */ +char* +fil_space_name( +/*===========*/ + fil_space_t* space) /*!< in: space */ +{ + return (space->name); +} + +/*******************************************************************//** +Return page type name */ +const char* +fil_get_page_type_name( +/*===================*/ + ulint page_type) /*!< in: FIL_PAGE_TYPE */ +{ + switch(page_type) { + case FIL_PAGE_PAGE_COMPRESSED: + return (const char*)"PAGE_COMPRESSED"; + case FIL_PAGE_INDEX: + return (const char*)"INDEX"; + case FIL_PAGE_UNDO_LOG: + return (const char*)"UNDO LOG"; + case FIL_PAGE_INODE: + return (const char*)"INODE"; + case FIL_PAGE_IBUF_FREE_LIST: + return (const char*)"IBUF_FREE_LIST"; + case FIL_PAGE_TYPE_ALLOCATED: + return (const char*)"ALLOCATED"; + case FIL_PAGE_IBUF_BITMAP: + return (const char*)"IBUF_BITMAP"; + case FIL_PAGE_TYPE_SYS: + return (const char*)"SYS"; + case FIL_PAGE_TYPE_TRX_SYS: + return (const char*)"TRX_SYS"; + case FIL_PAGE_TYPE_FSP_HDR: + return (const char*)"FSP_HDR"; + case FIL_PAGE_TYPE_XDES: + return (const char*)"XDES"; + case FIL_PAGE_TYPE_BLOB: + return (const char*)"BLOB"; + case FIL_PAGE_TYPE_ZBLOB: + return (const char*)"ZBLOB"; + case FIL_PAGE_TYPE_ZBLOB2: + return (const char*)"ZBLOB2"; + case FIL_PAGE_TYPE_COMPRESSED: + return (const char*)"ORACLE PAGE COMPRESSED"; + default: + return (const char*)"PAGE TYPE CORRUPTED"; + } +} +/****************************************************************//** +Get block size from fil node +@return block size*/ +ulint +fil_node_get_block_size( +/*====================*/ + fil_node_t* node) /*!< in: Node where to get block + size */ +{ + return (node->file_block_size); +} diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc new file mode 100644 index 00000000000..6baf1963c47 --- /dev/null +++ b/storage/innobase/fil/fil0pagecompress.cc @@ -0,0 +1,793 @@ +/***************************************************************************** + +Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file fil/fil0pagecompress.cc +Implementation for page compressed file spaces. + +Created 11/12/2013 Jan Lindström jan.lindstrom@mariadb.com +Updated 14/02/2015 +***********************************************************************/ + +#include "fil0fil.h" +#include "fil0pagecompress.h" + +#include <debug_sync.h> +#include <my_dbug.h> + +#include "mem0mem.h" +#include "hash0hash.h" +#include "os0file.h" +#include "mach0data.h" +#include "buf0buf.h" +#include "buf0flu.h" +#include "log0recv.h" +#include "fsp0fsp.h" +#include "srv0srv.h" +#include "srv0start.h" +#include "mtr0mtr.h" +#include "mtr0log.h" +#include "dict0dict.h" +#include "page0page.h" +#include "page0zip.h" +#include "trx0sys.h" +#include "row0mysql.h" +#ifndef UNIV_HOTBACKUP +# include "buf0lru.h" +# include "ibuf0ibuf.h" +# include "sync0sync.h" +# include "os0sync.h" +#else /* !UNIV_HOTBACKUP */ +# include "srv0srv.h" +static ulint srv_data_read, srv_data_written; +#endif /* !UNIV_HOTBACKUP */ +#include "zlib.h" +#ifdef __linux__ +#include <linux/fs.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#include <linux/falloc.h> +#endif +#include "row0mysql.h" +#ifdef HAVE_LZ4 +#include "lz4.h" +#endif +#ifdef HAVE_LZO +#include "lzo/lzo1x.h" +#endif +#ifdef HAVE_LZMA +#include "lzma.h" +#endif +#ifdef HAVE_BZIP2 +#include "bzlib.h" +#endif +#ifdef HAVE_SNAPPY +#include "snappy-c.h" +#endif + +/* Used for debugging */ +//#define UNIV_PAGECOMPRESS_DEBUG 1 + +/****************************************************************//** +For page compressed pages decompress the page after actual read +operation. */ +static +void +fil_decompress_page_2( +/*==================*/ + byte* page_buf, /*!< out: destination buffer for + uncompressed data */ + byte* buf, /*!< in: source compressed data */ + ulong len, /*!< in: length of output buffer.*/ + ulint* write_size) /*!< in/out: Actual payload size of + the compressed data. */ +{ + ulint page_type = mach_read_from_2(buf + FIL_PAGE_TYPE); + + if (page_type != FIL_PAGE_TYPE_COMPRESSED) { + /* It is not a compressed page */ + return; + } + + ulint olen = 0; + byte* ptr = buf + FIL_PAGE_DATA; + ulint version = mach_read_from_1(buf + FIL_PAGE_VERSION); + int err = 0; + + ut_a(version == 1); + + /* Read the original page type, before we compressed the data. */ + page_type = mach_read_from_2(buf + FIL_PAGE_ORIGINAL_TYPE_V1); + + ulint original_len = mach_read_from_2(buf + FIL_PAGE_ORIGINAL_SIZE_V1); + + if (original_len < UNIV_PAGE_SIZE_MIN - (FIL_PAGE_DATA + 8) + || original_len > UNIV_PAGE_SIZE_MAX - FIL_PAGE_DATA + || len < original_len + FIL_PAGE_DATA) { + fprintf(stderr, + "InnoDB: Corruption: We try to uncompress corrupted page\n" + "InnoDB: Original len %lu len %lu.\n", + original_len, len); + + fflush(stderr); + ut_error; + + } + + ulint algorithm = mach_read_from_1(buf + FIL_PAGE_ALGORITHM_V1); + + switch(algorithm) { + case PAGE_ZLIB_ALGORITHM: { + + fprintf(stderr, "InnoDB: [Note]: zlib\n"); + + err = uncompress(page_buf, &len, ptr, original_len); + /* If uncompress fails it means that page is corrupted */ + if (err != Z_OK) { + + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but uncompress failed with error %d.\n" + "InnoDB: size %lu len %lu\n", + err, original_len, len); + + fflush(stderr); + + ut_error; + } + + break; + } +#ifdef HAVE_LZ4 + case PAGE_LZ4_ALGORITHM: { + fprintf(stderr, "InnoDB: [Note]: lz4\n"); + err = LZ4_decompress_fast( + (const char*) ptr, (char*) (page_buf), original_len); + + if (err < 0) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %d bytes.\n" + "InnoDB: size %lu len %lu\n", + err, original_len, len); + fflush(stderr); + + ut_error; + } + break; + } +#endif /* HAVE_LZ4 */ + +#ifdef HAVE_LZMA + case PAGE_LZMA_ALGORITHM: { + + lzma_ret ret; + size_t src_pos = 0; + size_t dst_pos = 0; + uint64_t memlimit = UINT64_MAX; + + fprintf(stderr, "InnoDB: [Note]: lzma\n"); + ret = lzma_stream_buffer_decode( + &memlimit, + 0, + NULL, + ptr, + &src_pos, + original_len, + (page_buf), + &dst_pos, + len); + + + if (ret != LZMA_OK || (dst_pos <= 0 || dst_pos > len)) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %ld bytes.\n" + "InnoDB: size %lu len %lu\n", + dst_pos, original_len, len); + fflush(stderr); + + ut_error; + } + + break; + } +#endif /* HAVE_LZMA */ + +#ifdef HAVE_LZO + case PAGE_LZO_ALGORITHM: { + fprintf(stderr, "InnoDB: [Note]: lzo \n"); + err = lzo1x_decompress((const unsigned char *)ptr, + original_len,(unsigned char *)(page_buf), &olen, NULL); + + if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %ld bytes.\n" + "InnoDB: size %lu len %lu\n", + olen, original_len, len); + fflush(stderr); + + ut_error; + } + break; + } +#endif /* HAVE_LZO */ + + default: + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but compression algorithm %s\n" + "InnoDB: is not known.\n" + ,fil_get_compression_alg_name(algorithm)); + + fflush(stderr); + ut_error; + break; + } + + /* Leave the header alone */ + memmove(buf+FIL_PAGE_DATA, page_buf, original_len); + + mach_write_to_2(buf + FIL_PAGE_TYPE, page_type); + + ut_ad(memcmp(buf + FIL_PAGE_LSN + 4, + buf + (original_len + FIL_PAGE_DATA) + - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4) == 0); +} + +/****************************************************************//** +For page compressed pages compress the page before actual write +operation. +@return compressed page to be written*/ +byte* +fil_compress_page( +/*==============*/ + ulint space_id, /*!< in: tablespace id of the + table. */ + byte* buf, /*!< in: buffer from which to write; in aio + this must be appropriately aligned */ + byte* out_buf, /*!< out: compressed buffer */ + ulint len, /*!< in: length of input buffer.*/ + ulint compression_level, /* in: compression level */ + ulint block_size, /*!< in: block size */ + ulint* out_len, /*!< out: actual length of compressed + page */ + byte* lzo_mem) /*!< in: temporal memory used by LZO */ +{ + int err = Z_OK; + int level = 0; + ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; + ulint write_size=0; + ulint comp_method = innodb_compression_algorithm; /* Cache to avoid + change during + function execution */ + + ut_ad(buf); + ut_ad(out_buf); + ut_ad(len); + ut_ad(out_len); + + level = compression_level; + ut_ad(fil_space_is_page_compressed(space_id)); + + fil_system_enter(); + fil_space_t* space = fil_space_get_by_id(space_id); + fil_system_exit(); + + /* If no compression level was provided to this table, use system + default level */ + if (level == 0) { + level = page_zip_level; + } + +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, + "InnoDB: Note: Preparing for compress for space %lu name %s len %lu\n", + space_id, fil_space_name(space), len); +#endif /* UNIV_PAGECOMPRESS_DEBUG */ + + write_size = UNIV_PAGE_SIZE - header_len; + + switch(comp_method) { +#ifdef HAVE_LZ4 + case PAGE_LZ4_ALGORITHM: + err = LZ4_compress_limitedOutput((const char *)buf, + (char *)out_buf+header_len, len, write_size); + write_size = err; + + if (err == 0) { + /* If error we leave the actual page as it was */ + + if (space->printed_compression_failure == false) { + fprintf(stderr, + "InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n", + space_id, fil_space_name(space), len, err, write_size); + space->printed_compression_failure = true; + } + + srv_stats.pages_page_compression_error.inc(); + *out_len = len; + return (buf); + } + break; +#endif /* HAVE_LZ4 */ +#ifdef HAVE_LZO + case PAGE_LZO_ALGORITHM: + err = lzo1x_1_15_compress( + buf, len, out_buf+header_len, &write_size, lzo_mem); + + if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) { + if (space->printed_compression_failure == false) { + fprintf(stderr, + "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n", + space_id, fil_space_name(space), len, err, write_size); + space->printed_compression_failure = true; + } + srv_stats.pages_page_compression_error.inc(); + *out_len = len; + return (buf); + } + + break; +#endif /* HAVE_LZO */ +#ifdef HAVE_LZMA + case PAGE_LZMA_ALGORITHM: { + size_t out_pos=0; + + err = lzma_easy_buffer_encode( + compression_level, + LZMA_CHECK_NONE, + NULL, /* No custom allocator, use malloc/free */ + reinterpret_cast<uint8_t*>(buf), + len, + reinterpret_cast<uint8_t*>(out_buf + header_len), + &out_pos, + (size_t)write_size); + + if (err != LZMA_OK || out_pos > UNIV_PAGE_SIZE-header_len) { + if (space->printed_compression_failure == false) { + fprintf(stderr, + "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n", + space_id, fil_space_name(space), len, err, out_pos); + space->printed_compression_failure = true; + } + + srv_stats.pages_page_compression_error.inc(); + *out_len = len; + return (buf); + } + + write_size = out_pos; + + break; + } +#endif /* HAVE_LZMA */ + +#ifdef HAVE_BZIP2 + case PAGE_BZIP2_ALGORITHM: { + + err = BZ2_bzBuffToBuffCompress( + (char *)(out_buf + header_len), + (unsigned int *)&write_size, + (char *)buf, + len, + 1, + 0, + 0); + + if (err != BZ_OK || write_size > UNIV_PAGE_SIZE-header_len) { + if (space->printed_compression_failure == false) { + fprintf(stderr, + "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n", + space_id, fil_space_name(space), len, err, write_size); + space->printed_compression_failure = true; + } + srv_stats.pages_page_compression_error.inc(); + *out_len = len; + return (buf); + } + break; + } +#endif /* HAVE_BZIP2 */ + +#ifdef HAVE_SNAPPY + case PAGE_SNAPPY_ALGORITHM: + { + snappy_status cstatus; + + cstatus = snappy_compress((const char *)buf, len, (char *)(out_buf+header_len), &write_size); + + if (cstatus != SNAPPY_OK || write_size > UNIV_PAGE_SIZE-header_len) { + if (space->printed_compression_failure == false) { + fprintf(stderr, + "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n", + space_id, fil_space_name(space), len, (int)cstatus, write_size); + space->printed_compression_failure = true; + } + srv_stats.pages_page_compression_error.inc(); + *out_len = len; + return (buf); + } + break; + } +#endif /* HAVE_SNAPPY */ + + case PAGE_ZLIB_ALGORITHM: + err = compress2(out_buf+header_len, (ulong*)&write_size, buf, len, level); + + if (err != Z_OK) { + /* If error we leave the actual page as it was */ + + if (space->printed_compression_failure == false) { + fprintf(stderr, + "InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n", + space_id, fil_space_name(space), len, err, write_size); + space->printed_compression_failure = true; + } + + srv_stats.pages_page_compression_error.inc(); + *out_len = len; + return (buf); + } + break; + + case PAGE_UNCOMPRESSED: + *out_len = len; + return (buf); + break; + + default: + ut_error; + break; + } + + /* Set up the page header */ + memcpy(out_buf, buf, FIL_PAGE_DATA); + /* Set up the checksum */ + mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC); + /* Set up the correct page type */ + mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED); + /* Set up the flush lsn to be compression algorithm */ + mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, comp_method); + /* Set up the actual payload lenght */ + mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size); + +#ifdef UNIV_DEBUG + /* Verify */ + ut_ad(fil_page_is_compressed(out_buf)); + ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC); + ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size); + ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == (ulint)comp_method); + + /* Verify that page can be decompressed */ + { + byte *comp_page; + byte *uncomp_page; + + comp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3)); + uncomp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3)); + memcpy(comp_page, out_buf, UNIV_PAGE_SIZE); + + fil_decompress_page(uncomp_page, comp_page, len, NULL); + if(buf_page_is_corrupted(false, uncomp_page, 0)) { + buf_page_print(uncomp_page, 0, BUF_PAGE_PRINT_NO_CRASH); + ut_error; + } + ut_free(comp_page); + ut_free(uncomp_page); + } +#endif /* UNIV_DEBUG */ + + write_size+=header_len; + + /* Actual write needs to be alligned on block size */ + if (write_size % block_size) { + size_t tmp = write_size; +#ifdef UNIV_DEBUG + ut_a(block_size > 0); +#endif + write_size = (size_t)ut_uint64_align_up((ib_uint64_t)write_size, block_size); +#ifdef UNIV_DEBUG + ut_a(write_size > 0 && ((write_size % block_size) == 0)); + ut_a(write_size >= tmp); +#endif + } + +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, + "InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n", + space_id, fil_space_name(space), len, write_size); +#endif /* UNIV_PAGECOMPRESS_DEBUG */ + + + srv_stats.page_compression_saved.add((len - write_size)); + srv_stats.pages_page_compressed.inc(); + + /* If we do not persistently trim rest of page, we need to write it + all */ + if (!srv_use_trim) { + write_size = len; + } + + *out_len = write_size; + + return(out_buf); + +} + +/****************************************************************//** +For page compressed pages decompress the page after actual read +operation. */ +void +fil_decompress_page( +/*================*/ + byte* page_buf, /*!< in: preallocated buffer or NULL */ + byte* buf, /*!< out: buffer from which to read; in aio + this must be appropriately aligned */ + ulong len, /*!< in: length of output buffer.*/ + ulint* write_size) /*!< in/out: Actual payload size of + the compressed data. */ +{ + int err = 0; + ulint actual_size = 0; + ulint compression_alg = 0; + byte *in_buf; + ulint olen=0; + ulint ptype; + + ut_ad(buf); + ut_ad(len); + + ptype = mach_read_from_2(buf+FIL_PAGE_TYPE); + + /* Do not try to uncompressed pages that are not compressed */ + if (ptype != FIL_PAGE_PAGE_COMPRESSED && ptype != FIL_PAGE_TYPE_COMPRESSED) { + return; + } + + // If no buffer was given, we need to allocate temporal buffer + if (page_buf == NULL) { +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, + "InnoDB: Note: FIL: Compression buffer not given, allocating...\n"); +#endif /* UNIV_PAGECOMPRESS_DEBUG */ + in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3)); + } else { + in_buf = page_buf; + } + + if (ptype == FIL_PAGE_TYPE_COMPRESSED) { + + fil_decompress_page_2(in_buf, buf, len, write_size); + // Need to free temporal buffer if no buffer was given + if (page_buf == NULL) { + ut_free(in_buf); + } + return; + } + + /* Before actual decompress, make sure that page type is correct */ + + if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC || + mach_read_from_2(buf+FIL_PAGE_TYPE) != FIL_PAGE_PAGE_COMPRESSED) { + fprintf(stderr, + "InnoDB: Corruption: We try to uncompress corrupted page\n" + "InnoDB: CRC %lu type %lu.\n" + "InnoDB: len %lu\n", + mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM), + mach_read_from_2(buf+FIL_PAGE_TYPE), len); + + fflush(stderr); + ut_error; + } + + /* Get compression algorithm */ + compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN); + + /* Get the actual size of compressed page */ + actual_size = mach_read_from_2(buf+FIL_PAGE_DATA); + /* Check if payload size is corrupted */ + if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) { + fprintf(stderr, + "InnoDB: Corruption: We try to uncompress corrupted page\n" + "InnoDB: actual size %lu compression %s\n", + actual_size, fil_get_compression_alg_name(compression_alg)); + fflush(stderr); + ut_error; + } + + /* Store actual payload size of the compressed data. This pointer + points to buffer pool. */ + if (write_size) { + *write_size = actual_size; + } + +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, + "InnoDB: Note: Preparing for decompress for len %lu\n", + actual_size); +#endif /* UNIV_PAGECOMPRESS_DEBUG */ + + + switch(compression_alg) { + case PAGE_ZLIB_ALGORITHM: + err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size); + + /* If uncompress fails it means that page is corrupted */ + if (err != Z_OK) { + + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but uncompress failed with error %d.\n" + "InnoDB: size %lu len %lu\n", + err, actual_size, len); + + fflush(stderr); + + ut_error; + } + break; + +#ifdef HAVE_LZ4 + case PAGE_LZ4_ALGORITHM: + err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, len); + + if (err != (int)actual_size) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %d bytes.\n" + "InnoDB: size %lu len %lu\n", + err, actual_size, len); + fflush(stderr); + + ut_error; + } + break; +#endif /* HAVE_LZ4 */ +#ifdef HAVE_LZO + case PAGE_LZO_ALGORITHM: + err = lzo1x_decompress((const unsigned char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, + actual_size,(unsigned char *)in_buf, &olen, NULL); + + if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %ld bytes.\n" + "InnoDB: size %lu len %lu\n", + olen, actual_size, len); + fflush(stderr); + + ut_error; + } + break; +#endif /* HAVE_LZO */ +#ifdef HAVE_LZMA + case PAGE_LZMA_ALGORITHM: { + + lzma_ret ret; + size_t src_pos = 0; + size_t dst_pos = 0; + uint64_t memlimit = UINT64_MAX; + + ret = lzma_stream_buffer_decode( + &memlimit, + 0, + NULL, + buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, + &src_pos, + actual_size, + in_buf, + &dst_pos, + len); + + + if (ret != LZMA_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %ld bytes.\n" + "InnoDB: size %lu len %lu\n", + dst_pos, actual_size, len); + fflush(stderr); + + ut_error; + } + + break; + } +#endif /* HAVE_LZMA */ +#ifdef HAVE_BZIP2 + case PAGE_BZIP2_ALGORITHM: { + unsigned int dst_pos = UNIV_PAGE_SIZE; + + err = BZ2_bzBuffToBuffDecompress( + (char *)in_buf, + &dst_pos, + (char *)(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE), + actual_size, + 1, + 0); + + if (err != BZ_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %du bytes.\n" + "InnoDB: size %lu len %lu err %d\n", + dst_pos, actual_size, len, err); + fflush(stderr); + + ut_error; + } + break; + } +#endif /* HAVE_BZIP2 */ +#ifdef HAVE_SNAPPY + case PAGE_SNAPPY_ALGORITHM: + { + snappy_status cstatus; + + cstatus = snappy_uncompress( + (const char *)(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE), + actual_size, + (char *)in_buf, + &olen); + + if (cstatus != SNAPPY_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %lu bytes.\n" + "InnoDB: size %lu len %lu err %d\n", + olen, actual_size, len, (int)cstatus); + fflush(stderr); + + ut_error; + } + break; + } +#endif /* HAVE_SNAPPY */ + default: + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but compression algorithm %s\n" + "InnoDB: is not known.\n" + ,fil_get_compression_alg_name(compression_alg)); + + fflush(stderr); + ut_error; + break; + } + +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, + "InnoDB: Note: Decompression succeeded for len %lu \n", + len); +#endif /* UNIV_PAGECOMPRESS_DEBUG */ + + srv_stats.pages_page_decompressed.inc(); + + /* Copy the uncompressed page to the buffer pool, not + really any other options. */ + memcpy(buf, in_buf, len); + + // Need to free temporal buffer if no buffer was given + if (page_buf == NULL) { + ut_free(in_buf); + } +} + + diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 7b57f072493..10fc11eff5e 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4,7 +4,7 @@ Copyright (c) 2000, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2014 SkySQL Ab. All Rights Reserved. +Copyright (c) 2013, 2015, MariaDB Corporation. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -101,6 +101,7 @@ this program; if not, write to the Free Software Foundation, Inc., #endif /* UNIV_DEBUG */ #include "fts0priv.h" #include "page0zip.h" +#include "fil0pagecompress.h" #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X)) @@ -502,6 +503,28 @@ ib_cb_t innodb_api_cb[] = { (ib_cb_t) ib_trx_read_only }; +/** + Structure for CREATE TABLE options (table options). + It needs to be called ha_table_option_struct. + + The option values can be specified in the CREATE TABLE at the end: + CREATE TABLE ( ... ) *here* +*/ + +ha_create_table_option innodb_table_option_list[]= +{ + /* With this option user can enable page compression feature for the + table */ + HA_TOPTION_BOOL("PAGE_COMPRESSED", page_compressed, 0), + /* With this option user can set zip compression level for page + compression for this table*/ + HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, ULINT_UNDEFINED, 0, 9, 1), + /* With this option user can enable atomic writes feature for this table */ + HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0), + HA_TOPTION_END +}; + + /*************************************************************//** Check whether valid argument given to innodb_ft_*_stopword_table. This function is registered as a callback with MySQL. @@ -537,7 +560,28 @@ static inline ulint innobase_map_isolation_level( /*=========================*/ - enum_tx_isolation iso); /*!< in: MySQL isolation level code */ + enum_tx_isolation iso); /*!< in: MySQL isolation level code + */ + +/*************************************************************//** +Check for a valid value of innobase_compression_algorithm. +@return 0 for valid innodb_compression_algorithm. */ +static +int +innodb_compression_algorithm_validate( +/*==================================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to system + variable */ + void* save, /*!< out: immediate result + for update function */ + struct st_mysql_value* value); /*!< in: incoming string */ + +static ibool innodb_have_lzo=IF_LZO(1, 0); +static ibool innodb_have_lz4=IF_LZ4(1, 0); +static ibool innodb_have_lzma=IF_LZMA(1, 0); +static ibool innodb_have_bzip2=IF_BZIP2(1, 0); +static ibool innodb_have_snappy=IF_SNAPPY(1, 0); static const char innobase_hton_name[]= "InnoDB"; @@ -690,6 +734,46 @@ static SHOW_VAR innodb_status_variables[]= { {"purge_view_trx_id_age", (char*) &export_vars.innodb_purge_view_trx_id_age, SHOW_LONG}, #endif /* UNIV_DEBUG */ + /* Status variables for page compression */ + {"page_compression_saved", + (char*) &export_vars.innodb_page_compression_saved, SHOW_LONGLONG}, + {"page_compression_trim_sect512", + (char*) &export_vars.innodb_page_compression_trim_sect512, SHOW_LONGLONG}, + {"page_compression_trim_sect1024", + (char*) &export_vars.innodb_page_compression_trim_sect1024, SHOW_LONGLONG}, + {"page_compression_trim_sect2048", + (char*) &export_vars.innodb_page_compression_trim_sect2048, SHOW_LONGLONG}, + {"page_compression_trim_sect4096", + (char*) &export_vars.innodb_page_compression_trim_sect4096, SHOW_LONGLONG}, + {"page_compression_trim_sect8192", + (char*) &export_vars.innodb_page_compression_trim_sect8192, SHOW_LONGLONG}, + {"page_compression_trim_sect16384", + (char*) &export_vars.innodb_page_compression_trim_sect16384, SHOW_LONGLONG}, + {"page_compression_trim_sect32768", + (char*) &export_vars.innodb_page_compression_trim_sect32768, SHOW_LONGLONG}, + {"num_index_pages_written", + (char*) &export_vars.innodb_index_pages_written, SHOW_LONGLONG}, + {"num_non_index_pages_written", + (char*) &export_vars.innodb_non_index_pages_written, SHOW_LONGLONG}, + {"num_pages_page_compressed", + (char*) &export_vars.innodb_pages_page_compressed, SHOW_LONGLONG}, + {"num_page_compressed_trim_op", + (char*) &export_vars.innodb_page_compressed_trim_op, SHOW_LONGLONG}, + {"num_page_compressed_trim_op_saved", + (char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG}, + {"num_pages_page_decompressed", + (char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG}, + {"have_lz4", + (char*) &innodb_have_lz4, SHOW_BOOL}, + {"have_lzo", + (char*) &innodb_have_lzo, SHOW_BOOL}, + {"have_lzma", + (char*) &innodb_have_lzma, SHOW_BOOL}, + {"have_bzip2", + (char*) &innodb_have_bzip2, SHOW_BOOL}, + {"have_snappy", + (char*) &innodb_have_snappy, SHOW_BOOL}, + {NullS, NullS, SHOW_LONG} }; @@ -2910,6 +2994,8 @@ innobase_init( if (srv_file_per_table) innobase_hton->tablefile_extensions = ha_innobase_exts; + innobase_hton->table_options = innodb_table_option_list; + ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR); #ifndef DBUG_OFF @@ -2944,6 +3030,51 @@ innobase_init( } } +#ifndef HAVE_LZ4 + if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) { + sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: liblz4 is not installed. \n", + innodb_compression_algorithm); + goto error; + } +#endif + +#ifndef HAVE_LZO + if (innodb_compression_algorithm == PAGE_LZO_ALGORITHM) { + sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: liblzo is not installed. \n", + innodb_compression_algorithm); + goto error; + } +#endif + +#ifndef HAVE_LZMA + if (innodb_compression_algorithm == PAGE_LZMA_ALGORITHM) { + sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: liblzma is not installed. \n", + innodb_compression_algorithm); + goto error; + } +#endif + +#ifndef HAVE_BZIP2 + if (innodb_compression_algorithm == PAGE_BZIP2_ALGORITHM) { + sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: libbz2 is not installed. \n", + innodb_compression_algorithm); + goto error; + } +#endif + +#ifndef HAVE_SNAPPY + if (innodb_compression_algorithm == PAGE_SNAPPY_ALGORITHM) { + sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: libsnappy is not installed. \n", + innodb_compression_algorithm); + goto error; + } +#endif + os_innodb_umask = (ulint) my_umask; /* First calculate the default path for innodb_data_home_dir etc., @@ -9668,11 +9799,16 @@ innobase_table_flags( enum row_type row_format; rec_format_t innodb_row_format = REC_FORMAT_COMPACT; bool use_data_dir; + ha_table_option_struct *options= form->s->option_struct; /* Cache the value of innodb_file_format, in case it is modified by another thread while the table is being created. */ const ulint file_format_allowed = srv_file_format; + /* Cache the value of innobase_compression_level, in case it is + modified by another thread while the table is being created. */ + const ulint default_compression_level = page_zip_level; + *flags = 0; *flags2 = 0; @@ -9726,6 +9862,8 @@ index_bad: } } + row_format = form->s->row_type; + if (create_info->key_block_size) { /* The requested compressed page size (key_block_size) is given in kilobytes. If it is a valid number, store @@ -9735,7 +9873,7 @@ index_bad: ulint kbsize; /* Key Block Size */ for (zssize = kbsize = 1; zssize <= ut_min(UNIV_PAGE_SSIZE_MAX, - PAGE_ZIP_SSIZE_MAX); + PAGE_ZIP_SSIZE_MAX); zssize++, kbsize <<= 1) { if (kbsize == create_info->key_block_size) { zip_ssize = zssize; @@ -9763,8 +9901,8 @@ index_bad: } if (!zip_allowed - || zssize > ut_min(UNIV_PAGE_SSIZE_MAX, - PAGE_ZIP_SSIZE_MAX)) { + || zssize > ut_min(UNIV_PAGE_SSIZE_MAX, + PAGE_ZIP_SSIZE_MAX)) { push_warning_printf( thd, Sql_condition::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, @@ -9773,8 +9911,6 @@ index_bad: } } - row_format = form->s->row_type; - if (zip_ssize && zip_allowed) { /* if ROW_FORMAT is set to default, automatically change it to COMPRESSED.*/ @@ -9811,7 +9947,6 @@ index_bad: case ROW_TYPE_REDUNDANT: innodb_row_format = REC_FORMAT_REDUNDANT; break; - case ROW_TYPE_COMPRESSED: case ROW_TYPE_DYNAMIC: if (!use_tablespace) { @@ -9829,10 +9964,18 @@ index_bad: " innodb_file_format > Antelope.", get_row_format_name(row_format)); } else { - innodb_row_format = (row_format == ROW_TYPE_DYNAMIC - ? REC_FORMAT_DYNAMIC - : REC_FORMAT_COMPRESSED); - break; + switch(row_format) { + case ROW_TYPE_COMPRESSED: + innodb_row_format = REC_FORMAT_COMPRESSED; + break; + case ROW_TYPE_DYNAMIC: + innodb_row_format = REC_FORMAT_DYNAMIC; + break; + default: + /* Not possible, avoid compiler warning */ + break; + } + break; /* Correct row_format */ } zip_allowed = FALSE; /* fall through to set row_format = COMPACT */ @@ -9859,7 +10002,15 @@ index_bad: && ((create_info->data_file_name != NULL) && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)); - dict_tf_set(flags, innodb_row_format, zip_ssize, use_data_dir); + /* Set up table dictionary flags */ + dict_tf_set(flags, + innodb_row_format, + zip_ssize, + use_data_dir, + options->page_compressed, + (ulint)options->page_compression_level == ULINT_UNDEFINED ? + default_compression_level : options->page_compression_level, + options->atomic_writes); if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { *flags2 |= DICT_TF2_TEMPORARY; @@ -9877,6 +10028,114 @@ index_bad: DBUG_RETURN(true); } + +/*****************************************************************//** +Check engine specific table options not handled by SQL-parser. +@return NULL if valid, string if not */ +UNIV_INTERN +const char* +ha_innobase::check_table_options( + THD *thd, /*!< in: thread handle */ + TABLE* table, /*!< in: information on table + columns and indexes */ + HA_CREATE_INFO* create_info, /*!< in: more information of the + created table, contains also the + create statement string */ + const bool use_tablespace, /*!< in: use file par table */ + const ulint file_format) +{ + enum row_type row_format = table->s->row_type;; + ha_table_option_struct *options= table->s->option_struct; + atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes; + + /* Check page compression requirements */ + if (options->page_compressed) { + + if (row_format == ROW_TYPE_COMPRESSED) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSED table can't have" + " ROW_TYPE=COMPRESSED"); + return "PAGE_COMPRESSED"; + } + + if (row_format == ROW_TYPE_REDUNDANT) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSED table can't have" + " ROW_TYPE=REDUNDANT"); + return "PAGE_COMPRESSED"; + } + + if (!use_tablespace) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSED requires" + " innodb_file_per_table."); + return "PAGE_COMPRESSED"; + } + + if (file_format < UNIV_FORMAT_B) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSED requires" + " innodb_file_format > Antelope."); + return "PAGE_COMPRESSED"; + } + + if (create_info->key_block_size) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSED table can't have" + " key_block_size"); + return "PAGE_COMPRESSED"; + } + } + + /* Check page compression level requirements, some of them are + already checked above */ + if ((ulint)options->page_compression_level != ULINT_UNDEFINED) { + if (options->page_compressed == false) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSION_LEVEL requires" + " PAGE_COMPRESSED"); + return "PAGE_COMPRESSION_LEVEL"; + } + + if (options->page_compression_level < 0 || options->page_compression_level > 9) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu." + " Valid values are [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", + options->page_compression_level); + return "PAGE_COMPRESSION_LEVEL"; + } + } + + /* Check atomic writes requirements */ + if (awrites == ATOMIC_WRITES_ON || + (awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) { + if (!use_tablespace) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: ATOMIC_WRITES requires" + " innodb_file_per_table."); + return "ATOMIC_WRITES"; + } + } + + return 0; +} + /*****************************************************************//** Creates a new table to an InnoDB database. @return error number */ @@ -9908,6 +10167,7 @@ ha_innobase::create( while creating the table. So we read the current value here and make all further decisions based on this. */ bool use_tablespace = srv_file_per_table; + const ulint file_format = srv_file_format; /* Zip Shift Size - log2 - 9 of compressed page size, zero for uncompressed */ @@ -9931,6 +10191,12 @@ ha_innobase::create( /* Create the table definition in InnoDB */ + /* Validate table options not handled by the SQL-parser */ + if(check_table_options(thd, form, create_info, use_tablespace, + file_format)) { + DBUG_RETURN(HA_WRONG_CREATE_OPTION); + } + /* Validate create options if innodb_strict_mode is set. */ if (create_options_are_invalid( thd, form, create_info, use_tablespace)) { @@ -14227,6 +14493,12 @@ ha_innobase::check_if_incompatible_data( HA_CREATE_INFO* info, uint table_changes) { + ha_table_option_struct *param_old, *param_new; + + /* Cache engine specific options */ + param_new = info->option_struct; + param_old = table->s->option_struct; + innobase_copy_frm_flags_from_create_info(prebuilt->table, info); if (table_changes != IS_EQUAL_YES) { @@ -14253,6 +14525,13 @@ ha_innobase::check_if_incompatible_data( return(COMPATIBLE_DATA_NO); } + /* Changes on engine specific table options requests a rebuild of the table. */ + if (param_new->page_compressed != param_old->page_compressed || + param_new->page_compression_level != param_old->page_compression_level || + param_new->atomic_writes != param_old->atomic_writes) { + return(COMPATIBLE_DATA_NO); + } + return(COMPATIBLE_DATA_YES); } @@ -16426,7 +16705,7 @@ static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay, static MYSQL_SYSVAR_UINT(compression_level, page_zip_level, PLUGIN_VAR_RQCMDARG, - "Compression level used for compressed row format. 0 is no compression" + "Compression level used for zlib compression. 0 is no compression" ", 1 is fastest, 9 is best compression and default is 6.", NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0); @@ -16940,6 +17219,41 @@ static MYSQL_SYSVAR_ULONG(saved_page_number_debug, NULL, innodb_save_page_no, 0, 0, UINT_MAX32, 0); #endif /* UNIV_DEBUG */ +static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim, + PLUGIN_VAR_OPCMDARG, + "Use trim. Default FALSE.", + NULL, NULL, FALSE); + +static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 }; +static TYPELIB page_compression_algorithms_typelib= +{ + array_elements(page_compression_algorithms) - 1, 0, + page_compression_algorithms, 0 +}; +static MYSQL_SYSVAR_ENUM(compression_algorithm, innodb_compression_algorithm, + PLUGIN_VAR_OPCMDARG, + "Compression algorithm used on page compression. One of: none, zlib, lz4, lzo, lzma, or bzip2", + innodb_compression_algorithm_validate, NULL, + /* We use here the largest number of supported compression method to + enable all those methods that are available. Availability of compression + method is verified on innodb_compression_algorithm_validate function. */ + PAGE_UNCOMPRESSED, + &page_compression_algorithms_typelib); + +static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Number of multi-threaded flush threads", + NULL, NULL, + MTFLUSH_DEFAULT_WORKER, /* Default setting */ + 1, /* Minimum setting */ + MTFLUSH_MAX_WORKER, /* Max setting */ + 0); + +static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Use multi-threaded flush. Default FALSE.", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_UINT(simulate_comp_failures, srv_simulate_comp_failures, PLUGIN_VAR_NOCMDARG, "Simulate compression failures.", @@ -17104,6 +17418,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(fil_make_page_dirty_debug), MYSQL_SYSVAR(saved_page_number_debug), #endif /* UNIV_DEBUG */ + MYSQL_SYSVAR(use_trim), + MYSQL_SYSVAR(compression_algorithm), + MYSQL_SYSVAR(mtflush_threads), + MYSQL_SYSVAR(use_mtflush), MYSQL_SYSVAR(simulate_comp_failures), NULL }; @@ -17447,6 +17765,9 @@ ib_senderrf( case IB_LOG_LEVEL_FATAL: l = 0; break; + default: + l = 0; + break; } my_printv_error(code, format, MYF(l), args); @@ -17604,6 +17925,108 @@ innobase_convert_to_system_charset( static_cast<uint>(len), errors)); } +/*************************************************************//** +Check for a valid value of innobase_compression_algorithm. +@return 0 for valid innodb_compression_algorithm. */ +static +int +innodb_compression_algorithm_validate( +/*==================================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to system + variable */ + void* save, /*!< out: immediate result + for update function */ + struct st_mysql_value* value) /*!< in: incoming string */ +{ + long compression_algorithm; + DBUG_ENTER("innobase_compression_algorithm_validate"); + + if (value->value_type(value) == MYSQL_VALUE_TYPE_STRING) { + char buff[STRING_BUFFER_USUAL_SIZE]; + const char *str; + int length= sizeof(buff); + + if (!(str= value->val_str(value, buff, &length))) { + DBUG_RETURN(1); + } + + if ((compression_algorithm= (long)find_type(str, &page_compression_algorithms_typelib, 0) - 1) < 0) { + DBUG_RETURN(1); + } + } else { + long long tmp; + + if (value->val_int(value, &tmp)) { + DBUG_RETURN(1); + } + + if (tmp < 0 || tmp >= page_compression_algorithms_typelib.count) { + DBUG_RETURN(1); + } + + compression_algorithm= (long) tmp; + } + + *reinterpret_cast<ulong*>(save) = compression_algorithm; + +#ifndef HAVE_LZ4 + if (compression_algorithm == PAGE_LZ4_ALGORITHM) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: liblz4 is not installed. \n", + compression_algorithm); + DBUG_RETURN(1); + } +#endif + +#ifndef HAVE_LZO + if (compression_algorithm == PAGE_LZO_ALGORITHM) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: liblzo is not installed. \n", + compression_algorithm); + DBUG_RETURN(1); + } +#endif + +#ifndef HAVE_LZMA + if (compression_algorithm == PAGE_LZMA_ALGORITHM) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: liblzma is not installed. \n", + compression_algorithm); + DBUG_RETURN(1); + } +#endif + +#ifndef HAVE_BZIP2 + if (compression_algorithm == PAGE_BZIP2_ALGORITHM) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: libbz2 is not installed. \n", + compression_algorithm); + DBUG_RETURN(1); + } +#endif + +#ifndef HAVE_SNAPPY + if (compression_algorithm == PAGE_SNAPPY_ALGORITHM) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: libsnappy is not installed. \n", + compression_algorithm); + DBUG_RETURN(1); + } +#endif + DBUG_RETURN(0); +} + /********************************************************************** Issue a warning that the row is too big. */ UNIV_INTERN diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index 5cebc425769..2fc7f773a8d 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -56,6 +57,22 @@ typedef struct st_innobase_share { /** Prebuilt structures in an InnoDB table handle used within MySQL */ struct row_prebuilt_t; +/** Engine specific table options are definined using this struct */ +struct ha_table_option_struct +{ + bool page_compressed; /*!< Table is using page compression + if this option is true. */ + int page_compression_level; /*!< Table page compression level + or UNIV_UNSPECIFIED. */ + uint atomic_writes; /*!< Use atomic writes for this + table if this options is ON or + in DEFAULT if + srv_use_atomic_writes=1. + Atomic writes are not used if + value OFF.*/ +}; + + /** The class defining a handle to an Innodb table */ class ha_innobase: public handler { @@ -175,6 +192,8 @@ class ha_innobase: public handler char* norm_name, char* temp_path, char* remote_path); + const char* check_table_options(THD *thd, TABLE* table, + HA_CREATE_INFO* create_info, const bool use_tablespace, const ulint file_format); int create(const char *name, register TABLE *form, HA_CREATE_INFO *create_info); int truncate(); diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 3f393d9d431..96a779868fb 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2005, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -263,6 +264,22 @@ ha_innobase::check_if_supported_inplace_alter( update_thd(); trx_search_latch_release_if_reserved(prebuilt->trx); + /* Change on engine specific table options require rebuild of the + table */ + if (ha_alter_info->handler_flags + == Alter_inplace_info::CHANGE_CREATE_OPTION) { + ha_table_option_struct *new_options= ha_alter_info->create_info->option_struct; + ha_table_option_struct *old_options= table->s->option_struct; + + if (new_options->page_compressed != old_options->page_compressed || + new_options->page_compression_level != old_options->page_compression_level || + new_options->atomic_writes != old_options->atomic_writes) { + ha_alter_info->unsupported_reason = innobase_get_err_msg( + ER_ALTER_OPERATION_NOT_SUPPORTED_REASON); + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + } + } + if (ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE | INNOBASE_ALTER_NOREBUILD @@ -3397,6 +3414,17 @@ ha_innobase::prepare_inplace_alter_table( if (ha_alter_info->handler_flags & Alter_inplace_info::CHANGE_CREATE_OPTION) { + /* Check engine specific table options */ + if (const char* invalid_tbopt = check_table_options( + user_thd, altered_table, + ha_alter_info->create_info, + prebuilt->table->space != 0, + srv_file_format)) { + my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0), + table_type(), invalid_tbopt); + goto err_exit_no_heap; + } + if (const char* invalid_opt = create_options_are_invalid( user_thd, altered_table, ha_alter_info->create_info, diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 6168ce0f0d2..2211243fbf3 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -92,6 +92,7 @@ static buf_page_desc_t i_s_page_type[] = { {"COMPRESSED_BLOB", FIL_PAGE_TYPE_ZBLOB}, {"COMPRESSED_BLOB2", FIL_PAGE_TYPE_ZBLOB2}, {"IBUF_INDEX", I_S_PAGE_TYPE_IBUF}, + {"PAGE COMPRESSED", FIL_PAGE_PAGE_COMPRESSED}, {"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN} }; diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 4b2556524fa..ebed2bb62a6 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1499,6 +1500,11 @@ struct buf_page_t{ state == BUF_BLOCK_ZIP_PAGE and zip.data == NULL means an active buf_pool->watch */ + + ulint write_size; /* Write size is set when this + page is first time written and then + if written again we check is TRIM + operation needed. */ #ifndef UNIV_HOTBACKUP buf_page_t* hash; /*!< node used in chaining to buf_pool->page_hash or diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index f116720574b..66f9f7f4b7e 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2014, 2014, SkySQL Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -36,6 +37,14 @@ Created 11/5/1995 Heikki Tuuri /** Flag indicating if the page_cleaner is in active state. */ extern ibool buf_page_cleaner_is_active; +/** Handled page counters for a single flush */ +struct flush_counters_t { + ulint flushed; /*!< number of dirty pages flushed */ + ulint evicted; /*!< number of clean pages evicted */ + ulint unzip_LRU_evicted;/*!< number of uncompressed page images + evicted */ +}; + /********************************************************************//** Remove a block from the flush list of modified blocks. */ UNIV_INTERN @@ -279,6 +288,56 @@ buf_flush_get_dirty_pages_count( #endif /* !UNIV_HOTBACKUP */ +/******************************************************************//** +Start a buffer flush batch for LRU or flush list */ +ibool +buf_flush_start( +/*============*/ + buf_pool_t* buf_pool, /*!< buffer pool instance */ + buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU + or BUF_FLUSH_LIST */ +/******************************************************************//** +End a buffer flush batch for LRU or flush list */ +void +buf_flush_end( +/*==========*/ + buf_pool_t* buf_pool, /*!< buffer pool instance */ + buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU + or BUF_FLUSH_LIST */ +/******************************************************************//** +Gather the aggregated stats for both flush list and LRU list flushing */ +void +buf_flush_common( +/*=============*/ + buf_flush_t flush_type, /*!< in: type of flush */ + ulint page_count); /*!< in: number of pages flushed */ + +/*******************************************************************//** +This utility flushes dirty blocks from the end of the LRU list or flush_list. +NOTE 1: in the case of an LRU flush the calling thread may own latches to +pages: to avoid deadlocks, this function must be written so that it cannot +end up waiting for these latches! NOTE 2: in the case of a flush list flush, +the calling thread is not allowed to own any latches on pages! */ +void +buf_flush_batch( +/*============*/ + buf_pool_t* buf_pool, /*!< in: buffer pool instance */ + buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or + BUF_FLUSH_LIST; if BUF_FLUSH_LIST, + then the caller must not own any + latches on pages */ + ulint min_n, /*!< in: wished minimum mumber of blocks + flushed (it is not guaranteed that the + actual number is that big, though) */ + lsn_t lsn_limit, /*!< in: in the case of BUF_FLUSH_LIST + all blocks whose oldest_modification is + smaller than this should be flushed + (if their number does not exceed + min_n), otherwise ignored */ + flush_counters_t* n); /*!< out: flushed/evicted page + counts */ + + #ifndef UNIV_NONINL #include "buf0flu.ic" #endif diff --git a/storage/innobase/include/buf0mtflu.h b/storage/innobase/include/buf0mtflu.h new file mode 100644 index 00000000000..0475335bbf5 --- /dev/null +++ b/storage/innobase/include/buf0mtflu.h @@ -0,0 +1,95 @@ +/***************************************************************************** + +Copyright (C) 2014 SkySQL Ab. All Rights Reserved. +Copyright (C) 2014 Fusion-io. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/buf0mtflu.h +Multi-threadef flush method interface function prototypes + +Created 06/02/2014 Jan Lindström jan.lindstrom@skysql.com + Dhananjoy Das DDas@fusionio.com +***********************************************************************/ + +#ifndef buf0mtflu_h +#define buf0mtflu_h + +/******************************************************************//** +Add exit work item to work queue to signal multi-threded flush +threads that they should exit. +*/ +void +buf_mtflu_io_thread_exit(void); +/*===========================*/ + +/******************************************************************//** +Initialize multi-threaded flush thread syncronization data. +@return Initialized multi-threaded flush thread syncroniztion data. */ +void* +buf_mtflu_handler_init( +/*===================*/ + ulint n_threads, /*!< in: Number of threads to create */ + ulint wrk_cnt); /*!< in: Number of work items */ + +/******************************************************************//** +Return true if multi-threaded flush is initialized +@return true if initialized, false if not */ +bool +buf_mtflu_init_done(void); +/*======================*/ + +/*********************************************************************//** +Clears up tail of the LRU lists: +* Put replaceable pages at the tail of LRU to the free list +* Flush dirty pages at the tail of LRU to the disk +The depth to which we scan each buffer pool is controlled by dynamic +config parameter innodb_LRU_scan_depth. +@return total pages flushed */ +UNIV_INTERN +ulint +buf_mtflu_flush_LRU_tail(void); +/*===========================*/ + +/*******************************************************************//** +Multi-threaded version of buf_flush_list +*/ +bool +buf_mtflu_flush_list( +/*=================*/ + ulint min_n, /*!< in: wished minimum mumber of blocks + flushed (it is not guaranteed that the + actual number is that big, though) */ + lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all + blocks whose oldest_modification is + smaller than this should be flushed + (if their number does not exceed + min_n), otherwise ignored */ + ulint* n_processed); /*!< out: the number of pages + which were processed is passed + back to caller. Ignored if NULL */ + +/*********************************************************************//** +Set correct thread identifiers to io thread array based on +information we have. */ +void +buf_mtflu_set_thread_ids( +/*=====================*/ + ulint n_threads, /*!<in: Number of threads to fill */ + void* ctx, /*!<in: thread context */ + os_thread_id_t* thread_ids); /*!<in: thread id array */ + +#endif diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 9e007809471..db39d502db6 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -2,6 +2,7 @@ Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -42,6 +43,8 @@ Created 1/8/1996 Heikki Tuuri #include "ut0byte.h" #include "trx0types.h" #include "row0types.h" +#include "fsp0fsp.h" +#include "dict0pagecompress.h" extern bool innodb_table_stats_not_found; extern bool innodb_index_stats_not_found; @@ -918,7 +921,14 @@ dict_tf_set( ulint* flags, /*!< in/out: table */ rec_format_t format, /*!< in: file format */ ulint zip_ssize, /*!< in: zip shift size */ - bool remote_path) /*!< in: table uses DATA DIRECTORY */ + bool remote_path, /*!< in: table uses DATA DIRECTORY + */ + bool page_compressed,/*!< in: table uses page compressed + pages */ + ulint page_compression_level, /*!< in: table page compression + level */ + ulint atomic_writes) /*!< in: table atomic + writes option value*/ __attribute__((nonnull)); /********************************************************************//** Convert a 32 bit integer table flags to the 32 bit integer that is @@ -946,6 +956,7 @@ dict_tf_get_zip_size( /*=================*/ ulint flags) /*!< in: flags */ __attribute__((const)); + /********************************************************************//** Check whether the table uses the compressed compact page format. @return compressed page size, or 0 if not compressed */ @@ -1848,6 +1859,7 @@ dict_table_get_index_on_first_col( #endif /* !UNIV_HOTBACKUP */ + #ifndef UNIV_NONINL #include "dict0dict.ic" #endif diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index 066ffe47e4a..84d5c57f720 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -537,9 +538,25 @@ dict_tf_is_valid( ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags); ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags); ulint unused = DICT_TF_GET_UNUSED(flags); + ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags); + ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags); + ulint data_dir = DICT_TF_HAS_DATA_DIR(flags); + ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags); /* Make sure there are no bits that we do not know about. */ if (unused != 0) { + fprintf(stderr, + "InnoDB: Error: table unused flags are %ld" + " in the data dictionary and are corrupted\n" + "InnoDB: Error: data dictionary flags are\n" + "InnoDB: compact %ld atomic_blobs %ld\n" + "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" + "InnoDB: page_compression %ld page_compression_level %ld\n" + "InnoDB: atomic_writes %ld\n", + unused, + compact, atomic_blobs, unused, data_dir, zip_ssize, + page_compression, page_compression_level, atomic_writes + ); return(false); @@ -550,12 +567,34 @@ dict_tf_is_valid( data stored off-page in the clustered index. */ if (!compact) { + fprintf(stderr, + "InnoDB: Error: table compact flags are %ld" + " in the data dictionary and are corrupted\n" + "InnoDB: Error: data dictionary flags are\n" + "InnoDB: compact %ld atomic_blobs %ld\n" + "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" + "InnoDB: page_compression %ld page_compression_level %ld\n" + "InnoDB: atomic_writes %ld\n", + compact, compact, atomic_blobs, unused, data_dir, zip_ssize, + page_compression, page_compression_level, atomic_writes + ); return(false); } } else if (zip_ssize) { /* Antelope does not support COMPRESSED row format. */ + fprintf(stderr, + "InnoDB: Error: table flags are %ld" + " in the data dictionary and are corrupted\n" + "InnoDB: Error: data dictionary flags are\n" + "InnoDB: compact %ld atomic_blobs %ld\n" + "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" + "InnoDB: page_compression %ld page_compression_level %ld\n" + "InnoDB: atomic_writes %ld\n", + flags, compact, atomic_blobs, unused, data_dir, zip_ssize, + page_compression, page_compression_level, atomic_writes + ); return(false); } @@ -568,6 +607,58 @@ dict_tf_is_valid( || !atomic_blobs || zip_ssize > PAGE_ZIP_SSIZE_MAX) { + fprintf(stderr, + "InnoDB: Error: table compact flags are %ld in the data dictionary and are corrupted\n" + "InnoDB: Error: data dictionary flags are\n" + "InnoDB: compact %ld atomic_blobs %ld\n" + "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" + "InnoDB: page_compression %ld page_compression_level %ld\n" + "InnoDB: atomic_writes %ld\n", + flags, + compact, atomic_blobs, unused, data_dir, zip_ssize, + page_compression, page_compression_level, atomic_writes + + ); + return(false); + } + } + + if (page_compression || page_compression_level) { + /* Page compression format must have compact and + atomic_blobs and page_compression_level requires + page_compression */ + if (!compact + || !page_compression + || !atomic_blobs) { + + fprintf(stderr, + "InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n" + "InnoDB: Error: data dictionary flags are\n" + "InnoDB: compact %ld atomic_blobs %ld\n" + "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" + "InnoDB: page_compression %ld page_compression_level %ld\n" + "InnoDB: atomic_writes %ld\n", + flags, compact, atomic_blobs, unused, data_dir, zip_ssize, + page_compression, page_compression_level, atomic_writes + ); + return(false); + } + } + + if (atomic_writes) { + + if(atomic_writes > ATOMIC_WRITES_OFF) { + + fprintf(stderr, + "InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n" + "InnoDB: Error: data dictionary flags are\n" + "InnoDB: compact %ld atomic_blobs %ld\n" + "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" + "InnoDB: page_compression %ld page_compression_level %ld\n" + "InnoDB: atomic_writes %ld\n", + flags, compact, atomic_blobs, unused, data_dir, zip_ssize, + page_compression, page_compression_level, atomic_writes + ); return(false); } } @@ -594,6 +685,11 @@ dict_sys_tables_type_validate( ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(type); ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type); ulint unused = DICT_TF_GET_UNUSED(type); + ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(type); + ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type); + ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type); + + ut_a(atomic_writes <= ATOMIC_WRITES_OFF); /* The low order bit of SYS_TABLES.TYPE is always set to 1. If the format is UNIV_FORMAT_B or higher, this field is the same @@ -604,12 +700,16 @@ dict_sys_tables_type_validate( if (redundant) { if (zip_ssize || atomic_blobs) { + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=Redundant, zip_ssize %lu atomic_blobs %lu\n", + zip_ssize, atomic_blobs); return(ULINT_UNDEFINED); } } /* Make sure there are no bits that we do not know about. */ if (unused) { + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, unused %lu\n", + type, unused); return(ULINT_UNDEFINED); } @@ -624,6 +724,8 @@ dict_sys_tables_type_validate( } else if (zip_ssize) { /* Antelope does not support COMPRESSED format. */ + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu\n", + type, zip_ssize); return(ULINT_UNDEFINED); } @@ -633,11 +735,15 @@ dict_sys_tables_type_validate( should be in N_COLS, but we already know about the low_order_bit and DICT_N_COLS_COMPACT flags. */ if (!atomic_blobs) { + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu atomic_blobs %lu\n", + type, zip_ssize, atomic_blobs); return(ULINT_UNDEFINED); } /* Validate that the number is within allowed range. */ if (zip_ssize > PAGE_ZIP_SSIZE_MAX) { + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu max %d\n", + type, zip_ssize, PAGE_ZIP_SSIZE_MAX); return(ULINT_UNDEFINED); } } @@ -647,6 +753,27 @@ dict_sys_tables_type_validate( format, so the DATA_DIR flag is compatible with any other table flags. However, it is not used with TEMPORARY tables.*/ + if (page_compression || page_compression_level) { + /* page compressed row format must have low_order_bit and + atomic_blobs bits set and the DICT_N_COLS_COMPACT flag + should be in N_COLS, but we already know about the + low_order_bit and DICT_N_COLS_COMPACT flags. */ + + if (!atomic_blobs || !page_compression) { + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, page_compression %lu page_compression_level %lu\n" + "InnoDB: Error: atomic_blobs %lu\n", + type, page_compression, page_compression_level, atomic_blobs); + return(ULINT_UNDEFINED); + } + } + + /* Validate that the atomic writes number is within allowed range. */ + if (atomic_writes > ATOMIC_WRITES_OFF) { + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, atomic_writes %lu\n", + type, atomic_writes); + return(ULINT_UNDEFINED); + } + /* Return the validated SYS_TABLES.TYPE. */ return(type); } @@ -719,8 +846,16 @@ dict_tf_set( ulint* flags, /*!< in/out: table flags */ rec_format_t format, /*!< in: file format */ ulint zip_ssize, /*!< in: zip shift size */ - bool use_data_dir) /*!< in: table uses DATA DIRECTORY */ + bool use_data_dir, /*!< in: table uses DATA DIRECTORY + */ + bool page_compressed,/*!< in: table uses page compressed + pages */ + ulint page_compression_level, /*!< in: table page compression + level */ + ulint atomic_writes) /*!< in: table atomic writes setup */ { + atomic_writes_t awrites = (atomic_writes_t)atomic_writes; + switch (format) { case REC_FORMAT_REDUNDANT: *flags = 0; @@ -742,6 +877,19 @@ dict_tf_set( break; } + if (page_compressed) { + *flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS) + | (1 << DICT_TF_POS_PAGE_COMPRESSION) + | (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL); + + ut_ad(zip_ssize == 0); + ut_ad(dict_tf_get_page_compression(*flags) == TRUE); + ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level); + } + + *flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES); + ut_a(dict_tf_get_atomic_writes(*flags) == awrites); + if (use_data_dir) { *flags |= (1 << DICT_TF_POS_DATA_DIR); } @@ -765,6 +913,9 @@ dict_tf_to_fsp_flags( ulint table_flags) /*!< in: dict_table_t::flags */ { ulint fsp_flags; + ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags); + ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags); + ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags); DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure", return(ULINT_UNDEFINED);); @@ -783,7 +934,20 @@ dict_tf_to_fsp_flags( fsp_flags |= DICT_TF_HAS_DATA_DIR(table_flags) ? FSP_FLAGS_MASK_DATA_DIR : 0; + /* In addition, tablespace flags also contain if the page + compression is used for this table. */ + fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION(fsp_flags, page_compression); + + /* In addition, tablespace flags also contain page compression level + if page compression is used for this table. */ + fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level); + + /* In addition, tablespace flags also contain flag if atomic writes + is used for this table */ + fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes); + ut_a(fsp_flags_is_valid(fsp_flags)); + ut_a(dict_tf_verify_flags(table_flags, fsp_flags)); return(fsp_flags); } @@ -811,10 +975,15 @@ dict_sys_tables_type_to_tf( /* Adjust bit zero. */ flags = redundant ? 0 : 1; - /* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */ + /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION, + PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */ flags |= type & (DICT_TF_MASK_ZIP_SSIZE | DICT_TF_MASK_ATOMIC_BLOBS - | DICT_TF_MASK_DATA_DIR); + | DICT_TF_MASK_DATA_DIR + | DICT_TF_MASK_PAGE_COMPRESSION + | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL + | DICT_TF_MASK_ATOMIC_WRITES + ); return(flags); } @@ -842,10 +1011,14 @@ dict_tf_to_sys_tables_type( /* Adjust bit zero. It is always 1 in SYS_TABLES.TYPE */ type = 1; - /* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */ + /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION, + PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */ type |= flags & (DICT_TF_MASK_ZIP_SSIZE | DICT_TF_MASK_ATOMIC_BLOBS - | DICT_TF_MASK_DATA_DIR); + | DICT_TF_MASK_DATA_DIR + | DICT_TF_MASK_PAGE_COMPRESSION + | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL + | DICT_TF_MASK_ATOMIC_WRITES); return(type); } diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 5f6811f0719..e64b4e18a2e 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -2,6 +2,7 @@ Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -125,11 +126,26 @@ This flag prevents older engines from attempting to open the table and allows InnoDB to update_create_info() accordingly. */ #define DICT_TF_WIDTH_DATA_DIR 1 +/** +Width of the page compression flag +*/ +#define DICT_TF_WIDTH_PAGE_COMPRESSION 1 +#define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4 + +/** +Width of atomic writes flag +DEFAULT=0, ON = 1, OFF = 2 +*/ +#define DICT_TF_WIDTH_ATOMIC_WRITES 2 + /** Width of all the currently known table flags */ #define DICT_TF_BITS (DICT_TF_WIDTH_COMPACT \ + DICT_TF_WIDTH_ZIP_SSIZE \ + DICT_TF_WIDTH_ATOMIC_BLOBS \ - + DICT_TF_WIDTH_DATA_DIR) + + DICT_TF_WIDTH_DATA_DIR \ + + DICT_TF_WIDTH_PAGE_COMPRESSION \ + + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \ + + DICT_TF_WIDTH_ATOMIC_WRITES) /** A mask of all the known/used bits in table flags */ #define DICT_TF_BIT_MASK (~(~0 << DICT_TF_BITS)) @@ -145,9 +161,19 @@ allows InnoDB to update_create_info() accordingly. */ /** Zero relative shift position of the DATA_DIR field */ #define DICT_TF_POS_DATA_DIR (DICT_TF_POS_ATOMIC_BLOBS \ + DICT_TF_WIDTH_ATOMIC_BLOBS) +/** Zero relative shift position of the PAGE_COMPRESSION field */ +#define DICT_TF_POS_PAGE_COMPRESSION (DICT_TF_POS_DATA_DIR \ + + DICT_TF_WIDTH_DATA_DIR) +/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */ +#define DICT_TF_POS_PAGE_COMPRESSION_LEVEL (DICT_TF_POS_PAGE_COMPRESSION \ + + DICT_TF_WIDTH_PAGE_COMPRESSION) +/** Zero relative shift position of the ATOMIC_WRITES field */ +#define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \ + + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL) + /** Zero relative shift position of the start of the UNUSED bits */ -#define DICT_TF_POS_UNUSED (DICT_TF_POS_DATA_DIR \ - + DICT_TF_WIDTH_DATA_DIR) +#define DICT_TF_POS_UNUSED (DICT_TF_POS_ATOMIC_WRITES \ + + DICT_TF_WIDTH_ATOMIC_WRITES) /** Bit mask of the COMPACT field */ #define DICT_TF_MASK_COMPACT \ @@ -165,6 +191,18 @@ allows InnoDB to update_create_info() accordingly. */ #define DICT_TF_MASK_DATA_DIR \ ((~(~0 << DICT_TF_WIDTH_DATA_DIR)) \ << DICT_TF_POS_DATA_DIR) +/** Bit mask of the PAGE_COMPRESSION field */ +#define DICT_TF_MASK_PAGE_COMPRESSION \ + ((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION)) \ + << DICT_TF_POS_PAGE_COMPRESSION) +/** Bit mask of the PAGE_COMPRESSION_LEVEL field */ +#define DICT_TF_MASK_PAGE_COMPRESSION_LEVEL \ + ((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)) \ + << DICT_TF_POS_PAGE_COMPRESSION_LEVEL) +/** Bit mask of the ATOMIC_WRITES field */ +#define DICT_TF_MASK_ATOMIC_WRITES \ + ((~(~0 << DICT_TF_WIDTH_ATOMIC_WRITES)) \ + << DICT_TF_POS_ATOMIC_WRITES) /** Return the value of the COMPACT field */ #define DICT_TF_GET_COMPACT(flags) \ @@ -182,6 +220,19 @@ allows InnoDB to update_create_info() accordingly. */ #define DICT_TF_HAS_DATA_DIR(flags) \ ((flags & DICT_TF_MASK_DATA_DIR) \ >> DICT_TF_POS_DATA_DIR) +/** Return the value of the PAGE_COMPRESSION field */ +#define DICT_TF_GET_PAGE_COMPRESSION(flags) \ + ((flags & DICT_TF_MASK_PAGE_COMPRESSION) \ + >> DICT_TF_POS_PAGE_COMPRESSION) +/** Return the value of the PAGE_COMPRESSION_LEVEL field */ +#define DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags) \ + ((flags & DICT_TF_MASK_PAGE_COMPRESSION_LEVEL) \ + >> DICT_TF_POS_PAGE_COMPRESSION_LEVEL) +/** Return the value of the ATOMIC_WRITES field */ +#define DICT_TF_GET_ATOMIC_WRITES(flags) \ + ((flags & DICT_TF_MASK_ATOMIC_WRITES) \ + >> DICT_TF_POS_ATOMIC_WRITES) + /** Return the contents of the UNUSED bits */ #define DICT_TF_GET_UNUSED(flags) \ (flags >> DICT_TF_POS_UNUSED) diff --git a/storage/innobase/include/dict0pagecompress.h b/storage/innobase/include/dict0pagecompress.h new file mode 100644 index 00000000000..19a2a6c52f3 --- /dev/null +++ b/storage/innobase/include/dict0pagecompress.h @@ -0,0 +1,94 @@ +/***************************************************************************** + +Copyright (C) 2013 SkySQL Ab. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/dict0pagecompress.h +Helper functions for extracting/storing page compression information +to dictionary. + +Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com +***********************************************************************/ + +#ifndef dict0pagecompress_h +#define dict0pagecompress_h + +/********************************************************************//** +Extract the page compression level from table flags. +@return page compression level, or 0 if not compressed */ +UNIV_INLINE +ulint +dict_tf_get_page_compression_level( +/*===============================*/ + ulint flags) /*!< in: flags */ + __attribute__((const)); +/********************************************************************//** +Extract the page compression flag from table flags +@return page compression flag, or false if not compressed */ +UNIV_INLINE +ibool +dict_tf_get_page_compression( +/*==========================*/ + ulint flags) /*!< in: flags */ + __attribute__((const)); + +/********************************************************************//** +Check whether the table uses the page compressed page format. +@return page compression level, or 0 if not compressed */ +UNIV_INLINE +ulint +dict_table_page_compression_level( +/*==============================*/ + const dict_table_t* table) /*!< in: table */ + __attribute__((const)); + +/********************************************************************//** +Verify that dictionary flags match tablespace flags +@return true if flags match, false if not */ +UNIV_INLINE +ibool +dict_tf_verify_flags( +/*=================*/ + ulint table_flags, /*!< in: dict_table_t::flags */ + ulint fsp_flags) /*!< in: fil_space_t::flags */ + __attribute__((const)); + +/********************************************************************//** +Extract the atomic writes flag from table flags. +@return true if atomic writes are used, false if not used */ +UNIV_INLINE +atomic_writes_t +dict_tf_get_atomic_writes( +/*======================*/ + ulint flags) /*!< in: flags */ + __attribute__((const)); + +/********************************************************************//** +Check whether the table uses the atomic writes. +@return true if atomic writes is used, false if not */ +UNIV_INLINE +atomic_writes_t +dict_table_get_atomic_writes( +/*=========================*/ + const dict_table_t* table); /*!< in: table */ + + +#ifndef UNIV_NONINL +#include "dict0pagecompress.ic" +#endif + +#endif diff --git a/storage/innobase/include/dict0pagecompress.ic b/storage/innobase/include/dict0pagecompress.ic new file mode 100644 index 00000000000..811976434a8 --- /dev/null +++ b/storage/innobase/include/dict0pagecompress.ic @@ -0,0 +1,191 @@ +/***************************************************************************** + +Copyright (C) 2013 SkySQL Ab. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/dict0pagecompress.ic +Inline implementation for helper functions for extracting/storing +page compression and atomic writes information to dictionary. + +Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com +***********************************************************************/ + +/********************************************************************//** +Verify that dictionary flags match tablespace flags +@return true if flags match, false if not */ +UNIV_INLINE +ibool +dict_tf_verify_flags( +/*=================*/ + ulint table_flags, /*!< in: dict_table_t::flags */ + ulint fsp_flags) /*!< in: fil_space_t::flags */ +{ + ulint table_unused = DICT_TF_GET_UNUSED(table_flags); + ulint compact = DICT_TF_GET_COMPACT(table_flags); + ulint ssize = DICT_TF_GET_ZIP_SSIZE(table_flags); + ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(table_flags); + ulint data_dir = DICT_TF_HAS_DATA_DIR(table_flags); + ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags); + ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags); + ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags); + ulint post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags); + ulint zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags); + ulint fsp_atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags); + ulint page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(fsp_flags); + ulint fsp_unused = FSP_FLAGS_GET_UNUSED(fsp_flags); + ulint fsp_page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(fsp_flags); + ulint fsp_page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(fsp_flags); + ulint fsp_atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(fsp_flags); + + DBUG_EXECUTE_IF("dict_tf_verify_flags_failure", + return(ULINT_UNDEFINED);); + + ut_a(!table_unused); + ut_a(!fsp_unused); + ut_a(page_ssize == 0 || page_ssize != 0); /* silence compiler */ + ut_a(compact == 0 || compact == 1); /* silence compiler */ + ut_a(data_dir == 0 || data_dir == 1); /* silence compiler */ + ut_a(post_antelope == 0 || post_antelope == 1); /* silence compiler */ + + if (ssize != zip_ssize) { + fprintf(stderr, + "InnoDB: Error: table flags has zip_ssize %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has zip_ssize %ld\n", + ssize, zip_ssize); + return (FALSE); + } + if (atomic_blobs != fsp_atomic_blobs) { + fprintf(stderr, + "InnoDB: Error: table flags has atomic_blobs %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has atomic_blobs %ld\n", + atomic_blobs, fsp_atomic_blobs); + + return (FALSE); + } + if (page_compression != fsp_page_compression) { + fprintf(stderr, + "InnoDB: Error: table flags has page_compression %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file ahas page_compression %ld\n", + page_compression, fsp_page_compression); + + return (FALSE); + } + if (page_compression_level != fsp_page_compression_level) { + fprintf(stderr, + "InnoDB: Error: table flags has page_compression_level %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has page_compression_level %ld\n", + page_compression_level, fsp_page_compression_level); + + return (FALSE); + } + + if (atomic_writes != fsp_atomic_writes) { + fprintf(stderr, + "InnoDB: Error: table flags has atomic writes %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has atomic_writes %ld\n", + atomic_writes, fsp_atomic_writes); + + return (FALSE); + } + + return(TRUE); +} + +/********************************************************************//** +Extract the page compression level from dict_table_t::flags. +These flags are in memory, so assert that they are valid. +@return page compression level, or 0 if not compressed */ +UNIV_INLINE +ulint +dict_tf_get_page_compression_level( +/*===============================*/ + ulint flags) /*!< in: flags */ +{ + ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags); + + ut_ad(page_compression_level <= 9); + + return(page_compression_level); +} + +/********************************************************************//** +Check whether the table uses the page compression page format. +@return page compression level, or 0 if not compressed */ +UNIV_INLINE +ulint +dict_table_page_compression_level( +/*==============================*/ + const dict_table_t* table) /*!< in: table */ +{ + ut_ad(table); + ut_ad(dict_tf_get_page_compression(table->flags)); + + return(dict_tf_get_page_compression_level(table->flags)); +} + +/********************************************************************//** +Check whether the table uses the page compression page format. +@return true if page compressed, false if not */ +UNIV_INLINE +ibool +dict_tf_get_page_compression( +/*=========================*/ + ulint flags) /*!< in: flags */ +{ + return(DICT_TF_GET_PAGE_COMPRESSION(flags)); +} + +/********************************************************************//** +Check whether the table uses the page compression page format. +@return true if page compressed, false if not */ +UNIV_INLINE +ibool +dict_table_is_page_compressed( +/*==========================*/ + const dict_table_t* table) /*!< in: table */ +{ + return (dict_tf_get_page_compression(table->flags)); +} + +/********************************************************************//** +Extract the atomic writes flag from table flags. +@return enumerated value of atomic writes */ +UNIV_INLINE +atomic_writes_t +dict_tf_get_atomic_writes( +/*======================*/ + ulint flags) /*!< in: flags */ +{ + return((atomic_writes_t)DICT_TF_GET_ATOMIC_WRITES(flags)); +} + +/********************************************************************//** +Check whether the table uses the atomic writes. +@return enumerated value of atomic writes */ +UNIV_INLINE +atomic_writes_t +dict_table_get_atomic_writes( +/*=========================*/ + const dict_table_t* table) /*!< in: table */ +{ + return ((atomic_writes_t)dict_tf_get_atomic_writes(table->flags)); +} diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h index d34b6f7eab3..35430e8ea62 100644 --- a/storage/innobase/include/dict0types.h +++ b/storage/innobase/include/dict0types.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -75,6 +76,13 @@ enum ib_quiesce_t { QUIESCE_COMPLETE /*!< All done */ }; +/** Enum values for atomic_writes table option */ +typedef enum { + ATOMIC_WRITES_DEFAULT = 0, + ATOMIC_WRITES_ON = 1, + ATOMIC_WRITES_OFF = 2 +} atomic_writes_t; + /** Prefix for tmp tables, adopted from sql/table.h */ #define tmp_file_prefix "#sql" #define tmp_file_prefix_length 4 diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index da2ee1c5730..e3f9f86d414 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -131,11 +132,33 @@ extern fil_addr_t fil_addr_null; data file (ibdata*, not *.ibd): the file has been flushed to disk at least up to this lsn */ +/** If page type is FIL_PAGE_COMPRESSED then the 8 bytes starting at +FIL_PAGE_FILE_FLUSH_LSN are broken down as follows: */ + +/** Control information version format (u8) */ +static const ulint FIL_PAGE_VERSION = FIL_PAGE_FILE_FLUSH_LSN; + +/** Compression algorithm (u8) */ +static const ulint FIL_PAGE_ALGORITHM_V1 = FIL_PAGE_VERSION + 1; + +/** Original page type (u16) */ +static const ulint FIL_PAGE_ORIGINAL_TYPE_V1 = FIL_PAGE_ALGORITHM_V1 + 1; + +/** Original data size in bytes (u16)*/ +static const ulint FIL_PAGE_ORIGINAL_SIZE_V1 = FIL_PAGE_ORIGINAL_TYPE_V1 + 2; + +/** Size after compression (u16)*/ +static const ulint FIL_PAGE_COMPRESS_SIZE_V1 = FIL_PAGE_ORIGINAL_SIZE_V1 + 2; + #define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this contains the space id of the page */ #define FIL_PAGE_SPACE_ID FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID #define FIL_PAGE_DATA 38 /*!< start of the data on the page */ +/* Following are used when page compression is used */ +#define FIL_PAGE_COMPRESSED_SIZE 2 /*!< Number of bytes used to store + actual payload data size on + compressed pages. */ /* @} */ /** File page trailer @{ */ #define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used @@ -146,6 +169,7 @@ extern fil_addr_t fil_addr_null; /* @} */ /** File page types (values of FIL_PAGE_TYPE) @{ */ +#define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< page compressed page */ #define FIL_PAGE_INDEX 17855 /*!< B-tree node */ #define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */ #define FIL_PAGE_INODE 3 /*!< Index node */ @@ -160,7 +184,8 @@ extern fil_addr_t fil_addr_null; #define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */ #define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */ #define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */ -#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_ZBLOB2 +#define FIL_PAGE_TYPE_COMPRESSED 13 /*!< Compressed page */ +#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_COMPRESSED /*!< Last page type */ /* @} */ @@ -228,6 +253,7 @@ struct fil_node_t { ib_int64_t flush_counter;/*!< up to what modification_counter value we have flushed the modifications to disk */ + ulint file_block_size; UT_LIST_NODE_T(fil_node_t) chain; /*!< link field for the file chain */ UT_LIST_NODE_T(fil_node_t) LRU; @@ -305,6 +331,9 @@ struct fil_space_t { bool is_in_unflushed_spaces; /*!< true if this space is currently in unflushed_spaces */ + bool printed_compression_failure; + /*!< true if we have already printed + compression failure */ UT_LIST_NODE_T(fil_space_t) space_list; /*!< list of all spaces */ ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ @@ -401,6 +430,7 @@ ulint fil_space_get_type( /*===============*/ ulint id); /*!< in: space id */ + #endif /* !UNIV_HOTBACKUP */ /*******************************************************************//** Appends a new file to the chain of files of a space. File must be closed. @@ -580,8 +610,10 @@ fil_read_first_page( #endif /* UNIV_LOG_ARCHIVE */ lsn_t* min_flushed_lsn, /*!< out: min of flushed lsn values in data files */ - lsn_t* max_flushed_lsn) /*!< out: max of flushed + lsn_t* max_flushed_lsn, /*!< out: max of flushed lsn values in data files */ + ulint orig_space_id) /*!< in: file space id or + ULINT_UNDEFINED */ __attribute__((warn_unused_result)); /*******************************************************************//** Increments the count of pending operation, if space is not being deleted. @@ -945,8 +977,13 @@ fil_io( void* buf, /*!< in/out: buffer where to store read data or from where to write; in aio this must be appropriately aligned */ - void* message) /*!< in: message for aio handler if non-sync + void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ + ulint* write_size) /*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ __attribute__((nonnull(8))); /**********************************************************************//** Waits for an aio operation to complete. This function is used to write the @@ -1198,4 +1235,38 @@ fil_user_tablespace_restore_page( write buffer */ #endif /* !UNIV_INNOCHECKSUM */ + +/****************************************************************//** +Acquire fil_system mutex */ +void +fil_system_enter(void); +/*==================*/ +/****************************************************************//** +Release fil_system mutex */ +void +fil_system_exit(void); +/*==================*/ + +#ifndef UNIV_INNOCHECKSUM +/*******************************************************************//** +Returns the table space by a given id, NULL if not found. */ +fil_space_t* +fil_space_get_by_id( +/*================*/ + ulint id); /*!< in: space id */ +/*******************************************************************//** +Return space name */ +char* +fil_space_name( +/*===========*/ + fil_space_t* space); /*!< in: space */ +#endif + +/*******************************************************************//** +Return page type name */ +const char* +fil_get_page_type_name( +/*===================*/ + ulint page_type); /*!< in: FIL_PAGE_TYPE */ + #endif /* fil0fil_h */ diff --git a/storage/innobase/include/fil0pagecompress.h b/storage/innobase/include/fil0pagecompress.h new file mode 100644 index 00000000000..c797c221efc --- /dev/null +++ b/storage/innobase/include/fil0pagecompress.h @@ -0,0 +1,145 @@ +/***************************************************************************** + +Copyright (C) 2013, 2014 SkySQL Ab. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +#ifndef fil0pagecompress_h +#define fil0pagecompress_h + +#include "fsp0fsp.h" +#include "fsp0pagecompress.h" + +/******************************************************************//** +@file include/fil0pagecompress.h +Helper functions for extracting/storing page compression and +atomic writes information to table space. + +Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com +***********************************************************************/ + +/*******************************************************************//** +Returns the page compression level flag of the space, or 0 if the space +is not compressed. The tablespace must be cached in the memory cache. +@return page compression level if page compressed, ULINT_UNDEFINED if space not found */ +ulint +fil_space_get_page_compression_level( +/*=================================*/ + ulint id); /*!< in: space id */ +/*******************************************************************//** +Returns the page compression flag of the space, or false if the space +is not compressed. The tablespace must be cached in the memory cache. +@return true if page compressed, false if not or space not found */ +ibool +fil_space_is_page_compressed( +/*=========================*/ + ulint id); /*!< in: space id */ +/*******************************************************************//** +Returns the page compression flag of the space, or false if the space +is not compressed. The tablespace must be cached in the memory cache. +@return true if page compressed, false if not or space not found */ +ibool +fil_space_get_page_compressed( +/*=========================*/ + fil_space_t* space); /*!< in: space id */ +/*******************************************************************//** +Returns the atomic writes flag of the space, or false if the space +is not using atomic writes. The tablespace must be cached in the memory cache. +@return atomic write table option value */ +atomic_writes_t +fil_space_get_atomic_writes( +/*=========================*/ + ulint id); /*!< in: space id */ +/*******************************************************************//** +Find out wheather the page is index page or not +@return true if page type index page, false if not */ +ibool +fil_page_is_index_page( +/*===================*/ + byte *buf); /*!< in: page */ + +/****************************************************************//** +Get the name of the compression algorithm used for page +compression. +@return compression algorithm name or "UNKNOWN" if not known*/ +const char* +fil_get_compression_alg_name( +/*=========================*/ + ulint comp_alg); /*!<in: compression algorithm number */ + +/****************************************************************//** +For page compressed pages compress the page before actual write +operation. +@return compressed page to be written*/ +byte* +fil_compress_page( +/*==============*/ + ulint space_id, /*!< in: tablespace id of the + table. */ + byte* buf, /*!< in: buffer from which to write; in aio + this must be appropriately aligned */ + byte* out_buf, /*!< out: compressed buffer */ + ulint len, /*!< in: length of input buffer.*/ + ulint compression_level, /*!< in: compression level */ + ulint block_size, /*!< in: block size */ + ulint* out_len, /*!< out: actual length of compressed + page */ + byte* lzo_mem); /*!< in: temporal memory used by LZO */ + +/****************************************************************//** +For page compressed pages decompress the page after actual read +operation. +@return uncompressed page */ +void +fil_decompress_page( +/*================*/ + byte* page_buf, /*!< in: preallocated buffer or NULL */ + byte* buf, /*!< out: buffer from which to read; in aio + this must be appropriately aligned */ + ulong len, /*!< in: length of output buffer.*/ + ulint* write_size); /*!< in/out: Actual payload size of + the compressed data. */ + +/****************************************************************//** +Get space id from fil node +@return space id*/ +ulint +fil_node_get_space_id( +/*==================*/ + fil_node_t* node); /*!< in: Node where to get space id*/ + +/****************************************************************//** +Get block size from fil node +@return block size*/ +ulint +fil_node_get_block_size( + fil_node_t* node); /*!< in: Node where to get block + size */ +/*******************************************************************//** +Find out wheather the page is page compressed +@return true if page is page compressed*/ +ibool +fil_page_is_compressed( +/*===================*/ + byte *buf); /*!< in: page */ + +/*******************************************************************//** +Find out wheather the page is page compressed with lzo method +@return true if page is page compressed with lzo method*/ +ibool +fil_page_is_lzo_compressed( +/*=======================*/ + byte *buf); /*!< in: page */ +#endif diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h index a587ccc9f20..87f1f5a636d 100644 --- a/storage/innobase/include/fsp0fsp.h +++ b/storage/innobase/include/fsp0fsp.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -53,12 +54,21 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */ /** Width of the DATA_DIR flag. This flag indicates that the tablespace is found in a remote location, not the default data directory. */ #define FSP_FLAGS_WIDTH_DATA_DIR 1 +/** Number of flag bits used to indicate the page compression and compression level */ +#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION 1 +#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4 +/** Number of flag bits used to indicate atomic writes for this tablespace */ +#define FSP_FLAGS_WIDTH_ATOMIC_WRITES 2 + /** Width of all the currently known tablespace flags */ #define FSP_FLAGS_WIDTH (FSP_FLAGS_WIDTH_POST_ANTELOPE \ + FSP_FLAGS_WIDTH_ZIP_SSIZE \ + FSP_FLAGS_WIDTH_ATOMIC_BLOBS \ + FSP_FLAGS_WIDTH_PAGE_SSIZE \ - + FSP_FLAGS_WIDTH_DATA_DIR) + + FSP_FLAGS_WIDTH_DATA_DIR \ + + FSP_FLAGS_WIDTH_PAGE_COMPRESSION \ + + FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \ + + FSP_FLAGS_WIDTH_ATOMIC_WRITES) /** A mask of all the known/used bits in tablespace flags */ #define FSP_FLAGS_MASK (~(~0 << FSP_FLAGS_WIDTH)) @@ -71,9 +81,20 @@ is found in a remote location, not the default data directory. */ /** Zero relative shift position of the ATOMIC_BLOBS field */ #define FSP_FLAGS_POS_ATOMIC_BLOBS (FSP_FLAGS_POS_ZIP_SSIZE \ + FSP_FLAGS_WIDTH_ZIP_SSIZE) -/** Zero relative shift position of the PAGE_SSIZE field */ -#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_BLOBS \ +/** Note that these need to be before the page size to be compatible with +dictionary */ +/** Zero relative shift position of the PAGE_COMPRESSION field */ +#define FSP_FLAGS_POS_PAGE_COMPRESSION (FSP_FLAGS_POS_ATOMIC_BLOBS \ + FSP_FLAGS_WIDTH_ATOMIC_BLOBS) +/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */ +#define FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL (FSP_FLAGS_POS_PAGE_COMPRESSION \ + + FSP_FLAGS_WIDTH_PAGE_COMPRESSION) +/** Zero relative shift position of the ATOMIC_WRITES field */ +#define FSP_FLAGS_POS_ATOMIC_WRITES (FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL \ + + FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL) + /** Zero relative shift position of the PAGE_SSIZE field */ +#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_WRITES \ + + FSP_FLAGS_WIDTH_ATOMIC_WRITES) /** Zero relative shift position of the start of the UNUSED bits */ #define FSP_FLAGS_POS_DATA_DIR (FSP_FLAGS_POS_PAGE_SSIZE \ + FSP_FLAGS_WIDTH_PAGE_SSIZE) @@ -101,6 +122,18 @@ is found in a remote location, not the default data directory. */ #define FSP_FLAGS_MASK_DATA_DIR \ ((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR)) \ << FSP_FLAGS_POS_DATA_DIR) +/** Bit mask of the PAGE_COMPRESSION field */ +#define FSP_FLAGS_MASK_PAGE_COMPRESSION \ + ((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION)) \ + << FSP_FLAGS_POS_PAGE_COMPRESSION) +/** Bit mask of the PAGE_COMPRESSION_LEVEL field */ +#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL \ + ((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)) \ + << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL) +/** Bit mask of the ATOMIC_WRITES field */ +#define FSP_FLAGS_MASK_ATOMIC_WRITES \ + ((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_WRITES)) \ + << FSP_FLAGS_POS_ATOMIC_WRITES) /** Return the value of the POST_ANTELOPE field */ #define FSP_FLAGS_GET_POST_ANTELOPE(flags) \ @@ -126,11 +159,38 @@ is found in a remote location, not the default data directory. */ #define FSP_FLAGS_GET_UNUSED(flags) \ (flags >> FSP_FLAGS_POS_UNUSED) +/** Return the value of the PAGE_COMPRESSION field */ +#define FSP_FLAGS_GET_PAGE_COMPRESSION(flags) \ + ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION) \ + >> FSP_FLAGS_POS_PAGE_COMPRESSION) +/** Return the value of the PAGE_COMPRESSION_LEVEL field */ +#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags) \ + ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL) \ + >> FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL) +/** Return the value of the ATOMIC_WRITES field */ +#define FSP_FLAGS_GET_ATOMIC_WRITES(flags) \ + ((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) \ + >> FSP_FLAGS_POS_ATOMIC_WRITES) + /** Set a PAGE_SSIZE into the correct bits in a given tablespace flags. */ #define FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize) \ (flags | (ssize << FSP_FLAGS_POS_PAGE_SSIZE)) +/** Set a PAGE_COMPRESSION into the correct bits in a given +tablespace flags. */ +#define FSP_FLAGS_SET_PAGE_COMPRESSION(flags, compression) \ + (flags | (compression << FSP_FLAGS_POS_PAGE_COMPRESSION)) + +/** Set a PAGE_COMPRESSION_LEVEL into the correct bits in a given +tablespace flags. */ +#define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level) \ + (flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)) +/** Set a ATOMIC_WRITES into the correct bits in a given +tablespace flags. */ +#define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics) \ + (flags | (atomics << FSP_FLAGS_POS_ATOMIC_WRITES)) + /* @} */ /* @defgroup Tablespace Header Constants (moved from fsp0fsp.c) @{ */ diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic index 0d81e817cc9..3a3eb21a61a 100644 --- a/storage/innobase/include/fsp0fsp.ic +++ b/storage/innobase/include/fsp0fsp.ic @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -63,12 +64,17 @@ fsp_flags_is_valid( ulint atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags); ulint page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags); ulint unused = FSP_FLAGS_GET_UNUSED(flags); + ulint page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags); + ulint page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags); + ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags); DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false);); /* fsp_flags is zero unless atomic_blobs is set. */ /* Make sure there are no bits that we do not know about. */ if (unused != 0 || flags == 1) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted unused %lu\n", + flags, unused); return(false); } else if (post_antelope) { /* The Antelope row formats REDUNDANT and COMPACT did @@ -76,6 +82,8 @@ fsp_flags_is_valid( 4-byte field is zero for Antelope row formats. */ if (!atomic_blobs) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_blobs %lu\n", + flags, atomic_blobs); return(false); } } @@ -87,10 +95,14 @@ fsp_flags_is_valid( externally stored parts. */ if (post_antelope || zip_ssize != 0) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu atomic_blobs %lu\n", + flags, zip_ssize, atomic_blobs); return(false); } } else if (!post_antelope || zip_ssize > PAGE_ZIP_SSIZE_MAX) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu max %d\n", + flags, zip_ssize, PAGE_ZIP_SSIZE_MAX); return(false); } else if (page_ssize > UNIV_PAGE_SSIZE_MAX) { @@ -98,12 +110,33 @@ fsp_flags_is_valid( be zero for an original 16k page size. Validate the page shift size is within allowed range. */ + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu\n", + flags, page_ssize, UNIV_PAGE_SSIZE_MAX); return(false); } else if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_ORIG && !page_ssize) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu:%d\n", + flags, page_ssize, UNIV_PAGE_SIZE, UNIV_PAGE_SIZE_ORIG); return(false); } + /* Page compression level requires page compression and atomic blobs + to be set */ + if (page_compression_level || page_compression) { + if (!page_compression || !atomic_blobs) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_compression %lu\n" + "InnoDB: Error: page_compression_level %lu atomic_blobs %lu\n", + flags, page_compression, page_compression_level, atomic_blobs); + return(false); + } + } + + if (atomic_writes > ATOMIC_WRITES_OFF) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_writes %lu\n", + flags, atomic_writes); + return (false); + } + #if UNIV_FORMAT_MAX != UNIV_FORMAT_B # error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations." #endif @@ -312,3 +345,4 @@ xdes_calc_descriptor_page( } #endif /* !UNIV_INNOCHECKSUM */ + diff --git a/storage/innobase/include/fsp0pagecompress.h b/storage/innobase/include/fsp0pagecompress.h new file mode 100644 index 00000000000..5f943ee2b83 --- /dev/null +++ b/storage/innobase/include/fsp0pagecompress.h @@ -0,0 +1,84 @@ +/***************************************************************************** + +Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fsp0pagecompress.h +Helper functions for extracting/storing page compression and +atomic writes information to file space. + +Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com +***********************************************************************/ + +#ifndef fsp0pagecompress_h +#define fsp0pagecompress_h + +/* Supported page compression methods */ + +#define PAGE_UNCOMPRESSED 0 +#define PAGE_ZLIB_ALGORITHM 1 +#define PAGE_LZ4_ALGORITHM 2 +#define PAGE_LZO_ALGORITHM 3 +#define PAGE_LZMA_ALGORITHM 4 +#define PAGE_BZIP2_ALGORITHM 5 +#define PAGE_SNAPPY_ALGORITHM 6 +#define PAGE_ALGORITHM_LAST PAGE_SNAPPY_ALGORITHM + +/**********************************************************************//** +Reads the page compression level from the first page of a tablespace. +@return page compression level, or 0 if uncompressed */ +UNIV_INTERN +ulint +fsp_header_get_compression_level( +/*=============================*/ + const page_t* page); /*!< in: first page of a tablespace */ + +/********************************************************************//** +Determine if the tablespace is page compressed from dict_table_t::flags. +@return TRUE if page compressed, FALSE if not compressed */ +UNIV_INLINE +ibool +fsp_flags_is_page_compressed( +/*=========================*/ + ulint flags); /*!< in: tablespace flags */ + +/********************************************************************//** +Extract the page compression level from tablespace flags. +A tablespace has only one physical page compression level +whether that page is compressed or not. +@return page compression level of the file-per-table tablespace, +or zero if the table is not compressed. */ +UNIV_INLINE +ulint +fsp_flags_get_page_compression_level( +/*=================================*/ + ulint flags); /*!< in: tablespace flags */ + +/********************************************************************//** +Determine the tablespace is using atomic writes from dict_table_t::flags. +@return true if atomic writes is used, false if not */ +UNIV_INLINE +atomic_writes_t +fsp_flags_get_atomic_writes( +/*========================*/ + ulint flags); /*!< in: tablespace flags */ + +#ifndef UNIV_NONINL +#include "fsp0pagecompress.ic" +#endif + +#endif diff --git a/storage/innobase/include/fsp0pagecompress.ic b/storage/innobase/include/fsp0pagecompress.ic new file mode 100644 index 00000000000..3e59106b05d --- /dev/null +++ b/storage/innobase/include/fsp0pagecompress.ic @@ -0,0 +1,197 @@ +/***************************************************************************** + +Copyright (C) 2013,2014 SkySQL Ab. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fsp0pagecompress.ic +Implementation for helper functions for extracting/storing page +compression and atomic writes information to file space. + +Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com +***********************************************************************/ + +/********************************************************************//** +Determine if the tablespace is page compressed from dict_table_t::flags. +@return TRUE if page compressed, FALSE if not page compressed */ +UNIV_INLINE +ibool +fsp_flags_is_page_compressed( +/*=========================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return(FSP_FLAGS_GET_PAGE_COMPRESSION(flags)); +} + +/********************************************************************//** +Determine the tablespace is page compression level from dict_table_t::flags. +@return page compression level or 0 if not compressed*/ +UNIV_INLINE +ulint +fsp_flags_get_page_compression_level( +/*=================================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return(FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags)); +} + +/********************************************************************//** +Determine the tablespace is using atomic writes from dict_table_t::flags. +@return true if atomic writes is used, false if not */ +UNIV_INLINE +atomic_writes_t +fsp_flags_get_atomic_writes( +/*========================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return((atomic_writes_t)FSP_FLAGS_GET_ATOMIC_WRITES(flags)); +} + +/*******************************************************************//** +Find out wheather the page is index page or not +@return true if page type index page, false if not */ +UNIV_INLINE +ibool +fil_page_is_index_page( +/*===================*/ + byte *buf) /*!< in: page */ +{ + return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX); +} + +/*******************************************************************//** +Find out wheather the page is page compressed +@return true if page is page compressed, false if not */ +UNIV_INLINE +ibool +fil_page_is_compressed( +/*===================*/ + byte *buf) /*!< in: page */ +{ + return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED); +} + +/*******************************************************************//** +Returns the page compression level of the space, or 0 if the space +is not compressed. The tablespace must be cached in the memory cache. +@return page compression level, ULINT_UNDEFINED if space not found */ +UNIV_INLINE +ulint +fil_space_get_page_compression_level( +/*=================================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return(fsp_flags_get_page_compression_level(flags)); + } + + return(flags); +} + +/*******************************************************************//** +Extract the page compression from space. +@return true if space is page compressed, false if space is not found +or space is not page compressed. */ +UNIV_INLINE +ibool +fil_space_is_page_compressed( +/*=========================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return(fsp_flags_is_page_compressed(flags)); + } + + return(flags); +} + +/****************************************************************//** +Get the name of the compression algorithm used for page +compression. +@return compression algorithm name or "UNKNOWN" if not known*/ +UNIV_INLINE +const char* +fil_get_compression_alg_name( +/*=========================*/ + ulint comp_alg) /*!<in: compression algorithm number */ +{ + switch(comp_alg) { + case PAGE_UNCOMPRESSED: + return ("uncompressed"); + break; + case PAGE_ZLIB_ALGORITHM: + return ("ZLIB"); + break; + case PAGE_LZ4_ALGORITHM: + return ("LZ4"); + break; + case PAGE_LZO_ALGORITHM: + return ("LZO"); + break; + case PAGE_LZMA_ALGORITHM: + return ("LZMA"); + break; + default: + return("UNKNOWN"); + ut_error; + break; + } +} + +/*******************************************************************//** +Returns the atomic writes flag of the space, or false if the space +is not using atomic writes. The tablespace must be cached in the memory cache. +@return atomic writes table option value */ +UNIV_INLINE +atomic_writes_t +fil_space_get_atomic_writes( +/*========================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return((atomic_writes_t)fsp_flags_get_atomic_writes(flags)); + } + + return((atomic_writes_t)0); +} + +/*******************************************************************//** +Find out wheather the page is page compressed with lzo method +@return true if page is page compressed with lzo method, false if not */ +UNIV_INLINE +ibool +fil_page_is_lzo_compressed( +/*=======================*/ + byte *buf) /*!< in: page */ +{ + return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED && + mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN) == PAGE_LZO_ALGORITHM); +} diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h index 94fd908ab0c..e5c1734b842 100644 --- a/storage/innobase/include/fsp0types.h +++ b/storage/innobase/include/fsp0types.h @@ -29,6 +29,7 @@ Created May 26, 2009 Vasil Dimov #include "univ.i" #include "fil0fil.h" /* for FIL_PAGE_DATA */ +#include "ut0byte.h" /** @name Flags for inserting records in order If records are inserted in order, there are the following diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index 5077c9e37eb..4126be51ae9 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -2,6 +2,7 @@ Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -151,10 +152,9 @@ enum os_file_create_t { #define OS_FILE_INSUFFICIENT_RESOURCE 78 #define OS_FILE_AIO_INTERRUPTED 79 #define OS_FILE_OPERATION_ABORTED 80 - #define OS_FILE_ACCESS_VIOLATION 81 - -#define OS_FILE_ERROR_MAX 100 +#define OS_FILE_OPERATION_NOT_SUPPORTED 125 +#define OS_FILE_ERROR_MAX 200 /* @} */ /** Types for aio operations @{ */ @@ -295,33 +295,35 @@ os_file_write The wrapper functions have the prefix of "innodb_". */ #ifdef UNIV_PFS_IO -# define os_file_create(key, name, create, purpose, type, success) \ +# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \ pfs_os_file_create_func(key, name, create, purpose, type, \ - success, __FILE__, __LINE__) + success, atomic_writes, __FILE__, __LINE__) # define os_file_create_simple(key, name, create, access, success) \ pfs_os_file_create_simple_func(key, name, create, access, \ success, __FILE__, __LINE__) # define os_file_create_simple_no_error_handling( \ - key, name, create_mode, access, success) \ + key, name, create_mode, access, success, atomic_writes) \ pfs_os_file_create_simple_no_error_handling_func( \ - key, name, create_mode, access, success, __FILE__, __LINE__) + key, name, create_mode, access, success, atomic_writes, __FILE__, __LINE__) # define os_file_close(file) \ pfs_os_file_close_func(file, __FILE__, __LINE__) # define os_aio(type, mode, name, file, buf, offset, \ - n, message1, message2) \ + n, message1, message2, write_size, \ + page_compression, page_compression_level) \ pfs_os_aio_func(type, mode, name, file, buf, offset, \ - n, message1, message2, __FILE__, __LINE__) + n, message1, message2, write_size, \ + page_compression, page_compression_level, __FILE__, __LINE__) -# define os_file_read(file, buf, offset, n) \ - pfs_os_file_read_func(file, buf, offset, n, __FILE__, __LINE__) +# define os_file_read(file, buf, offset, n, compressed) \ + pfs_os_file_read_func(file, buf, offset, n, compressed, __FILE__, __LINE__) -# define os_file_read_no_error_handling(file, buf, offset, n) \ +# define os_file_read_no_error_handling(file, buf, offset, n, compressed) \ pfs_os_file_read_no_error_handling_func(file, buf, offset, n, \ - __FILE__, __LINE__) + compressed, __FILE__, __LINE__) # define os_file_write(name, file, buf, offset, n) \ pfs_os_file_write_func(name, file, buf, offset, \ @@ -342,28 +344,28 @@ The wrapper functions have the prefix of "innodb_". */ /* If UNIV_PFS_IO is not defined, these I/O APIs point to original un-instrumented file I/O APIs */ -# define os_file_create(key, name, create, purpose, type, success) \ - os_file_create_func(name, create, purpose, type, success) +# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \ + os_file_create_func(name, create, purpose, type, success, atomic_writes) -# define os_file_create_simple(key, name, create_mode, access, success) \ +# define os_file_create_simple(key, name, create_mode, access, success) \ os_file_create_simple_func(name, create_mode, access, success) # define os_file_create_simple_no_error_handling( \ - key, name, create_mode, access, success) \ - os_file_create_simple_no_error_handling_func( \ - name, create_mode, access, success) + key, name, create_mode, access, success, atomic_writes) \ + os_file_create_simple_no_error_handling_func( \ + name, create_mode, access, success, atomic_writes) # define os_file_close(file) os_file_close_func(file) -# define os_aio(type, mode, name, file, buf, offset, n, message1, message2) \ +# define os_aio(type, mode, name, file, buf, offset, n, message1, message2, write_size, page_compression, page_compression_level) \ os_aio_func(type, mode, name, file, buf, offset, n, \ - message1, message2) + message1, message2, write_size, page_compression, page_compression_level) -# define os_file_read(file, buf, offset, n) \ - os_file_read_func(file, buf, offset, n) +# define os_file_read(file, buf, offset, n, compressed) \ + os_file_read_func(file, buf, offset, n, compressed) -# define os_file_read_no_error_handling(file, buf, offset, n) \ - os_file_read_no_error_handling_func(file, buf, offset, n) +# define os_file_read_no_error_handling(file, buf, offset, n, compressed) \ + os_file_read_no_error_handling_func(file, buf, offset, n, compressed) # define os_file_write(name, file, buf, offset, n) \ os_file_write_func(name, file, buf, offset, n) @@ -524,7 +526,9 @@ os_file_create_simple_no_error_handling_func( OS_FILE_READ_WRITE, or OS_FILE_READ_ALLOW_DELETE; the last option is used by a backup program reading the file */ - ibool* success)/*!< out: TRUE if succeed, FALSE if error */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes)/*!< in: atomic writes table option + value */ __attribute__((nonnull, warn_unused_result)); /****************************************************************//** Tries to disable OS caching on an opened file descriptor. */ @@ -558,7 +562,9 @@ os_file_create_func( async i/o or unbuffered i/o: look in the function source code for the exact rules */ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ - ibool* success)/*!< out: TRUE if succeed, FALSE if error */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes)/*!< in: atomic writes table option + value */ __attribute__((nonnull, warn_unused_result)); /***********************************************************************//** Deletes a file. The file has to be closed before calling this. @@ -648,6 +654,8 @@ pfs_os_file_create_simple_no_error_handling_func( OS_FILE_READ_ALLOW_DELETE; the last option is used by a backup program reading the file */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes,/*!< in: atomic writes table option + value */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ __attribute__((nonnull, warn_unused_result)); @@ -676,6 +684,8 @@ pfs_os_file_create_func( function source code for the exact rules */ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes,/*!< in: atomic writes table option + value*/ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ __attribute__((nonnull, warn_unused_result)); @@ -706,6 +716,8 @@ pfs_os_file_read_func( void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ + ibool compressed, /*!< in: is this file space + compressed ? */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ @@ -724,6 +736,8 @@ pfs_os_file_read_no_error_handling_func( void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ + ibool compressed, /*!< in: is this file space + compressed ? */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ @@ -754,6 +768,15 @@ pfs_os_aio_func( (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + ulint* write_size,/*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ + ibool page_compression, /*!< in: is page compression used + on this file space */ + ulint page_compression_level, /*!< page compression + level to be used */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ /*******************************************************************//** @@ -910,7 +933,9 @@ os_file_read_func( os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ - ulint n); /*!< in: number of bytes to read */ + ulint n, /*!< in: number of bytes to read */ + ibool compressed); /*!< in: is this file space + compressed ? */ /*******************************************************************//** Rewind file to its start, read at most size - 1 bytes from it to str, and NUL-terminate str. All errors are silently ignored. This function is @@ -935,7 +960,9 @@ os_file_read_no_error_handling_func( os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ - ulint n); /*!< in: number of bytes to read */ + ulint n, /*!< in: number of bytes to read */ + ibool compressed); /*!< in: is this file space + compressed ? */ /*******************************************************************//** NOTE! Use the corresponding macro os_file_write(), not directly this @@ -952,6 +979,7 @@ os_file_write_func( const void* buf, /*!< in: buffer from which to write */ os_offset_t offset, /*!< in: file offset where to write */ ulint n); /*!< in: number of bytes to write */ + /*******************************************************************//** Check the existence and type of the given file. @return TRUE if call succeeded */ @@ -1114,10 +1142,20 @@ os_aio_func( (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ - void* message2);/*!< in: message for the aio handler + void* message2,/*!< in: message for the aio handler (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + ulint* write_size,/*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ + ibool page_compression, /*!< in: is page compression used + on this file space */ + ulint page_compression_level); /*!< page compression + level to be used */ + /************************************************************************//** Wakes up all async i/o threads so that they know to exit themselves in shutdown. */ @@ -1291,8 +1329,20 @@ os_file_handle_error_no_exit( /*=========================*/ const char* name, /*!< in: name of a file or NULL */ const char* operation, /*!< in: operation */ - ibool on_error_silent);/*!< in: if TRUE then don't print + ibool on_error_silent,/*!< in: if TRUE then don't print any message to the log. */ + const char* file, /*!< in: file name */ + const ulint line); /*!< in: line */ + +/***********************************************************************//** +Try to get number of bytes per sector from file system. +@return file block size */ +UNIV_INTERN +ulint +os_file_get_block_size( +/*===================*/ + os_file_t file, /*!< in: handle to a file */ + const char* name); /*!< in: file name */ #ifndef UNIV_NONINL #include "os0file.ic" diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic index defd8204ba3..8e1cea585e6 100644 --- a/storage/innobase/include/os0file.ic +++ b/storage/innobase/include/os0file.ic @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -88,6 +89,8 @@ pfs_os_file_create_simple_no_error_handling_func( OS_FILE_READ_ALLOW_DELETE; the last option is used by a backup program reading the file */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes,/*!< in: atomic writes table option + value */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -103,7 +106,7 @@ pfs_os_file_create_simple_no_error_handling_func( name, src_file, src_line); file = os_file_create_simple_no_error_handling_func( - name, create_mode, access_type, success); + name, create_mode, access_type, success, atomic_writes); register_pfs_file_open_end(locker, file); @@ -134,6 +137,8 @@ pfs_os_file_create_func( function source code for the exact rules */ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes, /*!< in: atomic writes table option + value */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -148,7 +153,7 @@ pfs_os_file_create_func( : PSI_FILE_OPEN), name, src_file, src_line); - file = os_file_create_func(name, create_mode, purpose, type, success); + file = os_file_create_func(name, create_mode, purpose, type, success, atomic_writes); register_pfs_file_open_end(locker, file); @@ -210,6 +215,15 @@ pfs_os_aio_func( (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + ulint* write_size,/*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ + ibool page_compression, /*!< in: is page compression used + on this file space */ + ulint page_compression_level, /*!< page compression + level to be used */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -225,7 +239,8 @@ pfs_os_aio_func( src_file, src_line); result = os_aio_func(type, mode, name, file, buf, offset, - n, message1, message2); + n, message1, message2, write_size, + page_compression, page_compression_level); register_pfs_file_io_end(locker, n); @@ -246,6 +261,8 @@ pfs_os_file_read_func( void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ + ibool compressed, /*!< in: is this file space + compressed ? */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -256,7 +273,7 @@ pfs_os_file_read_func( register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ, src_file, src_line); - result = os_file_read_func(file, buf, offset, n); + result = os_file_read_func(file, buf, offset, n, compressed); register_pfs_file_io_end(locker, n); @@ -279,6 +296,8 @@ pfs_os_file_read_no_error_handling_func( void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ + ibool compressed, /*!< in: is this file space + compressed ? */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -289,7 +308,7 @@ pfs_os_file_read_no_error_handling_func( register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ, src_file, src_line); - result = os_file_read_no_error_handling_func(file, buf, offset, n); + result = os_file_read_no_error_handling_func(file, buf, offset, n, compressed); register_pfs_file_io_end(locker, n); diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h index 2d90f47eefe..90fafb05047 100644 --- a/storage/innobase/include/srv0mon.h +++ b/storage/innobase/include/srv0mon.h @@ -2,6 +2,7 @@ Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -163,6 +164,8 @@ enum monitor_id_t { MONITOR_OVLD_BUF_POOL_PAGES_FREE, MONITOR_OVLD_PAGE_CREATED, MONITOR_OVLD_PAGES_WRITTEN, + MONITOR_OVLD_INDEX_PAGES_WRITTEN, + MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN, MONITOR_OVLD_PAGES_READ, MONITOR_OVLD_BYTE_READ, MONITOR_OVLD_BYTE_WRITTEN, @@ -304,6 +307,20 @@ enum monitor_id_t { MONITOR_PAGE_DECOMPRESS, MONITOR_PAD_INCREMENTS, MONITOR_PAD_DECREMENTS, + /* New monitor variables for page compression */ + MONITOR_OVLD_PAGE_COMPRESS_SAVED, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768, + MONITOR_OVLD_PAGES_PAGE_COMPRESSED, + MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP, + MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED, + MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED, + MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR, /* Index related counters */ MONITOR_MODULE_INDEX, diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index d06a14a9153..24a1678c38b 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -103,6 +103,37 @@ struct srv_stats_t { a disk page */ ulint_ctr_1_t buf_pool_reads; + /** Number of bytes saved by page compression */ + ulint_ctr_64_t page_compression_saved; + /** Number of 512Byte TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect512; + /** Number of 1K TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect1024; + /** Number of 2K TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect2048; + /** Number of 4K TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect4096; + /** Number of 8K TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect8192; + /** Number of 16K TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect16384; + /** Number of 32K TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect32768; + /* Number of index pages written */ + ulint_ctr_64_t index_pages_written; + /* Number of non index pages written */ + ulint_ctr_64_t non_index_pages_written; + /* Number of pages compressed with page compression */ + ulint_ctr_64_t pages_page_compressed; + /* Number of TRIM operations induced by page compression */ + ulint_ctr_64_t page_compressed_trim_op; + /* Number of TRIM operations saved by using actual write size knowledge */ + ulint_ctr_64_t page_compressed_trim_op_saved; + /* Number of pages decompressed with page compression */ + ulint_ctr_64_t pages_page_decompressed; + /* Number of page compression errors */ + ulint_ctr_64_t pages_page_compression_error; + /** Number of data read in total (in bytes) */ ulint_ctr_1_t data_read; @@ -230,6 +261,31 @@ OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ extern my_bool srv_use_native_aio; + +/* Use trim operation */ +extern my_bool srv_use_trim; + +/* Use posix fallocate */ +#ifdef HAVE_POSIX_FALLOCATE +extern my_bool srv_use_posix_fallocate; +#endif + +/* Use atomic writes i.e disable doublewrite buffer */ +extern my_bool srv_use_atomic_writes; + +/* Compression algorithm*/ +extern ulong innodb_compression_algorithm; + +/* Number of flush threads */ +#define MTFLUSH_MAX_WORKER 64 +#define MTFLUSH_DEFAULT_WORKER 8 + +/* Number of threads used for multi-threaded flush */ +extern long srv_mtflush_threads; + +/* If this flag is TRUE, then we will use multi threaded flush. */ +extern my_bool srv_use_mtflush; + #ifdef __WIN__ extern ibool srv_use_native_conditions; #endif /* __WIN__ */ @@ -362,12 +418,8 @@ extern my_bool srv_stats_sample_traditional; extern ibool srv_use_doublewrite_buf; extern ulong srv_doublewrite_batch_size; -extern ibool srv_use_atomic_writes; -#ifdef HAVE_POSIX_FALLOCATE -extern ibool srv_use_posix_fallocate; -#endif - extern double srv_max_buf_pool_modified_pct; + extern ulong srv_max_purge_lag; extern ulong srv_max_purge_lag_delay; @@ -871,6 +923,38 @@ struct export_var_t{ ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id - purged view's min trx_id */ #endif /* UNIV_DEBUG */ + + ib_int64_t innodb_page_compression_saved;/*!< Number of bytes saved + by page compression */ + ib_int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM + by page compression */ + ib_int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM + by page compression */ + ib_int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM + by page compression */ + ib_int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM + by page compression */ + ib_int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM + by page compression */ + ib_int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM + by page compression */ + ib_int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM + by page compression */ + ib_int64_t innodb_index_pages_written; /*!< Number of index pages + written */ + ib_int64_t innodb_non_index_pages_written; /*!< Number of non index pages + written */ + ib_int64_t innodb_pages_page_compressed;/*!< Number of pages + compressed by page compression */ + ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations + induced by page compression */ + ib_int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations + saved by page compression */ + ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages + decompressed by page + compression */ + ib_int64_t innodb_pages_page_compression_error;/*!< Number of page + compression errors */ }; /** Thread slot in the thread table. */ diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h index 40d502f4459..e1c19982ba5 100644 --- a/storage/innobase/include/srv0start.h +++ b/storage/innobase/include/srv0start.h @@ -37,7 +37,8 @@ Created 10/10/1995 Heikki Tuuri #endif /*********************************************************************//** -Normalizes a directory path for Windows: converts slashes to backslashes. */ +Normalizes a directory path for Windows: converts slashes to backslashes. +*/ UNIV_INTERN void srv_normalize_path_for_win( diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 3e17f65e4bc..76c0d21fab8 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -2,6 +2,7 @@ Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. +Copyright (c) 2013, 2015, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -331,6 +332,36 @@ typedef enum innodb_file_formats_enum innodb_file_formats_t; /** The 2-logarithm of UNIV_PAGE_SIZE: */ #define UNIV_PAGE_SIZE_SHIFT srv_page_size_shift +#ifdef HAVE_LZO +#define IF_LZO(A,B) A +#else +#define IF_LZO(A,B) B +#endif + +#ifdef HAVE_LZ4 +#define IF_LZ4(A,B) A +#else +#define IF_LZ4(A,B) B +#endif + +#ifdef HAVE_LZMA +#define IF_LZMA(A,B) A +#else +#define IF_LZMA(A,B) B +#endif + +#ifdef HAVE_BZIP2 +#define IF_BZIP2(A,B) A +#else +#define IF_BZIP2(A,B) B +#endif + +#ifdef HAVE_SNAPPY +#define IF_SNAPPY(A,B) A +#else +#define IF_SNAPPY(A,B) B +#endif + /** The universal page size of the database */ #define UNIV_PAGE_SIZE ((ulint) srv_page_size) diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h index 29fc8669ce4..796a272db59 100644 --- a/storage/innobase/include/ut0list.h +++ b/storage/innobase/include/ut0list.h @@ -150,6 +150,15 @@ ib_list_is_empty( /* out: TRUE if empty else */ const ib_list_t* list); /* in: list */ +/******************************************************************** +Get number of items on list. +@return number of items on list */ +UNIV_INLINE +ulint +ib_list_len( +/*========*/ + const ib_list_t* list); /*<! in: list */ + /* List. */ struct ib_list_t { ib_list_node_t* first; /*!< first node */ diff --git a/storage/innobase/include/ut0list.ic b/storage/innobase/include/ut0list.ic index d9dcb2eac99..7a7f53adb2f 100644 --- a/storage/innobase/include/ut0list.ic +++ b/storage/innobase/include/ut0list.ic @@ -58,3 +58,23 @@ ib_list_is_empty( { return(!(list->first || list->last)); } + +/******************************************************************** +Get number of items on list. +@return number of items on list */ +UNIV_INLINE +ulint +ib_list_len( +/*========*/ + const ib_list_t* list) /*<! in: list */ +{ + ulint len = 0; + ib_list_node_t* node = list->first; + + while(node) { + len++; + node = node->next; + } + + return (len); +} diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h index 33385ddf2d4..9906e299808 100644 --- a/storage/innobase/include/ut0wqueue.h +++ b/storage/innobase/include/ut0wqueue.h @@ -95,6 +95,23 @@ ib_wqueue_timedwait( ib_wqueue_t* wq, /* in: work queue */ ib_time_t wait_in_usecs); /* in: wait time in micro seconds */ +/******************************************************************** +Return first item on work queue or NULL if queue is empty +@return work item or NULL */ +void* +ib_wqueue_nowait( +/*=============*/ + ib_wqueue_t* wq); /*<! in: work queue */ + +/******************************************************************** +Get number of items on queue. +@return number of items on queue */ +ulint +ib_wqueue_len( +/*==========*/ + ib_wqueue_t* wq); /*<! in: work queue */ + + /* Work queue. */ struct ib_wqueue_t { ib_mutex_t mutex; /*!< mutex protecting everything */ diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 1850e798ed3..d65baa316d8 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -2,6 +2,7 @@ Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Google Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -1272,7 +1273,7 @@ log_group_file_header_flush( (ulint) (dest_offset / UNIV_PAGE_SIZE), (ulint) (dest_offset % UNIV_PAGE_SIZE), OS_FILE_LOG_BLOCK_SIZE, - buf, group); + buf, group, 0); srv_stats.os_log_pending_writes.dec(); } @@ -1400,7 +1401,7 @@ loop: fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0, (ulint) (next_offset / UNIV_PAGE_SIZE), (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf, - group); + group, 0); srv_stats.os_log_pending_writes.dec(); @@ -1966,7 +1967,7 @@ log_group_checkpoint( write_offset / UNIV_PAGE_SIZE, write_offset % UNIV_PAGE_SIZE, OS_FILE_LOG_BLOCK_SIZE, - buf, ((byte*) group + 1)); + buf, ((byte*) group + 1), 0); ut_ad(((ulint) group & 0x1UL) == 0); } @@ -2046,7 +2047,7 @@ log_group_read_checkpoint_info( fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0, field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE, - OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL); + OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0); } /******************************************************//** @@ -2340,7 +2341,7 @@ loop: fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0, (ulint) (source_offset / UNIV_PAGE_SIZE), (ulint) (source_offset % UNIV_PAGE_SIZE), - len, buf, NULL); + len, buf, NULL, 0); start_lsn += len; buf += len; @@ -2405,7 +2406,7 @@ log_group_archive_file_header_write( dest_offset / UNIV_PAGE_SIZE, dest_offset % UNIV_PAGE_SIZE, 2 * OS_FILE_LOG_BLOCK_SIZE, - buf, &log_archive_io); + buf, &log_archive_io, 0); } /******************************************************//** @@ -2441,7 +2442,7 @@ log_group_archive_completed_header_write( dest_offset % UNIV_PAGE_SIZE, OS_FILE_LOG_BLOCK_SIZE, buf + LOG_FILE_ARCH_COMPLETED, - &log_archive_io); + &log_archive_io, 0); } /******************************************************//** @@ -2569,7 +2570,7 @@ loop: (ulint) (next_offset / UNIV_PAGE_SIZE), (ulint) (next_offset % UNIV_PAGE_SIZE), ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf, - &log_archive_io); + &log_archive_io, 0); start_lsn += len; next_offset += len; diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 9affec63252..3632c45d603 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2,6 +2,7 @@ Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2078,7 +2079,7 @@ recv_apply_log_recs_for_backup(void) error = fil_io(OS_FILE_READ, true, recv_addr->space, zip_size, recv_addr->page_no, 0, zip_size, - block->page.zip.data, NULL); + block->page.zip.data, NULL, 0); if (error == DB_SUCCESS && !buf_zip_decompress(block, TRUE)) { exit(1); @@ -2088,7 +2089,7 @@ recv_apply_log_recs_for_backup(void) recv_addr->space, 0, recv_addr->page_no, 0, UNIV_PAGE_SIZE, - block->frame, NULL); + block->frame, NULL, 0); } if (error != DB_SUCCESS) { @@ -2117,13 +2118,13 @@ recv_apply_log_recs_for_backup(void) recv_addr->space, zip_size, recv_addr->page_no, 0, zip_size, - block->page.zip.data, NULL); + block->page.zip.data, NULL, 0); } else { error = fil_io(OS_FILE_WRITE, true, recv_addr->space, 0, recv_addr->page_no, 0, UNIV_PAGE_SIZE, - block->frame, NULL); + block->frame, NULL, 0); } skip_this_recv_addr: recv_addr = HASH_GET_NEXT(addr_hash, recv_addr); @@ -3082,7 +3083,7 @@ recv_recovery_from_checkpoint_start_func( fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0, 0, 0, LOG_FILE_HDR_SIZE, - log_hdr_buf, max_cp_group); + log_hdr_buf, max_cp_group, 0); if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, (byte*)"ibbackup", (sizeof "ibbackup") - 1)) { @@ -3113,7 +3114,7 @@ recv_recovery_from_checkpoint_start_func( fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, max_cp_group->space_id, 0, 0, 0, OS_FILE_LOG_BLOCK_SIZE, - log_hdr_buf, max_cp_group); + log_hdr_buf, max_cp_group, 0); } #ifdef UNIV_LOG_ARCHIVE @@ -3743,7 +3744,7 @@ ask_again: /* Read the archive file header */ fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0, 0, - LOG_FILE_HDR_SIZE, buf, NULL); + LOG_FILE_HDR_SIZE, buf, NULL, 0); /* Check if the archive file header is consistent */ @@ -3816,7 +3817,7 @@ ask_again: fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, read_offset / UNIV_PAGE_SIZE, - read_offset % UNIV_PAGE_SIZE, len, buf, NULL); + read_offset % UNIV_PAGE_SIZE, len, buf, NULL, 0); ret = recv_scan_log_recs( (buf_pool_get_n_pages() diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index e1c98f6ace3..525b537ddd7 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -2,6 +2,7 @@ Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. +Copyright (c) 2013, 2015, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -42,8 +43,15 @@ Created 10/21/1995 Heikki Tuuri #include "srv0srv.h" #include "srv0start.h" #include "fil0fil.h" +#include "fil0pagecompress.h" #include "buf0buf.h" #include "srv0mon.h" +#include "srv0srv.h" +#ifdef HAVE_POSIX_FALLOCATE +#include "unistd.h" +#include "fcntl.h" +#include "linux/falloc.h" +#endif #ifndef UNIV_HOTBACKUP # include "os0sync.h" # include "os0thread.h" @@ -60,6 +68,38 @@ Created 10/21/1995 Heikki Tuuri #include <libaio.h> #endif +#if defined(UNIV_LINUX) && defined(HAVE_SYS_IOCTL_H) +# include <sys/ioctl.h> +# ifndef DFS_IOCTL_ATOMIC_WRITE_SET +# define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint) +# endif +#endif + +#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H) +#include <sys/statvfs.h> +#endif + +#if defined(UNIV_LINUX) && defined(HAVE_LINUX_FALLOC_H) +#include <linux/falloc.h> +#endif + +#if defined(HAVE_FALLOCATE) +#ifndef FALLOC_FL_KEEP_SIZE +#define FALLOC_FL_KEEP_SIZE 0x01 +#endif +#ifndef FALLOC_FL_PUNCH_HOLE +#define FALLOC_FL_PUNCH_HOLE 0x02 +#endif +#endif + +#ifdef HAVE_LZO +#include "lzo/lzo1x.h" +#endif + +#ifdef HAVE_SNAPPY +#include "snappy-c.h" +#endif + /** Insert buffer segment id */ static const ulint IO_IBUF_SEGMENT = 0; @@ -175,6 +215,32 @@ struct os_aio_slot_t{ and which can be used to identify which pending aio operation was completed */ + ulint bitmap; + + byte* page_compression_page; /*!< Memory allocated for + page compressed page and + freed after the write + has been completed */ + + ibool page_compression; + ulint page_compression_level; + + ulint* write_size; /*!< Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ + + byte* page_buf; /*!< Actual page buffer for + page compressed pages, do not + free this */ + + ibool page_compress_success; + /*!< TRUE if page compression was + successfull, false if not */ + + ulint file_block_size;/*!< file block size */ + #ifdef WIN_ASYNC_IO HANDLE handle; /*!< handle object we need in the OVERLAPPED struct */ @@ -185,6 +251,7 @@ struct os_aio_slot_t{ int n_bytes; /* bytes written/read. */ int ret; /* AIO return code */ #endif /* WIN_ASYNC_IO */ + byte *lzo_mem; /* Temporal memory used by LZO */ }; /** The asynchronous i/o array structure */ @@ -294,6 +361,88 @@ UNIV_INTERN ulint os_n_pending_writes = 0; /** Number of pending read operations */ UNIV_INTERN ulint os_n_pending_reads = 0; +/** After first fallocate failure we will disable os_file_trim */ +UNIV_INTERN ibool os_fallocate_failed = FALSE; + +/**********************************************************************//** +Directly manipulate the allocated disk space by deallocating for the file referred to +by fd for the byte range starting at offset and continuing for len bytes. +Within the specified range, partial file system blocks are zeroed, and whole +file system blocks are removed from the file. After a successful call, +subsequent reads from this range will return zeroes. +@return true if success, false if error */ +UNIV_INTERN +ibool +os_file_trim( +/*=========*/ + os_aio_slot_t* slot); /*!< in: slot structure */ + +/**********************************************************************//** +Allocate memory for temporal buffer used for page compression. This +buffer is freed later. */ +UNIV_INTERN +void +os_slot_alloc_page_buf( +/*===================*/ + os_aio_slot_t* slot); /*!< in: slot structure */ + +#ifdef HAVE_LZO +/**********************************************************************//** +Allocate memory for temporal memory used for page compression when +LZO compression method is used */ +UNIV_INTERN +void +os_slot_alloc_lzo_mem( +/*===================*/ + os_aio_slot_t* slot); /*!< in: slot structure */ +#endif + +/****************************************************************//** +Does error handling when a file operation fails. +@return TRUE if we should retry the operation */ +ibool +os_file_handle_error_no_exit( +/*=========================*/ + const char* name, /*!< in: name of a file or NULL */ + const char* operation, /*!< in: operation */ + ibool on_error_silent,/*!< in: if TRUE then don't print + any message to the log. */ + const char* file, /*!< in: file name */ + const ulint line); /*!< in: line */ + +/****************************************************************//** +Tries to enable the atomic write feature, if available, for the specified file +handle. +@return TRUE if success */ +static __attribute__((warn_unused_result)) +ibool +os_file_set_atomic_writes( +/*======================*/ + const char* name /*!< in: name of the file */ + __attribute__((unused)), + os_file_t file /*!< in: handle to the file */ + __attribute__((unused))) +{ +#ifdef DFS_IOCTL_ATOMIC_WRITE_SET + int atomic_option = 1; + + if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) { + + fprintf(stderr, "InnoDB: Warning:Trying to enable atomic writes on " + "file %s on non-supported platform!\n", name); + os_file_handle_error_no_exit(name, "ioctl", FALSE, __FILE__, __LINE__); + return(FALSE); + } + + return(TRUE); +#else + fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on " + "file %s on non-supported platform!\n", name); + return(FALSE); +#endif +} + + #ifdef UNIV_DEBUG # ifndef UNIV_HOTBACKUP /**********************************************************************//** @@ -439,6 +588,19 @@ os_file_get_last_error_low( "InnoDB: because of either a thread exit" " or an application request.\n" "InnoDB: Retry attempt is made.\n"); + } else if (err == ECANCELED || err == ENOTTY) { + if (strerror(err) != NULL) { + fprintf(stderr, + "InnoDB: Error number %d" + " means '%s'.\n", + err, strerror(err)); + } + + if(srv_use_atomic_writes) { + fprintf(stderr, + "InnoDB: Error trying to enable atomic writes on " + "non-supported destination!\n"); + } } else { fprintf(stderr, "InnoDB: Some operating system error numbers" @@ -503,6 +665,19 @@ os_file_get_last_error_low( "InnoDB: The error means mysqld does not have" " the access rights to\n" "InnoDB: the directory.\n"); + } else if (err == ECANCELED || err == ENOTTY) { + if (strerror(err) != NULL) { + fprintf(stderr, + "InnoDB: Error number %d" + " means '%s'.\n", + err, strerror(err)); + } + + if(srv_use_atomic_writes) { + fprintf(stderr, + "InnoDB: Error trying to enable atomic writes on " + "non-supported destination!\n"); + } } else { if (strerror(err) != NULL) { fprintf(stderr, @@ -536,6 +711,9 @@ os_file_get_last_error_low( case ENOTDIR: case EISDIR: return(OS_FILE_PATH_ERROR); + case ECANCELED: + case ENOTTY: + return(OS_FILE_OPERATION_NOT_SUPPORTED); case EAGAIN: if (srv_use_native_aio) { return(OS_FILE_AIO_RESOURCES_RESERVED); @@ -582,9 +760,11 @@ os_file_handle_error_cond_exit( const char* operation, /*!< in: operation */ ibool should_exit, /*!< in: call exit(3) if unknown error and this parameter is TRUE */ - ibool on_error_silent)/*!< in: if TRUE then don't print + ibool on_error_silent,/*!< in: if TRUE then don't print any message to the log iff it is an unknown non-fatal error */ + const char* file, /*!< in: file name */ + const ulint line) /*!< in: line */ { ulint err; @@ -614,6 +794,9 @@ os_file_handle_error_cond_exit( " InnoDB: Disk is full. Try to clean the disk" " to free space.\n"); + fprintf(stderr, + " InnoDB: at file %s and at line %ld\n", file, line); + os_has_said_disk_full = TRUE; fflush(stderr); @@ -655,6 +838,9 @@ os_file_handle_error_cond_exit( ? " Cannot continue operation" : ""); } + fprintf(stderr, + " InnoDB: at file %s and at line %ld\n", file, line); + if (should_exit) { exit(1); } @@ -671,10 +857,12 @@ ibool os_file_handle_error( /*=================*/ const char* name, /*!< in: name of a file or NULL */ - const char* operation) /*!< in: operation */ + const char* operation, /*!< in: operation */ + const char* file, /*!< in: file name */ + const ulint line) /*!< in: line */ { /* exit in case of unknown error */ - return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE)); + return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE, file, line)); } /****************************************************************//** @@ -685,12 +873,14 @@ os_file_handle_error_no_exit( /*=========================*/ const char* name, /*!< in: name of a file or NULL */ const char* operation, /*!< in: operation */ - ibool on_error_silent)/*!< in: if TRUE then don't print + ibool on_error_silent,/*!< in: if TRUE then don't print any message to the log. */ + const char* file, /*!< in: file name */ + const ulint line) /*!< in: line */ { /* don't exit in case of unknown error */ return(os_file_handle_error_cond_exit( - name, operation, FALSE, on_error_silent)); + name, operation, FALSE, on_error_silent, file, line)); } #undef USE_FILE_LOCK @@ -830,7 +1020,7 @@ os_file_opendir( if (dir == INVALID_HANDLE_VALUE) { if (error_is_fatal) { - os_file_handle_error(dirname, "opendir"); + os_file_handle_error(dirname, "opendir", __FILE__, __LINE__); } return(NULL); @@ -841,7 +1031,7 @@ os_file_opendir( dir = opendir(dirname); if (dir == NULL && error_is_fatal) { - os_file_handle_error(dirname, "opendir"); + os_file_handle_error(dirname, "opendir", __FILE__, __LINE__); } return(dir); @@ -863,7 +1053,7 @@ os_file_closedir( ret = FindClose(dir); if (!ret) { - os_file_handle_error_no_exit(NULL, "closedir", FALSE); + os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__); return(-1); } @@ -875,7 +1065,7 @@ os_file_closedir( ret = closedir(dir); if (ret) { - os_file_handle_error_no_exit(NULL, "closedir", FALSE); + os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__); } return(ret); @@ -947,7 +1137,7 @@ next_file: return(1); } else { - os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE); + os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE, __FILE__, __LINE__); return(-1); } #else @@ -1033,7 +1223,7 @@ next_file: goto next_file; } - os_file_handle_error_no_exit(full_path, "stat", FALSE); + os_file_handle_error_no_exit(full_path, "stat", FALSE, __FILE__, __LINE__); ut_free(full_path); @@ -1084,7 +1274,7 @@ os_file_create_directory( && !fail_if_exists))) { os_file_handle_error_no_exit( - pathname, "CreateDirectory", FALSE); + pathname, "CreateDirectory", FALSE, __FILE__, __LINE__); return(FALSE); } @@ -1097,7 +1287,7 @@ os_file_create_directory( if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) { /* failure */ - os_file_handle_error_no_exit(pathname, "mkdir", FALSE); + os_file_handle_error_no_exit(pathname, "mkdir", FALSE, __FILE__, __LINE__); return(FALSE); } @@ -1207,7 +1397,7 @@ os_file_create_simple_func( retry = os_file_handle_error( name, create_mode == OS_FILE_OPEN ? - "open" : "create"); + "open" : "create", __FILE__, __LINE__); } else { *success = TRUE; @@ -1275,7 +1465,7 @@ os_file_create_simple_func( retry = os_file_handle_error( name, create_mode == OS_FILE_OPEN - ? "open" : "create"); + ? "open" : "create", __FILE__, __LINE__); } else { *success = TRUE; retry = false; @@ -1317,9 +1507,12 @@ os_file_create_simple_no_error_handling_func( OS_FILE_READ_WRITE, or OS_FILE_READ_ALLOW_DELETE; the last option is used by a backup program reading the file */ - ibool* success)/*!< out: TRUE if succeed, FALSE if error */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes) /*! in: atomic writes table option + value */ { os_file_t file; + atomic_writes_t awrites = (atomic_writes_t) atomic_writes; *success = FALSE; #ifdef __WIN__ @@ -1380,6 +1573,23 @@ os_file_create_simple_no_error_handling_func( attributes, NULL); // No template file + /* If we have proper file handle and atomic writes should be used, + try to set atomic writes and if that fails when creating a new + table, produce a error. If atomic writes are used on existing + file, ignore error and use traditional writes for that file */ + if (file != INVALID_HANDLE_VALUE + && (awrites == ATOMIC_WRITES_ON || + (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT)) + && !os_file_set_atomic_writes(name, file)) { + if (create_mode == OS_FILE_CREATE) { + fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n"); + CloseHandle(file); + os_file_delete_if_exists_func(name); + *success = FALSE; + file = INVALID_HANDLE_VALUE; + } + } + *success = (file != INVALID_HANDLE_VALUE); #else /* __WIN__ */ int create_flag; @@ -1440,6 +1650,24 @@ os_file_create_simple_no_error_handling_func( } #endif /* USE_FILE_LOCK */ + /* If we have proper file handle and atomic writes should be used, + try to set atomic writes and if that fails when creating a new + table, produce a error. If atomic writes are used on existing + file, ignore error and use traditional writes for that file */ + if (file != -1 + && (awrites == ATOMIC_WRITES_ON || + (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT)) + && !os_file_set_atomic_writes(name, file)) { + if (create_mode == OS_FILE_CREATE) { + fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n"); + close(file); + os_file_delete_if_exists_func(name); + *success = FALSE; + file = -1; + } + } + + #endif /* __WIN__ */ return(file); @@ -1524,12 +1752,15 @@ os_file_create_func( async i/o or unbuffered i/o: look in the function source code for the exact rules */ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ - ibool* success)/*!< out: TRUE if succeed, FALSE if error */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes) /*! in: atomic writes table option + value */ { os_file_t file; ibool retry; ibool on_error_no_exit; ibool on_error_silent; + atomic_writes_t awrites = (atomic_writes_t) atomic_writes; #ifdef __WIN__ DBUG_EXECUTE_IF( @@ -1662,9 +1893,9 @@ os_file_create_func( if (on_error_no_exit) { retry = os_file_handle_error_no_exit( - name, operation, on_error_silent); + name, operation, on_error_silent, __FILE__, __LINE__); } else { - retry = os_file_handle_error(name, operation); + retry = os_file_handle_error(name, operation, __FILE__, __LINE__); } } else { *success = TRUE; @@ -1673,6 +1904,22 @@ os_file_create_func( } while (retry); + /* If we have proper file handle and atomic writes should be used, + try to set atomic writes and if that fails when creating a new + table, produce a error. If atomic writes are used on existing + file, ignore error and use traditional writes for that file */ + if (file != INVALID_HANDLE_VALUE && type == OS_DATA_FILE + && (awrites == ATOMIC_WRITES_ON || + (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT)) + && !os_file_set_atomic_writes(name, file)) { + if (create_mode == OS_FILE_CREATE) { + fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n"); + CloseHandle(file); + os_file_delete_if_exists_func(name); + *success = FALSE; + file = INVALID_HANDLE_VALUE; + } + } #else /* __WIN__ */ int create_flag; const char* mode_str = NULL; @@ -1747,9 +1994,9 @@ os_file_create_func( if (on_error_no_exit) { retry = os_file_handle_error_no_exit( - name, operation, on_error_silent); + name, operation, on_error_silent, __FILE__, __LINE__); } else { - retry = os_file_handle_error(name, operation); + retry = os_file_handle_error(name, operation, __FILE__, __LINE__); } } else { *success = TRUE; @@ -1801,6 +2048,22 @@ os_file_create_func( } #endif /* USE_FILE_LOCK */ + /* If we have proper file handle and atomic writes should be used, + try to set atomic writes and if that fails when creating a new + table, produce a error. If atomic writes are used on existing + file, ignore error and use traditional writes for that file */ + if (file != -1 && type == OS_DATA_FILE + && (awrites == ATOMIC_WRITES_ON || + (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT)) + && !os_file_set_atomic_writes(name, file)) { + if (create_mode == OS_FILE_CREATE) { + fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n"); + close(file); + os_file_delete_if_exists_func(name); + *success = FALSE; + file = -1; + } + } #endif /* __WIN__ */ return(file); @@ -1859,7 +2122,7 @@ loop: ret = unlink(name); if (ret != 0 && errno != ENOENT) { - os_file_handle_error_no_exit(name, "delete", FALSE); + os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__); return(false); } @@ -1923,7 +2186,7 @@ loop: ret = unlink(name); if (ret != 0) { - os_file_handle_error_no_exit(name, "delete", FALSE); + os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__); return(false); } @@ -1967,7 +2230,7 @@ os_file_rename_func( return(TRUE); } - os_file_handle_error_no_exit(oldpath, "rename", FALSE); + os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__); return(FALSE); #else @@ -1976,7 +2239,7 @@ os_file_rename_func( ret = rename(oldpath, newpath); if (ret != 0) { - os_file_handle_error_no_exit(oldpath, "rename", FALSE); + os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__); return(FALSE); } @@ -2005,7 +2268,7 @@ os_file_close_func( return(TRUE); } - os_file_handle_error(NULL, "close"); + os_file_handle_error(NULL, "close", __FILE__, __LINE__); return(FALSE); #else @@ -2014,7 +2277,7 @@ os_file_close_func( ret = close(file); if (ret == -1) { - os_file_handle_error(NULL, "close"); + os_file_handle_error(NULL, "close", __FILE__, __LINE__); return(FALSE); } @@ -2106,6 +2369,11 @@ os_file_set_size( current_size = 0; +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: Note: File %s current_size %lu extended_size %lu\n", + name, os_file_get_size(file), size); +#endif + #ifdef HAVE_POSIX_FALLOCATE if (srv_use_posix_fallocate) { @@ -2114,15 +2382,15 @@ os_file_set_size( fprintf(stderr, "InnoDB: Error: preallocating file " "space for file \'%s\' failed. Current size " "%lu, desired size %lu\n", - name, (long unsigned) current_size, (long unsigned) size); - os_file_handle_error_no_exit(name, "posix_fallocate", FALSE); + name, current_size, size); + os_file_handle_error_no_exit(name, "posix_fallocate", FALSE, __FILE__, __LINE__); + return(FALSE); } return(TRUE); } #endif - /* Write up to 1 megabyte at a time. */ buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE)) * UNIV_PAGE_SIZE; @@ -2149,6 +2417,7 @@ os_file_set_size( } ret = os_file_write(name, file, buf, current_size, n_bytes); + if (!ret) { ut_free(buf2); goto error_handling; @@ -2279,7 +2548,7 @@ os_file_flush_func( return(TRUE); } - os_file_handle_error(NULL, "flush"); + os_file_handle_error(NULL, "flush", __FILE__, __LINE__); /* It is a fatal error if a file flush does not succeed, because then the database can get corrupt on disk */ @@ -2333,7 +2602,7 @@ os_file_flush_func( ib_logf(IB_LOG_LEVEL_ERROR, "The OS said file flush did not succeed"); - os_file_handle_error(NULL, "flush"); + os_file_handle_error(NULL, "flush", __FILE__, __LINE__); /* It is a fatal error if a file flush does not succeed, because then the database can get corrupt on disk */ @@ -2571,7 +2840,9 @@ os_file_read_func( os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ - ulint n) /*!< in: number of bytes to read */ + ulint n, /*!< in: number of bytes to read */ + ibool compressed) /*!< in: is this file space + compressed ? */ { #ifdef __WIN__ BOOL ret; @@ -2639,6 +2910,14 @@ try_again: os_mutex_exit(os_file_count_mutex); if (ret && len == n) { + /* Note that InnoDB writes files that are not formated + as file spaces and they do not have FIL_PAGE_TYPE + field, thus we must use here information is the actual + file space compressed. */ + if (fil_page_is_compressed((byte *)buf)) { + fil_decompress_page(NULL, (byte *)buf, len, NULL); + } + return(TRUE); } #else /* __WIN__ */ @@ -2651,6 +2930,13 @@ try_again: ret = os_file_pread(file, buf, n, offset); if ((ulint) ret == n) { + /* Note that InnoDB writes files that are not formated + as file spaces and they do not have FIL_PAGE_TYPE + field, thus we must use here information is the actual + file space compressed. */ + if (fil_page_is_compressed((byte *)buf)) { + fil_decompress_page(NULL, (byte *)buf, n, NULL); + } return(TRUE); } else if (ret == -1) { ib_logf(IB_LOG_LEVEL_ERROR, @@ -2667,7 +2953,7 @@ try_again: #ifdef __WIN__ error_handling: #endif - retry = os_file_handle_error(NULL, "read"); + retry = os_file_handle_error(NULL, "read", __FILE__, __LINE__); if (retry) { goto try_again; @@ -2702,7 +2988,9 @@ os_file_read_no_error_handling_func( os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ - ulint n) /*!< in: number of bytes to read */ + ulint n, /*!< in: number of bytes to read */ + ibool compressed) /*!< in: is this file space + compressed ? */ { #ifdef __WIN__ BOOL ret; @@ -2770,6 +3058,15 @@ try_again: os_mutex_exit(os_file_count_mutex); if (ret && len == n) { + + /* Note that InnoDB writes files that are not formated + as file spaces and they do not have FIL_PAGE_TYPE + field, thus we must use here information is the actual + file space compressed. */ + if (fil_page_is_compressed((byte *)buf)) { + fil_decompress_page(NULL, (byte *)buf, n, NULL); + } + return(TRUE); } #else /* __WIN__ */ @@ -2782,6 +3079,13 @@ try_again: ret = os_file_pread(file, buf, n, offset); if ((ulint) ret == n) { + /* Note that InnoDB writes files that are not formated + as file spaces and they do not have FIL_PAGE_TYPE + field, thus we must use here information is the actual + file space compressed. */ + if (fil_page_is_compressed((byte *)buf)) { + fil_decompress_page(NULL, (byte *)buf, n, NULL); + } return(TRUE); } else if (ret == -1) { ib_logf(IB_LOG_LEVEL_ERROR, @@ -2798,7 +3102,7 @@ try_again: #ifdef __WIN__ error_handling: #endif - retry = os_file_handle_error_no_exit(NULL, "read", FALSE); + retry = os_file_handle_error_no_exit(NULL, "read", FALSE, __FILE__, __LINE__); if (retry) { goto try_again; @@ -2869,6 +3173,7 @@ os_file_write_func( ut_ad(buf); ut_ad(n > 0); + retry: low = (DWORD) offset & 0xFFFFFFFF; high = (DWORD) (offset >> 32); @@ -3073,7 +3378,7 @@ os_file_status( } else if (ret) { /* file exists, but stat call failed */ - os_file_handle_error_no_exit(path, "stat", FALSE); + os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__); return(FALSE); } @@ -3101,7 +3406,7 @@ os_file_status( } else if (ret) { /* file exists, but stat call failed */ - os_file_handle_error_no_exit(path, "stat", FALSE); + os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__); return(FALSE); } @@ -3150,7 +3455,7 @@ os_file_get_status( } else if (ret) { /* file exists, but stat call failed */ - os_file_handle_error_no_exit(path, "stat", FALSE); + os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__); return(DB_FAIL); @@ -3203,7 +3508,7 @@ os_file_get_status( } else if (ret) { /* file exists, but stat call failed */ - os_file_handle_error_no_exit(path, "stat", FALSE); + os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__); return(DB_FAIL); @@ -3770,7 +4075,8 @@ os_aio_array_create( array->slots = static_cast<os_aio_slot_t*>( ut_malloc(n * sizeof(*array->slots))); - memset(array->slots, 0x0, sizeof(n * sizeof(*array->slots))); + memset(array->slots, 0x0, n * sizeof(*array->slots)); + #ifdef __WIN__ array->handles = static_cast<HANDLE*>(ut_malloc(n * sizeof(HANDLE))); #endif /* __WIN__ */ @@ -3858,8 +4164,8 @@ os_aio_array_free( /*==============*/ os_aio_array_t*& array) /*!< in, own: array to free */ { -#ifdef WIN_ASYNC_IO ulint i; +#ifdef WIN_ASYNC_IO for (i = 0; i < array->n_slots; i++) { os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); @@ -3881,6 +4187,19 @@ os_aio_array_free( } #endif /* LINUX_NATIVE_AIO */ + for (i = 0; i < array->n_slots; i++) { + os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); + if (slot->page_compression_page) { + ut_free(slot->page_compression_page); + slot->page_compression_page = NULL; + } + + if (slot->lzo_mem) { + ut_free(slot->lzo_mem); + slot->lzo_mem = NULL; + } + } + ut_free(array->slots); ut_free(array); @@ -4214,7 +4533,16 @@ os_aio_array_reserve_slot( void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset */ - ulint len) /*!< in: length of the block to read or write */ + ulint len, /*!< in: length of the block to read or write */ + ulint* write_size,/*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ + ibool page_compression, /*!< in: is page compression used + on this file space */ + ulint page_compression_level) /*!< page compression + level to be used */ { os_aio_slot_t* slot = NULL; #ifdef WIN_ASYNC_IO @@ -4304,6 +4632,61 @@ found: slot->buf = static_cast<byte*>(buf); slot->offset = offset; slot->io_already_done = FALSE; + slot->page_compress_success = FALSE; + slot->write_size = write_size; + slot->page_compression_level = page_compression_level; + slot->page_compression = page_compression; + + if (message1) { + slot->file_block_size = fil_node_get_block_size(message1); + } + + /* If the space is page compressed and this is write operation + then we compress the page */ + if (message1 && type == OS_FILE_WRITE && page_compression ) { + ulint real_len = len; + byte* tmp = NULL; + + /* Release the array mutex while compressing */ + os_mutex_exit(array->mutex); + + // We allocate memory for page compressed buffer if and only + // if it is not yet allocated. + os_slot_alloc_page_buf(slot); + +#ifdef HAVE_LZO + if (innodb_compression_algorithm == 3) { + os_slot_alloc_lzo_mem(slot); + } +#endif + + /* Call page compression */ + tmp = fil_compress_page(fil_node_get_space_id(slot->message1), + (byte *)buf, + slot->page_buf, + len, + page_compression_level, + fil_node_get_block_size(slot->message1), + &real_len, + slot->lzo_mem + ); + + /* If compression succeeded, set up the length and buffer */ + if (tmp != buf) { + len = real_len; + buf = slot->page_buf; + slot->len = real_len; + slot->page_compress_success = TRUE; + } else { + slot->page_compress_success = FALSE; + } + + /* Take array mutex back, not sure if this is really needed + below */ + os_mutex_enter(array->mutex); + + } + #ifdef WIN_ASYNC_IO control = &slot->control; @@ -4578,10 +4961,19 @@ os_aio_func( (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ - void* message2)/*!< in: message for the aio handler + void* message2,/*!< in: message for the aio handler (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + ulint* write_size,/*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ + ibool page_compression, /*!< in: is page compression used + on this file space */ + ulint page_compression_level) /*!< page compression + level to be used */ { os_aio_array_t* array; os_aio_slot_t* slot; @@ -4631,7 +5023,8 @@ os_aio_func( and os_file_write_func() */ if (type == OS_FILE_READ) { - ret = os_file_read_func(file, buf, offset, n); + ret = os_file_read_func(file, buf, offset, n, + page_compression); } else { ut_ad(!srv_read_only_mode); @@ -4643,10 +5036,9 @@ os_aio_func( os_has_said_disk_full = FALSE; ret = 0; errno = 28;); if (!ret) { - os_file_handle_error_cond_exit(name, "os_file_write_func", TRUE, FALSE); + os_file_handle_error_cond_exit(name, "os_file_write_func", TRUE, FALSE, __FILE__, __LINE__); } } - return ret; } @@ -4693,7 +5085,8 @@ try_again: } slot = os_aio_array_reserve_slot(type, array, message1, message2, file, - name, buf, offset, n); + name, buf, offset, n, write_size, page_compression, page_compression_level); + if (type == OS_FILE_READ) { if (srv_use_native_aio) { os_n_file_reads++; @@ -4773,7 +5166,7 @@ err_exit: os_aio_array_free_slot(array, slot); if (os_file_handle_error( - name,type == OS_FILE_READ ? "aio read" : "aio write")) { + name,type == OS_FILE_READ ? "aio read" : "aio write", __FILE__, __LINE__)) { goto try_again; } @@ -4886,9 +5279,17 @@ os_aio_windows_handle( if (ret && len == slot->len) { ret_val = TRUE; - } else if (os_file_handle_error(slot->name, "Windows aio")) { + } else if (!ret || (len != slot->len)) { - retry = TRUE; + if (!ret) { + if (os_file_handle_error(slot->name, "Windows aio", __FILE__, __LINE__)) { + retry = TRUE; + } else { + ret_val = FALSE; + } + } else { + retry = TRUE; + } } else { ret_val = FALSE; @@ -4916,9 +5317,18 @@ os_aio_windows_handle( switch (slot->type) { case OS_FILE_WRITE: - ret = WriteFile(slot->file, slot->buf, + if (slot->message1 && + slot->page_compression && + slot->page_compress_success && + slot->page_buf) { + ret = WriteFile(slot->file, slot->page_buf, + (DWORD) slot->len, &len, + &(slot->control)); + } else { + ret = WriteFile(slot->file, slot->buf, (DWORD) slot->len, &len, &(slot->control)); + } break; case OS_FILE_READ: @@ -4950,6 +5360,28 @@ os_aio_windows_handle( ret_val = ret && len == slot->len; } + if (slot->type == OS_FILE_READ) { + if(fil_page_is_compressed(slot->buf)) { + os_slot_alloc_page_buf(slot); + +#ifdef HAVE_LZO + if (fil_page_is_lzo_compressed(slot->buf)) { + os_slot_alloc_lzo_mem(slot); + } +#endif + + fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size); + } + } else { + /* OS_FILE_WRITE */ + if (slot->page_compress_success && fil_page_is_compressed(slot->page_buf)) { + if (srv_use_trim && os_fallocate_failed == FALSE) { + // Deallocate unused blocks from file system + os_file_trim(slot); + } + } + } + os_aio_array_free_slot(array, slot); return(ret_val); @@ -5039,6 +5471,34 @@ retry: /* We have not overstepped to next segment. */ ut_a(slot->pos < end_pos); + if (slot->type == OS_FILE_READ) { + /* If the table is page compressed and this is read, + we decompress before we annouce the read is + complete. For writes, we free the compressed page. */ + if (fil_page_is_compressed(slot->buf)) { + // We allocate memory for page compressed buffer if and only + // if it is not yet allocated. + os_slot_alloc_page_buf(slot); +#ifdef HAVE_LZO + if (fil_page_is_lzo_compressed(slot->buf)) { + os_slot_alloc_lzo_mem(slot); + } +#endif + + fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size); + } + } else { + /* OS_FILE_WRITE */ + if (slot->page_compress_success && + fil_page_is_compressed(slot->page_buf)) { + ut_ad(slot->page_compression_page); + if (srv_use_trim && os_fallocate_failed == FALSE) { + // Deallocate unused blocks from file system + os_file_trim(slot); + } + } + } + /* Mark this request as completed. The error handling will be done in the calling function. */ os_mutex_enter(array->mutex); @@ -5182,6 +5642,13 @@ found: } else { errno = -slot->ret; + if (slot->ret == 0) { + fprintf(stderr, + "InnoDB: Number of bytes after aio %d requested %lu\n" + "InnoDB: from file %s\n", + slot->n_bytes, slot->len, slot->name); + } + /* os_file_handle_error does tell us if we should retry this IO. As it stands now, we don't do this retry when reaping requests from a different context than @@ -5189,7 +5656,7 @@ found: windows and linux native AIO. We should probably look into this to transparently re-submit the IO. */ - os_file_handle_error(slot->name, "Linux aio"); + os_file_handle_error(slot->name, "Linux aio", __FILE__, __LINE__); ret = FALSE; } @@ -5473,13 +5940,13 @@ consecutive_loop: errno = 28;); if (!ret) { - os_file_handle_error_cond_exit(aio_slot->name, "os_file_write_func", TRUE, FALSE); + os_file_handle_error_cond_exit(aio_slot->name, "os_file_write_func", TRUE, FALSE, __FILE__, __LINE__); } } else { ret = os_file_read( aio_slot->file, combined_buf, - aio_slot->offset, total_len); + aio_slot->offset, total_len, aio_slot->page_compression); } srv_set_io_thread_op_info(global_segment, "file i/o done"); @@ -5869,4 +6336,290 @@ os_aio_all_slots_free(void) } #endif /* UNIV_DEBUG */ +#ifdef _WIN32 +#include <winioctl.h> +#ifndef FSCTL_FILE_LEVEL_TRIM +#define FSCTL_FILE_LEVEL_TRIM CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 130, METHOD_BUFFERED, FILE_WRITE_DATA) +typedef struct _FILE_LEVEL_TRIM_RANGE { + DWORDLONG Offset; + DWORDLONG Length; +} FILE_LEVEL_TRIM_RANGE, *PFILE_LEVEL_TRIM_RANGE; + +typedef struct _FILE_LEVEL_TRIM { + DWORD Key; + DWORD NumRanges; + FILE_LEVEL_TRIM_RANGE Ranges[1]; +} FILE_LEVEL_TRIM, *PFILE_LEVEL_TRIM; +#endif +#endif + +/**********************************************************************//** +Directly manipulate the allocated disk space by deallocating for the file referred to +by fd for the byte range starting at offset and continuing for len bytes. +Within the specified range, partial file system blocks are zeroed, and whole +file system blocks are removed from the file. After a successful call, +subsequent reads from this range will return zeroes. +@return true if success, false if error */ +UNIV_INTERN +ibool +os_file_trim( +/*=========*/ + os_aio_slot_t* slot) /*!< in: slot structure */ +{ + + size_t len = slot->len; + size_t trim_len = UNIV_PAGE_SIZE - len; + os_offset_t off = slot->offset + len; + size_t bsize = slot->file_block_size; + + // len here should be alligned to sector size + ut_a((trim_len % bsize) == 0); + ut_a((len % bsize) == 0); + ut_a(bsize != 0); + ut_a((off % bsize) == 0); + +#ifdef UNIV_DEBUG + fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu block_size %lu\n", + *slot->write_size, trim_len, len, off, bsize); +#endif + + // Nothing to do if trim length is zero or if actual write + // size is initialized and it is smaller than current write size. + // In first write if we trim we set write_size to actual bytes + // written and rest of the page is trimmed. In following writes + // there is no need to trim again if write_size only increases + // because rest of the page is already trimmed. If actual write + // size decreases we need to trim again. + if (trim_len == 0 || + (slot->write_size && + *slot->write_size > 0 && + len >= *slot->write_size)) { + +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n", + *slot->write_size, trim_len, len); +#endif + + if (*slot->write_size > 0 && len >= *slot->write_size) { + srv_stats.page_compressed_trim_op_saved.inc(); + } + + *slot->write_size = len; + + return (TRUE); + } + +#ifdef __linux__ +#if defined(HAVE_FALLOCATE) + int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len); + + if (ret) { + /* After first failure do not try to trim again */ + os_fallocate_failed = TRUE; + srv_use_trim = FALSE; + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: [Warning] fallocate call failed with error code %d.\n" + " InnoDB: start: %lu len: %lu payload: %lu\n" + " InnoDB: Disabling fallocate for now.\n", ret, off, trim_len, len); + + os_file_handle_error_no_exit(slot->name, + " fallocate(FALLOC_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) ", + FALSE, __FILE__, __LINE__); + + if (slot->write_size) { + *slot->write_size = 0; + } + + return (FALSE); + } else { + if (slot->write_size) { + *slot->write_size = len; + } + } +#else + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: [Warning] fallocate not supported on this installation." + " InnoDB: Disabling fallocate for now."); + os_fallocate_failed = TRUE; + srv_use_trim = FALSE; + if (slot->write_size) { + *slot->write_size = 0; + } + +#endif /* HAVE_FALLOCATE ... */ + +#elif defined(_WIN32) + FILE_LEVEL_TRIM flt; + flt.Key = 0; + flt.NumRanges = 1; + flt.Ranges[0].Offset = off; + flt.Ranges[0].Length = trim_len; + + BOOL ret = DeviceIoControl(slot->file, FSCTL_FILE_LEVEL_TRIM, + &flt, sizeof(flt), NULL, NULL, NULL, NULL); + + if (!ret) { + /* After first failure do not try to trim again */ + os_fallocate_failed = TRUE; + srv_use_trim=FALSE; + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: [Warning] fallocate call failed with error.\n" + " InnoDB: start: %lx len: %du payload: %lu\n" + " InnoDB: Disabling fallocate for now.\n", (slot->offset+len), trim_len, len); + + os_file_handle_error_no_exit(slot->name, + " DeviceIOControl(FSCTL_FILE_LEVEL_TRIM) ", + FALSE, __FILE__, __LINE__); + + if (slot->write_size) { + *slot->write_size = 0; + } + return (FALSE); + } else { + if (slot->write_size) { + *slot->write_size = len; + } + } +#endif + + switch(bsize) { + case 512: + srv_stats.page_compression_trim_sect512.add((trim_len / bsize)); + break; + case 1024: + srv_stats.page_compression_trim_sect1024.add((trim_len / bsize)); + break; + case 2948: + srv_stats.page_compression_trim_sect2048.add((trim_len / bsize)); + break; + case 4096: + srv_stats.page_compression_trim_sect4096.add((trim_len / bsize)); + break; + case 8192: + srv_stats.page_compression_trim_sect8192.add((trim_len / bsize)); + break; + case 16384: + srv_stats.page_compression_trim_sect16384.add((trim_len / bsize)); + break; + case 32768: + srv_stats.page_compression_trim_sect32768.add((trim_len / bsize)); + break; + default: + break; + } + + srv_stats.page_compressed_trim_op.inc(); + + return (TRUE); + +} #endif /* !UNIV_HOTBACKUP */ + +/**********************************************************************//** +Allocate memory for temporal buffer used for page compression. This +buffer is freed later. */ +UNIV_INTERN +void +os_slot_alloc_page_buf( +/*===================*/ + os_aio_slot_t* slot) /*!< in: slot structure */ +{ + byte* cbuf2; + byte* cbuf; + ulint asize = UNIV_PAGE_SIZE; + + ut_a(slot != NULL); + if (slot->page_compression_page == NULL) { + /* We allocate extra to avoid memory overwrite on compression */ +#ifdef HAVE_SNAPPY + asize += snappy_max_compressed_length(asize) - UNIV_PAGE_SIZE; +#endif + cbuf2 = static_cast<byte *>(ut_malloc(asize*2)); + cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE)); + slot->page_compression_page = static_cast<byte *>(cbuf2); + slot->page_buf = static_cast<byte *>(cbuf); + memset(slot->page_compression_page, 0, asize*2); + ut_a(slot->page_buf != NULL); + } +} + +#ifdef HAVE_LZO +/**********************************************************************//** +Allocate memory for temporal memory used for page compression when +LZO compression method is used */ +UNIV_INTERN +void +os_slot_alloc_lzo_mem( +/*===================*/ + os_aio_slot_t* slot) /*!< in: slot structure */ +{ + ut_a(slot != NULL); + if(slot->lzo_mem == NULL) { + slot->lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS)); + memset(slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS); + ut_a(slot->lzo_mem != NULL); + } +} +#endif + +/***********************************************************************//** +Try to get number of bytes per sector from file system. +@return file block size */ +UNIV_INTERN +ulint +os_file_get_block_size( +/*===================*/ + os_file_t file, /*!< in: handle to a file */ + const char* name) /*!< in: file name */ +{ + ulint fblock_size = 512; + +#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H) + struct statvfs fstat; + int err; + + err = fstatvfs(file, &fstat); + + if (err != 0) { + fprintf(stderr, "InnoDB: Warning: fstatvfs() failed on file %s\n", name); + os_file_handle_error_no_exit(name, "fstatvfs()", FALSE, __FILE__, __LINE__); + } else { + fblock_size = fstat.f_bsize; + } +#endif /* UNIV_LINUX */ +#ifdef __WIN__ + { + DWORD SectorsPerCluster = 0; + DWORD BytesPerSector = 0; + DWORD NumberOfFreeClusters = 0; + DWORD TotalNumberOfClusters = 0; + + if (GetFreeSpace((LPCTSTR)name, &SectorsPerCluster, &BytesPerSector, &NumberOfFreeClusters, &TotalNumberOfClusters)) { + fblock_size = BytesPerSector; + } else { + fprintf(stderr, "InnoDB: Warning: GetFreeSpace() failed on file %s\n", name); + os_file_handle_error_no_exit(name, "GetFreeSpace()", FALSE, __FILE__, __LINE__); + } + } +#endif /* __WIN__*/ + + if (fblock_size > UNIV_PAGE_SIZE/2 || fblock_size < 512) { + fprintf(stderr, "InnoDB: Note: File system for file %s has " + "file block size %lu not supported for page_size %lu\n", + name, fblock_size, UNIV_PAGE_SIZE); + + if (fblock_size < 512) { + fblock_size = 512; + } else { + fblock_size = UNIV_PAGE_SIZE/2; + } + + fprintf(stderr, "InnoDB: Note: Using file block size %ld for file %s\n", + fblock_size, name); + } + + return fblock_size; +} diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 74ebe159677..018bf44fb8d 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -2567,7 +2567,7 @@ all_done: success = os_file_read_no_error_handling( OS_FILE_FROM_FD(index->online_log->fd), index->online_log->head.block, ofs, - srv_sort_buf_size); + srv_sort_buf_size, FALSE); if (!success) { fprintf(stderr, "InnoDB: unable to read temporary file" @@ -3398,7 +3398,7 @@ all_done: success = os_file_read_no_error_handling( OS_FILE_FROM_FD(index->online_log->fd), index->online_log->head.block, ofs, - srv_sort_buf_size); + srv_sort_buf_size, FALSE); if (!success) { fprintf(stderr, "InnoDB: unable to read temporary file" diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index 284081d4b0c..7ebcdefdc3a 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -870,7 +870,8 @@ row_merge_read( #endif /* UNIV_DEBUG */ success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf, - ofs, srv_sort_buf_size); + ofs, srv_sort_buf_size, FALSE); + #ifdef POSIX_FADV_DONTNEED /* Each block is read exactly once. Free up the file cache. */ posix_fadvise(fd, ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED); diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index a0dd32c203f..5e15dd15db2 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -2,6 +2,7 @@ Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, 2014, MariaDB Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -290,6 +291,18 @@ static monitor_info_t innodb_counter_info[] = MONITOR_EXISTING | MONITOR_DEFAULT_ON), MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_WRITTEN}, + {"buffer_index_pages_written", "buffer", + "Number of index pages written (innodb_index_pages_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_PAGES_WRITTEN}, + + {"buffer_non_index_pages_written", "buffer", + "Number of non index pages written (innodb_non_index_pages_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN}, + {"buffer_pages_read", "buffer", "Number of pages read (innodb_pages_read)", static_cast<monitor_type_t>( @@ -879,6 +892,71 @@ static monitor_info_t innodb_counter_info[] = MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS}, + {"compress_saved", "compression", + "Number of bytes saved by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED}, + + {"compress_trim_sect512", "compression", + "Number of sect-512 TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512}, + + {"compress_trim_sect1024", "compression", + "Number of sect-1024 TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024}, + + {"compress_trim_sect2048", "compression", + "Number of sect-2048 TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048}, + + {"compress_trim_sect4096", "compression", + "Number of sect-4K TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096}, + + {"compress_trim_sect8192", "compression", + "Number of sect-8K TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192}, + + {"compress_trim_sect16384", "compression", + "Number of sect-16K TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384}, + + {"compress_trim_sect32768", "compression", + "Number of sect-32K TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768}, + + {"compress_pages_page_compressed", "compression", + "Number of pages compressed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSED}, + + {"compress_page_compressed_trim_op", "compression", + "Number of TRIM operation performed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP}, + + {"compress_page_compressed_trim_op_saved", "compression", + "Number of TRIM operation saved by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED}, + + {"compress_pages_page_decompressed", "compression", + "Number of pages decompressed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED}, + + {"compress_pages_page_compression_error", "compression", + "Number of page compression errors", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR}, + /* ========== Counters for Index ========== */ {"module_index", "index", "Index Manager", MONITOR_MODULE, @@ -1573,6 +1651,16 @@ srv_mon_process_existing_counter( value = stat.n_pages_written; break; + /* innodb_index_pages_written, the number of index pages written */ + case MONITOR_OVLD_INDEX_PAGES_WRITTEN: + value = srv_stats.index_pages_written; + break; + + /* innodb_non_index_pages_written, the number of non index pages written */ + case MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN: + value = srv_stats.non_index_pages_written; + break; + /* innodb_pages_read */ case MONITOR_OVLD_PAGES_READ: buf_get_total_stat(&stat); @@ -1834,6 +1922,46 @@ srv_mon_process_existing_counter( value = btr_cur_n_non_sea; break; + case MONITOR_OVLD_PAGE_COMPRESS_SAVED: + value = srv_stats.page_compression_saved; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512: + value = srv_stats.page_compression_trim_sect512; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024: + value = srv_stats.page_compression_trim_sect1024; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048: + value = srv_stats.page_compression_trim_sect2048; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096: + value = srv_stats.page_compression_trim_sect4096; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192: + value = srv_stats.page_compression_trim_sect8192; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384: + value = srv_stats.page_compression_trim_sect16384; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768: + value = srv_stats.page_compression_trim_sect32768; + break; + case MONITOR_OVLD_PAGES_PAGE_COMPRESSED: + value = srv_stats.pages_page_compressed; + break; + case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP: + value = srv_stats.page_compressed_trim_op; + break; + case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED: + value = srv_stats.page_compressed_trim_op_saved; + break; + case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED: + value = srv_stats.pages_page_decompressed; + break; + case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR: + value = srv_stats.pages_page_compression_error; + break; + default: ut_error; } diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index cd3bed9e2fe..129b33f0da9 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -71,6 +71,7 @@ Created 10/8/1995 Heikki Tuuri #include "mysql/plugin.h" #include "mysql/service_thd_wait.h" +#include "fil0pagecompress.h" /* The following is the maximum allowed duration of a lock wait. */ UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600; @@ -146,6 +147,20 @@ use simulated aio we build below with threads. Currently we support native aio on windows and linux */ UNIV_INTERN my_bool srv_use_native_aio = TRUE; +/* If this flag is TRUE, then we will use fallocate(PUCH_HOLE) +to the pages */ +UNIV_INTERN my_bool srv_use_trim = FALSE; +/* If this flag is TRUE, then we will use posix fallocate for file extentsion */ +UNIV_INTERN my_bool srv_use_posix_fallocate = FALSE; +/* If this flag is TRUE, then we disable doublewrite buffer */ +UNIV_INTERN my_bool srv_use_atomic_writes = FALSE; +/* If this flag IS TRUE, then we use this algorithm for page compressing the pages */ +UNIV_INTERN ulong innodb_compression_algorithm = PAGE_ZLIB_ALGORITHM; +/* Number of threads used for multi-threaded flush */ +UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER; +/* If this flag is TRUE, then we will use multi threaded flush. */ +UNIV_INTERN my_bool srv_use_mtflush = FALSE; + #ifdef __WIN__ /* Windows native condition variables. We use runtime loading / function pointers, because they are not available on Windows Server 2003 and @@ -356,11 +371,6 @@ batch flushing i.e.: LRU flushing and flush_list flushing. The rest of the pages are used for single page flushing. */ UNIV_INTERN ulong srv_doublewrite_batch_size = 120; -UNIV_INTERN ibool srv_use_atomic_writes = FALSE; -#ifdef HAVE_POSIX_FALLOCATE -UNIV_INTERN ibool srv_use_posix_fallocate = TRUE; -#endif - UNIV_INTERN ulong srv_replication_delay = 0; /*-------------------------------------------*/ @@ -393,6 +403,17 @@ static ulint srv_n_system_rows_read_old = 0; UNIV_INTERN ulint srv_truncated_status_writes = 0; UNIV_INTERN ulint srv_available_undo_logs = 0; +UNIV_INTERN ib_uint64_t srv_page_compression_saved = 0; +UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect512 = 0; +UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect4096 = 0; +UNIV_INTERN ib_uint64_t srv_index_pages_written = 0; +UNIV_INTERN ib_uint64_t srv_non_index_pages_written = 0; +UNIV_INTERN ib_uint64_t srv_pages_page_compressed = 0; +UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op = 0; +UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op_saved = 0; +UNIV_INTERN ib_uint64_t srv_index_page_decompressed = 0; + + /* Set the following to 0 if you want InnoDB to write messages on stderr on startup/shutdown. */ UNIV_INTERN ibool srv_print_verbose_log = TRUE; @@ -1518,6 +1539,15 @@ srv_export_innodb_status(void) srv_truncated_status_writes; export_vars.innodb_available_undo_logs = srv_available_undo_logs; + export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved; + export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512; + export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096; + export_vars.innodb_index_pages_written = srv_stats.index_pages_written; + export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written; + export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed; + export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op; + export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved; + export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed; #ifdef UNIV_DEBUG rw_lock_s_lock(&purge_sys->latch); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 7048a44ae97..f2de5e954ad 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -3,6 +3,7 @@ Copyright (c) 1996, 2015, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2009, Percona Inc. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -72,6 +73,7 @@ Created 2/16/1996 Heikki Tuuri # include "sync0sync.h" # include "buf0flu.h" # include "buf0rea.h" +# include "buf0mtflu.h" # include "dict0boot.h" # include "dict0load.h" # include "dict0stats_bg.h" @@ -129,10 +131,14 @@ static os_file_t files[1000]; /** io_handler_thread parameters for thread identification */ static ulint n[SRV_MAX_N_IO_THREADS + 6]; /** io_handler_thread identifiers, 32 is the maximum number of purge threads */ -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32]; +/** 6 is the ? */ +#define START_OLD_THREAD_CNT (SRV_MAX_N_IO_THREADS + 6 + 32) +static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32 + MTFLUSH_MAX_WORKER]; +/* Thread contex data for multi-threaded flush */ +void *mtflush_ctx=NULL; /** Thead handles */ -static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 6 + 32]; +static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 6 + 32 + MTFLUSH_MAX_WORKER]; static os_thread_t buf_flush_page_cleaner_thread_handle; static os_thread_t buf_dump_thread_handle; static os_thread_t dict_stats_thread_handle; @@ -544,7 +550,7 @@ create_log_file( *file = os_file_create( innodb_file_log_key, name, OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL, - OS_LOG_FILE, &ret); + OS_LOG_FILE, &ret, FALSE); if (!ret) { ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name); @@ -751,7 +757,7 @@ open_log_file( *file = os_file_create(innodb_file_log_key, name, OS_FILE_OPEN, OS_FILE_AIO, - OS_LOG_FILE, &ret); + OS_LOG_FILE, &ret, FALSE); if (!ret) { ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name); return(DB_ERROR); @@ -842,7 +848,7 @@ open_or_create_data_files( files[i] = os_file_create( innodb_file_data_key, name, OS_FILE_CREATE, - OS_FILE_NORMAL, OS_DATA_FILE, &ret); + OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); if (srv_read_only_mode) { @@ -885,7 +891,7 @@ open_or_create_data_files( files[i] = os_file_create( innodb_file_data_key, name, OS_FILE_OPEN_RAW, - OS_FILE_NORMAL, OS_DATA_FILE, &ret); + OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); if (!ret) { ib_logf(IB_LOG_LEVEL_ERROR, @@ -900,7 +906,7 @@ open_or_create_data_files( #ifdef UNIV_LOG_ARCHIVE min_arch_log_no, max_arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ - min_flushed_lsn, max_flushed_lsn); + min_flushed_lsn, max_flushed_lsn, ULINT_UNDEFINED); /* If first page is valid, don't overwrite DB. It prevents overwriting DB when mysql_install_db @@ -936,17 +942,17 @@ open_or_create_data_files( files[i] = os_file_create( innodb_file_data_key, name, OS_FILE_OPEN_RAW, - OS_FILE_NORMAL, OS_DATA_FILE, &ret); + OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); } else if (i == 0) { files[i] = os_file_create( innodb_file_data_key, name, OS_FILE_OPEN_RETRY, - OS_FILE_NORMAL, OS_DATA_FILE, &ret); + OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); } else { files[i] = os_file_create( innodb_file_data_key, name, OS_FILE_OPEN, OS_FILE_NORMAL, - OS_DATA_FILE, &ret); + OS_DATA_FILE, &ret, FALSE); } if (!ret) { @@ -1031,7 +1037,8 @@ check_first_page: #ifdef UNIV_LOG_ARCHIVE min_arch_log_no, max_arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ - min_flushed_lsn, max_flushed_lsn); + min_flushed_lsn, max_flushed_lsn, + ULINT_UNDEFINED); if (check_msg) { @@ -1166,7 +1173,7 @@ srv_undo_tablespace_create( innodb_file_data_key, name, srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE, - OS_FILE_NORMAL, OS_DATA_FILE, &ret); + OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); if (srv_read_only_mode && ret) { ib_logf(IB_LOG_LEVEL_INFO, @@ -1253,7 +1260,8 @@ srv_undo_tablespace_open( | OS_FILE_ON_ERROR_SILENT, OS_FILE_NORMAL, OS_DATA_FILE, - &ret); + &ret, + FALSE); /* If the file open was successful then load the tablespace. */ @@ -2754,6 +2762,24 @@ files_checked: } if (!srv_read_only_mode) { + if (srv_use_mtflush) { + /* Start multi-threaded flush threads */ + mtflush_ctx = buf_mtflu_handler_init( + srv_mtflush_threads, + srv_buf_pool_instances); + + /* Set up the thread ids */ + buf_mtflu_set_thread_ids( + srv_mtflush_threads, + mtflush_ctx, + (thread_ids + 6 + 32)); + +#if UNIV_DEBUG + fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n", + __FILE__, __LINE__, srv_buf_pool_instances, srv_mtflush_threads); +#endif + } + buf_flush_page_cleaner_thread_handle = os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL); buf_flush_page_cleaner_thread_started = true; } @@ -3021,6 +3047,13 @@ innobase_shutdown_for_mysql(void) logs_empty_and_mark_files_at_shutdown() and should have already quit or is quitting right now. */ + + if (srv_use_mtflush) { + /* g. Exit the multi threaded flush threads */ + + buf_mtflu_io_thread_exit(); + } + os_mutex_enter(os_sync_mutex); if (os_thread_count == 0) { diff --git a/storage/innobase/ut/ut0wqueue.cc b/storage/innobase/ut/ut0wqueue.cc index d1ba36b3b00..1607e535a94 100644 --- a/storage/innobase/ut/ut0wqueue.cc +++ b/storage/innobase/ut/ut0wqueue.cc @@ -162,6 +162,38 @@ ib_wqueue_timedwait( } /******************************************************************** +Return first item on work queue or NULL if queue is empty +@return work item or NULL */ +void* +ib_wqueue_nowait( +/*=============*/ + ib_wqueue_t* wq) /*<! in: work queue */ +{ + ib_list_node_t* node = NULL; + + mutex_enter(&wq->mutex); + + if(!ib_list_is_empty(wq->items)) { + node = ib_list_get_first(wq->items); + + if (node) { + ib_list_remove(wq->items, node); + + } + } + + /* We must reset the event when the list + gets emptied. */ + if(ib_list_is_empty(wq->items)) { + os_event_reset(wq->event); + } + + mutex_exit(&wq->mutex); + + return (node ? node->data : NULL); +} + +/******************************************************************** Check if queue is empty. */ ibool @@ -173,3 +205,20 @@ ib_wqueue_is_empty( { return(ib_list_is_empty(wq->items)); } + +/******************************************************************** +Get number of items on queue. +@return number of items on queue */ +ulint +ib_wqueue_len( +/*==========*/ + ib_wqueue_t* wq) /*<! in: work queue */ +{ + ulint len = 0; + + mutex_enter(&wq->mutex); + len = ib_list_len(wq->items); + mutex_exit(&wq->mutex); + + return(len); +} diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt index 2f74456e7a7..75e994aa34b 100644 --- a/storage/xtradb/CMakeLists.txt +++ b/storage/xtradb/CMakeLists.txt @@ -18,6 +18,17 @@ INCLUDE(CheckFunctionExists) INCLUDE(CheckCSourceCompiles) INCLUDE(CheckCSourceRuns) +INCLUDE(lz4) +INCLUDE(lzo) +INCLUDE(lzma) +INCLUDE(bzip2) +INCLUDE(snappy) + +MYSQL_CHECK_LZ4() +MYSQL_CHECK_LZO() +MYSQL_CHECK_LZMA() +MYSQL_CHECK_BZIP2() +MYSQL_CHECK_SNAPPY() # OS tests IF(UNIX) @@ -337,6 +348,7 @@ SET(INNOBASE_SOURCES buf/buf0flu.cc buf/buf0lru.cc buf/buf0rea.cc + buf/buf0mtflu.cc data/data0data.cc data/data0type.cc dict/dict0boot.cc @@ -350,6 +362,7 @@ SET(INNOBASE_SOURCES eval/eval0eval.cc eval/eval0proc.cc fil/fil0fil.cc + fil/fil0pagecompress.cc fsp/fsp0fsp.cc fut/fut0fut.cc fut/fut0lst.cc diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index d38e080051c..97781a12d20 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -2,6 +2,7 @@ Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -910,6 +911,11 @@ buf_page_print( mach_read_from_4(read_buf + FIL_PAGE_OFFSET), mach_read_from_4(read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)); + + ulint page_type = mach_read_from_4(read_buf + FIL_PAGE_TYPE); + + fprintf(stderr, "InnoDB: page type %ld meaning %s\n", page_type, + fil_get_page_type_name(page_type)); } #ifndef UNIV_HOTBACKUP @@ -3544,6 +3550,7 @@ buf_page_init_low( bpage->access_time = 0; bpage->newest_modification = 0; bpage->oldest_modification = 0; + bpage->write_size = 0; HASH_INVALIDATE(bpage, hash); bpage->is_corrupt = FALSE; #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG @@ -5706,3 +5713,24 @@ buf_page_init_for_backup_restore( } } #endif /* !UNIV_HOTBACKUP */ + +/*********************************************************************//** +Aquire LRU list mutex */ +void +buf_pool_mutex_enter( +/*=================*/ + buf_pool_t* buf_pool) /*!< in: buffer pool */ +{ + ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); + mutex_enter(&buf_pool->LRU_list_mutex); +} +/*********************************************************************//** +Exit LRU list mutex */ +void +buf_pool_mutex_exit( +/*================*/ + buf_pool_t* buf_pool) /*!< in: buffer pool */ +{ + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); + mutex_exit(&buf_pool->LRU_list_mutex); +} diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc index f4d1c637e3e..dc84bd194ef 100644 --- a/storage/xtradb/buf/buf0dblwr.cc +++ b/storage/xtradb/buf/buf0dblwr.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -382,7 +383,7 @@ buf_dblwr_init_or_load_pages( /* Read the trx sys header to check if we are using the doublewrite buffer */ off_t trx_sys_page = TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE; - os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE); + os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE, FALSE); doublewrite = read_buf + TRX_SYS_DOUBLEWRITE; @@ -416,12 +417,11 @@ buf_dblwr_init_or_load_pages( } /* Read the pages from the doublewrite buffer to memory */ - block_bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; - os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes); + os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes, FALSE); os_file_read(file, buf + block_bytes, block2 * UNIV_PAGE_SIZE, - block_bytes); + block_bytes, FALSE); /* Check if any of these pages is half-written in data files, in the intended position */ @@ -514,7 +514,7 @@ buf_dblwr_process() fil_io(OS_FILE_READ, true, space_id, zip_size, page_no, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - read_buf, NULL); + read_buf, NULL, 0); /* Check if the page is corrupt */ @@ -566,7 +566,7 @@ buf_dblwr_process() fil_io(OS_FILE_WRITE, true, space_id, zip_size, page_no, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - page, NULL); + page, NULL, 0); ib_logf(IB_LOG_LEVEL_INFO, "Recovered the page from" @@ -586,7 +586,7 @@ buf_dblwr_process() zip_size, page_no, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - page, NULL); + page, NULL, NULL); } } } @@ -798,7 +798,7 @@ buf_dblwr_write_block_to_datafile( buf_page_get_page_no(bpage), 0, buf_page_get_zip_size(bpage), (void*) bpage->zip.data, - (void*) bpage); + (void*) bpage, 0); return; } @@ -810,8 +810,8 @@ buf_dblwr_write_block_to_datafile( fil_io(flags, sync, buf_block_get_space(block), 0, buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE, - (void*) block->frame, (void*) block); - + (void*) block->frame, (void*) block, + (ulint *)&bpage->write_size); } /********************************************************************//** @@ -905,7 +905,7 @@ try_again: fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, buf_dblwr->block1, 0, len, - (void*) write_buf, NULL); + (void*) write_buf, NULL, 0); if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { /* No unwritten pages in the second block. */ @@ -921,7 +921,7 @@ try_again: fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, buf_dblwr->block2, 0, len, - (void*) write_buf, NULL); + (void*) write_buf, NULL, 0); flush: /* increment the doublewrite flushed pages counter */ @@ -1151,14 +1151,14 @@ retry: fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, offset, 0, UNIV_PAGE_SIZE, (void*) (buf_dblwr->write_buf - + UNIV_PAGE_SIZE * i), NULL); + + UNIV_PAGE_SIZE * i), NULL, 0); } else { /* It is a regular page. Write it directly to the doublewrite buffer */ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, offset, 0, UNIV_PAGE_SIZE, (void*) ((buf_block_t*) bpage)->frame, - NULL); + NULL, 0); } /* Now flush the doublewrite buffer data to disk */ diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index 0f39c5de2ca..7c9e5e091c5 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -1,6 +1,8 @@ /***************************************************************************** Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. +Copyright (c) 2013, 2014, Fusion-io. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -30,6 +32,7 @@ Created 11/11/1995 Heikki Tuuri #endif #include "buf0buf.h" +#include "buf0mtflu.h" #include "buf0checksum.h" #include "srv0start.h" #include "srv0srv.h" @@ -44,10 +47,12 @@ Created 11/11/1995 Heikki Tuuri #include "ibuf0ibuf.h" #include "log0log.h" #include "os0file.h" +#include "os0sync.h" #include "trx0sys.h" #include "srv0mon.h" #include "mysql/plugin.h" #include "mysql/service_thd_wait.h" +#include "fil0pagecompress.h" /** Number of pages flushed through non flush_list flushes. */ // static ulint buf_lru_flush_page_count = 0; @@ -75,15 +80,6 @@ in thrashing. */ /* @} */ -/** Handled page counters for a single flush */ -struct flush_counters_t { - ulint flushed; /*!< number of dirty pages flushed */ - ulint evicted; /*!< number of clean pages evicted, including - evicted uncompressed page images */ - ulint unzip_LRU_evicted;/*!< number of uncompressed page images - evicted */ -}; - /******************************************************************//** Increases flush_list size in bytes with zip_size for compressed page, UNIV_PAGE_SIZE for uncompressed page in inline function */ @@ -724,8 +720,10 @@ buf_flush_write_complete( buf_pool->n_flush[flush_type]--; - /* fprintf(stderr, "n pending flush %lu\n", - buf_pool->n_flush[flush_type]); */ +#ifdef UNIV_MTFLUSH_DEBUG + fprintf(stderr, "n pending flush %lu\n", + buf_pool->n_flush[flush_type]); +#endif if (buf_pool->n_flush[flush_type] == 0 && buf_pool->init_flush[flush_type] == FALSE) { @@ -881,6 +879,8 @@ buf_flush_write_block_low( { ulint zip_size = buf_page_get_zip_size(bpage); page_t* frame = NULL; + ulint space_id = buf_page_get_space(bpage); + atomic_writes_t awrites = fil_space_get_atomic_writes(space_id); #ifdef UNIV_DEBUG buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); @@ -956,12 +956,26 @@ buf_flush_write_block_low( sync, buf_page_get_space(bpage), zip_size, buf_page_get_page_no(bpage), 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - frame, bpage); - } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) { - buf_dblwr_write_single_page(bpage, sync); + frame, bpage, &bpage->write_size); } else { - ut_ad(!sync); - buf_dblwr_add_to_batch(bpage); + /* InnoDB uses doublewrite buffer and doublewrite buffer + is initialized. User can define do we use atomic writes + on a file space (table) or not. If atomic writes are + not used we should use doublewrite buffer and if + atomic writes should be used, no doublewrite buffer + is used. */ + + if (awrites == ATOMIC_WRITES_ON) { + fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, + FALSE, buf_page_get_space(bpage), zip_size, + buf_page_get_page_no(bpage), 0, + zip_size ? zip_size : UNIV_PAGE_SIZE, + frame, bpage, &bpage->write_size); + } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) { + buf_dblwr_write_single_page(bpage, sync); + } else { + buf_dblwr_add_to_batch(bpage); + } } /* When doing single page flushing the IO is done synchronously @@ -1753,7 +1767,6 @@ end up waiting for these latches! NOTE 2: in the case of a flush list flush, the calling thread is not allowed to own any latches on pages! @return number of blocks for which the write request was queued */ __attribute__((nonnull)) -static void buf_flush_batch( /*============*/ @@ -1812,7 +1825,6 @@ buf_flush_batch( /******************************************************************//** Gather the aggregated stats for both flush list and LRU list flushing */ -static void buf_flush_common( /*=============*/ @@ -1839,7 +1851,6 @@ buf_flush_common( /******************************************************************//** Start a buffer flush batch for LRU or flush list */ -static ibool buf_flush_start( /*============*/ @@ -1854,6 +1865,11 @@ buf_flush_start( /* There is already a flush batch of the same type running */ +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, "Error: flush_type %d n_flush %lu init_flush %lu\n", + flush_type, buf_pool->n_flush[flush_type], buf_pool->init_flush[flush_type]); +#endif + mutex_exit(&buf_pool->flush_state_mutex); return(FALSE); @@ -1868,7 +1884,6 @@ buf_flush_start( /******************************************************************//** End a buffer flush batch for LRU or flush list */ -static void buf_flush_end( /*==========*/ @@ -1923,6 +1938,24 @@ buf_flush_wait_batch_end( } } +/* JAN: TODO: */ + +void buf_pool_enter_LRU_mutex( + buf_pool_t* buf_pool) +{ + ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); + mutex_enter(&buf_pool->LRU_list_mutex); +} + +void buf_pool_exit_LRU_mutex( + buf_pool_t* buf_pool) +{ + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); + mutex_exit(&buf_pool->LRU_list_mutex); +} + +/* JAN: TODO: END: */ + /*******************************************************************//** This utility flushes dirty blocks from the end of the LRU list and also puts replaceable clean pages from the end of the LRU list to the free @@ -1993,6 +2026,10 @@ buf_flush_list( bool timeout = false; ulint flush_start_time = 0; + if (buf_mtflu_init_done()) { + return(buf_mtflu_flush_list(min_n, lsn_limit, n_processed)); + } + for (i = 0; i < srv_buf_pool_instances; i++) { requested_pages[i] = 0; active_instance[i] = true; @@ -2220,6 +2257,11 @@ buf_flush_LRU_tail(void) ulint free_list_lwm = srv_LRU_scan_depth / 100 * srv_cleaner_free_list_lwm; + if(buf_mtflu_init_done()) + { + return(buf_mtflu_flush_LRU_tail()); + } + for (ulint i = 0; i < srv_buf_pool_instances; i++) { const buf_pool_t* buf_pool = buf_pool_from_array(i); diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc new file mode 100644 index 00000000000..223edab2e9c --- /dev/null +++ b/storage/xtradb/buf/buf0mtflu.cc @@ -0,0 +1,733 @@ +/***************************************************************************** + +Copyright (C) 2013, 2014, Fusion-io. All Rights Reserved. +Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file buf/buf0mtflu.cc +Multi-threaded flush method implementation + +Created 06/11/2013 Dhananjoy Das DDas@fusionio.com +Modified 12/12/2013 Jan Lindström jan.lindstrom@skysql.com +Modified 03/02/2014 Dhananjoy Das DDas@fusionio.com +Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com +***********************************************************************/ + +#include "buf0buf.h" +#include "buf0flu.h" +#include "buf0mtflu.h" +#include "buf0checksum.h" +#include "srv0start.h" +#include "srv0srv.h" +#include "page0zip.h" +#include "ut0byte.h" +#include "ut0lst.h" +#include "page0page.h" +#include "fil0fil.h" +#include "buf0lru.h" +#include "buf0rea.h" +#include "ibuf0ibuf.h" +#include "log0log.h" +#include "os0file.h" +#include "os0sync.h" +#include "trx0sys.h" +#include "srv0mon.h" +#include "mysql/plugin.h" +#include "mysql/service_thd_wait.h" +#include "fil0pagecompress.h" + +#define MT_COMP_WATER_MARK 50 +/** Time to wait for a message. */ +#define MT_WAIT_IN_USECS 5000000 + +/* Work item status */ +typedef enum wrk_status { + WRK_ITEM_UNSET=0, /*!< Work item is not set */ + WRK_ITEM_START=1, /*!< Processing of work item has started */ + WRK_ITEM_DONE=2, /*!< Processing is done usually set to + SUCCESS/FAILED */ + WRK_ITEM_SUCCESS=2, /*!< Work item successfully processed */ + WRK_ITEM_FAILED=3, /*!< Work item process failed */ + WRK_ITEM_EXIT=4, /*!< Exiting */ + WRK_ITEM_SET=5, /*!< Work item is set */ + WRK_ITEM_STATUS_UNDEFINED +} wrk_status_t; + +/* Work item task type */ +typedef enum mt_wrk_tsk { + MT_WRK_NONE=0, /*!< Exit queue-wait */ + MT_WRK_WRITE=1, /*!< Flush operation */ + MT_WRK_READ=2, /*!< Read operation */ + MT_WRK_UNDEFINED +} mt_wrk_tsk_t; + +/* Work thread status */ +typedef enum wthr_status { + WTHR_NOT_INIT=0, /*!< Work thread not initialized */ + WTHR_INITIALIZED=1, /*!< Work thread initialized */ + WTHR_SIG_WAITING=2, /*!< Work thread wating signal */ + WTHR_RUNNING=3, /*!< Work thread running */ + WTHR_NO_WORK=4, /*!< Work thread has no work */ + WTHR_KILL_IT=5, /*!< Work thread should exit */ + WTHR_STATUS_UNDEFINED +} wthr_status_t; + +/* Write work task */ +typedef struct wr_tsk { + buf_pool_t *buf_pool; /*!< buffer-pool instance */ + buf_flush_t flush_type; /*!< flush-type for buffer-pool + flush operation */ + ulint min; /*!< minimum number of pages + requested to be flushed */ + lsn_t lsn_limit; /*!< lsn limit for the buffer-pool + flush operation */ +} wr_tsk_t; + +/* Read work task */ +typedef struct rd_tsk { + buf_pool_t *page_pool; /*!< list of pages to decompress; */ +} rd_tsk_t; + +/* Work item */ +typedef struct wrk_itm +{ + mt_wrk_tsk_t tsk; /*!< Task type. Based on task-type + one of the entries wr_tsk/rd_tsk + will be used */ + wr_tsk_t wr; /*!< Flush page list */ + rd_tsk_t rd; /*!< Decompress page list */ + ulint n_flushed; /*!< Flushed pages count */ + os_thread_id_t id_usr; /*!< Thread-id currently working */ + wrk_status_t wi_status; /*!< Work item status */ + mem_heap_t *wheap; /*!< Heap were to allocate memory + for queue nodes */ + mem_heap_t *rheap; +} wrk_t; + +typedef struct thread_data +{ + os_thread_id_t wthread_id; /*!< Identifier */ + os_thread_t wthread; /*!< Thread id */ + wthr_status_t wt_status; /*!< Worker thread status */ +} thread_data_t; + +/* Thread syncronization data */ +typedef struct thread_sync +{ + /* Global variables used by all threads */ + os_fast_mutex_t thread_global_mtx; /*!< Mutex used protecting below + variables */ + ulint n_threads; /*!< Number of threads */ + ib_wqueue_t *wq; /*!< Work Queue */ + ib_wqueue_t *wr_cq; /*!< Write Completion Queue */ + ib_wqueue_t *rd_cq; /*!< Read Completion Queue */ + mem_heap_t* wheap; /*!< Work heap where memory + is allocated */ + mem_heap_t* rheap; /*!< Work heap where memory + is allocated */ + wthr_status_t gwt_status; /*!< Global thread status */ + + /* Variables used by only one thread at a time */ + thread_data_t* thread_data; /*!< Thread specific data */ + +} thread_sync_t; + +static int mtflush_work_initialized = -1; +static thread_sync_t* mtflush_ctx=NULL; +static os_fast_mutex_t mtflush_mtx; + +/******************************************************************//** +Set multi-threaded flush work initialized. */ +static inline +void +buf_mtflu_work_init(void) +/*=====================*/ +{ + mtflush_work_initialized = 1; +} + +/******************************************************************//** +Return true if multi-threaded flush is initialized +@return true if initialized */ +bool +buf_mtflu_init_done(void) +/*=====================*/ +{ + return(mtflush_work_initialized == 1); +} + +/******************************************************************//** +Fush buffer pool instance. +@return number of flushed pages, or 0 if error happened +*/ +static +ulint +buf_mtflu_flush_pool_instance( +/*==========================*/ + wrk_t *work_item) /*!< inout: work item to be flushed */ +{ + flush_counters_t n; + ut_a(work_item != NULL); + ut_a(work_item->wr.buf_pool != NULL); + + if (!buf_flush_start(work_item->wr.buf_pool, work_item->wr.flush_type)) { + /* We have two choices here. If lsn_limit was + specified then skipping an instance of buffer + pool means we cannot guarantee that all pages + up to lsn_limit has been flushed. We can + return right now with failure or we can try + to flush remaining buffer pools up to the + lsn_limit. We attempt to flush other buffer + pools based on the assumption that it will + help in the retry which will follow the + failure. */ +#ifdef UNIV_MTFLUSH_DEBUG + fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n"); +#endif + return 0; + } + + memset(&n, 0, sizeof(flush_counters_t)); + + if (work_item->wr.flush_type == BUF_FLUSH_LRU) { + /* srv_LRU_scan_depth can be arbitrarily large value. + * We cap it with current LRU size. + */ + buf_pool_mutex_enter(work_item->wr.buf_pool); + work_item->wr.min = UT_LIST_GET_LEN(work_item->wr.buf_pool->LRU); + buf_pool_mutex_exit(work_item->wr.buf_pool); + work_item->wr.min = ut_min(srv_LRU_scan_depth,work_item->wr.min); + } + + buf_flush_batch(work_item->wr.buf_pool, + work_item->wr.flush_type, + work_item->wr.min, + work_item->wr.lsn_limit, + false, + &n); + + work_item->n_flushed = n.flushed; + buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type); + buf_flush_common(work_item->wr.flush_type, work_item->n_flushed); + + return work_item->n_flushed; +} + +/******************************************************************//** +Worker function to wait for work items and processing them and +sending reply back. +*/ +static +void +mtflush_service_io( +/*===============*/ + thread_sync_t* mtflush_io, /*!< inout: multi-threaded flush + syncronization data */ + thread_data_t* thread_data) /* Thread status data */ +{ + wrk_t *work_item = NULL; + ulint n_flushed=0; + + ut_a(mtflush_io != NULL); + ut_a(thread_data != NULL); + + thread_data->wt_status = WTHR_SIG_WAITING; + + work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq); + + if (work_item == NULL) { + work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq); + } + + if (work_item) { + thread_data->wt_status = WTHR_RUNNING; + } else { + /* Thread did not get any work */ + thread_data->wt_status = WTHR_NO_WORK; + return; + } + + if (work_item->wi_status != WRK_ITEM_EXIT) { + work_item->wi_status = WRK_ITEM_SET; + } + +#ifdef UNIV_MTFLUSH_DEBUG + ut_a(work_item->id_usr == 0); +#endif + work_item->id_usr = os_thread_get_curr_id(); + + /* This works as a producer/consumer model, where in tasks are + * inserted into the work-queue (wq) and completions are based + * on the type of operations performed and as a result the WRITE/ + * compression/flush operation completions get posted to wr_cq. + * And READ/decompress operations completions get posted to rd_cq. + * in future we may have others. + */ + + switch(work_item->tsk) { + case MT_WRK_NONE: + ut_a(work_item->wi_status == WRK_ITEM_EXIT); + work_item->wi_status = WRK_ITEM_EXIT; + ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap); + thread_data->wt_status = WTHR_KILL_IT; + break; + + case MT_WRK_WRITE: + ut_a(work_item->wi_status == WRK_ITEM_SET); + work_item->wi_status = WRK_ITEM_START; + /* Process work item */ + if (0 == (n_flushed = buf_mtflu_flush_pool_instance(work_item))) { + work_item->wi_status = WRK_ITEM_FAILED; + } + work_item->wi_status = WRK_ITEM_SUCCESS; + ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap); + break; + + case MT_WRK_READ: + ut_a(0); + break; + + default: + /* None other than Write/Read handling planned */ + ut_a(0); + break; + } +} + +/******************************************************************//** +Thead used to flush dirty pages when multi-threaded flush is +used. +@return a dummy parameter*/ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(mtflush_io_thread)( +/*==============================*/ + void * arg) +{ + thread_sync_t *mtflush_io = ((thread_sync_t *)arg); + thread_data_t *this_thread_data = NULL; + ulint i; + + /* Find correct slot for this thread */ + os_fast_mutex_lock(&(mtflush_io->thread_global_mtx)); + for(i=0; i < mtflush_io->n_threads; i ++) { + if (mtflush_io->thread_data[i].wthread_id == os_thread_get_curr_id()) { + break; + } + } + + ut_a(i <= mtflush_io->n_threads); + this_thread_data = &mtflush_io->thread_data[i]; + os_fast_mutex_unlock(&(mtflush_io->thread_global_mtx)); + + while (TRUE) { + +#ifdef UNIV_MTFLUSH_DEBUG + fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n", + os_thread_get_curr_id(), + ib_wqueue_len(mtflush_io->wq), + ib_wqueue_len(mtflush_io->wr_cq)); +#endif /* UNIV_MTFLUSH_DEBUG */ + + mtflush_service_io(mtflush_io, this_thread_data); + + + if (this_thread_data->wt_status == WTHR_KILL_IT) { + break; + } + } + + os_thread_exit(NULL); + OS_THREAD_DUMMY_RETURN; +} + +/******************************************************************//** +Add exit work item to work queue to signal multi-threded flush +threads that they should exit. +*/ +void +buf_mtflu_io_thread_exit(void) +/*==========================*/ +{ + ulint i; + thread_sync_t* mtflush_io = mtflush_ctx; + wrk_t* work_item = NULL; + + ut_a(mtflush_io != NULL); + + /* Allocate work items for shutdown message */ + work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads); + + /* Confirm if the io-thread KILL is in progress, bailout */ + if (mtflush_io->gwt_status == WTHR_KILL_IT) { + return; + } + + mtflush_io->gwt_status = WTHR_KILL_IT; + + /* This lock is to safequard against timing bug: flush request take + this mutex before sending work items to be processed by flush + threads. Inside flush thread we assume that work queue contains only + a constant number of items. Thus, we may not install new work items + below before all previous ones are processed. This mutex is released + by flush request after all work items sent to flush threads have + been processed. Thus, we can get this mutex if and only if work + queue is empty. */ + + os_fast_mutex_lock(&mtflush_mtx); + + /* Make sure the work queue is empty */ + ut_a(ib_wqueue_is_empty(mtflush_io->wq)); + + /* Send one exit work item/thread */ + for (i=0; i < (ulint)srv_mtflush_threads; i++) { + work_item[i].tsk = MT_WRK_NONE; + work_item[i].wi_status = WRK_ITEM_EXIT; + work_item[i].wheap = mtflush_io->wheap; + work_item[i].rheap = mtflush_io->rheap; + work_item[i].id_usr = 0; + + ib_wqueue_add(mtflush_io->wq, + (void *)&(work_item[i]), + mtflush_io->wheap); + } + + /* Requests sent */ + os_fast_mutex_unlock(&mtflush_mtx); + + /* Wait until all work items on a work queue are processed */ + while(!ib_wqueue_is_empty(mtflush_io->wq)) { + /* Wait */ + os_thread_sleep(MT_WAIT_IN_USECS); + } + + ut_a(ib_wqueue_is_empty(mtflush_io->wq)); + + /* Collect all work done items */ + for (i=0; i < (ulint)srv_mtflush_threads;) { + wrk_t* work_item = NULL; + + work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, MT_WAIT_IN_USECS); + + /* If we receive reply to work item and it's status is exit, + thead has processed this message and existed */ + if (work_item && work_item->wi_status == WRK_ITEM_EXIT) { + i++; + } + } + + /* Wait about 1/2 sec to allow threads really exit */ + os_thread_sleep(MT_WAIT_IN_USECS); + + /* Make sure that work queue is empty */ + while(!ib_wqueue_is_empty(mtflush_io->wq)) + { + ib_wqueue_nowait(mtflush_io->wq); + } + + os_fast_mutex_lock(&mtflush_mtx); + + ut_a(ib_wqueue_is_empty(mtflush_io->wq)); + ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq)); + ut_a(ib_wqueue_is_empty(mtflush_io->rd_cq)); + + /* Free all queues */ + ib_wqueue_free(mtflush_io->wq); + ib_wqueue_free(mtflush_io->wr_cq); + ib_wqueue_free(mtflush_io->rd_cq); + + mtflush_io->wq = NULL; + mtflush_io->wr_cq = NULL; + mtflush_io->rd_cq = NULL; + mtflush_work_initialized = 0; + + /* Free heap */ + mem_heap_free(mtflush_io->wheap); + mem_heap_free(mtflush_io->rheap); + + os_fast_mutex_unlock(&mtflush_mtx); + os_fast_mutex_free(&mtflush_mtx); + os_fast_mutex_free(&mtflush_io->thread_global_mtx); +} + +/******************************************************************//** +Initialize multi-threaded flush thread syncronization data. +@return Initialized multi-threaded flush thread syncroniztion data. */ +void* +buf_mtflu_handler_init( +/*===================*/ + ulint n_threads, /*!< in: Number of threads to create */ + ulint wrk_cnt) /*!< in: Number of work items */ +{ + ulint i; + mem_heap_t* mtflush_heap; + mem_heap_t* mtflush_heap2; + + /* Create heap, work queue, write completion queue, read + completion queue for multi-threaded flush, and init + handler. */ + mtflush_heap = mem_heap_create(0); + ut_a(mtflush_heap != NULL); + mtflush_heap2 = mem_heap_create(0); + ut_a(mtflush_heap2 != NULL); + + mtflush_ctx = (thread_sync_t *)mem_heap_alloc(mtflush_heap, + sizeof(thread_sync_t)); + memset(mtflush_ctx, 0, sizeof(thread_sync_t)); + ut_a(mtflush_ctx != NULL); + mtflush_ctx->thread_data = (thread_data_t*)mem_heap_alloc( + mtflush_heap, sizeof(thread_data_t) * n_threads); + ut_a(mtflush_ctx->thread_data); + memset(mtflush_ctx->thread_data, 0, sizeof(thread_data_t) * n_threads); + + mtflush_ctx->n_threads = n_threads; + mtflush_ctx->wq = ib_wqueue_create(); + ut_a(mtflush_ctx->wq); + mtflush_ctx->wr_cq = ib_wqueue_create(); + ut_a(mtflush_ctx->wr_cq); + mtflush_ctx->rd_cq = ib_wqueue_create(); + ut_a(mtflush_ctx->rd_cq); + mtflush_ctx->wheap = mtflush_heap; + mtflush_ctx->rheap = mtflush_heap2; + + os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_ctx->thread_global_mtx); + os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx); + + /* Create threads for page-compression-flush */ + for(i=0; i < n_threads; i++) { + os_thread_id_t new_thread_id; + + mtflush_ctx->thread_data[i].wt_status = WTHR_INITIALIZED; + + mtflush_ctx->thread_data[i].wthread = os_thread_create( + mtflush_io_thread, + ((void *) mtflush_ctx), + &new_thread_id); + + mtflush_ctx->thread_data[i].wthread_id = new_thread_id; + } + + buf_mtflu_work_init(); + + return((void *)mtflush_ctx); +} + +/******************************************************************//** +Flush buffer pool instances. +@return number of pages flushed. */ +ulint +buf_mtflu_flush_work_items( +/*=======================*/ + ulint buf_pool_inst, /*!< in: Number of buffer pool instances */ + ulint *per_pool_pages_flushed, /*!< out: Number of pages + flushed/instance */ + buf_flush_t flush_type, /*!< in: Type of flush */ + ulint min_n, /*!< in: Wished minimum number of + blocks to be flushed */ + lsn_t lsn_limit) /*!< in: All blocks whose + oldest_modification is smaller than + this should be flushed (if their + number does not exceed min_n) */ +{ + ulint n_flushed=0, i; + mem_heap_t* work_heap; + mem_heap_t* reply_heap; + wrk_t work_item[MTFLUSH_MAX_WORKER]; + + if (mtflush_ctx->gwt_status == WTHR_KILL_IT) { + return 0; + } + + /* Allocate heap where all work items used and queue + node items areallocated */ + work_heap = mem_heap_create(0); + reply_heap = mem_heap_create(0); + + + for(i=0;i<buf_pool_inst; i++) { + work_item[i].tsk = MT_WRK_WRITE; + work_item[i].wr.buf_pool = buf_pool_from_array(i); + work_item[i].wr.flush_type = flush_type; + work_item[i].wr.min = min_n; + work_item[i].wr.lsn_limit = lsn_limit; + work_item[i].wi_status = WRK_ITEM_UNSET; + work_item[i].wheap = work_heap; + work_item[i].rheap = reply_heap; + work_item[i].n_flushed = 0; + work_item[i].id_usr = 0; + + ib_wqueue_add(mtflush_ctx->wq, + (void *)(work_item + i), + work_heap); + } + + /* wait on the completion to arrive */ + for(i=0; i< buf_pool_inst;) { + wrk_t *done_wi = NULL; + done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq); + + if (done_wi != NULL) { + per_pool_pages_flushed[i] = done_wi->n_flushed; + +#ifdef UNIV_MTFLUSH_DEBUG + if((int)done_wi->id_usr == 0 && + (done_wi->wi_status == WRK_ITEM_SET || + done_wi->wi_status == WRK_ITEM_UNSET)) { + fprintf(stderr, + "**Set/Unused work_item[%lu] flush_type=%d\n", + i, + done_wi->wr.flush_type); + ut_a(0); + } +#endif + + n_flushed+= done_wi->n_flushed; + i++; + } + } + + /* Release used work_items and queue nodes */ + mem_heap_free(work_heap); + mem_heap_free(reply_heap); + + return(n_flushed); +} + +/*******************************************************************//** +Multi-threaded version of buf_flush_list +*/ +bool +buf_mtflu_flush_list( +/*=================*/ + ulint min_n, /*!< in: wished minimum mumber of blocks + flushed (it is not guaranteed that the + actual number is that big, though) */ + lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all + blocks whose oldest_modification is + smaller than this should be flushed + (if their number does not exceed + min_n), otherwise ignored */ + ulint* n_processed) /*!< out: the number of pages + which were processed is passed + back to caller. Ignored if NULL */ + +{ + ulint i; + bool success = true; + ulint cnt_flush[MTFLUSH_MAX_WORKER]; + + if (n_processed) { + *n_processed = 0; + } + + if (min_n != ULINT_MAX) { + /* Ensure that flushing is spread evenly amongst the + buffer pool instances. When min_n is ULINT_MAX + we need to flush everything up to the lsn limit + so no limit here. */ + min_n = (min_n + srv_buf_pool_instances - 1) + / srv_buf_pool_instances; + } + + /* This lock is to safequard against re-entry if any. */ + os_fast_mutex_lock(&mtflush_mtx); + buf_mtflu_flush_work_items(srv_buf_pool_instances, + cnt_flush, BUF_FLUSH_LIST, + min_n, lsn_limit); + os_fast_mutex_unlock(&mtflush_mtx); + + for (i = 0; i < srv_buf_pool_instances; i++) { + if (n_processed) { + *n_processed += cnt_flush[i]; + } + if (cnt_flush[i]) { + MONITOR_INC_VALUE_CUMULATIVE( + MONITOR_FLUSH_BATCH_TOTAL_PAGE, + MONITOR_FLUSH_BATCH_COUNT, + MONITOR_FLUSH_BATCH_PAGES, + cnt_flush[i]); + } + } +#ifdef UNIV_MTFLUSH_DEBUG + fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu ]\n", + __FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed); +#endif + return(success); +} + +/*********************************************************************//** +Clears up tail of the LRU lists: +* Put replaceable pages at the tail of LRU to the free list +* Flush dirty pages at the tail of LRU to the disk +The depth to which we scan each buffer pool is controlled by dynamic +config parameter innodb_LRU_scan_depth. +@return total pages flushed */ +UNIV_INTERN +ulint +buf_mtflu_flush_LRU_tail(void) +/*==========================*/ +{ + ulint total_flushed=0, i; + ulint cnt_flush[MTFLUSH_MAX_WORKER]; + + ut_a(buf_mtflu_init_done()); + + /* This lock is to safeguard against re-entry if any */ + os_fast_mutex_lock(&mtflush_mtx); + buf_mtflu_flush_work_items(srv_buf_pool_instances, + cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0); + os_fast_mutex_unlock(&mtflush_mtx); + + for (i = 0; i < srv_buf_pool_instances; i++) { + if (cnt_flush[i]) { + total_flushed += cnt_flush[i]; + + MONITOR_INC_VALUE_CUMULATIVE( + MONITOR_LRU_BATCH_TOTAL_PAGE, + MONITOR_LRU_BATCH_COUNT, + MONITOR_LRU_BATCH_PAGES, + cnt_flush[i]); + } + } + +#if UNIV_MTFLUSH_DEBUG + fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu ]\n", ( + srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed); +#endif + + return(total_flushed); +} + +/*********************************************************************//** +Set correct thread identifiers to io thread array based on +information we have. */ +void +buf_mtflu_set_thread_ids( +/*=====================*/ + ulint n_threads, /*!<in: Number of threads to fill */ + void* ctx, /*!<in: thread context */ + os_thread_id_t* thread_ids) /*!<in: thread id array */ +{ + thread_sync_t *mtflush_io = ((thread_sync_t *)ctx); + ulint i; + ut_a(mtflush_io != NULL); + ut_a(thread_ids != NULL); + + for(i = 0; i < n_threads; i++) { + thread_ids[i] = mtflush_io->thread_data[i].wthread_id; + } +} diff --git a/storage/xtradb/buf/buf0rea.cc b/storage/xtradb/buf/buf0rea.cc index c28df72df92..63d2fdf7726 100644 --- a/storage/xtradb/buf/buf0rea.cc +++ b/storage/xtradb/buf/buf0rea.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -229,14 +230,14 @@ not_to_recover: *err = _fil_io(OS_FILE_READ | wake_later | ignore_nonexistent_pages, sync, space, zip_size, offset, 0, zip_size, - bpage->zip.data, bpage, trx); + bpage->zip.data, bpage, 0, trx); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); *err = _fil_io(OS_FILE_READ | wake_later | ignore_nonexistent_pages, sync, space, 0, offset, 0, UNIV_PAGE_SIZE, - ((buf_block_t*) bpage)->frame, bpage, trx); + ((buf_block_t*) bpage)->frame, bpage, &bpage->write_size, trx); } if (sync) { diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc index 8d8347ca726..a76121544b0 100644 --- a/storage/xtradb/dict/dict0dict.cc +++ b/storage/xtradb/dict/dict0dict.cc @@ -2,6 +2,7 @@ Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index da61d29f6f8..d1f35480ecf 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -54,6 +55,14 @@ Created 10/25/1995 Heikki Tuuri # include "srv0srv.h" static ulint srv_data_read, srv_data_written; #endif /* !UNIV_HOTBACKUP */ +#include "fil0pagecompress.h" +#include "zlib.h" +#ifdef __linux__ +#include <linux/fs.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#endif +#include "row0mysql.h" MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system; @@ -162,7 +171,7 @@ fil_system_t* fil_system = NULL; && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)\ || ((s)->purpose == FIL_LOG \ && srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT)) - + #else /* __WIN__ */ # define fil_buffering_disabled(s) (0) #endif /* __WIN__ */ @@ -270,11 +279,16 @@ fil_read( block size multiple */ void* buf, /*!< in/out: buffer where to store data read; in aio this must be appropriately aligned */ - void* message) /*!< in: message for aio handler if non-sync - aio used, else ignored */ + void* message, /*!< in: message for aio handler if non-sync + aio used, else ignored */ + ulint* write_size) /*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ { return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset, - byte_offset, len, buf, message)); + byte_offset, len, buf, message, write_size)); } /********************************************************************//** @@ -299,18 +313,22 @@ fil_write( be a block size multiple */ void* buf, /*!< in: buffer from which to write; in aio this must be appropriately aligned */ - void* message) /*!< in: message for aio handler if non-sync - aio used, else ignored */ + void* message, /*!< in: message for aio handler if non-sync + aio used, else ignored */ + ulint* write_size) /*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ { ut_ad(!srv_read_only_mode); return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset, - byte_offset, len, buf, message)); + byte_offset, len, buf, message, write_size)); } /*******************************************************************//** Returns the table space by a given id, NULL if not found. */ -UNIV_INLINE fil_space_t* fil_space_get_by_id( /*================*/ @@ -328,6 +346,19 @@ fil_space_get_by_id( return(space); } +/****************************************************************//** +Get space id from fil node */ +ulint +fil_node_get_space_id( +/*==================*/ + fil_node_t* node) /*!< in: Compressed node*/ +{ + ut_ad(node); + ut_ad(node->space); + + return (node->space->id); +} + /*******************************************************************//** Returns the table space by a given name, NULL if not found. */ UNIV_INLINE @@ -548,8 +579,9 @@ fil_node_open_file( byte* buf2; byte* page; ulint space_id; - ulint flags; + ulint flags=0; ulint page_size; + ulint atomic_writes=0; ut_ad(mutex_own(&(system->mutex))); ut_a(node->n_pending == 0); @@ -566,7 +598,7 @@ fil_node_open_file( node->handle = os_file_create_simple_no_error_handling( innodb_file_data_key, node->name, OS_FILE_OPEN, - OS_FILE_READ_ONLY, &success); + OS_FILE_READ_ONLY, &success, 0); if (!success) { /* The following call prints an error message */ os_file_get_last_error(true); @@ -583,6 +615,8 @@ fil_node_open_file( size_bytes = os_file_get_size(node->handle); ut_a(size_bytes != (os_offset_t) -1); + + node->file_block_size = os_file_get_block_size(node->handle, node->name); #ifdef UNIV_HOTBACKUP if (space->id == 0) { node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE); @@ -614,10 +648,13 @@ fil_node_open_file( set */ page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE)); - success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE); + success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE, + space->flags); + space_id = fsp_header_get_space_id(page); flags = fsp_header_get_flags(page); page_size = fsp_flags_get_page_size(flags); + atomic_writes = fsp_flags_get_atomic_writes(flags); ut_free(buf2); @@ -668,6 +705,17 @@ fil_node_open_file( ut_error; } + if (UNIV_UNLIKELY(space->flags != flags)) { + if (!dict_tf_verify_flags(space->flags, flags)) { + fprintf(stderr, + "InnoDB: Error: table flags are 0x%lx" + " in the data dictionary\n" + "InnoDB: but the flags in file %s are 0x%lx!\n", + space->flags, node->name, flags); + ut_error; + } + } + if (size_bytes >= 1024 * 1024) { /* Truncate the size to whole megabytes. */ size_bytes = ut_2pow_round(size_bytes, 1024 * 1024); @@ -687,6 +735,8 @@ add_size: space->size += node->size; } + atomic_writes = fsp_flags_get_atomic_writes(space->flags); + /* printf("Opening file %s\n", node->name); */ /* Open the file for reading and writing, in Windows normally in the @@ -697,18 +747,22 @@ add_size: node->handle = os_file_create(innodb_file_log_key, node->name, OS_FILE_OPEN, OS_FILE_AIO, OS_LOG_FILE, - &ret); + &ret, atomic_writes); } else if (node->is_raw_disk) { node->handle = os_file_create(innodb_file_data_key, node->name, OS_FILE_OPEN_RAW, OS_FILE_AIO, OS_DATA_FILE, - &ret); + &ret, atomic_writes); } else { node->handle = os_file_create(innodb_file_data_key, node->name, OS_FILE_OPEN, OS_FILE_AIO, OS_DATA_FILE, - &ret); + &ret, atomic_writes); + } + + if (node->file_block_size == 0) { + node->file_block_size = os_file_get_block_size(node->handle, node->name); } ut_a(ret); @@ -1112,7 +1166,6 @@ fil_space_create( DBUG_EXECUTE_IF("fil_space_create_failure", return(false);); ut_a(fil_system); - ut_a(fsp_flags_is_valid(flags)); /* Look for a matching tablespace and if found free it. */ do { @@ -1189,6 +1242,7 @@ fil_space_create( space->flags = flags; space->magic_n = FIL_SPACE_MAGIC_N; + space->printed_compression_failure = false; rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP); @@ -1774,12 +1828,12 @@ fil_write_lsn_and_arch_no_to_file( buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE)); err = fil_read(TRUE, space, 0, sum_of_sizes, 0, - UNIV_PAGE_SIZE, buf, NULL); + UNIV_PAGE_SIZE, buf, NULL, 0); if (err == DB_SUCCESS) { mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn); err = fil_write(TRUE, space, 0, sum_of_sizes, 0, - UNIV_PAGE_SIZE, buf, NULL); + UNIV_PAGE_SIZE, buf, NULL, 0); } mem_free(buf1); @@ -1913,8 +1967,10 @@ fil_read_first_page( ulint* space_id, /*!< out: tablespace ID */ lsn_t* min_flushed_lsn, /*!< out: min of flushed lsn values in data files */ - lsn_t* max_flushed_lsn) /*!< out: max of flushed + lsn_t* max_flushed_lsn, /*!< out: max of flushed lsn values in data files */ + ulint orig_space_id) /*!< in: original file space + id */ { byte* buf; byte* page; @@ -1927,7 +1983,10 @@ fil_read_first_page( page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE)); - os_file_read(data_file, page, 0, UNIV_PAGE_SIZE); + os_file_read(data_file, page, 0, UNIV_PAGE_SIZE, + orig_space_id != ULINT_UNDEFINED ? + fil_space_is_page_compressed(orig_space_id) : + FALSE); /* The FSP_HEADER on page 0 is only valid for the first file in a tablespace. So if this is not the first datafile, leave @@ -1936,12 +1995,21 @@ fil_read_first_page( if (!one_read_already) { *flags = fsp_header_get_flags(page); *space_id = fsp_header_get_space_id(page); + } - check_msg = fil_check_first_page(page); + /* Page is page compressed page, need to decompress, before + continue. */ + if (fil_page_is_compressed(page)) { + ulint write_size=0; + fil_decompress_page(NULL, page, UNIV_PAGE_SIZE, &write_size); } flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN); + if (!one_read_already) { + check_msg = fil_check_first_page(page); + } + ut_free(buf); if (check_msg) { @@ -3064,7 +3132,7 @@ fil_create_link_file( file = os_file_create_simple_no_error_handling( innodb_file_data_key, link_filepath, - OS_FILE_CREATE, OS_FILE_READ_WRITE, &success); + OS_FILE_CREATE, OS_FILE_READ_WRITE, &success, 0); if (!success) { /* The following call will print an error message */ @@ -3084,6 +3152,8 @@ fil_create_link_file( } else if (error == OS_FILE_DISK_FULL) { err = DB_OUT_OF_FILE_SPACE; + } else if (error == OS_FILE_OPERATION_NOT_SUPPORTED) { + err = DB_UNSUPPORTED; } else { err = DB_ERROR; } @@ -3094,7 +3164,7 @@ fil_create_link_file( } if (!os_file_write(link_filepath, file, filepath, 0, - strlen(filepath))) { + strlen(filepath))) { err = DB_ERROR; } @@ -3173,8 +3243,9 @@ fil_open_linked_file( /*===============*/ const char* tablename, /*!< in: database/tablename */ char** remote_filepath,/*!< out: remote filepath */ - os_file_t* remote_file) /*!< out: remote file handle */ - + os_file_t* remote_file, /*!< out: remote file handle */ + ulint atomic_writes) /*!< in: atomic writes table option + value */ { ibool success; @@ -3188,7 +3259,7 @@ fil_open_linked_file( *remote_file = os_file_create_simple_no_error_handling( innodb_file_data_key, *remote_filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, - &success); + &success, atomic_writes); if (!success) { char* link_filepath = fil_make_isl_name(tablename); @@ -3243,6 +3314,7 @@ fil_create_new_single_table_tablespace( /* TRUE if a table is created with CREATE TEMPORARY TABLE */ bool is_temp = !!(flags2 & DICT_TF2_TEMPORARY); bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags); + ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags); ut_a(space_id > 0); ut_ad(!srv_read_only_mode); @@ -3275,7 +3347,8 @@ fil_create_new_single_table_tablespace( OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL, OS_DATA_FILE, - &ret); + &ret, + atomic_writes); if (ret == FALSE) { /* The following call will print an error message */ @@ -3302,6 +3375,11 @@ fil_create_new_single_table_tablespace( goto error_exit_3; } + if (error == OS_FILE_OPERATION_NOT_SUPPORTED) { + err = DB_UNSUPPORTED; + goto error_exit_3; + } + if (error == OS_FILE_DISK_FULL) { err = DB_OUT_OF_FILE_SPACE; goto error_exit_3; @@ -3340,6 +3418,7 @@ fil_create_new_single_table_tablespace( flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE); fsp_header_init_fields(page, space_id, flags); mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); + ut_ad(fsp_flags_is_valid(flags)); if (!(fsp_flags_is_compressed(flags))) { buf_flush_init_for_writing(page, NULL, 0); @@ -3516,16 +3595,25 @@ fil_open_single_table_tablespace( fsp_open_info remote; ulint tablespaces_found = 0; ulint valid_tablespaces_found = 0; + ulint atomic_writes = 0; #ifdef UNIV_SYNC_DEBUG ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex))); - if (!fsp_flags_is_valid(flags)) { + /* Table flags can be ULINT_UNDEFINED if + dict_tf_to_fsp_flags_failure is set. */ + if (flags != ULINT_UNDEFINED) { + if (!fsp_flags_is_valid(flags)) { + return(DB_CORRUPTION); + } + } else { return(DB_CORRUPTION); } + atomic_writes = fsp_flags_get_atomic_writes(flags); + /* If the tablespace was relocated, we do not compare the DATA_DIR flag */ ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR; @@ -3550,7 +3638,7 @@ fil_open_single_table_tablespace( } link_file_found = fil_open_linked_file( - tablename, &remote.filepath, &remote.file); + tablename, &remote.filepath, &remote.file, atomic_writes); remote.success = link_file_found; if (remote.success) { /* possibility of multiple files. */ @@ -3578,7 +3666,7 @@ fil_open_single_table_tablespace( if (dict.filepath) { dict.file = os_file_create_simple_no_error_handling( innodb_file_data_key, dict.filepath, OS_FILE_OPEN, - OS_FILE_READ_ONLY, &dict.success); + OS_FILE_READ_ONLY, &dict.success, atomic_writes); if (dict.success) { /* possibility of multiple files. */ validate = true; @@ -3590,7 +3678,7 @@ fil_open_single_table_tablespace( ut_a(def.filepath); def.file = os_file_create_simple_no_error_handling( innodb_file_data_key, def.filepath, OS_FILE_OPEN, - OS_FILE_READ_ONLY, &def.success); + OS_FILE_READ_ONLY, &def.success, atomic_writes); if (def.success) { tablespaces_found++; } @@ -3606,7 +3694,7 @@ fil_open_single_table_tablespace( if (def.success) { def.check_msg = fil_read_first_page( def.file, FALSE, &def.flags, &def.id, - &def.lsn, &def.lsn); + &def.lsn, &def.lsn, id); def.valid = !def.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3628,7 +3716,7 @@ fil_open_single_table_tablespace( if (remote.success) { remote.check_msg = fil_read_first_page( remote.file, FALSE, &remote.flags, &remote.id, - &remote.lsn, &remote.lsn); + &remote.lsn, &remote.lsn, id); remote.valid = !remote.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3651,7 +3739,7 @@ fil_open_single_table_tablespace( if (dict.success) { dict.check_msg = fil_read_first_page( dict.file, FALSE, &dict.flags, &dict.id, - &dict.lsn, &dict.lsn); + &dict.lsn, &dict.lsn, id); dict.valid = !dict.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3915,7 +4003,8 @@ fil_user_tablespace_find_space_id( for (ulint j = 0; j < page_count; ++j) { - st = os_file_read(fsp->file, page, (j* page_size), page_size); + st = os_file_read(fsp->file, page, (j* page_size), page_size, + fsp_flags_is_page_compressed(fsp->flags)); if (!st) { ib_logf(IB_LOG_LEVEL_INFO, @@ -4028,7 +4117,7 @@ fil_user_tablespace_restore_page( err = os_file_write(fsp->filepath, fsp->file, page, (zip_size ? zip_size : page_size) * page_no, - buflen); + buflen); os_file_flush(fsp->file); out: @@ -4052,7 +4141,7 @@ check_first_page: fsp->success = TRUE; if (const char* check_msg = fil_read_first_page( fsp->file, FALSE, &fsp->flags, &fsp->id, - &fsp->lsn, &fsp->lsn)) { + &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED)) { ib_logf(IB_LOG_LEVEL_ERROR, "%s in tablespace %s (table %s)", check_msg, fsp->filepath, tablename); @@ -4125,9 +4214,7 @@ fil_load_single_table_tablespace( fsp_open_info def; fsp_open_info remote; os_offset_t size; -#ifdef UNIV_HOTBACKUP fil_space_t* space; -#endif memset(&def, 0, sizeof(def)); memset(&remote, 0, sizeof(remote)); @@ -4160,7 +4247,8 @@ fil_load_single_table_tablespace( one of them is sent to this function. So if this table has already been loaded, there is nothing to do.*/ mutex_enter(&fil_system->mutex); - if (fil_space_get_by_name(tablename)) { + space = fil_space_get_by_name(tablename); + if (space) { mem_free(tablename); mutex_exit(&fil_system->mutex); return; @@ -4185,7 +4273,7 @@ fil_load_single_table_tablespace( /* Check for a link file which locates a remote tablespace. */ remote.success = fil_open_linked_file( - tablename, &remote.filepath, &remote.file); + tablename, &remote.filepath, &remote.file, FALSE); /* Read the first page of the remote tablespace */ if (remote.success) { @@ -4200,7 +4288,7 @@ fil_load_single_table_tablespace( /* Try to open the tablespace in the datadir. */ def.file = os_file_create_simple_no_error_handling( innodb_file_data_key, def.filepath, OS_FILE_OPEN, - OS_FILE_READ_WRITE, &def.success); + OS_FILE_READ_ONLY, &def.success, FALSE); /* Read the first page of the remote tablespace */ if (def.success) { @@ -4965,6 +5053,11 @@ retry: start_page_no = space->size; file_start_page_no = space->size - node->size; + /* Determine correct file block size */ + if (node->file_block_size == 0) { + node->file_block_size = os_file_get_block_size(node->handle, node->name); + } + #ifdef HAVE_POSIX_FALLOCATE if (srv_use_posix_fallocate) { os_offset_t start_offset = start_page_no * page_size; @@ -4976,7 +5069,7 @@ retry: "space for file \'%s\' failed. Current size " INT64PF ", desired size " INT64PF "\n", node->name, start_offset, len+start_offset); - os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE); + os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE, __FILE__, __LINE__); success = FALSE; } else { success = TRUE; @@ -5025,7 +5118,7 @@ retry: success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, node->name, node->handle, buf, offset, page_size * n_pages, - NULL, NULL, space_id, NULL); + node, NULL, space_id, NULL, 0, 0, 0); #endif /* UNIV_HOTBACKUP */ DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", @@ -5404,7 +5497,12 @@ _fil_io( or from where to write; in aio this must be appropriately aligned */ void* message, /*!< in: message for aio handler if non-sync - aio used, else ignored */ + aio used, else ignored */ + ulint* write_size, /*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ trx_t* trx) { ulint mode; @@ -5415,6 +5513,8 @@ _fil_io( ulint wake_later; os_offset_t offset; ibool ignore_nonexistent_pages; + ibool page_compressed = FALSE; + ulint page_compression_level = 0; is_log = type & OS_FILE_LOG; type = type & ~OS_FILE_LOG; @@ -5468,6 +5568,11 @@ _fil_io( } else if (type == OS_FILE_WRITE) { ut_ad(!srv_read_only_mode); srv_stats.data_written.add(len); + if (fil_page_is_index_page((byte *)buf)) { + srv_stats.index_pages_written.inc(); + } else { + srv_stats.non_index_pages_written.inc(); + } } /* Reserve the fil_system mutex and make sure that we can open at @@ -5477,6 +5582,8 @@ _fil_io( space = fil_space_get_by_id(space_id); + page_compressed = fsp_flags_is_page_compressed(space->flags); + page_compression_level = fsp_flags_get_page_compression_level(space->flags); /* If we are deleting a tablespace we don't allow any read operations on that. However, we do allow write operations. */ if (space == 0 || (type == OS_FILE_READ && space->stop_new_ops)) { @@ -5622,7 +5729,8 @@ _fil_io( /* Queue the aio request */ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset, len, node, message, space_id, trx); + offset, len, node, message, space_id, trx, + page_compressed, page_compression_level, write_size); #else /* In mysqlbackup do normal i/o, not aio */ @@ -6173,7 +6281,8 @@ fil_iterate( ut_ad(!(n_bytes % iter.page_size)); if (!os_file_read(iter.file, io_buffer, offset, - (ulint) n_bytes)) { + (ulint) n_bytes, + fil_space_is_page_compressed(space_id))) { ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed"); @@ -6260,7 +6369,7 @@ fil_tablespace_iterate( file = os_file_create_simple_no_error_handling( innodb_file_data_key, filepath, - OS_FILE_OPEN, OS_FILE_READ_WRITE, &success); + OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE); DBUG_EXECUTE_IF("fil_tablespace_iterate_failure", { @@ -6312,7 +6421,8 @@ fil_tablespace_iterate( /* Read the first page and determine the page and zip size. */ - if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) { + if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE, + dict_tf_get_page_compression(table->flags))) { err = DB_IO_ERROR; @@ -6547,3 +6657,87 @@ fil_space_set_corrupt( mutex_exit(&fil_system->mutex); } + +/****************************************************************//** +Acquire fil_system mutex */ +void +fil_system_enter(void) +/*==================*/ +{ + ut_ad(!mutex_own(&fil_system->mutex)); + mutex_enter(&fil_system->mutex); +} + +/****************************************************************//** +Release fil_system mutex */ +void +fil_system_exit(void) +/*=================*/ +{ + ut_ad(mutex_own(&fil_system->mutex)); + mutex_exit(&fil_system->mutex); +} + +/*******************************************************************//** +Return space name */ +char* +fil_space_name( +/*===========*/ + fil_space_t* space) /*!< in: space */ +{ + return (space->name); +} + +/*******************************************************************//** +Return page type name */ +const char* +fil_get_page_type_name( +/*===================*/ + ulint page_type) /*!< in: FIL_PAGE_TYPE */ +{ + switch(page_type) { + case FIL_PAGE_PAGE_COMPRESSED: + return (const char*)"PAGE_COMPRESSED"; + case FIL_PAGE_INDEX: + return (const char*)"INDEX"; + case FIL_PAGE_UNDO_LOG: + return (const char*)"UNDO LOG"; + case FIL_PAGE_INODE: + return (const char*)"INODE"; + case FIL_PAGE_IBUF_FREE_LIST: + return (const char*)"IBUF_FREE_LIST"; + case FIL_PAGE_TYPE_ALLOCATED: + return (const char*)"ALLOCATED"; + case FIL_PAGE_IBUF_BITMAP: + return (const char*)"IBUF_BITMAP"; + case FIL_PAGE_TYPE_SYS: + return (const char*)"SYS"; + case FIL_PAGE_TYPE_TRX_SYS: + return (const char*)"TRX_SYS"; + case FIL_PAGE_TYPE_FSP_HDR: + return (const char*)"FSP_HDR"; + case FIL_PAGE_TYPE_XDES: + return (const char*)"XDES"; + case FIL_PAGE_TYPE_BLOB: + return (const char*)"BLOB"; + case FIL_PAGE_TYPE_ZBLOB: + return (const char*)"ZBLOB"; + case FIL_PAGE_TYPE_ZBLOB2: + return (const char*)"ZBLOB2"; + case FIL_PAGE_TYPE_COMPRESSED: + return (const char*)"ORACLE PAGE COMPRESSED"; + default: + return (const char*)"PAGE TYPE CORRUPTED"; + } +} +/****************************************************************//** +Get block size from fil node +@return block size*/ +ulint +fil_node_get_block_size( +/*====================*/ + fil_node_t* node) /*!< in: Node where to get block + size */ +{ + return (node->file_block_size); +} diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc new file mode 100644 index 00000000000..686f98c83c5 --- /dev/null +++ b/storage/xtradb/fil/fil0pagecompress.cc @@ -0,0 +1,790 @@ +/***************************************************************************** + +Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file fil/fil0pagecompress.cc +Implementation for page compressed file spaces. + +Created 11/12/2013 Jan Lindström jan.lindstrom@mariadb.com +Updated 14/02/2015 +***********************************************************************/ + +#include "fil0fil.h" +#include "fil0pagecompress.h" + +#include <debug_sync.h> +#include <my_dbug.h> + +#include "mem0mem.h" +#include "hash0hash.h" +#include "os0file.h" +#include "mach0data.h" +#include "buf0buf.h" +#include "buf0flu.h" +#include "log0recv.h" +#include "fsp0fsp.h" +#include "srv0srv.h" +#include "srv0start.h" +#include "mtr0mtr.h" +#include "mtr0log.h" +#include "dict0dict.h" +#include "page0page.h" +#include "page0zip.h" +#include "trx0sys.h" +#include "row0mysql.h" +#ifndef UNIV_HOTBACKUP +# include "buf0lru.h" +# include "ibuf0ibuf.h" +# include "sync0sync.h" +# include "os0sync.h" +#else /* !UNIV_HOTBACKUP */ +# include "srv0srv.h" +static ulint srv_data_read, srv_data_written; +#endif /* !UNIV_HOTBACKUP */ +#include "zlib.h" +#ifdef __linux__ +#include <linux/fs.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#include <linux/falloc.h> +#endif +#include "row0mysql.h" +#ifdef HAVE_LZ4 +#include "lz4.h" +#endif +#ifdef HAVE_LZO +#include "lzo/lzo1x.h" +#endif +#ifdef HAVE_LZMA +#include "lzma.h" +#endif +#ifdef HAVE_BZIP2 +#include "bzlib.h" +#endif +#ifdef HAVE_SNAPPY +#include "snappy-c.h" +#endif + +/* Used for debugging */ +//#define UNIV_PAGECOMPRESS_DEBUG 1 + +/****************************************************************//** +For page compressed pages decompress the page after actual read +operation. */ +static +void +fil_decompress_page_2( +/*==================*/ + byte* page_buf, /*!< out: destination buffer for + uncompressed data */ + byte* buf, /*!< in: source compressed data */ + ulong len, /*!< in: length of output buffer.*/ + ulint* write_size) /*!< in/out: Actual payload size of + the compressed data. */ +{ + ulint page_type = mach_read_from_2(buf + FIL_PAGE_TYPE); + + if (page_type != FIL_PAGE_TYPE_COMPRESSED) { + /* It is not a compressed page */ + return; + } + + ulint olen = 0; + byte* ptr = buf + FIL_PAGE_DATA; + ulint version = mach_read_from_1(buf + FIL_PAGE_VERSION); + int err = 0; + + ut_a(version == 1); + + /* Read the original page type, before we compressed the data. */ + page_type = mach_read_from_2(buf + FIL_PAGE_ORIGINAL_TYPE_V1); + + ulint original_len = mach_read_from_2(buf + FIL_PAGE_ORIGINAL_SIZE_V1); + + if (original_len < UNIV_PAGE_SIZE_MIN - (FIL_PAGE_DATA + 8) + || original_len > UNIV_PAGE_SIZE_MAX - FIL_PAGE_DATA + || len < original_len + FIL_PAGE_DATA) { + fprintf(stderr, + "InnoDB: Corruption: We try to uncompress corrupted page\n" + "InnoDB: Original len %lu len %lu.\n", + original_len, len); + + fflush(stderr); + ut_error; + + } + + ulint algorithm = mach_read_from_1(buf + FIL_PAGE_ALGORITHM_V1); + + switch(algorithm) { + case PAGE_ZLIB_ALGORITHM: { + + fprintf(stderr, "InnoDB: [Note]: zlib\n"); + + err = uncompress(page_buf, &len, ptr, original_len); + /* If uncompress fails it means that page is corrupted */ + if (err != Z_OK) { + + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but uncompress failed with error %d.\n" + "InnoDB: size %lu len %lu\n", + err, original_len, len); + + fflush(stderr); + + ut_error; + } + + break; + } +#ifdef HAVE_LZ4 + case PAGE_LZ4_ALGORITHM: { + fprintf(stderr, "InnoDB: [Note]: lz4\n"); + err = LZ4_decompress_fast( + (const char*) ptr, (char*) (page_buf), original_len); + + if (err < 0) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %d bytes.\n" + "InnoDB: size %lu len %lu\n", + err, original_len, len); + fflush(stderr); + + ut_error; + } + break; + } +#endif /* HAVE_LZ4 */ + +#ifdef HAVE_LZMA + case PAGE_LZMA_ALGORITHM: { + + lzma_ret ret; + size_t src_pos = 0; + size_t dst_pos = 0; + uint64_t memlimit = UINT64_MAX; + + fprintf(stderr, "InnoDB: [Note]: lzma\n"); + ret = lzma_stream_buffer_decode( + &memlimit, + 0, + NULL, + ptr, + &src_pos, + original_len, + (page_buf), + &dst_pos, + len); + + + if (ret != LZMA_OK || (dst_pos <= 0 || dst_pos > len)) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %ld bytes.\n" + "InnoDB: size %lu len %lu\n", + dst_pos, original_len, len); + fflush(stderr); + + ut_error; + } + + break; + } +#endif /* HAVE_LZMA */ + +#ifdef HAVE_LZO + case PAGE_LZO_ALGORITHM: { + fprintf(stderr, "InnoDB: [Note]: lzo \n"); + err = lzo1x_decompress((const unsigned char *)ptr, + original_len,(unsigned char *)(page_buf), &olen, NULL); + + if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %ld bytes.\n" + "InnoDB: size %lu len %lu\n", + olen, original_len, len); + fflush(stderr); + + ut_error; + } + break; + } +#endif /* HAVE_LZO */ + + default: + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but compression algorithm %s\n" + "InnoDB: is not known.\n" + ,fil_get_compression_alg_name(algorithm)); + + fflush(stderr); + ut_error; + break; + } + + /* Leave the header alone */ + memmove(buf+FIL_PAGE_DATA, page_buf, original_len); + + mach_write_to_2(buf + FIL_PAGE_TYPE, page_type); + + ut_ad(memcmp(buf + FIL_PAGE_LSN + 4, + buf + (original_len + FIL_PAGE_DATA) + - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4) == 0); +} + +/****************************************************************//** +For page compressed pages compress the page before actual write +operation. +@return compressed page to be written*/ +byte* +fil_compress_page( +/*==============*/ + ulint space_id, /*!< in: tablespace id of the + table. */ + byte* buf, /*!< in: buffer from which to write; in aio + this must be appropriately aligned */ + byte* out_buf, /*!< out: compressed buffer */ + ulint len, /*!< in: length of input buffer.*/ + ulint compression_level, /* in: compression level */ + ulint block_size, /*!< in: block size */ + ulint* out_len, /*!< out: actual length of compressed + page */ + byte* lzo_mem) /*!< in: temporal memory used by LZO */ +{ + int err = Z_OK; + int level = 0; + ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; + ulint write_size=0; + ulint comp_method = innodb_compression_algorithm; /* Cache to avoid + change during + function execution */ + ut_ad(buf); + ut_ad(out_buf); + ut_ad(len); + ut_ad(out_len); + + level = compression_level; + ut_ad(fil_space_is_page_compressed(space_id)); + + fil_system_enter(); + fil_space_t* space = fil_space_get_by_id(space_id); + fil_system_exit(); + + /* If no compression level was provided to this table, use system + default level */ + if (level == 0) { + level = page_zip_level; + } + +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, + "InnoDB: Note: Preparing for compress for space %lu name %s len %lu\n", + space_id, fil_space_name(space), len); +#endif /* UNIV_PAGECOMPRESS_DEBUG */ + + write_size = UNIV_PAGE_SIZE - header_len; + + switch(comp_method) { +#ifdef HAVE_LZ4 + case PAGE_LZ4_ALGORITHM: + err = LZ4_compress_limitedOutput((const char *)buf, + (char *)out_buf+header_len, len, write_size); + write_size = err; + + if (err == 0) { + /* If error we leave the actual page as it was */ + + if (space->printed_compression_failure == false) { + fprintf(stderr, + "InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n", + space_id, fil_space_name(space), len, err, write_size); + space->printed_compression_failure = true; + } + + srv_stats.pages_page_compression_error.inc(); + *out_len = len; + return (buf); + } + break; +#endif /* HAVE_LZ4 */ +#ifdef HAVE_LZO + case PAGE_LZO_ALGORITHM: + err = lzo1x_1_15_compress( + buf, len, out_buf+header_len, &write_size, lzo_mem); + + if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) { + if (space->printed_compression_failure == false) { + fprintf(stderr, + "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n", + space_id, fil_space_name(space), len, err, write_size); + space->printed_compression_failure = true; + } + srv_stats.pages_page_compression_error.inc(); + *out_len = len; + return (buf); + } + + break; +#endif /* HAVE_LZO */ +#ifdef HAVE_LZMA + case PAGE_LZMA_ALGORITHM: { + size_t out_pos=0; + + err = lzma_easy_buffer_encode( + compression_level, + LZMA_CHECK_NONE, + NULL, /* No custom allocator, use malloc/free */ + reinterpret_cast<uint8_t*>(buf), + len, + reinterpret_cast<uint8_t*>(out_buf + header_len), + &out_pos, + (size_t)write_size); + + if (err != LZMA_OK || out_pos > UNIV_PAGE_SIZE-header_len) { + if (space->printed_compression_failure == false) { + fprintf(stderr, + "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n", + space_id, fil_space_name(space), len, err, out_pos); + space->printed_compression_failure = true; + } + srv_stats.pages_page_compression_error.inc(); + *out_len = len; + return (buf); + } + + write_size = out_pos; + + break; + } +#endif /* HAVE_LZMA */ + +#ifdef HAVE_BZIP2 + case PAGE_BZIP2_ALGORITHM: { + + err = BZ2_bzBuffToBuffCompress( + (char *)(out_buf + header_len), + (unsigned int *)&write_size, + (char *)buf, + len, + 1, + 0, + 0); + + if (err != BZ_OK || write_size > UNIV_PAGE_SIZE-header_len) { + if (space->printed_compression_failure == false) { + fprintf(stderr, + "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n", + space_id, fil_space_name(space), len, err, write_size); + space->printed_compression_failure = true; + } + srv_stats.pages_page_compression_error.inc(); + *out_len = len; + return (buf); + } + break; + } +#endif /* HAVE_BZIP2 */ + +#ifdef HAVE_SNAPPY + case PAGE_SNAPPY_ALGORITHM: + { + snappy_status cstatus; + + cstatus = snappy_compress((const char *)buf, len, (char *)(out_buf+header_len), &write_size); + + if (cstatus != SNAPPY_OK || write_size > UNIV_PAGE_SIZE-header_len) { + if (space->printed_compression_failure == false) { + fprintf(stderr, + "InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n", + space_id, fil_space_name(space), len, (int)cstatus, write_size); + space->printed_compression_failure = true; + } + srv_stats.pages_page_compression_error.inc(); + *out_len = len; + return (buf); + } + break; + } +#endif /* HAVE_SNAPPY */ + + case PAGE_ZLIB_ALGORITHM: + err = compress2(out_buf+header_len, (ulong*)&write_size, buf, len, level); + + if (err != Z_OK) { + /* If error we leave the actual page as it was */ + + if (space->printed_compression_failure == false) { + fprintf(stderr, + "InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n", + space_id, fil_space_name(space), len, err, write_size); + space->printed_compression_failure = true; + } + + srv_stats.pages_page_compression_error.inc(); + *out_len = len; + return (buf); + } + break; + + case PAGE_UNCOMPRESSED: + *out_len = len; + return (buf); + break; + default: + ut_error; + break; + } + + /* Set up the page header */ + memcpy(out_buf, buf, FIL_PAGE_DATA); + /* Set up the checksum */ + mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC); + /* Set up the correct page type */ + mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED); + /* Set up the flush lsn to be compression algorithm */ + mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, comp_method); + /* Set up the actual payload lenght */ + mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size); + +#ifdef UNIV_DEBUG + /* Verify */ + ut_ad(fil_page_is_compressed(out_buf)); + ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC); + ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size); + ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == (ulint)comp_method); + + /* Verify that page can be decompressed */ + { + byte *comp_page; + byte *uncomp_page; + + comp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3)); + uncomp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3)); + memcpy(comp_page, out_buf, UNIV_PAGE_SIZE); + + fil_decompress_page(uncomp_page, comp_page, len, NULL); + if(buf_page_is_corrupted(false, uncomp_page, 0)) { + buf_page_print(uncomp_page, 0, BUF_PAGE_PRINT_NO_CRASH); + ut_error; + } + ut_free(comp_page); + ut_free(uncomp_page); + } +#endif /* UNIV_DEBUG */ + + write_size+=header_len; + + /* Actual write needs to be alligned on block size */ + if (write_size % block_size) { + size_t tmp = write_size; +#ifdef UNIV_DEBUG + ut_a(block_size > 0); +#endif + write_size = (size_t)ut_uint64_align_up((ib_uint64_t)write_size, block_size); +#ifdef UNIV_DEBUG + ut_a(write_size > 0 && ((write_size % block_size) == 0)); + ut_a(write_size >= tmp); +#endif + } + +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, + "InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n", + space_id, fil_space_name(space), len, write_size); +#endif /* UNIV_PAGECOMPRESS_DEBUG */ + + + srv_stats.page_compression_saved.add((len - write_size)); + srv_stats.pages_page_compressed.inc(); + + /* If we do not persistently trim rest of page, we need to write it + all */ + if (!srv_use_trim) { + write_size = len; + } + + *out_len = write_size; + + return(out_buf); + +} + +/****************************************************************//** +For page compressed pages decompress the page after actual read +operation. */ +void +fil_decompress_page( +/*================*/ + byte* page_buf, /*!< in: preallocated buffer or NULL */ + byte* buf, /*!< out: buffer from which to read; in aio + this must be appropriately aligned */ + ulong len, /*!< in: length of output buffer.*/ + ulint* write_size) /*!< in/out: Actual payload size of + the compressed data. */ +{ + int err = 0; + ulint actual_size = 0; + ulint compression_alg = 0; + byte *in_buf; + ulint olen=0; + ulint ptype; + + ut_ad(buf); + ut_ad(len); + + ptype = mach_read_from_2(buf+FIL_PAGE_TYPE); + + /* Do not try to uncompressed pages that are not compressed */ + if (ptype != FIL_PAGE_PAGE_COMPRESSED && ptype != FIL_PAGE_TYPE_COMPRESSED) { + return; + } + + // If no buffer was given, we need to allocate temporal buffer + if (page_buf == NULL) { +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, + "InnoDB: Note: FIL: Compression buffer not given, allocating...\n"); +#endif /* UNIV_PAGECOMPRESS_DEBUG */ + in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3)); + } else { + in_buf = page_buf; + } + + if (ptype == FIL_PAGE_TYPE_COMPRESSED) { + + fil_decompress_page_2(in_buf, buf, len, write_size); + // Need to free temporal buffer if no buffer was given + if (page_buf == NULL) { + ut_free(in_buf); + } + return; + } + + /* Before actual decompress, make sure that page type is correct */ + + if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC || + mach_read_from_2(buf+FIL_PAGE_TYPE) != FIL_PAGE_PAGE_COMPRESSED) { + fprintf(stderr, + "InnoDB: Corruption: We try to uncompress corrupted page\n" + "InnoDB: CRC %lu type %lu.\n" + "InnoDB: len %lu\n", + mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM), + mach_read_from_2(buf+FIL_PAGE_TYPE), len); + + fflush(stderr); + ut_error; + } + + /* Get compression algorithm */ + compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN); + + /* Get the actual size of compressed page */ + actual_size = mach_read_from_2(buf+FIL_PAGE_DATA); + /* Check if payload size is corrupted */ + if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) { + fprintf(stderr, + "InnoDB: Corruption: We try to uncompress corrupted page\n" + "InnoDB: actual size %lu compression %s\n", + actual_size, fil_get_compression_alg_name(compression_alg)); + fflush(stderr); + ut_error; + } + + /* Store actual payload size of the compressed data. This pointer + points to buffer pool. */ + if (write_size) { + *write_size = actual_size; + } + +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, + "InnoDB: Note: Preparing for decompress for len %lu\n", + actual_size); +#endif /* UNIV_PAGECOMPRESS_DEBUG */ + + + switch(compression_alg) { + case PAGE_ZLIB_ALGORITHM: + err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size); + + /* If uncompress fails it means that page is corrupted */ + if (err != Z_OK) { + + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but uncompress failed with error %d.\n" + "InnoDB: size %lu len %lu\n", + err, actual_size, len); + + fflush(stderr); + + ut_error; + } + break; + +#ifdef HAVE_LZ4 + case PAGE_LZ4_ALGORITHM: + err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, len); + + if (err != (int)actual_size) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %d bytes.\n" + "InnoDB: size %lu len %lu\n", + err, actual_size, len); + fflush(stderr); + + ut_error; + } + break; +#endif /* HAVE_LZ4 */ +#ifdef HAVE_LZO + case PAGE_LZO_ALGORITHM: + err = lzo1x_decompress((const unsigned char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, + actual_size,(unsigned char *)in_buf, &olen, NULL); + + if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %ld bytes.\n" + "InnoDB: size %lu len %lu\n", + olen, actual_size, len); + fflush(stderr); + + ut_error; + } + break; +#endif /* HAVE_LZO */ +#ifdef HAVE_LZMA + case PAGE_LZMA_ALGORITHM: { + + lzma_ret ret; + size_t src_pos = 0; + size_t dst_pos = 0; + uint64_t memlimit = UINT64_MAX; + + ret = lzma_stream_buffer_decode( + &memlimit, + 0, + NULL, + buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, + &src_pos, + actual_size, + in_buf, + &dst_pos, + len); + + + if (ret != LZMA_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %ld bytes.\n" + "InnoDB: size %lu len %lu\n", + dst_pos, actual_size, len); + fflush(stderr); + + ut_error; + } + + break; + } +#endif /* HAVE_LZMA */ +#ifdef HAVE_BZIP2 + case PAGE_BZIP2_ALGORITHM: { + unsigned int dst_pos = UNIV_PAGE_SIZE; + + err = BZ2_bzBuffToBuffDecompress( + (char *)in_buf, + &dst_pos, + (char *)(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE), + actual_size, + 1, + 0); + + if (err != BZ_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %du bytes.\n" + "InnoDB: size %lu len %lu err %d\n", + dst_pos, actual_size, len, err); + fflush(stderr); + + ut_error; + } + break; + } +#endif /* HAVE_BZIP2 */ +#ifdef HAVE_SNAPPY + case PAGE_SNAPPY_ALGORITHM: + { + snappy_status cstatus; + + cstatus = snappy_uncompress( + (const char *)(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE), + actual_size, + (char *)in_buf, + &olen); + + if (cstatus != SNAPPY_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) { + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but decompression read only %lu bytes.\n" + "InnoDB: size %lu len %lu err %d\n", + olen, actual_size, len, (int)cstatus); + fflush(stderr); + + ut_error; + } + break; + } +#endif /* HAVE_SNAPPY */ + default: + fprintf(stderr, + "InnoDB: Corruption: Page is marked as compressed\n" + "InnoDB: but compression algorithm %s\n" + "InnoDB: is not known.\n" + ,fil_get_compression_alg_name(compression_alg)); + + fflush(stderr); + ut_error; + break; + } + +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, + "InnoDB: Note: Decompression succeeded for len %lu \n", + len); +#endif /* UNIV_PAGECOMPRESS_DEBUG */ + + srv_stats.pages_page_decompressed.inc(); + + /* Copy the uncompressed page to the buffer pool, not + really any other options. */ + memcpy(buf, in_buf, len); + + // Need to free temporal buffer if no buffer was given + if (page_buf == NULL) { + ut_free(in_buf); + } +} + + diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 0bb72ada5af..ef6aae7889e 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -4,7 +4,7 @@ Copyright (c) 2000, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2014 SkySQL Ab. All Rights Reserved. +Copyright (c) 2013, 2015, MariaDB Corporation. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -104,6 +104,7 @@ this program; if not, write to the Free Software Foundation, Inc., #endif /* UNIV_DEBUG */ #include "fts0priv.h" #include "page0zip.h" +#include "fil0pagecompress.h" #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X)) @@ -562,6 +563,27 @@ ib_cb_t innodb_api_cb[] = { (ib_cb_t) ib_cursor_stmt_begin }; +/** + Structure for CREATE TABLE options (table options). + It needs to be called ha_table_option_struct. + + The option values can be specified in the CREATE TABLE at the end: + CREATE TABLE ( ... ) *here* +*/ + +ha_create_table_option innodb_table_option_list[]= +{ + /* With this option user can enable page compression feature for the + table */ + HA_TOPTION_BOOL("PAGE_COMPRESSED", page_compressed, 0), + /* With this option user can set zip compression level for page + compression for this table*/ + HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, ULINT_UNDEFINED, 0, 9, 1), + /* With this option user can enable atomic writes feature for this table */ + HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0), + HA_TOPTION_END +}; + /*************************************************************//** Check whether valid argument given to innodb_ft_*_stopword_table. This function is registered as a callback with MySQL. @@ -632,6 +654,20 @@ static int innobase_checkpoint_state(handlerton *hton, bool disable) return 0; } +/*************************************************************//** +Check for a valid value of innobase_compression_algorithm. +@return 0 for valid innodb_compression_algorithm. */ +static +int +innodb_compression_algorithm_validate( +/*==================================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to system + variable */ + void* save, /*!< out: immediate result + for update function */ + struct st_mysql_value* value); /*!< in: incoming string */ + static const char innobase_hton_name[]= "InnoDB"; static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG, @@ -681,6 +717,11 @@ static MYSQL_THDVAR_BOOL(fake_changes, PLUGIN_VAR_OPCMDARG, "This is to cause replication prefetch IO. ATTENTION: the transaction started after enabled is affected.", NULL, NULL, FALSE); +static ibool innodb_have_lzo=IF_LZO(1, 0); +static ibool innodb_have_lz4=IF_LZ4(1, 0); +static ibool innodb_have_lzma=IF_LZMA(1, 0); +static ibool innodb_have_bzip2=IF_BZIP2(1, 0); +static ibool innodb_have_snappy=IF_SNAPPY(1, 0); static SHOW_VAR innodb_status_variables[]= { {"available_undo_logs", @@ -885,6 +926,47 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_x_lock_spin_rounds, SHOW_LONGLONG}, {"x_lock_spin_waits", (char*) &export_vars.innodb_x_lock_spin_waits, SHOW_LONGLONG}, + + /* Status variables for page compression */ + {"page_compression_saved", + (char*) &export_vars.innodb_page_compression_saved, SHOW_LONGLONG}, + {"page_compression_trim_sect512", + (char*) &export_vars.innodb_page_compression_trim_sect512, SHOW_LONGLONG}, + {"page_compression_trim_sect1024", + (char*) &export_vars.innodb_page_compression_trim_sect1024, SHOW_LONGLONG}, + {"page_compression_trim_sect2048", + (char*) &export_vars.innodb_page_compression_trim_sect2048, SHOW_LONGLONG}, + {"page_compression_trim_sect4096", + (char*) &export_vars.innodb_page_compression_trim_sect4096, SHOW_LONGLONG}, + {"page_compression_trim_sect8192", + (char*) &export_vars.innodb_page_compression_trim_sect8192, SHOW_LONGLONG}, + {"page_compression_trim_sect16384", + (char*) &export_vars.innodb_page_compression_trim_sect16384, SHOW_LONGLONG}, + {"page_compression_trim_sect32768", + (char*) &export_vars.innodb_page_compression_trim_sect32768, SHOW_LONGLONG}, + {"num_index_pages_written", + (char*) &export_vars.innodb_index_pages_written, SHOW_LONGLONG}, + {"num_non_index_pages_written", + (char*) &export_vars.innodb_non_index_pages_written, SHOW_LONGLONG}, + {"num_pages_page_compressed", + (char*) &export_vars.innodb_pages_page_compressed, SHOW_LONGLONG}, + {"num_page_compressed_trim_op", + (char*) &export_vars.innodb_page_compressed_trim_op, SHOW_LONGLONG}, + {"num_page_compressed_trim_op_saved", + (char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG}, + {"num_pages_page_decompressed", + (char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG}, + {"have_lz4", + (char*) &innodb_have_lz4, SHOW_BOOL}, + {"have_lzo", + (char*) &innodb_have_lzo, SHOW_BOOL}, + {"have_lzma", + (char*) &innodb_have_lzma, SHOW_BOOL}, + {"have_bzip2", + (char*) &innodb_have_bzip2, SHOW_BOOL}, + {"have_snappy", + (char*) &innodb_have_snappy, SHOW_BOOL}, + {NullS, NullS, SHOW_LONG} }; @@ -3279,6 +3361,8 @@ innobase_init( if (srv_file_per_table) innobase_hton->tablefile_extensions = ha_innobase_exts; + innobase_hton->table_options = innodb_table_option_list; + ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR); #ifndef DBUG_OFF @@ -3347,6 +3431,51 @@ innobase_init( } } +#ifndef HAVE_LZ4 + if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) { + sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: liblz4 is not installed. \n", + innodb_compression_algorithm); + goto error; + } +#endif + +#ifndef HAVE_LZO + if (innodb_compression_algorithm == PAGE_LZO_ALGORITHM) { + sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: liblzo is not installed. \n", + innodb_compression_algorithm); + goto error; + } +#endif + +#ifndef HAVE_LZMA + if (innodb_compression_algorithm == PAGE_LZMA_ALGORITHM) { + sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: liblzma is not installed. \n", + innodb_compression_algorithm); + goto error; + } +#endif + +#ifndef HAVE_BZIP2 + if (innodb_compression_algorithm == PAGE_BZIP2_ALGORITHM) { + sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: libbz2 is not installed. \n", + innodb_compression_algorithm); + goto error; + } +#endif + +#ifndef HAVE_SNAPPY + if (innodb_compression_algorithm == PAGE_SNAPPY_ALGORITHM) { + sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: libsnappy is not installed. \n", + innodb_compression_algorithm); + goto error; + } +#endif + os_innodb_umask = (ulint) my_umask; /* First calculate the default path for innodb_data_home_dir etc., @@ -10299,11 +10428,16 @@ innobase_table_flags( enum row_type row_format; rec_format_t innodb_row_format = REC_FORMAT_COMPACT; bool use_data_dir; + ha_table_option_struct *options= form->s->option_struct; /* Cache the value of innodb_file_format, in case it is modified by another thread while the table is being created. */ const ulint file_format_allowed = srv_file_format; + /* Cache the value of innobase_compression_level, in case it is + modified by another thread while the table is being created. */ + const ulint default_compression_level = page_zip_level; + *flags = 0; *flags2 = 0; @@ -10352,6 +10486,8 @@ index_bad: } } + row_format = form->s->row_type; + if (create_info->key_block_size) { /* The requested compressed page size (key_block_size) is given in kilobytes. If it is a valid number, store @@ -10399,8 +10535,6 @@ index_bad: } } - row_format = form->s->row_type; - if (zip_ssize && zip_allowed) { /* if ROW_FORMAT is set to default, automatically change it to COMPRESSED.*/ @@ -10455,10 +10589,18 @@ index_bad: " innodb_file_format > Antelope.", get_row_format_name(row_format)); } else { - innodb_row_format = (row_format == ROW_TYPE_DYNAMIC - ? REC_FORMAT_DYNAMIC - : REC_FORMAT_COMPRESSED); - break; + switch(row_format) { + case ROW_TYPE_COMPRESSED: + innodb_row_format = REC_FORMAT_COMPRESSED; + break; + case ROW_TYPE_DYNAMIC: + innodb_row_format = REC_FORMAT_DYNAMIC; + break; + default: + /* Not possible, avoid compiler warning */ + break; + } + break; /* Correct row_format */ } zip_allowed = FALSE; /* fall through to set row_format = COMPACT */ @@ -10486,7 +10628,15 @@ index_bad: && ((create_info->data_file_name != NULL) && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)); - dict_tf_set(flags, innodb_row_format, zip_ssize, use_data_dir); + /* Set up table dictionary flags */ + dict_tf_set(flags, + innodb_row_format, + zip_ssize, + use_data_dir, + options->page_compressed, + (ulint)options->page_compression_level == ULINT_UNDEFINED ? + default_compression_level : options->page_compression_level, + options->atomic_writes); if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { *flags2 |= DICT_TF2_TEMPORARY; @@ -10505,6 +10655,113 @@ index_bad: } /*****************************************************************//** +Check engine specific table options not handled by SQL-parser. +@return NULL if valid, string if not */ +UNIV_INTERN +const char* +ha_innobase::check_table_options( + THD *thd, /*!< in: thread handle */ + TABLE* table, /*!< in: information on table + columns and indexes */ + HA_CREATE_INFO* create_info, /*!< in: more information of the + created table, contains also the + create statement string */ + const bool use_tablespace, /*!< in: use file par table */ + const ulint file_format) +{ + enum row_type row_format = table->s->row_type;; + ha_table_option_struct *options= table->s->option_struct; + atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes; + + /* Check page compression requirements */ + if (options->page_compressed) { + + if (row_format == ROW_TYPE_COMPRESSED) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSED table can't have" + " ROW_TYPE=COMPRESSED"); + return "PAGE_COMPRESSED"; + } + + if (row_format == ROW_TYPE_REDUNDANT) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSED table can't have" + " ROW_TYPE=REDUNDANT"); + return "PAGE_COMPRESSED"; + } + + if (!use_tablespace) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSED requires" + " innodb_file_per_table."); + return "PAGE_COMPRESSED"; + } + + if (file_format < UNIV_FORMAT_B) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSED requires" + " innodb_file_format > Antelope."); + return "PAGE_COMPRESSED"; + } + + if (create_info->key_block_size) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSED table can't have" + " key_block_size"); + return "PAGE_COMPRESSED"; + } + } + + /* Check page compression level requirements, some of them are + already checked above */ + if ((ulint)options->page_compression_level != ULINT_UNDEFINED) { + if (options->page_compressed == false) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSION_LEVEL requires" + " PAGE_COMPRESSED"); + return "PAGE_COMPRESSION_LEVEL"; + } + + if (options->page_compression_level < 0 || options->page_compression_level > 9) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu." + " Valid values are [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", + options->page_compression_level); + return "PAGE_COMPRESSION_LEVEL"; + } + } + + /* Check atomic writes requirements */ + if (awrites == ATOMIC_WRITES_ON || + (awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) { + if (!use_tablespace) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: ATOMIC_WRITES requires" + " innodb_file_per_table."); + return "ATOMIC_WRITES"; + } + } + + return 0; +} + +/*****************************************************************//** Creates a new table to an InnoDB database. @return error number */ UNIV_INTERN @@ -10535,6 +10792,7 @@ ha_innobase::create( while creating the table. So we read the current value here and make all further decisions based on this. */ bool use_tablespace = srv_file_per_table; + const ulint file_format = srv_file_format; /* Zip Shift Size - log2 - 9 of compressed page size, zero for uncompressed */ @@ -10558,6 +10816,12 @@ ha_innobase::create( /* Create the table definition in InnoDB */ + /* Validate table options not handled by the SQL-parser */ + if(check_table_options(thd, form, create_info, use_tablespace, + file_format)) { + DBUG_RETURN(HA_WRONG_CREATE_OPTION); + } + /* Validate create options if innodb_strict_mode is set. */ if (create_options_are_invalid( thd, form, create_info, use_tablespace)) { @@ -14949,6 +15213,12 @@ ha_innobase::check_if_incompatible_data( HA_CREATE_INFO* info, uint table_changes) { + ha_table_option_struct *param_old, *param_new; + + /* Cache engine specific options */ + param_new = info->option_struct; + param_old = table->s->option_struct; + innobase_copy_frm_flags_from_create_info(prebuilt->table, info); if (table_changes != IS_EQUAL_YES) { @@ -14975,6 +15245,13 @@ ha_innobase::check_if_incompatible_data( return(COMPATIBLE_DATA_NO); } + /* Changes on engine specific table options requests a rebuild of the table. */ + if (param_new->page_compressed != param_old->page_compressed || + param_new->page_compression_level != param_old->page_compression_level || + param_new->atomic_writes != param_old->atomic_writes) { + return(COMPATIBLE_DATA_NO); + } + return(COMPATIBLE_DATA_YES); } @@ -17521,12 +17798,6 @@ static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay, "innodb_thread_concurrency is reached (0 by default)", NULL, NULL, 0, 0, ~0UL, 0); -static MYSQL_SYSVAR_UINT(compression_level, page_zip_level, - PLUGIN_VAR_RQCMDARG, - "Compression level used for compressed row format. 0 is no compression" - ", 1 is fastest, 9 is best compression and default is 6.", - NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0); - static MYSQL_SYSVAR_BOOL(log_compressed_pages, page_zip_log_pages, PLUGIN_VAR_OPCMDARG, "Enables/disables the logging of entire compressed page images." @@ -18225,6 +18496,47 @@ static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace, "Print stacktrace on long semaphore wait (off by default supported only on linux)", NULL, NULL, FALSE); +static MYSQL_SYSVAR_UINT(compression_level, page_zip_level, + PLUGIN_VAR_RQCMDARG, + "Compression level used for zlib compression. 0 is no compression" + ", 1 is fastest, 9 is best compression and default is 6.", + NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0); + +static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim, + PLUGIN_VAR_OPCMDARG, + "Use trim. Default FALSE.", + NULL, NULL, FALSE); + +static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 }; +static TYPELIB page_compression_algorithms_typelib= +{ + array_elements(page_compression_algorithms) - 1, 0, + page_compression_algorithms, 0 +}; +static MYSQL_SYSVAR_ENUM(compression_algorithm, innodb_compression_algorithm, + PLUGIN_VAR_OPCMDARG, + "Compression algorithm used on page compression. One of: none, zlib, lz4, lzo, lzma, or bzip2", + innodb_compression_algorithm_validate, NULL, + /* We use here the largest number of supported compression method to + enable all those methods that are available. Availability of compression + method is verified on innodb_compression_algorithm_validate function. */ + PAGE_UNCOMPRESSED, + &page_compression_algorithms_typelib); + +static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Number of multi-threaded flush threads", + NULL, NULL, + MTFLUSH_DEFAULT_WORKER, /* Default setting */ + 1, /* Minimum setting */ + MTFLUSH_MAX_WORKER, /* Max setting */ + 0); + +static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Use multi-threaded flush. Default FALSE.", + NULL, NULL, FALSE); + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(log_block_size), MYSQL_SYSVAR(additional_mem_pool_size), @@ -18422,6 +18734,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(fake_changes), MYSQL_SYSVAR(locking_fake_changes), MYSQL_SYSVAR(use_stacktrace), + MYSQL_SYSVAR(use_trim), + MYSQL_SYSVAR(compression_algorithm), + MYSQL_SYSVAR(mtflush_threads), + MYSQL_SYSVAR(use_mtflush), MYSQL_SYSVAR(simulate_comp_failures), NULL }; @@ -18714,6 +19030,9 @@ ib_senderrf( case IB_LOG_LEVEL_FATAL: l = 0; break; + default: + l = 0; + break; } my_printv_error(code, format, MYF(l), args); @@ -18920,15 +19239,117 @@ int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t return ds_mrr.dsmrr_explain_info(mrr_mode, str, size); } -/* +/* A helper function used only in index_cond_func_innodb */ bool ha_innobase::is_thd_killed() -{ +{ return thd_kill_level(user_thd); } +/*************************************************************//** +Check for a valid value of innobase_compression_algorithm. +@return 0 for valid innodb_compression_algorithm. */ +static +int +innodb_compression_algorithm_validate( +/*==================================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to system + variable */ + void* save, /*!< out: immediate result + for update function */ + struct st_mysql_value* value) /*!< in: incoming string */ +{ + long compression_algorithm; + DBUG_ENTER("innobase_compression_algorithm_validate"); + + if (value->value_type(value) == MYSQL_VALUE_TYPE_STRING) { + char buff[STRING_BUFFER_USUAL_SIZE]; + const char *str; + int length= sizeof(buff); + + if (!(str= value->val_str(value, buff, &length))) { + DBUG_RETURN(1); + } + + if ((compression_algorithm= (long)find_type(str, &page_compression_algorithms_typelib, 0) - 1) < 0) { + DBUG_RETURN(1); + } + } else { + long long tmp; + + if (value->val_int(value, &tmp)) { + DBUG_RETURN(1); + } + + if (tmp < 0 || tmp >= page_compression_algorithms_typelib.count) { + DBUG_RETURN(1); + } + + compression_algorithm= (long) tmp; + } + + *reinterpret_cast<ulong*>(save) = compression_algorithm; + +#ifndef HAVE_LZ4 + if (compression_algorithm == PAGE_LZ4_ALGORITHM) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: liblz4 is not installed. \n", + compression_algorithm); + DBUG_RETURN(1); + } +#endif + +#ifndef HAVE_LZO + if (compression_algorithm == PAGE_LZO_ALGORITHM) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: liblzo is not installed. \n", + compression_algorithm); + DBUG_RETURN(1); + } +#endif + +#ifndef HAVE_LZMA + if (compression_algorithm == PAGE_LZMA_ALGORITHM) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: liblzma is not installed. \n", + compression_algorithm); + DBUG_RETURN(1); + } +#endif + +#ifndef HAVE_BZIP2 + if (compression_algorithm == PAGE_BZIP2_ALGORITHM) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: libbz2 is not installed. \n", + compression_algorithm); + DBUG_RETURN(1); + } +#endif + +#ifndef HAVE_SNAPPY + if (compression_algorithm == PAGE_SNAPPY_ALGORITHM) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" + "InnoDB: libsnappy is not installed. \n", + compression_algorithm); + DBUG_RETURN(1); + } +#endif + DBUG_RETURN(0); +} + /********************************************************************** Issue a warning that the row is too big. */ UNIV_INTERN diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 823d136d54b..6ed4174f042 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -57,6 +58,21 @@ typedef struct st_innobase_share { /** Prebuilt structures in an InnoDB table handle used within MySQL */ struct row_prebuilt_t; +/** Engine specific table options are definined using this struct */ +struct ha_table_option_struct +{ + bool page_compressed; /*!< Table is using page compression + if this option is true. */ + int page_compression_level; /*!< Table page compression level + or UNIV_UNSPECIFIED. */ + uint atomic_writes; /*!< Use atomic writes for this + table if this options is ON or + in DEFAULT if + srv_use_atomic_writes=1. + Atomic writes are not used if + value OFF.*/ +}; + /** The class defining a handle to an Innodb table */ class ha_innobase: public handler { @@ -176,6 +192,8 @@ class ha_innobase: public handler char* norm_name, char* temp_path, char* remote_path); + const char* check_table_options(THD *thd, TABLE* table, + HA_CREATE_INFO* create_info, const bool use_tablespace, const ulint file_format); int create(const char *name, register TABLE *form, HA_CREATE_INFO *create_info); int truncate(); diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index cc18f709043..439c92b0638 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2005, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -263,6 +264,22 @@ ha_innobase::check_if_supported_inplace_alter( update_thd(); trx_search_latch_release_if_reserved(prebuilt->trx); + /* Change on engine specific table options require rebuild of the + table */ + if (ha_alter_info->handler_flags + == Alter_inplace_info::CHANGE_CREATE_OPTION) { + ha_table_option_struct *new_options= ha_alter_info->create_info->option_struct; + ha_table_option_struct *old_options= table->s->option_struct; + + if (new_options->page_compressed != old_options->page_compressed || + new_options->page_compression_level != old_options->page_compression_level || + new_options->atomic_writes != old_options->atomic_writes) { + ha_alter_info->unsupported_reason = innobase_get_err_msg( + ER_ALTER_OPERATION_NOT_SUPPORTED_REASON); + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + } + } + if (ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE | INNOBASE_ALTER_NOREBUILD @@ -3414,6 +3431,17 @@ ha_innobase::prepare_inplace_alter_table( if (ha_alter_info->handler_flags & Alter_inplace_info::CHANGE_CREATE_OPTION) { + /* Check engine specific table options */ + if (const char* invalid_tbopt = check_table_options( + user_thd, altered_table, + ha_alter_info->create_info, + prebuilt->table->space != 0, + srv_file_format)) { + my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0), + table_type(), invalid_tbopt); + goto err_exit_no_heap; + } + if (const char* invalid_opt = create_options_are_invalid( user_thd, altered_table, ha_alter_info->create_info, diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index 02d2a2100a4..af5a7467c8e 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -104,6 +104,7 @@ static buf_page_desc_t i_s_page_type[] = { {"COMPRESSED_BLOB", FIL_PAGE_TYPE_ZBLOB}, {"COMPRESSED_BLOB2", FIL_PAGE_TYPE_ZBLOB2}, {"IBUF_INDEX", I_S_PAGE_TYPE_IBUF}, + {"PAGE COMPRESSED", FIL_PAGE_PAGE_COMPRESSED}, {"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN} }; diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index 797287ad222..d8a3e77d820 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1533,6 +1534,12 @@ struct buf_page_t{ state == BUF_BLOCK_ZIP_PAGE and zip.data == NULL means an active buf_pool->watch */ + + ulint write_size; /* Write size is set when this + page is first time written and then + if written again we check is TRIM + operation needed. */ + #ifndef UNIV_HOTBACKUP buf_page_t* hash; /*!< node used in chaining to buf_pool->page_hash or @@ -2174,6 +2181,20 @@ struct CheckUnzipLRUAndLRUList { }; #endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */ +/*********************************************************************//** +Aquire LRU list mutex */ +void +buf_pool_mutex_enter( +/*=================*/ + buf_pool_t* buf_pool); /*!< in: buffer pool */ +/*********************************************************************//** +Exit LRU list mutex */ +void +buf_pool_mutex_exit( +/*================*/ + buf_pool_t* buf_pool); /*!< in: buffer pool */ + + #ifndef UNIV_NONINL #include "buf0buf.ic" #endif diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h index 56b0c314b5c..5cc0eb9d4cf 100644 --- a/storage/xtradb/include/buf0flu.h +++ b/storage/xtradb/include/buf0flu.h @@ -36,7 +36,14 @@ Created 11/5/1995 Heikki Tuuri /** Flag indicating if the page_cleaner is in active state. */ extern ibool buf_page_cleaner_is_active; -/** Flag indicating if the lru_manager is in active state. */ +/** Handled page counters for a single flush */ +struct flush_counters_t { + ulint flushed; /*!< number of dirty pages flushed */ + ulint evicted; /*!< number of clean pages evicted */ + ulint unzip_LRU_evicted;/*!< number of uncompressed page images + evicted */ +}; + extern bool buf_lru_manager_is_active; /********************************************************************//** @@ -304,6 +311,63 @@ buf_flush_flush_list_in_progress(void) /*==================================*/ __attribute__((warn_unused_result)); +/******************************************************************//** +Start a buffer flush batch for LRU or flush list */ +ibool +buf_flush_start( +/*============*/ + buf_pool_t* buf_pool, /*!< buffer pool instance */ + buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU + or BUF_FLUSH_LIST */ + +/******************************************************************//** +End a buffer flush batch for LRU or flush list */ +void +buf_flush_end( +/*==========*/ + buf_pool_t* buf_pool, /*!< buffer pool instance */ + buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU + or BUF_FLUSH_LIST */ + +/*******************************************************************//** +This utility flushes dirty blocks from the end of the LRU list or flush_list. +NOTE 1: in the case of an LRU flush the calling thread may own latches to +pages: to avoid deadlocks, this function must be written so that it cannot +end up waiting for these latches! NOTE 2: in the case of a flush list flush, +the calling thread is not allowed to own any latches on pages! +@return number of blocks for which the write request was queued */ +__attribute__((nonnull)) +void +buf_flush_batch( +/*============*/ + buf_pool_t* buf_pool, /*!< in: buffer pool instance */ + buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or + BUF_FLUSH_LIST; if BUF_FLUSH_LIST, + then the caller must not own any + latches on pages */ + ulint min_n, /*!< in: wished minimum mumber of blocks + flushed (it is not guaranteed that the + actual number is that big, though) */ + lsn_t lsn_limit, /*!< in: in the case of BUF_FLUSH_LIST + all blocks whose oldest_modification is + smaller than this should be flushed + (if their number does not exceed + min_n), otherwise ignored */ + bool limited_lru_scan,/*!< in: for LRU flushes, if true, + allow to scan only up to + srv_LRU_scan_depth pages in total */ + flush_counters_t* n); /*!< out: flushed/evicted page + counts */ + + +/******************************************************************//** +Gather the aggregated stats for both flush list and LRU list flushing */ +void +buf_flush_common( +/*=============*/ + buf_flush_t flush_type, /*!< in: type of flush */ + ulint page_count); /*!< in: number of pages flushed */ + #ifndef UNIV_NONINL #include "buf0flu.ic" #endif diff --git a/storage/xtradb/include/buf0mtflu.h b/storage/xtradb/include/buf0mtflu.h new file mode 100644 index 00000000000..0475335bbf5 --- /dev/null +++ b/storage/xtradb/include/buf0mtflu.h @@ -0,0 +1,95 @@ +/***************************************************************************** + +Copyright (C) 2014 SkySQL Ab. All Rights Reserved. +Copyright (C) 2014 Fusion-io. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/buf0mtflu.h +Multi-threadef flush method interface function prototypes + +Created 06/02/2014 Jan Lindström jan.lindstrom@skysql.com + Dhananjoy Das DDas@fusionio.com +***********************************************************************/ + +#ifndef buf0mtflu_h +#define buf0mtflu_h + +/******************************************************************//** +Add exit work item to work queue to signal multi-threded flush +threads that they should exit. +*/ +void +buf_mtflu_io_thread_exit(void); +/*===========================*/ + +/******************************************************************//** +Initialize multi-threaded flush thread syncronization data. +@return Initialized multi-threaded flush thread syncroniztion data. */ +void* +buf_mtflu_handler_init( +/*===================*/ + ulint n_threads, /*!< in: Number of threads to create */ + ulint wrk_cnt); /*!< in: Number of work items */ + +/******************************************************************//** +Return true if multi-threaded flush is initialized +@return true if initialized, false if not */ +bool +buf_mtflu_init_done(void); +/*======================*/ + +/*********************************************************************//** +Clears up tail of the LRU lists: +* Put replaceable pages at the tail of LRU to the free list +* Flush dirty pages at the tail of LRU to the disk +The depth to which we scan each buffer pool is controlled by dynamic +config parameter innodb_LRU_scan_depth. +@return total pages flushed */ +UNIV_INTERN +ulint +buf_mtflu_flush_LRU_tail(void); +/*===========================*/ + +/*******************************************************************//** +Multi-threaded version of buf_flush_list +*/ +bool +buf_mtflu_flush_list( +/*=================*/ + ulint min_n, /*!< in: wished minimum mumber of blocks + flushed (it is not guaranteed that the + actual number is that big, though) */ + lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all + blocks whose oldest_modification is + smaller than this should be flushed + (if their number does not exceed + min_n), otherwise ignored */ + ulint* n_processed); /*!< out: the number of pages + which were processed is passed + back to caller. Ignored if NULL */ + +/*********************************************************************//** +Set correct thread identifiers to io thread array based on +information we have. */ +void +buf_mtflu_set_thread_ids( +/*=====================*/ + ulint n_threads, /*!<in: Number of threads to fill */ + void* ctx, /*!<in: thread context */ + os_thread_id_t* thread_ids); /*!<in: thread id array */ + +#endif diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index def7b246ead..43fa613e756 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -2,6 +2,7 @@ Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -42,6 +43,8 @@ Created 1/8/1996 Heikki Tuuri #include "ut0byte.h" #include "trx0types.h" #include "row0types.h" +#include "fsp0fsp.h" +#include "dict0pagecompress.h" extern bool innodb_table_stats_not_found; extern bool innodb_index_stats_not_found; @@ -918,7 +921,14 @@ dict_tf_set( ulint* flags, /*!< in/out: table */ rec_format_t format, /*!< in: file format */ ulint zip_ssize, /*!< in: zip shift size */ - bool remote_path) /*!< in: table uses DATA DIRECTORY */ + bool remote_path, /*!< in: table uses DATA DIRECTORY + */ + bool page_compressed,/*!< in: table uses page compressed + pages */ + ulint page_compression_level, /*!< in: table page compression + level */ + ulint atomic_writes) /*!< in: table atomic + writes option value*/ __attribute__((nonnull)); /********************************************************************//** Convert a 32 bit integer table flags to the 32 bit integer that is diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic index 6bfd7f6cdae..2b698dd7218 100644 --- a/storage/xtradb/include/dict0dict.ic +++ b/storage/xtradb/include/dict0dict.ic @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -537,10 +538,27 @@ dict_tf_is_valid( ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags); ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags); ulint unused = DICT_TF_GET_UNUSED(flags); + ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags); + ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags); + ulint data_dir = DICT_TF_HAS_DATA_DIR(flags); + ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags); /* Make sure there are no bits that we do not know about. */ if (unused != 0) { + fprintf(stderr, + "InnoDB: Error: table unused flags are %ld" + " in the data dictionary and are corrupted\n" + "InnoDB: Error: data dictionary flags are\n" + "InnoDB: compact %ld atomic_blobs %ld\n" + "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" + "InnoDB: page_compression %ld page_compression_level %ld\n" + "InnoDB: atomic_writes %ld\n", + unused, + compact, atomic_blobs, unused, data_dir, zip_ssize, + page_compression, page_compression_level, atomic_writes + ); + return(false); } else if (atomic_blobs) { @@ -550,12 +568,36 @@ dict_tf_is_valid( data stored off-page in the clustered index. */ if (!compact) { + fprintf(stderr, + "InnoDB: Error: table compact flags are %ld" + " in the data dictionary and are corrupted\n" + "InnoDB: Error: data dictionary flags are\n" + "InnoDB: compact %ld atomic_blobs %ld\n" + "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" + "InnoDB: page_compression %ld page_compression_level %ld\n" + "InnoDB: atomic_writes %ld\n", + compact, compact, atomic_blobs, unused, data_dir, zip_ssize, + page_compression, page_compression_level, atomic_writes + ); + return(false); } } else if (zip_ssize) { /* Antelope does not support COMPRESSED row format. */ + fprintf(stderr, + "InnoDB: Error: table flags are %ld" + " in the data dictionary and are corrupted\n" + "InnoDB: Error: data dictionary flags are\n" + "InnoDB: compact %ld atomic_blobs %ld\n" + "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" + "InnoDB: page_compression %ld page_compression_level %ld\n" + "InnoDB: atomic_writes %ld\n", + flags, compact, atomic_blobs, unused, data_dir, zip_ssize, + page_compression, page_compression_level, atomic_writes + ); + return(false); } @@ -568,6 +610,59 @@ dict_tf_is_valid( || !atomic_blobs || zip_ssize > PAGE_ZIP_SSIZE_MAX) { + fprintf(stderr, + "InnoDB: Error: table compact flags are %ld in the data dictionary and are corrupted\n" + "InnoDB: Error: data dictionary flags are\n" + "InnoDB: compact %ld atomic_blobs %ld\n" + "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" + "InnoDB: page_compression %ld page_compression_level %ld\n" + "InnoDB: atomic_writes %ld\n", + flags, + compact, atomic_blobs, unused, data_dir, zip_ssize, + page_compression, page_compression_level, atomic_writes + + ); + return(false); + } + } + + if (page_compression || page_compression_level) { + /* Page compression format must have compact and + atomic_blobs and page_compression_level requires + page_compression */ + if (!compact + || !page_compression + || !atomic_blobs) { + + fprintf(stderr, + "InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n" + "InnoDB: Error: data dictionary flags are\n" + "InnoDB: compact %ld atomic_blobs %ld\n" + "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" + "InnoDB: page_compression %ld page_compression_level %ld\n" + "InnoDB: atomic_writes %ld\n", + flags, compact, atomic_blobs, unused, data_dir, zip_ssize, + page_compression, page_compression_level, atomic_writes + ); + + return(false); + } + } + + if (atomic_writes) { + + if(atomic_writes > ATOMIC_WRITES_OFF) { + + fprintf(stderr, + "InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n" + "InnoDB: Error: data dictionary flags are\n" + "InnoDB: compact %ld atomic_blobs %ld\n" + "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" + "InnoDB: page_compression %ld page_compression_level %ld\n" + "InnoDB: atomic_writes %ld\n", + flags, compact, atomic_blobs, unused, data_dir, zip_ssize, + page_compression, page_compression_level, atomic_writes + ); return(false); } } @@ -594,6 +689,11 @@ dict_sys_tables_type_validate( ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(type); ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type); ulint unused = DICT_TF_GET_UNUSED(type); + ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(type); + ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type); + ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type); + + ut_a(atomic_writes <= ATOMIC_WRITES_OFF); /* The low order bit of SYS_TABLES.TYPE is always set to 1. If the format is UNIV_FORMAT_B or higher, this field is the same @@ -604,12 +704,16 @@ dict_sys_tables_type_validate( if (redundant) { if (zip_ssize || atomic_blobs) { + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=Redundant, zip_ssize %lu atomic_blobs %lu\n", + zip_ssize, atomic_blobs); return(ULINT_UNDEFINED); } } /* Make sure there are no bits that we do not know about. */ if (unused) { + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, unused %lu\n", + type, unused); return(ULINT_UNDEFINED); } @@ -624,6 +728,8 @@ dict_sys_tables_type_validate( } else if (zip_ssize) { /* Antelope does not support COMPRESSED format. */ + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu\n", + type, zip_ssize); return(ULINT_UNDEFINED); } @@ -633,11 +739,15 @@ dict_sys_tables_type_validate( should be in N_COLS, but we already know about the low_order_bit and DICT_N_COLS_COMPACT flags. */ if (!atomic_blobs) { + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu atomic_blobs %lu\n", + type, zip_ssize, atomic_blobs); return(ULINT_UNDEFINED); } /* Validate that the number is within allowed range. */ if (zip_ssize > PAGE_ZIP_SSIZE_MAX) { + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu max %d\n", + type, zip_ssize, PAGE_ZIP_SSIZE_MAX); return(ULINT_UNDEFINED); } } @@ -647,6 +757,27 @@ dict_sys_tables_type_validate( format, so the DATA_DIR flag is compatible with any other table flags. However, it is not used with TEMPORARY tables.*/ + if (page_compression || page_compression_level) { + /* page compressed row format must have low_order_bit and + atomic_blobs bits set and the DICT_N_COLS_COMPACT flag + should be in N_COLS, but we already know about the + low_order_bit and DICT_N_COLS_COMPACT flags. */ + + if (!atomic_blobs || !page_compression) { + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, page_compression %lu page_compression_level %lu\n" + "InnoDB: Error: atomic_blobs %lu\n", + type, page_compression, page_compression_level, atomic_blobs); + return(ULINT_UNDEFINED); + } + } + + /* Validate that the atomic writes number is within allowed range. */ + if (atomic_writes > ATOMIC_WRITES_OFF) { + fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, atomic_writes %lu\n", + type, atomic_writes); + return(ULINT_UNDEFINED); + } + /* Return the validated SYS_TABLES.TYPE. */ return(type); } @@ -719,8 +850,16 @@ dict_tf_set( ulint* flags, /*!< in/out: table flags */ rec_format_t format, /*!< in: file format */ ulint zip_ssize, /*!< in: zip shift size */ - bool use_data_dir) /*!< in: table uses DATA DIRECTORY */ + bool use_data_dir, /*!< in: table uses DATA DIRECTORY + */ + bool page_compressed,/*!< in: table uses page compressed + pages */ + ulint page_compression_level, /*!< in: table page compression + level */ + ulint atomic_writes) /*!< in: table atomic writes setup */ { + atomic_writes_t awrites = (atomic_writes_t)atomic_writes; + switch (format) { case REC_FORMAT_REDUNDANT: *flags = 0; @@ -745,6 +884,19 @@ dict_tf_set( if (use_data_dir) { *flags |= (1 << DICT_TF_POS_DATA_DIR); } + + if (page_compressed) { + *flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS) + | (1 << DICT_TF_POS_PAGE_COMPRESSION) + | (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL); + + ut_ad(zip_ssize == 0); + ut_ad(dict_tf_get_page_compression(*flags) == TRUE); + ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level); + } + + *flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES); + ut_a(dict_tf_get_atomic_writes(*flags) == awrites); } /********************************************************************//** @@ -765,6 +917,9 @@ dict_tf_to_fsp_flags( ulint table_flags) /*!< in: dict_table_t::flags */ { ulint fsp_flags; + ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags); + ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags); + ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags); DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure", return(ULINT_UNDEFINED);); @@ -783,7 +938,20 @@ dict_tf_to_fsp_flags( fsp_flags |= DICT_TF_HAS_DATA_DIR(table_flags) ? FSP_FLAGS_MASK_DATA_DIR : 0; + /* In addition, tablespace flags also contain if the page + compression is used for this table. */ + fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION(fsp_flags, page_compression); + + /* In addition, tablespace flags also contain page compression level + if page compression is used for this table. */ + fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level); + + /* In addition, tablespace flags also contain flag if atomic writes + is used for this table */ + fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes); + ut_a(fsp_flags_is_valid(fsp_flags)); + ut_a(dict_tf_verify_flags(table_flags, fsp_flags)); return(fsp_flags); } @@ -811,10 +979,15 @@ dict_sys_tables_type_to_tf( /* Adjust bit zero. */ flags = redundant ? 0 : 1; - /* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */ + /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION, + PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */ flags |= type & (DICT_TF_MASK_ZIP_SSIZE | DICT_TF_MASK_ATOMIC_BLOBS - | DICT_TF_MASK_DATA_DIR); + | DICT_TF_MASK_DATA_DIR + | DICT_TF_MASK_PAGE_COMPRESSION + | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL + | DICT_TF_MASK_ATOMIC_WRITES + ); return(flags); } @@ -842,10 +1015,14 @@ dict_tf_to_sys_tables_type( /* Adjust bit zero. It is always 1 in SYS_TABLES.TYPE */ type = 1; - /* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */ + /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION, + PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */ type |= flags & (DICT_TF_MASK_ZIP_SSIZE | DICT_TF_MASK_ATOMIC_BLOBS - | DICT_TF_MASK_DATA_DIR); + | DICT_TF_MASK_DATA_DIR + | DICT_TF_MASK_PAGE_COMPRESSION + | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL + | DICT_TF_MASK_ATOMIC_WRITES); return(type); } diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index b8df96acc30..473aefec418 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -2,6 +2,7 @@ Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -130,11 +131,26 @@ This flag prevents older engines from attempting to open the table and allows InnoDB to update_create_info() accordingly. */ #define DICT_TF_WIDTH_DATA_DIR 1 +/** +Width of the page compression flag +*/ +#define DICT_TF_WIDTH_PAGE_COMPRESSION 1 +#define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4 + +/** +Width of atomic writes flag +DEFAULT=0, ON = 1, OFF = 2 +*/ +#define DICT_TF_WIDTH_ATOMIC_WRITES 2 + /** Width of all the currently known table flags */ #define DICT_TF_BITS (DICT_TF_WIDTH_COMPACT \ + DICT_TF_WIDTH_ZIP_SSIZE \ + DICT_TF_WIDTH_ATOMIC_BLOBS \ - + DICT_TF_WIDTH_DATA_DIR) + + DICT_TF_WIDTH_DATA_DIR \ + + DICT_TF_WIDTH_PAGE_COMPRESSION \ + + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \ + + DICT_TF_WIDTH_ATOMIC_WRITES) /** A mask of all the known/used bits in table flags */ #define DICT_TF_BIT_MASK (~(~0 << DICT_TF_BITS)) @@ -150,9 +166,18 @@ allows InnoDB to update_create_info() accordingly. */ /** Zero relative shift position of the DATA_DIR field */ #define DICT_TF_POS_DATA_DIR (DICT_TF_POS_ATOMIC_BLOBS \ + DICT_TF_WIDTH_ATOMIC_BLOBS) +/** Zero relative shift position of the PAGE_COMPRESSION field */ +#define DICT_TF_POS_PAGE_COMPRESSION (DICT_TF_POS_DATA_DIR \ + + DICT_TF_WIDTH_DATA_DIR) +/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */ +#define DICT_TF_POS_PAGE_COMPRESSION_LEVEL (DICT_TF_POS_PAGE_COMPRESSION \ + + DICT_TF_WIDTH_PAGE_COMPRESSION) +/** Zero relative shift position of the ATOMIC_WRITES field */ +#define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \ + + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL) /** Zero relative shift position of the start of the UNUSED bits */ -#define DICT_TF_POS_UNUSED (DICT_TF_POS_DATA_DIR \ - + DICT_TF_WIDTH_DATA_DIR) +#define DICT_TF_POS_UNUSED (DICT_TF_POS_ATOMIC_WRITES \ + + DICT_TF_WIDTH_ATOMIC_WRITES) /** Bit mask of the COMPACT field */ #define DICT_TF_MASK_COMPACT \ @@ -170,6 +195,18 @@ allows InnoDB to update_create_info() accordingly. */ #define DICT_TF_MASK_DATA_DIR \ ((~(~0 << DICT_TF_WIDTH_DATA_DIR)) \ << DICT_TF_POS_DATA_DIR) +/** Bit mask of the PAGE_COMPRESSION field */ +#define DICT_TF_MASK_PAGE_COMPRESSION \ + ((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION)) \ + << DICT_TF_POS_PAGE_COMPRESSION) +/** Bit mask of the PAGE_COMPRESSION_LEVEL field */ +#define DICT_TF_MASK_PAGE_COMPRESSION_LEVEL \ + ((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)) \ + << DICT_TF_POS_PAGE_COMPRESSION_LEVEL) +/** Bit mask of the ATOMIC_WRITES field */ +#define DICT_TF_MASK_ATOMIC_WRITES \ + ((~(~0 << DICT_TF_WIDTH_ATOMIC_WRITES)) \ + << DICT_TF_POS_ATOMIC_WRITES) /** Return the value of the COMPACT field */ #define DICT_TF_GET_COMPACT(flags) \ @@ -190,6 +227,19 @@ allows InnoDB to update_create_info() accordingly. */ /** Return the contents of the UNUSED bits */ #define DICT_TF_GET_UNUSED(flags) \ (flags >> DICT_TF_POS_UNUSED) + +/** Return the value of the PAGE_COMPRESSION field */ +#define DICT_TF_GET_PAGE_COMPRESSION(flags) \ + ((flags & DICT_TF_MASK_PAGE_COMPRESSION) \ + >> DICT_TF_POS_PAGE_COMPRESSION) +/** Return the value of the PAGE_COMPRESSION_LEVEL field */ +#define DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags) \ + ((flags & DICT_TF_MASK_PAGE_COMPRESSION_LEVEL) \ + >> DICT_TF_POS_PAGE_COMPRESSION_LEVEL) +/** Return the value of the ATOMIC_WRITES field */ +#define DICT_TF_GET_ATOMIC_WRITES(flags) \ + ((flags & DICT_TF_MASK_ATOMIC_WRITES) \ + >> DICT_TF_POS_ATOMIC_WRITES) /* @} */ #ifndef UNIV_INNOCHECKSUM diff --git a/storage/xtradb/include/dict0pagecompress.h b/storage/xtradb/include/dict0pagecompress.h new file mode 100644 index 00000000000..19a2a6c52f3 --- /dev/null +++ b/storage/xtradb/include/dict0pagecompress.h @@ -0,0 +1,94 @@ +/***************************************************************************** + +Copyright (C) 2013 SkySQL Ab. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/dict0pagecompress.h +Helper functions for extracting/storing page compression information +to dictionary. + +Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com +***********************************************************************/ + +#ifndef dict0pagecompress_h +#define dict0pagecompress_h + +/********************************************************************//** +Extract the page compression level from table flags. +@return page compression level, or 0 if not compressed */ +UNIV_INLINE +ulint +dict_tf_get_page_compression_level( +/*===============================*/ + ulint flags) /*!< in: flags */ + __attribute__((const)); +/********************************************************************//** +Extract the page compression flag from table flags +@return page compression flag, or false if not compressed */ +UNIV_INLINE +ibool +dict_tf_get_page_compression( +/*==========================*/ + ulint flags) /*!< in: flags */ + __attribute__((const)); + +/********************************************************************//** +Check whether the table uses the page compressed page format. +@return page compression level, or 0 if not compressed */ +UNIV_INLINE +ulint +dict_table_page_compression_level( +/*==============================*/ + const dict_table_t* table) /*!< in: table */ + __attribute__((const)); + +/********************************************************************//** +Verify that dictionary flags match tablespace flags +@return true if flags match, false if not */ +UNIV_INLINE +ibool +dict_tf_verify_flags( +/*=================*/ + ulint table_flags, /*!< in: dict_table_t::flags */ + ulint fsp_flags) /*!< in: fil_space_t::flags */ + __attribute__((const)); + +/********************************************************************//** +Extract the atomic writes flag from table flags. +@return true if atomic writes are used, false if not used */ +UNIV_INLINE +atomic_writes_t +dict_tf_get_atomic_writes( +/*======================*/ + ulint flags) /*!< in: flags */ + __attribute__((const)); + +/********************************************************************//** +Check whether the table uses the atomic writes. +@return true if atomic writes is used, false if not */ +UNIV_INLINE +atomic_writes_t +dict_table_get_atomic_writes( +/*=========================*/ + const dict_table_t* table); /*!< in: table */ + + +#ifndef UNIV_NONINL +#include "dict0pagecompress.ic" +#endif + +#endif diff --git a/storage/xtradb/include/dict0pagecompress.ic b/storage/xtradb/include/dict0pagecompress.ic new file mode 100644 index 00000000000..811976434a8 --- /dev/null +++ b/storage/xtradb/include/dict0pagecompress.ic @@ -0,0 +1,191 @@ +/***************************************************************************** + +Copyright (C) 2013 SkySQL Ab. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/dict0pagecompress.ic +Inline implementation for helper functions for extracting/storing +page compression and atomic writes information to dictionary. + +Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com +***********************************************************************/ + +/********************************************************************//** +Verify that dictionary flags match tablespace flags +@return true if flags match, false if not */ +UNIV_INLINE +ibool +dict_tf_verify_flags( +/*=================*/ + ulint table_flags, /*!< in: dict_table_t::flags */ + ulint fsp_flags) /*!< in: fil_space_t::flags */ +{ + ulint table_unused = DICT_TF_GET_UNUSED(table_flags); + ulint compact = DICT_TF_GET_COMPACT(table_flags); + ulint ssize = DICT_TF_GET_ZIP_SSIZE(table_flags); + ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(table_flags); + ulint data_dir = DICT_TF_HAS_DATA_DIR(table_flags); + ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags); + ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags); + ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags); + ulint post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags); + ulint zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags); + ulint fsp_atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags); + ulint page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(fsp_flags); + ulint fsp_unused = FSP_FLAGS_GET_UNUSED(fsp_flags); + ulint fsp_page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(fsp_flags); + ulint fsp_page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(fsp_flags); + ulint fsp_atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(fsp_flags); + + DBUG_EXECUTE_IF("dict_tf_verify_flags_failure", + return(ULINT_UNDEFINED);); + + ut_a(!table_unused); + ut_a(!fsp_unused); + ut_a(page_ssize == 0 || page_ssize != 0); /* silence compiler */ + ut_a(compact == 0 || compact == 1); /* silence compiler */ + ut_a(data_dir == 0 || data_dir == 1); /* silence compiler */ + ut_a(post_antelope == 0 || post_antelope == 1); /* silence compiler */ + + if (ssize != zip_ssize) { + fprintf(stderr, + "InnoDB: Error: table flags has zip_ssize %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has zip_ssize %ld\n", + ssize, zip_ssize); + return (FALSE); + } + if (atomic_blobs != fsp_atomic_blobs) { + fprintf(stderr, + "InnoDB: Error: table flags has atomic_blobs %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has atomic_blobs %ld\n", + atomic_blobs, fsp_atomic_blobs); + + return (FALSE); + } + if (page_compression != fsp_page_compression) { + fprintf(stderr, + "InnoDB: Error: table flags has page_compression %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file ahas page_compression %ld\n", + page_compression, fsp_page_compression); + + return (FALSE); + } + if (page_compression_level != fsp_page_compression_level) { + fprintf(stderr, + "InnoDB: Error: table flags has page_compression_level %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has page_compression_level %ld\n", + page_compression_level, fsp_page_compression_level); + + return (FALSE); + } + + if (atomic_writes != fsp_atomic_writes) { + fprintf(stderr, + "InnoDB: Error: table flags has atomic writes %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has atomic_writes %ld\n", + atomic_writes, fsp_atomic_writes); + + return (FALSE); + } + + return(TRUE); +} + +/********************************************************************//** +Extract the page compression level from dict_table_t::flags. +These flags are in memory, so assert that they are valid. +@return page compression level, or 0 if not compressed */ +UNIV_INLINE +ulint +dict_tf_get_page_compression_level( +/*===============================*/ + ulint flags) /*!< in: flags */ +{ + ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags); + + ut_ad(page_compression_level <= 9); + + return(page_compression_level); +} + +/********************************************************************//** +Check whether the table uses the page compression page format. +@return page compression level, or 0 if not compressed */ +UNIV_INLINE +ulint +dict_table_page_compression_level( +/*==============================*/ + const dict_table_t* table) /*!< in: table */ +{ + ut_ad(table); + ut_ad(dict_tf_get_page_compression(table->flags)); + + return(dict_tf_get_page_compression_level(table->flags)); +} + +/********************************************************************//** +Check whether the table uses the page compression page format. +@return true if page compressed, false if not */ +UNIV_INLINE +ibool +dict_tf_get_page_compression( +/*=========================*/ + ulint flags) /*!< in: flags */ +{ + return(DICT_TF_GET_PAGE_COMPRESSION(flags)); +} + +/********************************************************************//** +Check whether the table uses the page compression page format. +@return true if page compressed, false if not */ +UNIV_INLINE +ibool +dict_table_is_page_compressed( +/*==========================*/ + const dict_table_t* table) /*!< in: table */ +{ + return (dict_tf_get_page_compression(table->flags)); +} + +/********************************************************************//** +Extract the atomic writes flag from table flags. +@return enumerated value of atomic writes */ +UNIV_INLINE +atomic_writes_t +dict_tf_get_atomic_writes( +/*======================*/ + ulint flags) /*!< in: flags */ +{ + return((atomic_writes_t)DICT_TF_GET_ATOMIC_WRITES(flags)); +} + +/********************************************************************//** +Check whether the table uses the atomic writes. +@return enumerated value of atomic writes */ +UNIV_INLINE +atomic_writes_t +dict_table_get_atomic_writes( +/*=========================*/ + const dict_table_t* table) /*!< in: table */ +{ + return ((atomic_writes_t)dict_tf_get_atomic_writes(table->flags)); +} diff --git a/storage/xtradb/include/dict0types.h b/storage/xtradb/include/dict0types.h index d34b6f7eab3..909fdf9cf3d 100644 --- a/storage/xtradb/include/dict0types.h +++ b/storage/xtradb/include/dict0types.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -83,6 +84,14 @@ enum ib_quiesce_t { #define TEMP_TABLE_PREFIX "#sql" #define TEMP_TABLE_PATH_PREFIX "/" TEMP_TABLE_PREFIX + +/** Enum values for atomic_writes table option */ +typedef enum { + ATOMIC_WRITES_DEFAULT = 0, + ATOMIC_WRITES_ON = 1, + ATOMIC_WRITES_OFF = 2 +} atomic_writes_t; + #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG /** Flag to control insert buffer debugging. */ extern uint ibuf_debug; diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 7edf79043d3..4fd84ad9fbe 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -127,11 +128,36 @@ extern fil_addr_t fil_addr_null; data file (ibdata*, not *.ibd): the file has been flushed to disk at least up to this lsn */ +/** If page type is FIL_PAGE_COMPRESSED then the 8 bytes starting at +FIL_PAGE_FILE_FLUSH_LSN are broken down as follows: */ + +/** Control information version format (u8) */ +static const ulint FIL_PAGE_VERSION = FIL_PAGE_FILE_FLUSH_LSN; + +/** Compression algorithm (u8) */ +static const ulint FIL_PAGE_ALGORITHM_V1 = FIL_PAGE_VERSION + 1; + +/** Original page type (u16) */ +static const ulint FIL_PAGE_ORIGINAL_TYPE_V1 = FIL_PAGE_ALGORITHM_V1 + 1; + +/** Original data size in bytes (u16)*/ +static const ulint FIL_PAGE_ORIGINAL_SIZE_V1 = FIL_PAGE_ORIGINAL_TYPE_V1 + 2; + +/** Size after compression (u16)*/ +static const ulint FIL_PAGE_COMPRESS_SIZE_V1 = FIL_PAGE_ORIGINAL_SIZE_V1 + 2; + #define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this contains the space id of the page */ #define FIL_PAGE_SPACE_ID FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID #define FIL_PAGE_DATA 38 /*!< start of the data on the page */ +/* Following are used when page compression is used */ +#define FIL_PAGE_COMPRESSED_SIZE 2 /*!< Number of bytes used to store + actual payload data size on + compressed pages. */ +#define FIL_PAGE_COMPRESSION_ZLIB 1 /*!< Compressin algorithm ZLIB. */ +#define FIL_PAGE_COMPRESSION_LZ4 2 /*!< Compressin algorithm LZ4. */ + /* @} */ /** File page trailer @{ */ #define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used @@ -142,6 +168,7 @@ extern fil_addr_t fil_addr_null; /* @} */ /** File page types (values of FIL_PAGE_TYPE) @{ */ +#define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< Page compressed page */ #define FIL_PAGE_INDEX 17855 /*!< B-tree node */ #define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */ #define FIL_PAGE_INODE 3 /*!< Index node */ @@ -156,7 +183,8 @@ extern fil_addr_t fil_addr_null; #define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */ #define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */ #define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */ -#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_ZBLOB2 +#define FIL_PAGE_TYPE_COMPRESSED 13 /*!< Compressed page */ +#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_COMPRESSED /*!< Last page type */ /* @} */ @@ -221,6 +249,7 @@ struct fil_node_t { ib_int64_t flush_counter;/*!< up to what modification_counter value we have flushed the modifications to disk */ + ulint file_block_size; UT_LIST_NODE_T(fil_node_t) chain; /*!< link field for the file chain */ UT_LIST_NODE_T(fil_node_t) LRU; @@ -299,6 +328,9 @@ struct fil_space_t { /*!< true if this space is currently in unflushed_spaces */ ibool is_corrupt; + bool printed_compression_failure; + /*!< true if we have already printed + compression failure */ UT_LIST_NODE_T(fil_space_t) space_list; /*!< list of all spaces */ ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ @@ -576,8 +608,10 @@ fil_read_first_page( ulint* space_id, /*!< out: tablespace ID */ lsn_t* min_flushed_lsn, /*!< out: min of flushed lsn values in data files */ - lsn_t* max_flushed_lsn) /*!< out: max of flushed + lsn_t* max_flushed_lsn, /*!< out: max of flushed lsn values in data files */ + ulint orig_space_id) /*!< in: file space id or + ULINT_UNDEFINED */ __attribute__((warn_unused_result)); /*******************************************************************//** Increments the count of pending operation, if space is not being deleted. @@ -914,8 +948,8 @@ fil_space_get_n_reserved_extents( Reads or writes data. This operation is asynchronous (aio). @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace which does not exist */ -#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message) \ - _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, NULL) +#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, write_size) \ + _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, write_size, NULL) UNIV_INTERN dberr_t @@ -945,7 +979,12 @@ _fil_io( or from where to write; in aio this must be appropriately aligned */ void* message, /*!< in: message for aio handler if non-sync - aio used, else ignored */ + aio used, else ignored */ + ulint* write_size, /*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ trx_t* trx) __attribute__((nonnull(8))); /**********************************************************************//** @@ -1223,4 +1262,50 @@ fil_space_set_corrupt( /*==================*/ ulint space_id); +/****************************************************************//** +Acquire fil_system mutex */ +void +fil_system_enter(void); +/*==================*/ +/****************************************************************//** +Release fil_system mutex */ +void +fil_system_exit(void); +/*==================*/ + +#ifndef UNIV_INNOCHECKSUM +/*******************************************************************//** +Returns the table space by a given id, NULL if not found. */ +fil_space_t* +fil_space_get_by_id( +/*================*/ + ulint id); /*!< in: space id */ +/*******************************************************************//** +Return space name */ +char* +fil_space_name( +/*===========*/ + fil_space_t* space); /*!< in: space */ +#endif + +/****************************************************************//** +Does error handling when a file operation fails. +@return TRUE if we should retry the operation */ +ibool +os_file_handle_error_no_exit( +/*=========================*/ + const char* name, /*!< in: name of a file or NULL */ + const char* operation, /*!< in: operation */ + ibool on_error_silent,/*!< in: if TRUE then don't print + any message to the log. */ + const char* file, /*!< in: file name */ + const ulint line); /*!< in: line */ + +/*******************************************************************//** +Return page type name */ +const char* +fil_get_page_type_name( +/*===================*/ + ulint page_type); /*!< in: FIL_PAGE_TYPE */ + #endif /* fil0fil_h */ diff --git a/storage/xtradb/include/fil0pagecompress.h b/storage/xtradb/include/fil0pagecompress.h new file mode 100644 index 00000000000..c797c221efc --- /dev/null +++ b/storage/xtradb/include/fil0pagecompress.h @@ -0,0 +1,145 @@ +/***************************************************************************** + +Copyright (C) 2013, 2014 SkySQL Ab. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +#ifndef fil0pagecompress_h +#define fil0pagecompress_h + +#include "fsp0fsp.h" +#include "fsp0pagecompress.h" + +/******************************************************************//** +@file include/fil0pagecompress.h +Helper functions for extracting/storing page compression and +atomic writes information to table space. + +Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com +***********************************************************************/ + +/*******************************************************************//** +Returns the page compression level flag of the space, or 0 if the space +is not compressed. The tablespace must be cached in the memory cache. +@return page compression level if page compressed, ULINT_UNDEFINED if space not found */ +ulint +fil_space_get_page_compression_level( +/*=================================*/ + ulint id); /*!< in: space id */ +/*******************************************************************//** +Returns the page compression flag of the space, or false if the space +is not compressed. The tablespace must be cached in the memory cache. +@return true if page compressed, false if not or space not found */ +ibool +fil_space_is_page_compressed( +/*=========================*/ + ulint id); /*!< in: space id */ +/*******************************************************************//** +Returns the page compression flag of the space, or false if the space +is not compressed. The tablespace must be cached in the memory cache. +@return true if page compressed, false if not or space not found */ +ibool +fil_space_get_page_compressed( +/*=========================*/ + fil_space_t* space); /*!< in: space id */ +/*******************************************************************//** +Returns the atomic writes flag of the space, or false if the space +is not using atomic writes. The tablespace must be cached in the memory cache. +@return atomic write table option value */ +atomic_writes_t +fil_space_get_atomic_writes( +/*=========================*/ + ulint id); /*!< in: space id */ +/*******************************************************************//** +Find out wheather the page is index page or not +@return true if page type index page, false if not */ +ibool +fil_page_is_index_page( +/*===================*/ + byte *buf); /*!< in: page */ + +/****************************************************************//** +Get the name of the compression algorithm used for page +compression. +@return compression algorithm name or "UNKNOWN" if not known*/ +const char* +fil_get_compression_alg_name( +/*=========================*/ + ulint comp_alg); /*!<in: compression algorithm number */ + +/****************************************************************//** +For page compressed pages compress the page before actual write +operation. +@return compressed page to be written*/ +byte* +fil_compress_page( +/*==============*/ + ulint space_id, /*!< in: tablespace id of the + table. */ + byte* buf, /*!< in: buffer from which to write; in aio + this must be appropriately aligned */ + byte* out_buf, /*!< out: compressed buffer */ + ulint len, /*!< in: length of input buffer.*/ + ulint compression_level, /*!< in: compression level */ + ulint block_size, /*!< in: block size */ + ulint* out_len, /*!< out: actual length of compressed + page */ + byte* lzo_mem); /*!< in: temporal memory used by LZO */ + +/****************************************************************//** +For page compressed pages decompress the page after actual read +operation. +@return uncompressed page */ +void +fil_decompress_page( +/*================*/ + byte* page_buf, /*!< in: preallocated buffer or NULL */ + byte* buf, /*!< out: buffer from which to read; in aio + this must be appropriately aligned */ + ulong len, /*!< in: length of output buffer.*/ + ulint* write_size); /*!< in/out: Actual payload size of + the compressed data. */ + +/****************************************************************//** +Get space id from fil node +@return space id*/ +ulint +fil_node_get_space_id( +/*==================*/ + fil_node_t* node); /*!< in: Node where to get space id*/ + +/****************************************************************//** +Get block size from fil node +@return block size*/ +ulint +fil_node_get_block_size( + fil_node_t* node); /*!< in: Node where to get block + size */ +/*******************************************************************//** +Find out wheather the page is page compressed +@return true if page is page compressed*/ +ibool +fil_page_is_compressed( +/*===================*/ + byte *buf); /*!< in: page */ + +/*******************************************************************//** +Find out wheather the page is page compressed with lzo method +@return true if page is page compressed with lzo method*/ +ibool +fil_page_is_lzo_compressed( +/*=======================*/ + byte *buf); /*!< in: page */ +#endif diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h index a587ccc9f20..6fe44a0ef16 100644 --- a/storage/xtradb/include/fsp0fsp.h +++ b/storage/xtradb/include/fsp0fsp.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -53,12 +54,21 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */ /** Width of the DATA_DIR flag. This flag indicates that the tablespace is found in a remote location, not the default data directory. */ #define FSP_FLAGS_WIDTH_DATA_DIR 1 +/** Number of flag bits used to indicate the page compression and compression level */ +#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION 1 +#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4 +/** Number of flag bits used to indicate atomic writes for this tablespace */ +#define FSP_FLAGS_WIDTH_ATOMIC_WRITES 2 + /** Width of all the currently known tablespace flags */ #define FSP_FLAGS_WIDTH (FSP_FLAGS_WIDTH_POST_ANTELOPE \ + FSP_FLAGS_WIDTH_ZIP_SSIZE \ + FSP_FLAGS_WIDTH_ATOMIC_BLOBS \ + FSP_FLAGS_WIDTH_PAGE_SSIZE \ - + FSP_FLAGS_WIDTH_DATA_DIR) + + FSP_FLAGS_WIDTH_DATA_DIR \ + + FSP_FLAGS_WIDTH_PAGE_COMPRESSION \ + + FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \ + + FSP_FLAGS_WIDTH_ATOMIC_WRITES) /** A mask of all the known/used bits in tablespace flags */ #define FSP_FLAGS_MASK (~(~0 << FSP_FLAGS_WIDTH)) @@ -71,10 +81,21 @@ is found in a remote location, not the default data directory. */ /** Zero relative shift position of the ATOMIC_BLOBS field */ #define FSP_FLAGS_POS_ATOMIC_BLOBS (FSP_FLAGS_POS_ZIP_SSIZE \ + FSP_FLAGS_WIDTH_ZIP_SSIZE) -/** Zero relative shift position of the PAGE_SSIZE field */ -#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_BLOBS \ +/** Note that these need to be before the page size to be compatible with +dictionary */ +/** Zero relative shift position of the PAGE_COMPRESSION field */ +#define FSP_FLAGS_POS_PAGE_COMPRESSION (FSP_FLAGS_POS_ATOMIC_BLOBS \ + FSP_FLAGS_WIDTH_ATOMIC_BLOBS) -/** Zero relative shift position of the start of the UNUSED bits */ +/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */ +#define FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL (FSP_FLAGS_POS_PAGE_COMPRESSION \ + + FSP_FLAGS_WIDTH_PAGE_COMPRESSION) +/** Zero relative shift position of the ATOMIC_WRITES field */ +#define FSP_FLAGS_POS_ATOMIC_WRITES (FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL \ + + FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL) + /** Zero relative shift position of the PAGE_SSIZE field */ +#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_WRITES \ + + FSP_FLAGS_WIDTH_ATOMIC_WRITES) +/** Zero relative shift position of the start of the DATA DIR bits */ #define FSP_FLAGS_POS_DATA_DIR (FSP_FLAGS_POS_PAGE_SSIZE \ + FSP_FLAGS_WIDTH_PAGE_SSIZE) /** Zero relative shift position of the start of the UNUSED bits */ @@ -101,6 +122,19 @@ is found in a remote location, not the default data directory. */ #define FSP_FLAGS_MASK_DATA_DIR \ ((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR)) \ << FSP_FLAGS_POS_DATA_DIR) +/** Bit mask of the PAGE_COMPRESSION field */ +#define FSP_FLAGS_MASK_PAGE_COMPRESSION \ + ((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION)) \ + << FSP_FLAGS_POS_PAGE_COMPRESSION) +/** Bit mask of the PAGE_COMPRESSION_LEVEL field */ +#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL \ + ((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)) \ + << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL) +/** Bit mask of the ATOMIC_WRITES field */ +#define FSP_FLAGS_MASK_ATOMIC_WRITES \ + ((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_WRITES)) \ + << FSP_FLAGS_POS_ATOMIC_WRITES) + /** Return the value of the POST_ANTELOPE field */ #define FSP_FLAGS_GET_POST_ANTELOPE(flags) \ @@ -125,12 +159,38 @@ is found in a remote location, not the default data directory. */ /** Return the contents of the UNUSED bits */ #define FSP_FLAGS_GET_UNUSED(flags) \ (flags >> FSP_FLAGS_POS_UNUSED) +/** Return the value of the PAGE_COMPRESSION field */ +#define FSP_FLAGS_GET_PAGE_COMPRESSION(flags) \ + ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION) \ + >> FSP_FLAGS_POS_PAGE_COMPRESSION) +/** Return the value of the PAGE_COMPRESSION_LEVEL field */ +#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags) \ + ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL) \ + >> FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL) +/** Return the value of the ATOMIC_WRITES field */ +#define FSP_FLAGS_GET_ATOMIC_WRITES(flags) \ + ((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) \ + >> FSP_FLAGS_POS_ATOMIC_WRITES) /** Set a PAGE_SSIZE into the correct bits in a given tablespace flags. */ #define FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize) \ (flags | (ssize << FSP_FLAGS_POS_PAGE_SSIZE)) +/** Set a PAGE_COMPRESSION into the correct bits in a given +tablespace flags. */ +#define FSP_FLAGS_SET_PAGE_COMPRESSION(flags, compression) \ + (flags | (compression << FSP_FLAGS_POS_PAGE_COMPRESSION)) + +/** Set a PAGE_COMPRESSION_LEVEL into the correct bits in a given +tablespace flags. */ +#define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level) \ + (flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)) +/** Set a ATOMIC_WRITES into the correct bits in a given +tablespace flags. */ +#define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics) \ + (flags | (atomics << FSP_FLAGS_POS_ATOMIC_WRITES)) + /* @} */ /* @defgroup Tablespace Header Constants (moved from fsp0fsp.c) @{ */ diff --git a/storage/xtradb/include/fsp0fsp.ic b/storage/xtradb/include/fsp0fsp.ic index 0d81e817cc9..ddcb87b0e57 100644 --- a/storage/xtradb/include/fsp0fsp.ic +++ b/storage/xtradb/include/fsp0fsp.ic @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -63,12 +64,17 @@ fsp_flags_is_valid( ulint atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags); ulint page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags); ulint unused = FSP_FLAGS_GET_UNUSED(flags); + ulint page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags); + ulint page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags); + ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags); DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false);); /* fsp_flags is zero unless atomic_blobs is set. */ /* Make sure there are no bits that we do not know about. */ if (unused != 0 || flags == 1) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted unused %lu\n", + flags, unused); return(false); } else if (post_antelope) { /* The Antelope row formats REDUNDANT and COMPACT did @@ -76,6 +82,8 @@ fsp_flags_is_valid( 4-byte field is zero for Antelope row formats. */ if (!atomic_blobs) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_blobs %lu\n", + flags, atomic_blobs); return(false); } } @@ -87,10 +95,14 @@ fsp_flags_is_valid( externally stored parts. */ if (post_antelope || zip_ssize != 0) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu atomic_blobs %lu\n", + flags, zip_ssize, atomic_blobs); return(false); } } else if (!post_antelope || zip_ssize > PAGE_ZIP_SSIZE_MAX) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu max %d\n", + flags, zip_ssize, PAGE_ZIP_SSIZE_MAX); return(false); } else if (page_ssize > UNIV_PAGE_SSIZE_MAX) { @@ -98,9 +110,13 @@ fsp_flags_is_valid( be zero for an original 16k page size. Validate the page shift size is within allowed range. */ + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu\n", + flags, page_ssize, UNIV_PAGE_SSIZE_MAX); return(false); } else if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_ORIG && !page_ssize) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu:%d\n", + flags, page_ssize, UNIV_PAGE_SIZE, UNIV_PAGE_SIZE_ORIG); return(false); } @@ -108,6 +124,23 @@ fsp_flags_is_valid( # error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations." #endif + /* Page compression level requires page compression and atomic blobs + to be set */ + if (page_compression_level || page_compression) { + if (!page_compression || !atomic_blobs) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_compression %lu\n" + "InnoDB: Error: page_compression_level %lu atomic_blobs %lu\n", + flags, page_compression, page_compression_level, atomic_blobs); + return(false); + } + } + + if (atomic_writes > ATOMIC_WRITES_OFF) { + fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_writes %lu\n", + flags, atomic_writes); + return (false); + } + /* The DATA_DIR field can be used for any row type so there is nothing here to validate. */ diff --git a/storage/xtradb/include/fsp0pagecompress.h b/storage/xtradb/include/fsp0pagecompress.h new file mode 100644 index 00000000000..5f943ee2b83 --- /dev/null +++ b/storage/xtradb/include/fsp0pagecompress.h @@ -0,0 +1,84 @@ +/***************************************************************************** + +Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fsp0pagecompress.h +Helper functions for extracting/storing page compression and +atomic writes information to file space. + +Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com +***********************************************************************/ + +#ifndef fsp0pagecompress_h +#define fsp0pagecompress_h + +/* Supported page compression methods */ + +#define PAGE_UNCOMPRESSED 0 +#define PAGE_ZLIB_ALGORITHM 1 +#define PAGE_LZ4_ALGORITHM 2 +#define PAGE_LZO_ALGORITHM 3 +#define PAGE_LZMA_ALGORITHM 4 +#define PAGE_BZIP2_ALGORITHM 5 +#define PAGE_SNAPPY_ALGORITHM 6 +#define PAGE_ALGORITHM_LAST PAGE_SNAPPY_ALGORITHM + +/**********************************************************************//** +Reads the page compression level from the first page of a tablespace. +@return page compression level, or 0 if uncompressed */ +UNIV_INTERN +ulint +fsp_header_get_compression_level( +/*=============================*/ + const page_t* page); /*!< in: first page of a tablespace */ + +/********************************************************************//** +Determine if the tablespace is page compressed from dict_table_t::flags. +@return TRUE if page compressed, FALSE if not compressed */ +UNIV_INLINE +ibool +fsp_flags_is_page_compressed( +/*=========================*/ + ulint flags); /*!< in: tablespace flags */ + +/********************************************************************//** +Extract the page compression level from tablespace flags. +A tablespace has only one physical page compression level +whether that page is compressed or not. +@return page compression level of the file-per-table tablespace, +or zero if the table is not compressed. */ +UNIV_INLINE +ulint +fsp_flags_get_page_compression_level( +/*=================================*/ + ulint flags); /*!< in: tablespace flags */ + +/********************************************************************//** +Determine the tablespace is using atomic writes from dict_table_t::flags. +@return true if atomic writes is used, false if not */ +UNIV_INLINE +atomic_writes_t +fsp_flags_get_atomic_writes( +/*========================*/ + ulint flags); /*!< in: tablespace flags */ + +#ifndef UNIV_NONINL +#include "fsp0pagecompress.ic" +#endif + +#endif diff --git a/storage/xtradb/include/fsp0pagecompress.ic b/storage/xtradb/include/fsp0pagecompress.ic new file mode 100644 index 00000000000..4dde042e19e --- /dev/null +++ b/storage/xtradb/include/fsp0pagecompress.ic @@ -0,0 +1,197 @@ +/***************************************************************************** + +Copyright (C) 2013, 2014, SkySQL Ab. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fsp0pagecompress.ic +Implementation for helper functions for extracting/storing page +compression and atomic writes information to file space. + +Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com +***********************************************************************/ + +/********************************************************************//** +Determine if the tablespace is page compressed from dict_table_t::flags. +@return TRUE if page compressed, FALSE if not page compressed */ +UNIV_INLINE +ibool +fsp_flags_is_page_compressed( +/*=========================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return(FSP_FLAGS_GET_PAGE_COMPRESSION(flags)); +} + +/********************************************************************//** +Determine the tablespace is page compression level from dict_table_t::flags. +@return page compression level or 0 if not compressed*/ +UNIV_INLINE +ulint +fsp_flags_get_page_compression_level( +/*=================================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return(FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags)); +} + +/********************************************************************//** +Determine the tablespace is using atomic writes from dict_table_t::flags. +@return true if atomic writes is used, false if not */ +UNIV_INLINE +atomic_writes_t +fsp_flags_get_atomic_writes( +/*========================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return((atomic_writes_t)FSP_FLAGS_GET_ATOMIC_WRITES(flags)); +} + +/*******************************************************************//** +Find out wheather the page is index page or not +@return true if page type index page, false if not */ +UNIV_INLINE +ibool +fil_page_is_index_page( +/*===================*/ + byte *buf) /*!< in: page */ +{ + return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX); +} + +/*******************************************************************//** +Find out wheather the page is page compressed +@return true if page is page compressed, false if not */ +UNIV_INLINE +ibool +fil_page_is_compressed( +/*===================*/ + byte *buf) /*!< in: page */ +{ + return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED); +} + +/*******************************************************************//** +Returns the page compression level of the space, or 0 if the space +is not compressed. The tablespace must be cached in the memory cache. +@return page compression level, ULINT_UNDEFINED if space not found */ +UNIV_INLINE +ulint +fil_space_get_page_compression_level( +/*=================================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return(fsp_flags_get_page_compression_level(flags)); + } + + return(flags); +} + +/*******************************************************************//** +Extract the page compression from space. +@return true if space is page compressed, false if space is not found +or space is not page compressed. */ +UNIV_INLINE +ibool +fil_space_is_page_compressed( +/*=========================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return(fsp_flags_is_page_compressed(flags)); + } + + return(flags); +} + +/****************************************************************//** +Get the name of the compression algorithm used for page +compression. +@return compression algorithm name or "UNKNOWN" if not known*/ +UNIV_INLINE +const char* +fil_get_compression_alg_name( +/*=========================*/ + ulint comp_alg) /*!<in: compression algorithm number */ +{ + switch(comp_alg) { + case PAGE_UNCOMPRESSED: + return ("uncompressed"); + break; + case PAGE_ZLIB_ALGORITHM: + return ("ZLIB"); + break; + case PAGE_LZ4_ALGORITHM: + return ("LZ4"); + break; + case PAGE_LZO_ALGORITHM: + return ("LZO"); + break; + case PAGE_LZMA_ALGORITHM: + return ("LZMA"); + break; + default: + return("UNKNOWN"); + ut_error; + break; + } +} + +/*******************************************************************//** +Returns the atomic writes flag of the space, or false if the space +is not using atomic writes. The tablespace must be cached in the memory cache. +@return atomic writes table option value */ +UNIV_INLINE +atomic_writes_t +fil_space_get_atomic_writes( +/*========================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return((atomic_writes_t)fsp_flags_get_atomic_writes(flags)); + } + + return((atomic_writes_t)0); +} + +/*******************************************************************//** +Find out wheather the page is page compressed with lzo method +@return true if page is page compressed with lzo method, false if not */ +UNIV_INLINE +ibool +fil_page_is_lzo_compressed( +/*=======================*/ + byte *buf) /*!< in: page */ +{ + return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED && + mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN) == PAGE_LZO_ALGORITHM); +} diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 7b880b891bd..78af907c006 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -2,6 +2,7 @@ Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -156,10 +157,9 @@ enum os_file_create_t { #define OS_FILE_INSUFFICIENT_RESOURCE 78 #define OS_FILE_AIO_INTERRUPTED 79 #define OS_FILE_OPERATION_ABORTED 80 - #define OS_FILE_ACCESS_VIOLATION 81 - -#define OS_FILE_ERROR_MAX 100 +#define OS_FILE_OPERATION_NOT_SUPPORTED 125 +#define OS_FILE_ERROR_MAX 200 /* @} */ /** Types for aio operations @{ */ @@ -305,43 +305,45 @@ os_file_write The wrapper functions have the prefix of "innodb_". */ #ifdef UNIV_PFS_IO -# define os_file_create(key, name, create, purpose, type, success) \ +# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \ pfs_os_file_create_func(key, name, create, purpose, type, \ - success, __FILE__, __LINE__) + success, atomic_writes, __FILE__, __LINE__) # define os_file_create_simple(key, name, create, access, success) \ pfs_os_file_create_simple_func(key, name, create, access, \ success, __FILE__, __LINE__) # define os_file_create_simple_no_error_handling( \ - key, name, create_mode, access, success) \ + key, name, create_mode, access, success, atomic_writes) \ pfs_os_file_create_simple_no_error_handling_func( \ - key, name, create_mode, access, success, __FILE__, __LINE__) + key, name, create_mode, access, success, atomic_writes, __FILE__, __LINE__) # define os_file_close(file) \ pfs_os_file_close_func(file, __FILE__, __LINE__) # define os_aio(type, mode, name, file, buf, offset, \ - n, message1, message2, space_id, trx) \ + n, message1, message2, space_id, \ + trx, page_compressed, page_compression_level, write_size) \ pfs_os_aio_func(type, mode, name, file, buf, offset, \ n, message1, message2, space_id, trx, \ + page_compressed, page_compression_level, write_size, \ __FILE__, __LINE__) -# define os_file_read(file, buf, offset, n) \ - pfs_os_file_read_func(file, buf, offset, n, NULL, \ +# define os_file_read(file, buf, offset, n, compressed) \ + pfs_os_file_read_func(file, buf, offset, n, NULL, compressed, \ __FILE__, __LINE__) -# define os_file_read_trx(file, buf, offset, n, trx) \ - pfs_os_file_read_func(file, buf, offset, n, trx, \ +# define os_file_read_trx(file, buf, offset, n, trx, compressed) \ + pfs_os_file_read_func(file, buf, offset, n, trx, compressed, \ __FILE__, __LINE__) -# define os_file_read_no_error_handling(file, buf, offset, n) \ - pfs_os_file_read_no_error_handling_func(file, buf, offset, n, \ +# define os_file_read_no_error_handling(file, buf, offset, n, compressed) \ + pfs_os_file_read_no_error_handling_func(file, buf, offset, n, compressed, \ __FILE__, __LINE__) -# define os_file_write(name, file, buf, offset, n) \ - pfs_os_file_write_func(name, file, buf, offset, \ - n, __FILE__, __LINE__) +# define os_file_write(name, file, buf, offset, n) \ + pfs_os_file_write_func(name, file, buf, offset, n, \ + __FILE__, __LINE__) # define os_file_flush(file) \ pfs_os_file_flush_func(file, __FILE__, __LINE__) @@ -358,32 +360,34 @@ The wrapper functions have the prefix of "innodb_". */ /* If UNIV_PFS_IO is not defined, these I/O APIs point to original un-instrumented file I/O APIs */ -# define os_file_create(key, name, create, purpose, type, success) \ - os_file_create_func(name, create, purpose, type, success) +# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \ + os_file_create_func(name, create, purpose, type, success, atomic_writes) # define os_file_create_simple(key, name, create_mode, access, success) \ os_file_create_simple_func(name, create_mode, access, success) # define os_file_create_simple_no_error_handling( \ - key, name, create_mode, access, success) \ + key, name, create_mode, access, success, atomic_writes) \ os_file_create_simple_no_error_handling_func( \ - name, create_mode, access, success) + name, create_mode, access, success, atomic_writes) # define os_file_close(file) os_file_close_func(file) # define os_aio(type, mode, name, file, buf, offset, n, message1, \ - message2, space_id, trx) \ + message2, space_id, trx, \ + page_compressed, page_compression_level, write_size) \ os_aio_func(type, mode, name, file, buf, offset, n, \ - message1, message2, space_id, trx) + message1, message2, space_id, trx, \ + page_compressed, page_compression_level, write_size) -# define os_file_read(file, buf, offset, n) \ - os_file_read_func(file, buf, offset, n, NULL) +# define os_file_read(file, buf, offset, n, compressed) \ + os_file_read_func(file, buf, offset, n, NULL, compressed) -# define os_file_read_trx(file, buf, offset, n, trx) \ - os_file_read_func(file, buf, offset, n, trx) +# define os_file_read_trx(file, buf, offset, n, trx, compressed) \ + os_file_read_func(file, buf, offset, n, trx, compressed) -# define os_file_read_no_error_handling(file, buf, offset, n) \ - os_file_read_no_error_handling_func(file, buf, offset, n) +# define os_file_read_no_error_handling(file, buf, offset, n, compressed) \ + os_file_read_no_error_handling_func(file, buf, offset, n, compressed) # define os_file_write(name, file, buf, offset, n) \ os_file_write_func(name, file, buf, offset, n) @@ -526,7 +530,9 @@ os_file_create_simple_func( ulint create_mode,/*!< in: create mode */ ulint access_type,/*!< in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ - ibool* success);/*!< out: TRUE if succeed, FALSE if error */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes);/*!< in: atomic writes table option + value */ /****************************************************************//** NOTE! Use the corresponding macro os_file_create_simple_no_error_handling(), not directly this function! @@ -544,7 +550,9 @@ os_file_create_simple_no_error_handling_func( OS_FILE_READ_WRITE, or OS_FILE_READ_ALLOW_DELETE; the last option is used by a backup program reading the file */ - ibool* success)/*!< out: TRUE if succeed, FALSE if error */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes)/*!< in: atomic writes table option + value */ __attribute__((nonnull, warn_unused_result)); /****************************************************************//** Tries to disable OS caching on an opened file descriptor. */ @@ -578,7 +586,9 @@ os_file_create_func( async i/o or unbuffered i/o: look in the function source code for the exact rules */ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ - ibool* success)/*!< out: TRUE if succeed, FALSE if error */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes) /*!< in: atomic writes table option + value */ __attribute__((nonnull, warn_unused_result)); /***********************************************************************//** Deletes a file. The file has to be closed before calling this. @@ -643,6 +653,8 @@ pfs_os_file_create_simple_func( ulint access_type,/*!< in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes,/*!< in: atomic writes table option + value */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ __attribute__((nonnull, warn_unused_result)); @@ -668,6 +680,8 @@ pfs_os_file_create_simple_no_error_handling_func( OS_FILE_READ_ALLOW_DELETE; the last option is used by a backup program reading the file */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes,/*!< in: atomic writes table option + value*/ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ __attribute__((nonnull, warn_unused_result)); @@ -696,6 +710,8 @@ pfs_os_file_create_func( function source code for the exact rules */ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes,/*!< in: atomic writes table option + value */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ __attribute__((nonnull, warn_unused_result)); @@ -726,7 +742,9 @@ pfs_os_file_read_func( void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ - trx_t* trx, + trx_t* trx, /*!< in: trx */ + ibool compressed, /*!< in: is this file space + compressed ? */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ @@ -745,6 +763,8 @@ pfs_os_file_read_no_error_handling_func( void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ + ibool compressed, /*!< in: is this file space + compressed ? */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ @@ -777,6 +797,15 @@ pfs_os_aio_func( OS_AIO_SYNC */ ulint space_id, trx_t* trx, + ibool page_compression, /*!< in: is page compression used + on this file space */ + ulint page_compression_level, /*!< page compression + level to be used */ + ulint* write_size,/*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ /*******************************************************************//** @@ -940,7 +969,9 @@ os_file_read_func( void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ - trx_t* trx); + trx_t* trx, /*!< in: trx */ + ibool compressed); /*!< in: is this file space + compressed ? */ /*******************************************************************//** Rewind file to its start, read at most size - 1 bytes from it to str, and NUL-terminate str. All errors are silently ignored. This function is @@ -965,7 +996,9 @@ os_file_read_no_error_handling_func( os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ - ulint n); /*!< in: number of bytes to read */ + ulint n, /*!< in: number of bytes to read */ + ibool compressed); /*!< in: is this file space + compressed ? */ /*******************************************************************//** NOTE! Use the corresponding macro os_file_write(), not directly this @@ -982,6 +1015,7 @@ os_file_write_func( const void* buf, /*!< in: buffer from which to write */ os_offset_t offset, /*!< in: file offset where to write */ ulint n); /*!< in: number of bytes to write */ + /*******************************************************************//** Check the existence and type of the given file. @return TRUE if call succeeded */ @@ -1149,7 +1183,17 @@ os_aio_func( aio operation); ignored if mode is OS_AIO_SYNC */ ulint space_id, - trx_t* trx); + trx_t* trx, + ibool page_compression, /*!< in: is page compression used + on this file space */ + ulint page_compression_level, /*!< page compression + level to be used */ + ulint* write_size);/*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ + /************************************************************************//** Wakes up all async i/o threads so that they know to exit themselves in shutdown. */ @@ -1211,6 +1255,7 @@ os_aio_windows_handle( void** message2, ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ ulint* space_id); + #endif /**********************************************************************//** @@ -1330,6 +1375,16 @@ os_file_handle_error_no_exit( any message to the log. */ +/***********************************************************************//** +Try to get number of bytes per sector from file system. +@return file block size */ +UNIV_INTERN +ulint +os_file_get_block_size( +/*===================*/ + os_file_t file, /*!< in: handle to a file */ + const char* name); /*!< in: file name */ + #ifndef UNIV_NONINL #include "os0file.ic" #endif diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic index 25a1397147e..61300387e1b 100644 --- a/storage/xtradb/include/os0file.ic +++ b/storage/xtradb/include/os0file.ic @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -44,6 +45,8 @@ pfs_os_file_create_simple_func( ulint access_type,/*!< in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes,/*!< in: atomic writes table option + value */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -59,7 +62,7 @@ pfs_os_file_create_simple_func( name, src_file, src_line); file = os_file_create_simple_func(name, create_mode, - access_type, success); + access_type, success, atomic_writes); /* Regsiter the returning "file" value with the system */ register_pfs_file_open_end(locker, file); @@ -88,6 +91,8 @@ pfs_os_file_create_simple_no_error_handling_func( OS_FILE_READ_ALLOW_DELETE; the last option is used by a backup program reading the file */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes,/*!< in: atomic writes table option + value */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -103,7 +108,7 @@ pfs_os_file_create_simple_no_error_handling_func( name, src_file, src_line); file = os_file_create_simple_no_error_handling_func( - name, create_mode, access_type, success); + name, create_mode, access_type, success, atomic_writes); register_pfs_file_open_end(locker, file); @@ -134,6 +139,8 @@ pfs_os_file_create_func( function source code for the exact rules */ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes, /*!< in: atomic writes table option + value */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -148,7 +155,8 @@ pfs_os_file_create_func( : PSI_FILE_OPEN), name, src_file, src_line); - file = os_file_create_func(name, create_mode, purpose, type, success); + file = os_file_create_func(name, create_mode, purpose, type, + success, atomic_writes); register_pfs_file_open_end(locker, file); @@ -212,6 +220,15 @@ pfs_os_aio_func( OS_AIO_SYNC */ ulint space_id, trx_t* trx, + ibool page_compression, /*!< in: is page compression used + on this file space */ + ulint page_compression_level, /*!< page compression + level to be used */ + ulint* write_size,/*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -227,7 +244,8 @@ pfs_os_aio_func( src_file, src_line); result = os_aio_func(type, mode, name, file, buf, offset, - n, message1, message2, space_id, trx); + n, message1, message2, space_id, trx, + page_compression, page_compression_level, write_size); register_pfs_file_io_end(locker, n); @@ -249,6 +267,8 @@ pfs_os_file_read_func( os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ trx_t* trx, + ibool compressed, /*!< in: is this file space + compressed ? */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -259,7 +279,7 @@ pfs_os_file_read_func( register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ, src_file, src_line); - result = os_file_read_func(file, buf, offset, n, trx); + result = os_file_read_func(file, buf, offset, n, trx, compressed); register_pfs_file_io_end(locker, n); @@ -282,6 +302,8 @@ pfs_os_file_read_no_error_handling_func( void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ + ibool compressed, /*!< in: is this file space + compressed ? */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -292,7 +314,7 @@ pfs_os_file_read_no_error_handling_func( register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ, src_file, src_line); - result = os_file_read_no_error_handling_func(file, buf, offset, n); + result = os_file_read_no_error_handling_func(file, buf, offset, n, compressed); register_pfs_file_io_end(locker, n); diff --git a/storage/xtradb/include/srv0mon.h b/storage/xtradb/include/srv0mon.h index 2d90f47eefe..3d9e16b19f9 100644 --- a/storage/xtradb/include/srv0mon.h +++ b/storage/xtradb/include/srv0mon.h @@ -2,6 +2,7 @@ Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -163,6 +164,8 @@ enum monitor_id_t { MONITOR_OVLD_BUF_POOL_PAGES_FREE, MONITOR_OVLD_PAGE_CREATED, MONITOR_OVLD_PAGES_WRITTEN, + MONITOR_OVLD_INDEX_PAGES_WRITTEN, + MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN, MONITOR_OVLD_PAGES_READ, MONITOR_OVLD_BYTE_READ, MONITOR_OVLD_BYTE_WRITTEN, @@ -305,6 +308,21 @@ enum monitor_id_t { MONITOR_PAD_INCREMENTS, MONITOR_PAD_DECREMENTS, + /* New monitor variables for page compression */ + MONITOR_OVLD_PAGE_COMPRESS_SAVED, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384, + MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768, + MONITOR_OVLD_PAGES_PAGE_COMPRESSED, + MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP, + MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED, + MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED, + MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR, + /* Index related counters */ MONITOR_MODULE_INDEX, MONITOR_INDEX_SPLIT, diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index cabb047112c..ea7508f0c38 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -103,6 +103,37 @@ struct srv_stats_t { a disk page */ ulint_ctr_1_t buf_pool_reads; + /** Number of bytes saved by page compression */ + ulint_ctr_64_t page_compression_saved; + /** Number of 512Byte TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect512; + /** Number of 1K TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect1024; + /** Number of 2K TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect2048; + /** Number of 4K TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect4096; + /** Number of 8K TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect8192; + /** Number of 16K TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect16384; + /** Number of 32K TRIM by page compression */ + ulint_ctr_64_t page_compression_trim_sect32768; + /* Number of index pages written */ + ulint_ctr_64_t index_pages_written; + /* Number of non index pages written */ + ulint_ctr_64_t non_index_pages_written; + /* Number of pages compressed with page compression */ + ulint_ctr_64_t pages_page_compressed; + /* Number of TRIM operations induced by page compression */ + ulint_ctr_64_t page_compressed_trim_op; + /* Number of TRIM operations saved by using actual write size knowledge */ + ulint_ctr_64_t page_compressed_trim_op_saved; + /* Number of pages decompressed with page compression */ + ulint_ctr_64_t pages_page_decompressed; + /* Number of page compression errors */ + ulint_ctr_64_t pages_page_compression_error; + /** Number of data read in total (in bytes) */ ulint_ctr_1_t data_read; @@ -257,6 +288,28 @@ extern ibool srv_use_native_conditions; #endif /* __WIN__ */ #endif /* !UNIV_HOTBACKUP */ +/* Use trim operation */ +extern my_bool srv_use_trim; + +/* Use posix fallocate */ +extern my_bool srv_use_posix_fallocate; + +/* Use atomic writes i.e disable doublewrite buffer */ +extern my_bool srv_use_atomic_writes; + +/* Compression algorithm*/ +extern ulong innodb_compression_algorithm; + +/* Number of flush threads */ +#define MTFLUSH_MAX_WORKER 64 +#define MTFLUSH_DEFAULT_WORKER 8 + +/* Number of threads used for multi-threaded flush */ +extern long srv_mtflush_threads; + +/* If this flag is TRUE, then we will use multi threaded flush. */ +extern my_bool srv_use_mtflush; + /** Server undo tablespaces directory, can be absolute path. */ extern char* srv_undo_dir; @@ -432,10 +485,6 @@ extern my_bool srv_stats_sample_traditional; extern ibool srv_use_doublewrite_buf; extern ulong srv_doublewrite_batch_size; -extern ibool srv_use_atomic_writes; -#ifdef HAVE_POSIX_FALLOCATE -extern ibool srv_use_posix_fallocate; -#endif extern ulong srv_checksum_algorithm; extern ulong srv_log_arch_expire_sec; @@ -1089,6 +1138,39 @@ struct export_var_t{ ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id - purged view's min trx_id */ #endif /* UNIV_DEBUG */ + + + ib_int64_t innodb_page_compression_saved;/*!< Number of bytes saved + by page compression */ + ib_int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM + by page compression */ + ib_int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM + by page compression */ + ib_int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM + by page compression */ + ib_int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM + by page compression */ + ib_int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM + by page compression */ + ib_int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM + by page compression */ + ib_int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM + by page compression */ + ib_int64_t innodb_index_pages_written; /*!< Number of index pages + written */ + ib_int64_t innodb_non_index_pages_written; /*!< Number of non index pages + written */ + ib_int64_t innodb_pages_page_compressed;/*!< Number of pages + compressed by page compression */ + ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations + induced by page compression */ + ib_int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations + saved by page compression */ + ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages + decompressed by page + compression */ + ib_int64_t innodb_pages_page_compression_error;/*!< Number of page + compression errors */ }; /** Thread slot in the thread table. */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 7d97deb71ef..6b0c33df44c 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -2,6 +2,7 @@ Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. +Copyright (c) 2013, 2015, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -350,6 +351,36 @@ typedef enum innodb_file_formats_enum innodb_file_formats_t; /** The 2-logarithm of UNIV_PAGE_SIZE: */ #define UNIV_PAGE_SIZE_SHIFT srv_page_size_shift +#ifdef HAVE_LZO +#define IF_LZO(A,B) A +#else +#define IF_LZO(A,B) B +#endif + +#ifdef HAVE_LZ4 +#define IF_LZ4(A,B) A +#else +#define IF_LZ4(A,B) B +#endif + +#ifdef HAVE_LZMA +#define IF_LZMA(A,B) A +#else +#define IF_LZMA(A,B) B +#endif + +#ifdef HAVE_BZIP2 +#define IF_BZIP2(A,B) A +#else +#define IF_BZIP2(A,B) B +#endif + +#ifdef HAVE_SNAPPY +#define IF_SNAPPY(A,B) A +#else +#define IF_SNAPPY(A,B) B +#endif + /** The universal page size of the database */ #define UNIV_PAGE_SIZE ((ulint) srv_page_size) diff --git a/storage/xtradb/include/ut0list.h b/storage/xtradb/include/ut0list.h index 29fc8669ce4..796a272db59 100644 --- a/storage/xtradb/include/ut0list.h +++ b/storage/xtradb/include/ut0list.h @@ -150,6 +150,15 @@ ib_list_is_empty( /* out: TRUE if empty else */ const ib_list_t* list); /* in: list */ +/******************************************************************** +Get number of items on list. +@return number of items on list */ +UNIV_INLINE +ulint +ib_list_len( +/*========*/ + const ib_list_t* list); /*<! in: list */ + /* List. */ struct ib_list_t { ib_list_node_t* first; /*!< first node */ diff --git a/storage/xtradb/include/ut0list.ic b/storage/xtradb/include/ut0list.ic index d9dcb2eac99..7a7f53adb2f 100644 --- a/storage/xtradb/include/ut0list.ic +++ b/storage/xtradb/include/ut0list.ic @@ -58,3 +58,23 @@ ib_list_is_empty( { return(!(list->first || list->last)); } + +/******************************************************************** +Get number of items on list. +@return number of items on list */ +UNIV_INLINE +ulint +ib_list_len( +/*========*/ + const ib_list_t* list) /*<! in: list */ +{ + ulint len = 0; + ib_list_node_t* node = list->first; + + while(node) { + len++; + node = node->next; + } + + return (len); +} diff --git a/storage/xtradb/include/ut0wqueue.h b/storage/xtradb/include/ut0wqueue.h index 33385ddf2d4..e6b9891aed1 100644 --- a/storage/xtradb/include/ut0wqueue.h +++ b/storage/xtradb/include/ut0wqueue.h @@ -95,6 +95,23 @@ ib_wqueue_timedwait( ib_wqueue_t* wq, /* in: work queue */ ib_time_t wait_in_usecs); /* in: wait time in micro seconds */ +/******************************************************************** +Return first item on work queue or NULL if queue is empty +@return work item or NULL */ +void* +ib_wqueue_nowait( +/*=============*/ + ib_wqueue_t* wq); /*<! in: work queue */ + + +/******************************************************************** +Get number of items on queue. +@return number of items on queue */ +ulint +ib_wqueue_len( +/*==========*/ + ib_wqueue_t* wq); /*<! in: work queue */ + /* Work queue. */ struct ib_wqueue_t { ib_mutex_t mutex; /*!< mutex protecting everything */ diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc index 1ff00dbcb8c..903bdab02ce 100644 --- a/storage/xtradb/log/log0log.cc +++ b/storage/xtradb/log/log0log.cc @@ -1382,7 +1382,7 @@ log_group_file_header_flush( (ulint) (dest_offset / UNIV_PAGE_SIZE), (ulint) (dest_offset % UNIV_PAGE_SIZE), OS_FILE_LOG_BLOCK_SIZE, - buf, group); + buf, group, 0); srv_stats.os_log_pending_writes.dec(); } @@ -1510,7 +1510,7 @@ loop: fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0, (ulint) (next_offset / UNIV_PAGE_SIZE), (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf, - group); + group, 0); srv_stats.os_log_pending_writes.dec(); @@ -2101,7 +2101,7 @@ log_group_checkpoint( write_offset / UNIV_PAGE_SIZE, write_offset % UNIV_PAGE_SIZE, OS_FILE_LOG_BLOCK_SIZE, - buf, ((byte*) group + 1)); + buf, ((byte*) group + 1), 0); ut_ad(((ulint) group & 0x1UL) == 0); } @@ -2181,7 +2181,7 @@ log_group_read_checkpoint_info( fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0, field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE, - OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL); + OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0); } /******************************************************//** @@ -2564,7 +2564,7 @@ loop: fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0, (ulint) (source_offset / UNIV_PAGE_SIZE), (ulint) (source_offset % UNIV_PAGE_SIZE), - len, buf, (type == LOG_ARCHIVE) ? &log_archive_io : NULL); + len, buf, (type == LOG_ARCHIVE) ? &log_archive_io : NULL, 0); start_lsn += len; buf += len; @@ -2689,7 +2689,7 @@ log_group_archive_file_header_write( dest_offset / UNIV_PAGE_SIZE, dest_offset % UNIV_PAGE_SIZE, 2 * OS_FILE_LOG_BLOCK_SIZE, - buf, &log_archive_io); + buf, &log_archive_io, 0); } /******************************************************//** @@ -2726,7 +2726,7 @@ log_group_archive_completed_header_write( dest_offset % UNIV_PAGE_SIZE, OS_FILE_LOG_BLOCK_SIZE, buf + LOG_FILE_ARCH_COMPLETED, - &log_archive_io); + &log_archive_io, 0); } /******************************************************//** @@ -2789,12 +2789,12 @@ loop: file_handle = os_file_create(innodb_file_log_key, name, open_mode, OS_FILE_AIO, - OS_DATA_FILE, &ret); + OS_DATA_FILE, &ret, FALSE); if (!ret && (open_mode == OS_FILE_CREATE)) { file_handle = os_file_create( innodb_file_log_key, name, OS_FILE_OPEN, - OS_FILE_AIO, OS_DATA_FILE, &ret); + OS_FILE_AIO, OS_DATA_FILE, &ret, FALSE); } if (!ret) { @@ -2863,7 +2863,7 @@ loop: (ulint) (next_offset / UNIV_PAGE_SIZE), (ulint) (next_offset % UNIV_PAGE_SIZE), ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf, - &log_archive_io); + &log_archive_io, 0); start_lsn += len; next_offset += len; diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc index 0b9b9aa3205..60ae4a50626 100644 --- a/storage/xtradb/log/log0online.cc +++ b/storage/xtradb/log/log0online.cc @@ -283,7 +283,7 @@ log_online_read_bitmap_page( ut_a(bitmap_file->offset % MODIFIED_PAGE_BLOCK_SIZE == 0); success = os_file_read(bitmap_file->file, page, bitmap_file->offset, - MODIFIED_PAGE_BLOCK_SIZE); + MODIFIED_PAGE_BLOCK_SIZE, FALSE); if (UNIV_UNLIKELY(!success)) { @@ -539,7 +539,7 @@ log_online_start_bitmap_file(void) log_bmp_sys->out.name, OS_FILE_CREATE, OS_FILE_READ_WRITE, - &success); + &success, FALSE); } if (UNIV_UNLIKELY(!success)) { @@ -699,7 +699,7 @@ log_online_read_init(void) log_bmp_sys->out.file = os_file_create_simple_no_error_handling (innodb_file_bmp_key, log_bmp_sys->out.name, OS_FILE_OPEN, - OS_FILE_READ_WRITE, &success); + OS_FILE_READ_WRITE, &success, FALSE); if (!success) { @@ -1104,7 +1104,7 @@ log_online_write_bitmap_page( success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file, block, log_bmp_sys->out.offset, - MODIFIED_PAGE_BLOCK_SIZE); + MODIFIED_PAGE_BLOCK_SIZE); if (UNIV_UNLIKELY(!success)) { /* The following call prints an error message */ @@ -1491,7 +1491,7 @@ log_online_open_bitmap_file_read_only( bitmap_file->name, OS_FILE_OPEN, OS_FILE_READ_ONLY, - &success); + &success, FALSE); if (UNIV_UNLIKELY(!success)) { /* Here and below assume that bitmap file names do not diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index c7482e93c25..42c238810e8 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -2,6 +2,7 @@ Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, SkySQL Ab. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2149,7 +2150,7 @@ recv_apply_log_recs_for_backup(void) error = fil_io(OS_FILE_READ, true, recv_addr->space, zip_size, recv_addr->page_no, 0, zip_size, - block->page.zip.data, NULL); + block->page.zip.data, NULL, 0); if (error == DB_SUCCESS && !buf_zip_decompress(block, TRUE)) { exit(1); @@ -2159,7 +2160,7 @@ recv_apply_log_recs_for_backup(void) recv_addr->space, 0, recv_addr->page_no, 0, UNIV_PAGE_SIZE, - block->frame, NULL); + block->frame, NULL, 0); } if (error != DB_SUCCESS) { @@ -2188,13 +2189,13 @@ recv_apply_log_recs_for_backup(void) recv_addr->space, zip_size, recv_addr->page_no, 0, zip_size, - block->page.zip.data, NULL); + block->page.zip.data, NULL, 0); } else { error = fil_io(OS_FILE_WRITE, true, recv_addr->space, 0, recv_addr->page_no, 0, UNIV_PAGE_SIZE, - block->frame, NULL); + block->frame, NULL, 0); } skip_this_recv_addr: recv_addr = HASH_GET_NEXT(addr_hash, recv_addr); @@ -3159,7 +3160,7 @@ recv_recovery_from_checkpoint_start_func( fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0, 0, 0, LOG_FILE_HDR_SIZE, - log_hdr_buf, max_cp_group); + log_hdr_buf, max_cp_group, 0); if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, (byte*)"ibbackup", (sizeof "ibbackup") - 1)) { @@ -3190,7 +3191,7 @@ recv_recovery_from_checkpoint_start_func( fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, max_cp_group->space_id, 0, 0, 0, OS_FILE_LOG_BLOCK_SIZE, - log_hdr_buf, max_cp_group); + log_hdr_buf, max_cp_group, 0); } log_hdr_log_block_size @@ -3786,7 +3787,7 @@ try_open_again: file_handle = os_file_create(innodb_file_log_key, name, OS_FILE_OPEN, - OS_FILE_LOG, OS_FILE_AIO, &ret); + OS_FILE_LOG, OS_FILE_AIO, &ret, FALSE); if (ret == FALSE) { ask_again: @@ -3838,7 +3839,7 @@ ask_again: /* Read the archive file header */ fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0, 0, 0, - LOG_FILE_HDR_SIZE, buf, NULL); + LOG_FILE_HDR_SIZE, buf, NULL, 0); /* Check if the archive file header is consistent */ @@ -3912,7 +3913,7 @@ ask_again: fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0, read_offset / UNIV_PAGE_SIZE, - read_offset % UNIV_PAGE_SIZE, len, buf, NULL); + read_offset % UNIV_PAGE_SIZE, len, buf, NULL, 0); ret = recv_scan_log_recs( (buf_pool_get_n_pages() diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index 978f3dc7cc4..28f1b156224 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -2,6 +2,7 @@ Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. +Copyright (c) 2013, 2015, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -42,10 +43,17 @@ Created 10/21/1995 Heikki Tuuri #include "srv0srv.h" #include "srv0start.h" #include "fil0fil.h" +#include "fil0pagecompress.h" #include "buf0buf.h" #include "btr0types.h" #include "trx0trx.h" #include "srv0mon.h" +#include "srv0srv.h" +#ifdef HAVE_POSIX_FALLOCATE +#include "unistd.h" +#include "fcntl.h" +#include "linux/falloc.h" +#endif #ifndef UNIV_HOTBACKUP # include "os0sync.h" # include "os0thread.h" @@ -73,6 +81,31 @@ Created 10/21/1995 Heikki Tuuri # endif #endif +#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H) +#include <sys/statvfs.h> +#endif + +#if defined(UNIV_LINUX) && defined(HAVE_LINUX_FALLOC_H) +#include <linux/falloc.h> +#endif + +#if defined(HAVE_FALLOCATE) +#ifndef FALLOC_FL_KEEP_SIZE +#define FALLOC_FL_KEEP_SIZE 0x01 +#endif +#ifndef FALLOC_FL_PUNCH_HOLE +#define FALLOC_FL_PUNCH_HOLE 0x02 +#endif +#endif + +#ifdef HAVE_LZO +#include "lzo/lzo1x.h" +#endif + +#ifdef HAVE_SNAPPY +#include "snappy-c.h" +#endif + /** Insert buffer segment id */ static const ulint IO_IBUF_SEGMENT = 0; @@ -196,11 +229,38 @@ struct os_aio_slot_t{ and which can be used to identify which pending aio operation was completed */ + ulint bitmap; + + byte* page_compression_page; /*!< Memory allocated for + page compressed page and + freed after the write + has been completed */ + + ibool page_compression; + ulint page_compression_level; + + ulint* write_size; /*!< Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ + + byte* page_buf; /*!< Actual page buffer for + page compressed pages, do not + free this */ + + ibool page_compress_success; + /*!< TRUE if page compression was + successfull, false if not */ + + ulint file_block_size;/*!< file block size */ + #ifdef LINUX_NATIVE_AIO struct iocb control; /* Linux control block for aio */ int n_bytes; /* bytes written/read. */ int ret; /* AIO return code */ #endif /* WIN_ASYNC_IO */ + byte *lzo_mem; /* Temporal memory used by LZO */ }; /** The asynchronous i/o array structure */ @@ -301,6 +361,66 @@ UNIV_INTERN ulint os_n_pending_writes = 0; /** Number of pending read operations */ UNIV_INTERN ulint os_n_pending_reads = 0; +/** After first fallocate failure we will disable os_file_trim */ +UNIV_INTERN ibool os_fallocate_failed = FALSE; + +/**********************************************************************//** +Directly manipulate the allocated disk space by deallocating for the file referred to +by fd for the byte range starting at offset and continuing for len bytes. +Within the specified range, partial file system blocks are zeroed, and whole +file system blocks are removed from the file. After a successful call, +subsequent reads from this range will return zeroes. +@return true if success, false if error */ +UNIV_INTERN +ibool +os_file_trim( +/*=========*/ + os_aio_slot_t* slot); /*!< in: slot structure */ + +/**********************************************************************//** +Allocate memory for temporal buffer used for page compression. This +buffer is freed later. */ +UNIV_INTERN +void +os_slot_alloc_page_buf( +/*===================*/ + os_aio_slot_t* slot); /*!< in: slot structure */ + +#ifdef HAVE_LZO +/**********************************************************************//** +Allocate memory for temporal memory used for page compression when +LZO compression method is used */ +UNIV_INTERN +void +os_slot_alloc_lzo_mem( +/*===================*/ + os_aio_slot_t* slot); /*!< in: slot structure */ +#endif + +/****************************************************************//** +Does error handling when a file operation fails. +@return TRUE if we should retry the operation */ +ibool +os_file_handle_error_no_exit( +/*=========================*/ + const char* name, /*!< in: name of a file or NULL */ + const char* operation, /*!< in: operation */ + ibool on_error_silent,/*!< in: if TRUE then don't print + any message to the log. */ + const char* file, /*!< in: file name */ + const ulint line); /*!< in: line */ + +/****************************************************************//** +Tries to enable the atomic write feature, if available, for the specified file +handle. +@return TRUE if success */ +static __attribute__((warn_unused_result)) +ibool +os_file_set_atomic_writes( +/*======================*/ + const char* name, /*!< in: name of the file */ + os_file_t file); /*!< in: handle to the file */ + #ifdef UNIV_DEBUG # ifndef UNIV_HOTBACKUP /**********************************************************************//** @@ -537,6 +657,19 @@ os_file_get_last_error_low( "InnoDB: because of either a thread exit" " or an application request.\n" "InnoDB: Retry attempt is made.\n"); + } else if (err == ECANCELED || err == ENOTTY) { + if (strerror(err) != NULL) { + fprintf(stderr, + "InnoDB: Error number %d" + " means '%s'.\n", + err, strerror(err)); + } + + if(srv_use_atomic_writes) { + fprintf(stderr, + "InnoDB: Error trying to enable atomic writes on " + "non-supported destination!\n"); + } } else { fprintf(stderr, "InnoDB: Some operating system error numbers" @@ -601,6 +734,20 @@ os_file_get_last_error_low( "InnoDB: The error means mysqld does not have" " the access rights to\n" "InnoDB: the directory.\n"); + } else if (err == ECANCELED || err == ENOTTY) { + if (strerror(err) != NULL) { + fprintf(stderr, + "InnoDB: Error number %d" + " means '%s'.\n", + err, strerror(err)); + } + + + if(srv_use_atomic_writes) { + fprintf(stderr, + "InnoDB: Error trying to enable atomic writes on " + "non-supported destination!\n"); + } } else { if (strerror(err) != NULL) { fprintf(stderr, @@ -639,6 +786,9 @@ os_file_get_last_error_low( return(OS_FILE_AIO_RESOURCES_RESERVED); } break; + case ECANCELED: + case ENOTTY: + return(OS_FILE_OPERATION_NOT_SUPPORTED); case EINTR: if (srv_use_native_aio) { return(OS_FILE_AIO_INTERRUPTED); @@ -672,7 +822,6 @@ Does error handling when a file operation fails. Conditionally exits (calling exit(3)) based on should_exit value and the error type, if should_exit is TRUE then on_error_silent is ignored. @return TRUE if we should retry the operation */ -static ibool os_file_handle_error_cond_exit( /*===========================*/ @@ -680,9 +829,11 @@ os_file_handle_error_cond_exit( const char* operation, /*!< in: operation */ ibool should_exit, /*!< in: call exit(3) if unknown error and this parameter is TRUE */ - ibool on_error_silent)/*!< in: if TRUE then don't print + ibool on_error_silent,/*!< in: if TRUE then don't print any message to the log iff it is an unknown non-fatal error */ + const char* file, /*!< in: file name */ + const ulint line) /*!< in: line */ { ulint err; @@ -714,6 +865,9 @@ os_file_handle_error_cond_exit( os_has_said_disk_full = TRUE; + fprintf(stderr, + " InnoDB: at file %s and at line %ld\n", file, line); + fflush(stderr); ut_error; @@ -747,6 +901,9 @@ os_file_handle_error_cond_exit( is better to ignore on_error_silent and print an error message to the log. */ + fprintf(stderr, + " InnoDB: at file %s and at line %ld\n", file, line); + if (should_exit || !on_error_silent) { ib_logf(IB_LOG_LEVEL_ERROR, "File %s: '%s' returned OS " "error " ULINTPF ".%s", name ? name : "(unknown)", @@ -770,10 +927,12 @@ ibool os_file_handle_error( /*=================*/ const char* name, /*!< in: name of a file or NULL */ - const char* operation) /*!< in: operation */ + const char* operation, /*!< in: operation */ + const char* file, /*!< in: file name */ + const ulint line) /*!< in: line */ { /* exit in case of unknown error */ - return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE)); + return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE, file, line)); } /****************************************************************//** @@ -784,12 +943,14 @@ os_file_handle_error_no_exit( /*=========================*/ const char* name, /*!< in: name of a file or NULL */ const char* operation, /*!< in: operation */ - ibool on_error_silent)/*!< in: if TRUE then don't print + ibool on_error_silent,/*!< in: if TRUE then don't print any message to the log. */ + const char* file, /*!< in: file name */ + const ulint line) /*!< in: line */ { /* don't exit in case of unknown error */ return(os_file_handle_error_cond_exit( - name, operation, FALSE, on_error_silent)); + name, operation, FALSE, on_error_silent, file, line)); } #undef USE_FILE_LOCK @@ -932,7 +1093,7 @@ os_file_opendir( if (dir == INVALID_HANDLE_VALUE) { if (error_is_fatal) { - os_file_handle_error(dirname, "opendir"); + os_file_handle_error(dirname, "opendir", __FILE__, __LINE__); } return(NULL); @@ -943,7 +1104,7 @@ os_file_opendir( dir = opendir(dirname); if (dir == NULL && error_is_fatal) { - os_file_handle_error(dirname, "opendir"); + os_file_handle_error(dirname, "opendir", __FILE__, __LINE__); } return(dir); @@ -965,7 +1126,7 @@ os_file_closedir( ret = FindClose(dir); if (!ret) { - os_file_handle_error_no_exit(NULL, "closedir", FALSE); + os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__); return(-1); } @@ -977,7 +1138,7 @@ os_file_closedir( ret = closedir(dir); if (ret) { - os_file_handle_error_no_exit(NULL, "closedir", FALSE); + os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__); } return(ret); @@ -1049,7 +1210,7 @@ next_file: return(1); } else { - os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE); + os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE, __FILE__, __LINE__); return(-1); } #else @@ -1135,7 +1296,7 @@ next_file: goto next_file; } - os_file_handle_error_no_exit(full_path, "stat", FALSE); + os_file_handle_error_no_exit(full_path, "stat", FALSE, __FILE__, __LINE__); ut_free(full_path); @@ -1186,7 +1347,7 @@ os_file_create_directory( && !fail_if_exists))) { os_file_handle_error_no_exit( - pathname, "CreateDirectory", FALSE); + pathname, "CreateDirectory", FALSE, __FILE__, __LINE__); return(FALSE); } @@ -1199,7 +1360,7 @@ os_file_create_directory( if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) { /* failure */ - os_file_handle_error_no_exit(pathname, "mkdir", FALSE); + os_file_handle_error_no_exit(pathname, "mkdir", FALSE, __FILE__, __LINE__); return(FALSE); } @@ -1309,7 +1470,7 @@ os_file_create_simple_func( retry = os_file_handle_error( name, create_mode == OS_FILE_OPEN ? - "open" : "create"); + "open" : "create", __FILE__, __LINE__); } else { *success = TRUE; @@ -1377,7 +1538,7 @@ os_file_create_simple_func( retry = os_file_handle_error( name, create_mode == OS_FILE_OPEN - ? "open" : "create"); + ? "open" : "create", __FILE__, __LINE__); } else { *success = TRUE; retry = false; @@ -1419,9 +1580,12 @@ os_file_create_simple_no_error_handling_func( OS_FILE_READ_WRITE, or OS_FILE_READ_ALLOW_DELETE; the last option is used by a backup program reading the file */ - ibool* success)/*!< out: TRUE if succeed, FALSE if error */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes) /*! in: atomic writes table option + value */ { os_file_t file; + atomic_writes_t awrites = (atomic_writes_t) atomic_writes; *success = FALSE; #ifdef __WIN__ @@ -1482,6 +1646,23 @@ os_file_create_simple_no_error_handling_func( attributes, NULL); // No template file + /* If we have proper file handle and atomic writes should be used, + try to set atomic writes and if that fails when creating a new + table, produce a error. If atomic writes are used on existing + file, ignore error and use traditional writes for that file */ + if (file != INVALID_HANDLE_VALUE + && (awrites == ATOMIC_WRITES_ON || + (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT)) + && !os_file_set_atomic_writes(name, file)) { + if (create_mode == OS_FILE_CREATE) { + fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n"); + CloseHandle(file); + os_file_delete_if_exists_func(name); + *success = FALSE; + file = INVALID_HANDLE_VALUE; + } + } + *success = (file != INVALID_HANDLE_VALUE); #else /* __WIN__ */ int create_flag; @@ -1542,6 +1723,23 @@ os_file_create_simple_no_error_handling_func( } #endif /* USE_FILE_LOCK */ + /* If we have proper file handle and atomic writes should be used, + try to set atomic writes and if that fails when creating a new + table, produce a error. If atomic writes are used on existing + file, ignore error and use traditional writes for that file */ + if (file != -1 + && (awrites == ATOMIC_WRITES_ON || + (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT)) + && !os_file_set_atomic_writes(name, file)) { + if (create_mode == OS_FILE_CREATE) { + fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n"); + close(file); + os_file_delete_if_exists_func(name); + *success = FALSE; + file = -1; + } + } + #endif /* __WIN__ */ return(file); @@ -1625,15 +1823,16 @@ os_file_set_atomic_writes( if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) { - os_file_handle_error_no_exit(name, "ioctl", FALSE); + fprintf(stderr, "InnoDB: Warning:Trying to enable atomic writes on " + "file %s on non-supported platform!\n", name); + os_file_handle_error_no_exit(name, "ioctl(DFS_IOCTL_ATOMIC_WRITE_SET)", FALSE, __FILE__, __LINE__); return(FALSE); } return(TRUE); #else - ib_logf(IB_LOG_LEVEL_ERROR, - "trying to enable atomic writes on non-supported platform! " - "Please restart with innodb_use_atomic_writes disabled.\n"); + fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on " + "file %s on non-supported platform!\n", name); return(FALSE); #endif } @@ -1659,12 +1858,15 @@ os_file_create_func( async i/o or unbuffered i/o: look in the function source code for the exact rules */ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ - ibool* success)/*!< out: TRUE if succeed, FALSE if error */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + ulint atomic_writes) /*! in: atomic writes table option + value */ { os_file_t file; ibool retry; ibool on_error_no_exit; ibool on_error_silent; + atomic_writes_t awrites = (atomic_writes_t) atomic_writes; #ifdef __WIN__ DBUG_EXECUTE_IF( @@ -1807,9 +2009,9 @@ os_file_create_func( if (on_error_no_exit) { retry = os_file_handle_error_no_exit( - name, operation, on_error_silent); + name, operation, on_error_silent, __FILE__, __LINE__); } else { - retry = os_file_handle_error(name, operation); + retry = os_file_handle_error(name, operation, __FILE__, __LINE__); } } else { *success = TRUE; @@ -1821,11 +2023,21 @@ os_file_create_func( } while (retry); - if (srv_use_atomic_writes && type == OS_DATA_FILE && - !os_file_set_atomic_writes(name, file)) { - CloseHandle(file); + /* If we have proper file handle and atomic writes should be used, + try to set atomic writes and if that fails when creating a new + table, produce a error. If atomic writes are used on existing + file, ignore error and use traditional writes for that file */ + if (file != INVALID_HANDLE_VALUE && type == OS_DATA_FILE + && (awrites == ATOMIC_WRITES_ON || + (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT)) + && !os_file_set_atomic_writes(name, file)) { + if (create_mode == OS_FILE_CREATE) { + fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n"); + CloseHandle(file); + os_file_delete_if_exists_func(name); *success = FALSE; file = INVALID_HANDLE_VALUE; + } } #else /* __WIN__ */ @@ -1902,9 +2114,9 @@ os_file_create_func( if (on_error_no_exit) { retry = os_file_handle_error_no_exit( - name, operation, on_error_silent); + name, operation, on_error_silent, __FILE__, __LINE__); } else { - retry = os_file_handle_error(name, operation); + retry = os_file_handle_error(name, operation, __FILE__, __LINE__); } } else { *success = TRUE; @@ -1958,14 +2170,24 @@ os_file_create_func( } #endif /* USE_FILE_LOCK */ - if (srv_use_atomic_writes && type == OS_DATA_FILE - && file != -1 && !os_file_set_atomic_writes(name, file)) { - - *success = FALSE; - close(file); - file = -1; + /* If we have proper file handle and atomic writes should be used, + try to set atomic writes and if that fails when creating a new + table, produce a error. If atomic writes are used on existing + file, ignore error and use traditional writes for that file */ + if (file != -1 && type == OS_DATA_FILE + && (awrites == ATOMIC_WRITES_ON || + (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT)) + && !os_file_set_atomic_writes(name, file)) { + if (create_mode == OS_FILE_CREATE) { + fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n"); + close(file); + os_file_delete_if_exists_func(name); + *success = FALSE; + file = -1; + } } + #endif /* __WIN__ */ return(file); @@ -2024,7 +2246,7 @@ loop: ret = unlink(name); if (ret != 0 && errno != ENOENT) { - os_file_handle_error_no_exit(name, "delete", FALSE); + os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__); return(false); } @@ -2088,7 +2310,7 @@ loop: ret = unlink(name); if (ret != 0) { - os_file_handle_error_no_exit(name, "delete", FALSE); + os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__); return(false); } @@ -2132,7 +2354,7 @@ os_file_rename_func( return(TRUE); } - os_file_handle_error_no_exit(oldpath, "rename", FALSE); + os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__); return(FALSE); #else @@ -2141,7 +2363,7 @@ os_file_rename_func( ret = rename(oldpath, newpath); if (ret != 0) { - os_file_handle_error_no_exit(oldpath, "rename", FALSE); + os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__); return(FALSE); } @@ -2170,7 +2392,7 @@ os_file_close_func( return(TRUE); } - os_file_handle_error(NULL, "close"); + os_file_handle_error(NULL, "close", __FILE__, __LINE__); return(FALSE); #else @@ -2179,7 +2401,7 @@ os_file_close_func( ret = close(file); if (ret == -1) { - os_file_handle_error(NULL, "close"); + os_file_handle_error(NULL, "close", __FILE__, __LINE__); return(FALSE); } @@ -2269,6 +2491,12 @@ os_file_set_size( current_size = 0; +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: Note: File %s current_size %lu extended_size %lu\n", + name, os_file_get_size(file), size); +#endif + + #ifdef HAVE_POSIX_FALLOCATE if (srv_use_posix_fallocate) { @@ -2279,7 +2507,7 @@ os_file_set_size( INT64PF ", desired size " INT64PF "\n", name, current_size, size); os_file_handle_error_no_exit (name, "posix_fallocate", - FALSE); + FALSE, __FILE__, __LINE__); return(FALSE); } return(TRUE); @@ -2312,6 +2540,7 @@ os_file_set_size( } ret = os_file_write(name, file, buf, current_size, n_bytes); + if (!ret) { ut_free(buf2); goto error_handling; @@ -2466,7 +2695,7 @@ os_file_flush_func( return(TRUE); } - os_file_handle_error(NULL, "flush"); + os_file_handle_error(NULL, "flush", __FILE__, __LINE__); /* It is a fatal error if a file flush does not succeed, because then the database can get corrupt on disk */ @@ -2520,7 +2749,7 @@ os_file_flush_func( ib_logf(IB_LOG_LEVEL_ERROR, "The OS said file flush did not succeed"); - os_file_handle_error(NULL, "flush"); + os_file_handle_error(NULL, "flush", __FILE__, __LINE__); /* It is a fatal error if a file flush does not succeed, because then the database can get corrupt on disk */ @@ -2834,7 +3063,9 @@ os_file_read_func( void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ - trx_t* trx) + trx_t* trx, + ibool compressed) /*!< in: is this file space + compressed ? */ { #ifdef __WIN__ BOOL ret; @@ -2876,6 +3107,14 @@ try_again: os_mutex_exit(os_file_count_mutex); if (ret && len == n) { + /* Note that InnoDB writes files that are not formated + as file spaces and they do not have FIL_PAGE_TYPE + field, thus we must use here information is the actual + file space compressed. */ + if (fil_page_is_compressed((byte *)buf)) { + fil_decompress_page(NULL, (byte *)buf, len, NULL); + } + return(TRUE); } #else /* __WIN__ */ @@ -2888,6 +3127,14 @@ try_again: ret = os_file_pread(file, buf, n, offset, trx); if ((ulint) ret == n) { + /* Note that InnoDB writes files that are not formated + as file spaces and they do not have FIL_PAGE_TYPE + field, thus we must use here information is the actual + file space compressed. */ + if (fil_page_is_compressed((byte *)buf)) { + fil_decompress_page(NULL, (byte *)buf, n, NULL); + } + return(TRUE); } else if (ret == -1) { ib_logf(IB_LOG_LEVEL_ERROR, @@ -2901,7 +3148,7 @@ try_again: n, offset, (lint) ret); } #endif /* __WIN__ */ - retry = os_file_handle_error(NULL, "read"); + retry = os_file_handle_error(NULL, "read", __FILE__, __LINE__); if (retry) { goto try_again; @@ -2936,7 +3183,9 @@ os_file_read_no_error_handling_func( os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ - ulint n) /*!< in: number of bytes to read */ + ulint n, /*!< in: number of bytes to read */ + ibool compressed) /*!< in: is this file space + compressed ? */ { #ifdef __WIN__ BOOL ret; @@ -2980,6 +3229,15 @@ try_again: os_mutex_exit(os_file_count_mutex); if (ret && len == n) { + + /* Note that InnoDB writes files that are not formated + as file spaces and they do not have FIL_PAGE_TYPE + field, thus we must use here information is the actual + file space compressed. */ + if (fil_page_is_compressed((byte *)buf)) { + fil_decompress_page(NULL, (byte *)buf, n, NULL); + } + return(TRUE); } #else /* __WIN__ */ @@ -2992,6 +3250,14 @@ try_again: ret = os_file_pread(file, buf, n, offset, NULL); if ((ulint) ret == n) { + /* Note that InnoDB writes files that are not formated + as file spaces and they do not have FIL_PAGE_TYPE + field, thus we must use here information is the actual + file space compressed. */ + if (fil_page_is_compressed((byte *)buf)) { + fil_decompress_page(NULL, (byte *)buf, n, NULL); + } + return(TRUE); } else if (ret == -1) { ib_logf(IB_LOG_LEVEL_ERROR, @@ -3005,7 +3271,7 @@ try_again: n, offset, (lint) ret); } #endif /* __WIN__ */ - retry = os_file_handle_error_no_exit(NULL, "read", FALSE); + retry = os_file_handle_error_no_exit(NULL, "read", FALSE, __FILE__, __LINE__); if (retry) { goto try_again; @@ -3071,6 +3337,7 @@ os_file_write_func( ut_ad(buf); ut_ad(n > 0); + retry: os_mutex_enter(os_file_count_mutex); @@ -3244,7 +3511,7 @@ os_file_status( } else if (ret) { /* file exists, but stat call failed */ - os_file_handle_error_no_exit(path, "stat", FALSE); + os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__); return(FALSE); } @@ -3272,7 +3539,7 @@ os_file_status( } else if (ret) { /* file exists, but stat call failed */ - os_file_handle_error_no_exit(path, "stat", FALSE); + os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__); return(FALSE); } @@ -3321,7 +3588,7 @@ os_file_get_status( } else if (ret) { /* file exists, but stat call failed */ - os_file_handle_error_no_exit(path, "stat", FALSE); + os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__); return(DB_FAIL); @@ -3374,7 +3641,7 @@ os_file_get_status( } else if (ret) { /* file exists, but stat call failed */ - os_file_handle_error_no_exit(path, "stat", FALSE); + os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__); return(DB_FAIL); @@ -3938,7 +4205,7 @@ os_aio_array_create( array->slots = static_cast<os_aio_slot_t*>( ut_malloc(n * sizeof(*array->slots))); - memset(array->slots, 0x0, sizeof(n * sizeof(*array->slots))); + memset(array->slots, 0x0, n * sizeof(*array->slots)); #if defined(LINUX_NATIVE_AIO) array->aio_ctx = NULL; @@ -4013,6 +4280,8 @@ os_aio_array_free( /*==============*/ os_aio_array_t*& array) /*!< in, own: array to free */ { + ulint i; + os_mutex_free(array->mutex); os_event_free(array->not_full); os_event_free(array->is_empty); @@ -4024,6 +4293,19 @@ os_aio_array_free( } #endif /* LINUX_NATIVE_AIO */ + for (i = 0; i < array->n_slots; i++) { + os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); + if (slot->page_compression_page) { + ut_free(slot->page_compression_page); + slot->page_compression_page = NULL; + } + + if (slot->lzo_mem) { + ut_free(slot->lzo_mem); + slot->lzo_mem = NULL; + } + } + ut_free(array->slots); ut_free(array); @@ -4368,7 +4650,16 @@ os_aio_array_reserve_slot( to write */ os_offset_t offset, /*!< in: file offset */ ulint len, /*!< in: length of the block to read or write */ - ulint space_id) + ulint space_id, + ibool page_compression, /*!< in: is page compression used + on this file space */ + ulint page_compression_level, /*!< page compression + level to be used */ + ulint* write_size)/*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ { os_aio_slot_t* slot = NULL; #ifdef WIN_ASYNC_IO @@ -4460,6 +4751,59 @@ found: slot->io_already_done = FALSE; slot->space_id = space_id; + slot->page_compress_success = FALSE; + slot->write_size = write_size; + slot->page_compression_level = page_compression_level; + slot->page_compression = page_compression; + + if (message1) { + slot->file_block_size = fil_node_get_block_size(message1); + } + + /* If the space is page compressed and this is write operation + then we compress the page */ + if (message1 && type == OS_FILE_WRITE && page_compression ) { + ulint real_len = len; + byte* tmp = NULL; + + /* Release the array mutex while compressing */ + os_mutex_exit(array->mutex); + + // We allocate memory for page compressed buffer if and only + // if it is not yet allocated. + os_slot_alloc_page_buf(slot); + +#ifdef HAVE_LZO + if (innodb_compression_algorithm == 3) { + os_slot_alloc_lzo_mem(slot); + } +#endif + + /* Call page compression */ + tmp = fil_compress_page(fil_node_get_space_id(slot->message1), + (byte *)buf, + slot->page_buf, + len, + page_compression_level, + fil_node_get_block_size(slot->message1), + &real_len, + slot->lzo_mem + ); + + /* If compression succeeded, set up the length and buffer */ + if (tmp != buf) { + len = real_len; + buf = slot->page_buf; + slot->len = real_len; + slot->page_compress_success = TRUE; + } else { + slot->page_compress_success = FALSE; + } + + /* Take array mutex back */ + os_mutex_enter(array->mutex); + } + #ifdef WIN_ASYNC_IO control = &slot->control; control->Offset = (DWORD) offset & 0xFFFFFFFF; @@ -4735,7 +5079,16 @@ os_aio_func( aio operation); ignored if mode is OS_AIO_SYNC */ ulint space_id, - trx_t* trx) + trx_t* trx, + ibool page_compression, /*!< in: is page compression used + on this file space */ + ulint page_compression_level, /*!< page compression + level to be used */ + ulint* write_size)/*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ { os_aio_array_t* array; os_aio_slot_t* slot; @@ -4766,7 +5119,8 @@ os_aio_func( no need to use an i/o-handler thread */ if (type == OS_FILE_READ) { - ret = os_file_read_func(file, buf, offset, n, trx); + ret = os_file_read_func(file, buf, offset, n, trx, page_compression); + } else { ut_ad(!srv_read_only_mode); ut_a(type == OS_FILE_WRITE); @@ -4777,14 +5131,10 @@ os_aio_func( os_has_said_disk_full = FALSE; ret = 0; errno = 28;); if (!ret) { - os_file_handle_error_cond_exit(name, "os_file_write_func", TRUE, FALSE); + os_file_handle_error_cond_exit(name, "os_file_write_func", TRUE, FALSE, __FILE__, __LINE__); } } - if (!ret) { - fprintf(stderr, "FAIL"); - } - return ret; } @@ -4835,8 +5185,11 @@ try_again: trx->io_reads++; trx->io_read += n; } + slot = os_aio_array_reserve_slot(type, array, message1, message2, file, - name, buf, offset, n, space_id); + name, buf, offset, n, space_id, + page_compression, page_compression_level, write_size); + if (type == OS_FILE_READ) { if (srv_use_native_aio) { os_n_file_reads++; @@ -4894,7 +5247,7 @@ err_exit: os_aio_array_free_slot(array, slot); if (os_file_handle_error( - name,type == OS_FILE_READ ? "aio read" : "aio write")) { + name,type == OS_FILE_READ ? "aio read" : "aio write", __FILE__, __LINE__)) { goto try_again; } @@ -4994,7 +5347,7 @@ os_aio_windows_handle( if (ret && len == slot->len) { ret_val = TRUE; - } else if (os_file_handle_error(slot->name, "Windows aio")) { + } else if (os_file_handle_error(slot->name, "Windows aio", __FILE__, __LINE__)) { retry = TRUE; } else { @@ -5022,12 +5375,18 @@ os_aio_windows_handle( switch (slot->type) { case OS_FILE_WRITE: - ret_val = os_file_write(slot->name, slot->file, slot->buf, - slot->control.Offset, slot->control.OffsetHigh, slot->len); + if (slot->message1 && slot->page_compression && slot->page_compress_success && slot->page_buf) { + ret_val = os_file_write(slot->name, slot->file, slot->page_buf, + slot->offset, slot->len); + } else { + + ret_val = os_file_write(slot->name, slot->file, slot->buf, + slot->offset, slot->len); + } break; case OS_FILE_READ: - ret_val = os_file_read(slot->file, slot->buf, - slot->control.Offset, slot->control.OffsetHigh, slot->len); + ret_val = os_file_read(slot->file, slot->buf, + slot->offset, slot->len, slot->page_compression); break; default: ut_error; @@ -5052,6 +5411,27 @@ os_aio_windows_handle( ret_val = ret && len == slot->len; } + if (slot->type == OS_FILE_READ) { + if (fil_page_is_compressed(slot->buf)) { + os_slot_alloc_page_buf(slot); + +#ifdef HAVE_LZO + if (fil_page_is_lzo_compressed(slot->buf)) { + os_slot_alloc_lzo_mem(slot); + } +#endif + fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size); + } + } else { + /* OS_FILE_WRITE */ + if (slot->page_compress_success && fil_page_is_compressed(slot->page_buf)) { + if (srv_use_trim && os_fallocate_failed == FALSE) { + // Deallocate unused blocks from file system + os_file_trim(slot); + } + } + } + os_aio_array_free_slot((os_aio_array_t *)slot->arr, slot); return(ret_val); @@ -5141,6 +5521,33 @@ retry: /* We have not overstepped to next segment. */ ut_a(slot->pos < end_pos); + if (slot->type == OS_FILE_READ) { + /* If the table is page compressed and this is read, + we decompress before we annouce the read is + complete. For writes, we free the compressed page. */ + if (fil_page_is_compressed(slot->buf)) { + // We allocate memory for page compressed buffer if and only + // if it is not yet allocated. + os_slot_alloc_page_buf(slot); +#ifdef HAVE_LZO + if (fil_page_is_lzo_compressed(slot->buf)) { + os_slot_alloc_lzo_mem(slot); + } +#endif + fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size); + } + } else { + /* OS_FILE_WRITE */ + if (slot->page_compress_success && + fil_page_is_compressed(slot->page_buf)) { + ut_ad(slot->page_compression_page); + if (srv_use_trim && os_fallocate_failed == FALSE) { + // Deallocate unused blocks from file system + os_file_trim(slot); + } + } + } + /* Mark this request as completed. The error handling will be done in the calling function. */ os_mutex_enter(array->mutex); @@ -5322,6 +5729,13 @@ found: } else { errno = -slot->ret; + if (slot->ret == 0) { + fprintf(stderr, + "InnoDB: Number of bytes after aio %d requested %lu\n" + "InnoDB: from file %s\n", + slot->n_bytes, slot->len, slot->name); + } + /* os_file_handle_error does tell us if we should retry this IO. As it stands now, we don't do this retry when reaping requests from a different context than @@ -5329,7 +5743,7 @@ found: windows and linux native AIO. We should probably look into this to transparently re-submit the IO. */ - os_file_handle_error(slot->name, "Linux aio"); + os_file_handle_error(slot->name, "Linux aio", __FILE__, __LINE__); ret = FALSE; } @@ -5612,13 +6026,14 @@ consecutive_loop: os_has_said_disk_full = FALSE; ret = 0; errno = 28;); if (!ret) { - os_file_handle_error_cond_exit(aio_slot->name, "os_file_write_func", TRUE, FALSE); + os_file_handle_error_cond_exit(aio_slot->name, "os_file_write_func", TRUE, FALSE, __FILE__, __LINE__); } } else { ret = os_file_read( aio_slot->file, combined_buf, - aio_slot->offset, total_len); + aio_slot->offset, total_len, + aio_slot->page_compression); } srv_set_io_thread_op_info(global_segment, "file i/o done"); @@ -6010,3 +6425,289 @@ os_aio_all_slots_free(void) #endif /* UNIV_DEBUG */ #endif /* !UNIV_HOTBACKUP */ + +#ifdef _WIN32 +#include <winioctl.h> +#ifndef FSCTL_FILE_LEVEL_TRIM +#define FSCTL_FILE_LEVEL_TRIM CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 130, METHOD_BUFFERED, FILE_WRITE_DATA) +typedef struct _FILE_LEVEL_TRIM_RANGE { + DWORDLONG Offset; + DWORDLONG Length; +} FILE_LEVEL_TRIM_RANGE, *PFILE_LEVEL_TRIM_RANGE; + +typedef struct _FILE_LEVEL_TRIM { + DWORD Key; + DWORD NumRanges; + FILE_LEVEL_TRIM_RANGE Ranges[1]; +} FILE_LEVEL_TRIM, *PFILE_LEVEL_TRIM; +#endif +#endif + +/**********************************************************************//** +Directly manipulate the allocated disk space by deallocating for the file referred to +by fd for the byte range starting at offset and continuing for len bytes. +Within the specified range, partial file system blocks are zeroed, and whole +file system blocks are removed from the file. After a successful call, +subsequent reads from this range will return zeroes. +@return true if success, false if error */ +UNIV_INTERN +ibool +os_file_trim( +/*=========*/ + os_aio_slot_t* slot) /*!< in: slot structure */ +{ + size_t len = slot->len; + size_t trim_len = UNIV_PAGE_SIZE - len; + os_offset_t off = slot->offset + len; + size_t bsize = slot->file_block_size; + + // len here should be alligned to sector size + ut_ad((trim_len % bsize) == 0); + ut_ad((len % bsize) == 0); + ut_ad(bsize != 0); + ut_ad((off % bsize) == 0); + +#ifdef UNIV_DEBUG + fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu block_size %lu\n", + *slot->write_size, trim_len, len, off, bsize); +#endif + + // Nothing to do if trim length is zero or if actual write + // size is initialized and it is smaller than current write size. + // In first write if we trim we set write_size to actual bytes + // written and rest of the page is trimmed. In following writes + // there is no need to trim again if write_size only increases + // because rest of the page is already trimmed. If actual write + // size decreases we need to trim again. + if (trim_len == 0 || + (slot->write_size && + *slot->write_size > 0 && + len >= *slot->write_size)) { + +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n", + *slot->write_size, trim_len, len); +#endif + + if (*slot->write_size > 0 && len >= *slot->write_size) { + srv_stats.page_compressed_trim_op_saved.inc(); + } + + *slot->write_size = len; + + return (TRUE); + } + +#ifdef __linux__ +#if defined(HAVE_FALLOCATE) + int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len); + + if (ret) { + /* After first failure do not try to trim again */ + os_fallocate_failed = TRUE; + srv_use_trim = FALSE; + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: [Warning] fallocate call failed with error code %d.\n" + " InnoDB: start: %lu len: %lu payload: %lu\n" + " InnoDB: Disabling fallocate for now.\n", ret, off, trim_len, len); + + os_file_handle_error_no_exit(slot->name, + " fallocate(FALLOC_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) ", + FALSE, __FILE__, __LINE__); + + if (slot->write_size) { + *slot->write_size = 0; + } + + return (FALSE); + } else { + if (slot->write_size) { + *slot->write_size = len; + } + } +#else + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: [Warning] fallocate not supported on this installation." + " InnoDB: Disabling fallocate for now."); + os_fallocate_failed = TRUE; + srv_use_trim = FALSE; + if (slot->write_size) { + *slot->write_size = 0; + } + +#endif /* HAVE_FALLOCATE ... */ + +#elif defined(_WIN32) + FILE_LEVEL_TRIM flt; + flt.Key = 0; + flt.NumRanges = 1; + flt.Ranges[0].Offset = off; + flt.Ranges[0].Length = trim_len; + + BOOL ret = DeviceIoControl(slot->file, FSCTL_FILE_LEVEL_TRIM, + &flt, sizeof(flt), NULL, NULL, NULL, NULL); + + if (!ret) { + /* After first failure do not try to trim again */ + os_fallocate_failed = TRUE; + srv_use_trim = FALSE; + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: [Warning] fallocate call failed with error.\n" + " InnoDB: start: %lx len: %du payload: %lu\n" + " InnoDB: Disabling fallocate for now.\n", (slot->offset+len), trim_len, len); + + os_file_handle_error_no_exit(slot->name, + " DeviceIOControl(FSCTL_FILE_LEVEL_TRIM) ", + FALSE, __FILE__, __LINE__); + + if (slot->write_size) { + *slot->write_size = 0; + } + return (FALSE); + } else { + if (slot->write_size) { + *slot->write_size = len; + } + } +#endif + + switch(bsize) { + case 512: + srv_stats.page_compression_trim_sect512.add((trim_len / bsize)); + break; + case 1024: + srv_stats.page_compression_trim_sect1024.add((trim_len / bsize)); + break; + case 2948: + srv_stats.page_compression_trim_sect2048.add((trim_len / bsize)); + break; + case 4096: + srv_stats.page_compression_trim_sect4096.add((trim_len / bsize)); + break; + case 8192: + srv_stats.page_compression_trim_sect8192.add((trim_len / bsize)); + break; + case 16384: + srv_stats.page_compression_trim_sect16384.add((trim_len / bsize)); + break; + case 32768: + srv_stats.page_compression_trim_sect32768.add((trim_len / bsize)); + break; + default: + break; + } + + srv_stats.page_compressed_trim_op.inc(); + + return (TRUE); + +} + +/**********************************************************************//** +Allocate memory for temporal buffer used for page compression. This +buffer is freed later. */ +UNIV_INTERN +void +os_slot_alloc_page_buf( +/*===================*/ + os_aio_slot_t* slot) /*!< in: slot structure */ +{ + byte* cbuf2; + byte* cbuf; + ulint asize = UNIV_PAGE_SIZE; + + ut_a(slot != NULL); + if (slot->page_compression_page == NULL) { + /* We allocate extra to avoid memory overwrite on compression */ +#ifdef HAVE_SNAPPY + asize += snappy_max_compressed_length(asize) - UNIV_PAGE_SIZE; +#endif + cbuf2 = static_cast<byte *>(ut_malloc(asize*2)); + cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE)); + slot->page_compression_page = static_cast<byte *>(cbuf2); + slot->page_buf = static_cast<byte *>(cbuf); + memset(slot->page_compression_page, 0, asize*2); + ut_a(slot->page_buf != NULL); + } +} + +#ifdef HAVE_LZO +/**********************************************************************//** +Allocate memory for temporal memory used for page compression when +LZO compression method is used */ +UNIV_INTERN +void +os_slot_alloc_lzo_mem( +/*===================*/ + os_aio_slot_t* slot) /*!< in: slot structure */ +{ + ut_a(slot != NULL); + if(slot->lzo_mem == NULL) { + slot->lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS)); + memset(slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS); + ut_a(slot->lzo_mem != NULL); + } +} +#endif + +/***********************************************************************//** +Try to get number of bytes per sector from file system. +@return file block size */ +UNIV_INTERN +ulint +os_file_get_block_size( +/*===================*/ + os_file_t file, /*!< in: handle to a file */ + const char* name) /*!< in: file name */ +{ + ulint fblock_size = 512; + +#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H) + struct statvfs fstat; + int err; + + err = fstatvfs(file, &fstat); + + if (err != 0) { + fprintf(stderr, "InnoDB: Warning: fstatvfs() failed on file %s\n", name); + os_file_handle_error_no_exit(name, "fstatvfs()", FALSE, __FILE__, __LINE__); + } else { + fblock_size = fstat.f_bsize; + } +#endif /* UNIV_LINUX */ +#ifdef __WIN__ + { + DWORD SectorsPerCluster = 0; + DWORD BytesPerSector = 0; + DWORD NumberOfFreeClusters = 0; + DWORD TotalNumberOfClusters = 0; + + if (GetFreeSpace((LPCTSTR)name, &SectorsPerCluster, &BytesPerSector, &NumberOfFreeClusters, &TotalNumberOfClusters)) { + fblock_size = BytesPerSector; + } else { + fprintf(stderr, "InnoDB: Warning: GetFreeSpace() failed on file %s\n", name); + os_file_handle_error_no_exit(name, "GetFreeSpace()", FALSE, __FILE__, __LINE__); + } + } +#endif /* __WIN__*/ + + if (fblock_size > UNIV_PAGE_SIZE/2 || fblock_size < 512) { + fprintf(stderr, "InnoDB: Note: File system for file %s has " + "file block size %lu not supported for page_size %lu\n", + name, fblock_size, UNIV_PAGE_SIZE); + + if (fblock_size < 512) { + fblock_size = 512; + } else { + fblock_size = UNIV_PAGE_SIZE/2; + } + + fprintf(stderr, "InnoDB: Note: Using file block size %ld for file %s\n", + fblock_size, name); + } + + return fblock_size; +} diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc index 4a50be3d098..bba8c32b752 100644 --- a/storage/xtradb/row/row0log.cc +++ b/storage/xtradb/row/row0log.cc @@ -2565,7 +2565,7 @@ all_done: success = os_file_read_no_error_handling( OS_FILE_FROM_FD(index->online_log->fd), index->online_log->head.block, ofs, - srv_sort_buf_size); + srv_sort_buf_size, FALSE); if (!success) { fprintf(stderr, "InnoDB: unable to read temporary file" @@ -3393,7 +3393,7 @@ all_done: success = os_file_read_no_error_handling( OS_FILE_FROM_FD(index->online_log->fd), index->online_log->head.block, ofs, - srv_sort_buf_size); + srv_sort_buf_size, FALSE); if (!success) { fprintf(stderr, "InnoDB: unable to read temporary file" diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc index 0a5eb4374f1..58c700f08e5 100644 --- a/storage/xtradb/row/row0merge.cc +++ b/storage/xtradb/row/row0merge.cc @@ -872,7 +872,8 @@ row_merge_read( #endif /* UNIV_DEBUG */ success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf, - ofs, srv_sort_buf_size); + ofs, srv_sort_buf_size, FALSE); + #ifdef POSIX_FADV_DONTNEED /* Each block is read exactly once. Free up the file cache. */ posix_fadvise(fd, ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED); diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc index a0dd32c203f..5e15dd15db2 100644 --- a/storage/xtradb/srv/srv0mon.cc +++ b/storage/xtradb/srv/srv0mon.cc @@ -2,6 +2,7 @@ Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, 2014, MariaDB Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -290,6 +291,18 @@ static monitor_info_t innodb_counter_info[] = MONITOR_EXISTING | MONITOR_DEFAULT_ON), MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_WRITTEN}, + {"buffer_index_pages_written", "buffer", + "Number of index pages written (innodb_index_pages_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_PAGES_WRITTEN}, + + {"buffer_non_index_pages_written", "buffer", + "Number of non index pages written (innodb_non_index_pages_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN}, + {"buffer_pages_read", "buffer", "Number of pages read (innodb_pages_read)", static_cast<monitor_type_t>( @@ -879,6 +892,71 @@ static monitor_info_t innodb_counter_info[] = MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS}, + {"compress_saved", "compression", + "Number of bytes saved by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED}, + + {"compress_trim_sect512", "compression", + "Number of sect-512 TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512}, + + {"compress_trim_sect1024", "compression", + "Number of sect-1024 TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024}, + + {"compress_trim_sect2048", "compression", + "Number of sect-2048 TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048}, + + {"compress_trim_sect4096", "compression", + "Number of sect-4K TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096}, + + {"compress_trim_sect8192", "compression", + "Number of sect-8K TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192}, + + {"compress_trim_sect16384", "compression", + "Number of sect-16K TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384}, + + {"compress_trim_sect32768", "compression", + "Number of sect-32K TRIMed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768}, + + {"compress_pages_page_compressed", "compression", + "Number of pages compressed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSED}, + + {"compress_page_compressed_trim_op", "compression", + "Number of TRIM operation performed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP}, + + {"compress_page_compressed_trim_op_saved", "compression", + "Number of TRIM operation saved by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED}, + + {"compress_pages_page_decompressed", "compression", + "Number of pages decompressed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED}, + + {"compress_pages_page_compression_error", "compression", + "Number of page compression errors", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR}, + /* ========== Counters for Index ========== */ {"module_index", "index", "Index Manager", MONITOR_MODULE, @@ -1573,6 +1651,16 @@ srv_mon_process_existing_counter( value = stat.n_pages_written; break; + /* innodb_index_pages_written, the number of index pages written */ + case MONITOR_OVLD_INDEX_PAGES_WRITTEN: + value = srv_stats.index_pages_written; + break; + + /* innodb_non_index_pages_written, the number of non index pages written */ + case MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN: + value = srv_stats.non_index_pages_written; + break; + /* innodb_pages_read */ case MONITOR_OVLD_PAGES_READ: buf_get_total_stat(&stat); @@ -1834,6 +1922,46 @@ srv_mon_process_existing_counter( value = btr_cur_n_non_sea; break; + case MONITOR_OVLD_PAGE_COMPRESS_SAVED: + value = srv_stats.page_compression_saved; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512: + value = srv_stats.page_compression_trim_sect512; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024: + value = srv_stats.page_compression_trim_sect1024; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048: + value = srv_stats.page_compression_trim_sect2048; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096: + value = srv_stats.page_compression_trim_sect4096; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192: + value = srv_stats.page_compression_trim_sect8192; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384: + value = srv_stats.page_compression_trim_sect16384; + break; + case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768: + value = srv_stats.page_compression_trim_sect32768; + break; + case MONITOR_OVLD_PAGES_PAGE_COMPRESSED: + value = srv_stats.pages_page_compressed; + break; + case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP: + value = srv_stats.page_compressed_trim_op; + break; + case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED: + value = srv_stats.page_compressed_trim_op_saved; + break; + case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED: + value = srv_stats.pages_page_decompressed; + break; + case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR: + value = srv_stats.pages_page_compression_error; + break; + default: ut_error; } diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index 303985cdae2..92238cee405 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -73,6 +73,7 @@ Created 10/8/1995 Heikki Tuuri #include "mysql/plugin.h" #include "mysql/service_thd_wait.h" +#include "fil0pagecompress.h" /* prototypes of new functions added to ha_innodb.cc for kill_idle_transaction */ ibool innobase_thd_is_idle(const void* thd); @@ -161,6 +162,23 @@ use simulated aio we build below with threads. Currently we support native aio on windows and linux */ UNIV_INTERN my_bool srv_use_native_aio = TRUE; +/* Default compression level if page compression is used and no compression +level is set for the table*/ +UNIV_INTERN long srv_compress_zlib_level = 6; +/* If this flag is TRUE, then we will use fallocate(PUCH_HOLE) +to the pages */ +UNIV_INTERN my_bool srv_use_trim = FALSE; +/* If this flag is TRUE, then we will use posix fallocate for file extentsion */ +UNIV_INTERN my_bool srv_use_posix_fallocate = FALSE; +/* If this flag is TRUE, then we disable doublewrite buffer */ +UNIV_INTERN my_bool srv_use_atomic_writes = FALSE; +/* If this flag IS TRUE, then we use this algorithm for page compressing the pages */ +UNIV_INTERN ulong innodb_compression_algorithm = PAGE_ZLIB_ALGORITHM; +/* Number of threads used for multi-threaded flush */ +UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER; +/* If this flag is TRUE, then we will use multi threaded flush. */ +UNIV_INTERN my_bool srv_use_mtflush = FALSE; + #ifdef __WIN__ /* Windows native condition variables. We use runtime loading / function pointers, because they are not available on Windows Server 2003 and @@ -466,10 +484,6 @@ pages default true. */ UNIV_INTERN my_bool srv_stats_sample_traditional = TRUE; UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE; -UNIV_INTERN ibool srv_use_atomic_writes = FALSE; -#ifdef HAVE_POSIX_FALLOCATE -UNIV_INTERN ibool srv_use_posix_fallocate = FALSE; -#endif /** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages. The following parameter is the size of the buffer that is used for @@ -514,6 +528,16 @@ static ulint srv_n_system_rows_read_old = 0; UNIV_INTERN ulint srv_truncated_status_writes = 0; UNIV_INTERN ulint srv_available_undo_logs = 0; +UNIV_INTERN ib_uint64_t srv_page_compression_saved = 0; +UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect512 = 0; +UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect4096 = 0; +UNIV_INTERN ib_uint64_t srv_index_pages_written = 0; +UNIV_INTERN ib_uint64_t srv_non_index_pages_written = 0; +UNIV_INTERN ib_uint64_t srv_pages_page_compressed = 0; +UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op = 0; +UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op_saved = 0; +UNIV_INTERN ib_uint64_t srv_index_page_decompressed = 0; + /* Ensure status variables are on separate cache lines */ #ifdef __powerpc__ @@ -1899,6 +1923,16 @@ srv_export_innodb_status(void) export_vars.innodb_descriptors_memory = os_atomic_increment_ulint(&srv_descriptors_memory, 0); + export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved; + export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512; + export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096; + export_vars.innodb_index_pages_written = srv_stats.index_pages_written; + export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written; + export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed; + export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op; + export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved; + export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed; + #ifdef UNIV_DEBUG rw_lock_s_lock(&purge_sys->latch); trx_id_t done_trx_no = purge_sys->done.trx_no; diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index 73866520ef0..248f1e4db89 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -3,6 +3,7 @@ Copyright (c) 1996, 2014, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2009, Percona Inc. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -67,11 +68,14 @@ Created 2/16/1996 Heikki Tuuri #include "ibuf0ibuf.h" #include "srv0start.h" #include "srv0srv.h" +#include "buf0flu.h" + #ifndef UNIV_HOTBACKUP # include "trx0rseg.h" # include "os0proc.h" # include "sync0sync.h" # include "buf0flu.h" +# include "buf0mtflu.h" # include "buf0rea.h" # include "dict0boot.h" # include "dict0load.h" @@ -141,11 +145,13 @@ SRV_MAX_N_IO_THREADS + 6: srv_purge_coordinator_thread SRV_MAX_N_IO_THREADS + 7: srv_worker_thread ... SRV_MAX_N_IO_THREADS + 7 + srv_n_purge_threads - 1: srv_worker_thread */ -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7 - + SRV_MAX_N_PURGE_THREADS]; +static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7 + SRV_MAX_N_PURGE_THREADS + MTFLUSH_MAX_WORKER]; /** Thead handles */ -static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 7 + SRV_MAX_N_PURGE_THREADS]; +static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 7 + SRV_MAX_N_PURGE_THREADS+ MTFLUSH_MAX_WORKER]; +/* Thread contex data for multi-threaded flush */ +void *mtflush_ctx=NULL; + static os_thread_t buf_flush_page_cleaner_thread_handle; static os_thread_t buf_dump_thread_handle; static os_thread_t dict_stats_thread_handle; @@ -570,7 +576,7 @@ create_log_file( *file = os_file_create( innodb_file_log_key, name, OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL, - OS_LOG_FILE, &ret); + OS_LOG_FILE, &ret, FALSE); if (!ret) { ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name); @@ -787,7 +793,7 @@ open_log_file( *file = os_file_create(innodb_file_log_key, name, OS_FILE_OPEN, OS_FILE_AIO, - OS_LOG_FILE, &ret); + OS_LOG_FILE, &ret, FALSE); if (!ret) { ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name); return(DB_ERROR); @@ -878,7 +884,7 @@ open_or_create_data_files( files[i] = os_file_create( innodb_file_data_key, name, OS_FILE_CREATE, - OS_FILE_NORMAL, OS_DATA_FILE, &ret); + OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); if (srv_read_only_mode) { @@ -921,7 +927,7 @@ open_or_create_data_files( files[i] = os_file_create( innodb_file_data_key, name, OS_FILE_OPEN_RAW, - OS_FILE_NORMAL, OS_DATA_FILE, &ret); + OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); if (!ret) { ib_logf(IB_LOG_LEVEL_ERROR, @@ -954,17 +960,17 @@ open_or_create_data_files( files[i] = os_file_create( innodb_file_data_key, name, OS_FILE_OPEN_RAW, - OS_FILE_NORMAL, OS_DATA_FILE, &ret); + OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); } else if (i == 0) { files[i] = os_file_create( innodb_file_data_key, name, OS_FILE_OPEN_RETRY, - OS_FILE_NORMAL, OS_DATA_FILE, &ret); + OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); } else { files[i] = os_file_create( innodb_file_data_key, name, OS_FILE_OPEN, OS_FILE_NORMAL, - OS_DATA_FILE, &ret); + OS_DATA_FILE, &ret, FALSE); } if (!ret) { @@ -1046,7 +1052,7 @@ skip_size_check: check_first_page: check_msg = fil_read_first_page( files[i], one_opened, &flags, &space, - min_flushed_lsn, max_flushed_lsn); + min_flushed_lsn, max_flushed_lsn, ULINT_UNDEFINED); if (check_msg) { @@ -1181,7 +1187,7 @@ srv_undo_tablespace_create( innodb_file_data_key, name, srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE, - OS_FILE_NORMAL, OS_DATA_FILE, &ret); + OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); if (srv_read_only_mode && ret) { ib_logf(IB_LOG_LEVEL_INFO, @@ -1268,7 +1274,8 @@ srv_undo_tablespace_open( | OS_FILE_ON_ERROR_SILENT, OS_FILE_NORMAL, OS_DATA_FILE, - &ret); + &ret, + FALSE); /* If the file open was successful then load the tablespace. */ @@ -2825,6 +2832,24 @@ files_checked: } if (!srv_read_only_mode) { + if (srv_use_mtflush) { + /* Start multi-threaded flush threads */ + mtflush_ctx = buf_mtflu_handler_init( + srv_mtflush_threads, + srv_buf_pool_instances); + + /* Set up the thread ids */ + buf_mtflu_set_thread_ids( + srv_mtflush_threads, + mtflush_ctx, + (thread_ids + 6 + SRV_MAX_N_PURGE_THREADS)); +#if UNIV_DEBUG + fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n", + __FILE__, __LINE__, srv_buf_pool_instances, srv_mtflush_threads); +#endif + } + + buf_flush_page_cleaner_thread_handle = os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL); buf_flush_page_cleaner_thread_started = true; } @@ -3102,6 +3127,13 @@ innobase_shutdown_for_mysql(void) logs_empty_and_mark_files_at_shutdown() and should have already quit or is quitting right now. */ + + if (srv_use_mtflush) { + /* g. Exit the multi threaded flush threads */ + + buf_mtflu_io_thread_exit(); + } + os_mutex_enter(os_sync_mutex); if (os_thread_count == 0) { diff --git a/storage/xtradb/ut/ut0wqueue.cc b/storage/xtradb/ut/ut0wqueue.cc index d1ba36b3b00..1607e535a94 100644 --- a/storage/xtradb/ut/ut0wqueue.cc +++ b/storage/xtradb/ut/ut0wqueue.cc @@ -162,6 +162,38 @@ ib_wqueue_timedwait( } /******************************************************************** +Return first item on work queue or NULL if queue is empty +@return work item or NULL */ +void* +ib_wqueue_nowait( +/*=============*/ + ib_wqueue_t* wq) /*<! in: work queue */ +{ + ib_list_node_t* node = NULL; + + mutex_enter(&wq->mutex); + + if(!ib_list_is_empty(wq->items)) { + node = ib_list_get_first(wq->items); + + if (node) { + ib_list_remove(wq->items, node); + + } + } + + /* We must reset the event when the list + gets emptied. */ + if(ib_list_is_empty(wq->items)) { + os_event_reset(wq->event); + } + + mutex_exit(&wq->mutex); + + return (node ? node->data : NULL); +} + +/******************************************************************** Check if queue is empty. */ ibool @@ -173,3 +205,20 @@ ib_wqueue_is_empty( { return(ib_list_is_empty(wq->items)); } + +/******************************************************************** +Get number of items on queue. +@return number of items on queue */ +ulint +ib_wqueue_len( +/*==========*/ + ib_wqueue_t* wq) /*<! in: work queue */ +{ + ulint len = 0; + + mutex_enter(&wq->mutex); + len = ib_list_len(wq->items); + mutex_exit(&wq->mutex); + + return(len); +} |