summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-03-18 14:43:08 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2021-03-18 14:43:08 +0200
commit126725421e56293d7c8b816e066271606b59dcd5 (patch)
tree4210f684a907d615da1c34b280218449d1118141
parent39c015b77e0b5adced487ef634f87b39b9b4abc0 (diff)
downloadmariadb-git-126725421e56293d7c8b816e066271606b59dcd5.tar.gz
MDEV-25121: innodb_flush_method=O_DIRECT fails on compressed tables
Tests with 4096-byte sector size confirm that it is safe to use O_DIRECT with page_compressed tables. That had been disabled on Linux, in an attempt to fix MDEV-21584 which had been filed for the O_DIRECT problems earlier. The fil_node_t::block_size was being set mostly correctly until commit 10dd290b4b8b8b235c8cf42e100f0a4415629e79 (MDEV-17380) introduced a regression in MariaDB Server 10.4.4. fil_node_open_file(): Only avoid setting O_DIRECT on ROW_FORMAT=COMPRESSED tables that use KEY_BLOCK_SIZE=1 or 2 (1024 or 2048 bytes). fil_ibd_create(): Avoid setting O_DIRECT on ROW_FORMAT=COMPRESSED tables that use KEY_BLOCK_SIZE=1 or 2 (1024 or 2048 bytes). fil_node_t::find_metadata(): Require fstat() to be always invoked outside Microsoft Windows, so that fil_node_t::block_size can be set. fil_node_t::read_page0(): Rely on find_metadata() to assign block_size. Thanks to Vladislav Vaintroub for testing this on Microsoft Windows using an old-fashioned rotational hard disk with 4KiB sector size. Reviewed by: Vladislav Vaintroub This is a port of commit 00f620b27e960c4b96a8392b27742fd5e41a69e3 and commit 6505662c23ba81331d91f65c18e06a759d6f148f from 10.2.
-rw-r--r--storage/innobase/fil/fil0fil.cc41
-rw-r--r--storage/innobase/include/fil0fil.h2
-rw-r--r--storage/innobase/os/os0file.cc29
3 files changed, 41 insertions, 31 deletions
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index a8b64f836cc..40a0744dd3d 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -488,12 +488,16 @@ static bool fil_node_open_file(fil_node_t* node)
const bool first_time_open = node->size == 0;
- bool o_direct_possible = !FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
- if (const ulint ssize = FSP_FLAGS_GET_ZIP_SSIZE(space->flags)) {
- compile_time_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096);
- if (ssize < 3) {
- o_direct_possible = false;
- }
+ ulint type;
+ static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096,
+ "compatibility");
+ switch (FSP_FLAGS_GET_ZIP_SSIZE(space->flags)) {
+ case 1:
+ case 2:
+ type = OS_DATA_FILE_NO_O_DIRECT;
+ break;
+ default:
+ type = OS_DATA_FILE;
}
if (first_time_open
@@ -514,9 +518,7 @@ retry:
? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
: OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_AIO,
- o_direct_possible
- ? OS_DATA_FILE
- : OS_DATA_FILE_NO_O_DIRECT,
+ type,
read_only_mode,
&success);
@@ -556,9 +558,7 @@ fail:
? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
: OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_AIO,
- o_direct_possible
- ? OS_DATA_FILE
- : OS_DATA_FILE_NO_O_DIRECT,
+ type,
read_only_mode,
&success);
}
@@ -2904,13 +2904,22 @@ fil_ibd_create(
return NULL;
}
+ ulint type;
+ static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096,
+ "compatibility");
+ switch (FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
+ case 1:
+ case 2:
+ type = OS_DATA_FILE_NO_O_DIRECT;
+ break;
+ default:
+ type = OS_DATA_FILE;
+ }
+
file = os_file_create(
innodb_data_file_key, path,
OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
- OS_FILE_NORMAL,
- OS_DATA_FILE,
- srv_read_only_mode,
- &success);
+ OS_FILE_NORMAL, type, srv_read_only_mode, &success);
if (!success) {
/* The following call will print an error message */
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 3001817a78c..873fcd67a3a 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -637,7 +637,7 @@ struct fil_node_t {
/** Determine some file metadata when creating or reading the file.
@param file the file that is being created, or OS_FILE_CLOSED */
void find_metadata(os_file_t file = OS_FILE_CLOSED
-#ifdef UNIV_LINUX
+#ifndef _WIN32
, struct stat* statbuf = NULL
#endif
);
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index f96ff6b5171..bf38252578b 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2020, MariaDB Corporation.
+Copyright (c) 2013, 2021, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -4137,7 +4137,9 @@ os_file_create_func(
case SRV_ALL_O_DIRECT_FSYNC:
/*Traditional Windows behavior, no buffering for any files.*/
- attributes |= FILE_FLAG_NO_BUFFERING;
+ if (type != OS_DATA_FILE_NO_O_DIRECT) {
+ attributes |= FILE_FLAG_NO_BUFFERING;
+ }
break;
case SRV_FSYNC:
@@ -7707,7 +7709,7 @@ static bool is_file_on_ssd(char *file_path)
/** Determine some file metadata when creating or reading the file.
@param file the file that is being created, or OS_FILE_CLOSED */
void fil_node_t::find_metadata(os_file_t file
-#ifdef UNIV_LINUX
+#ifndef _WIN32
, struct stat* statbuf
#endif
)
@@ -7747,18 +7749,18 @@ void fil_node_t::find_metadata(os_file_t file
block_size = 512;
}
#else
- on_ssd = space->atomic_write_supported;
-# ifdef UNIV_LINUX
- if (!on_ssd) {
- struct stat sbuf;
- if (!statbuf && !fstat(file, &sbuf)) {
- statbuf = &sbuf;
- }
- if (statbuf && fil_system.is_ssd(statbuf->st_dev)) {
- on_ssd = true;
- }
+ struct stat sbuf;
+ if (!statbuf && !fstat(file, &sbuf)) {
+ statbuf = &sbuf;
}
+ if (statbuf) {
+ block_size = statbuf->st_blksize;
+ }
+ on_ssd = space->atomic_write_supported
+# ifdef UNIV_LINUX
+ || (statbuf && fil_system.is_ssd(statbuf->st_dev))
# endif
+ ;
#endif
if (!space->atomic_write_supported) {
space->atomic_write_supported = atomic_write
@@ -7794,7 +7796,6 @@ bool fil_node_t::read_page0(bool first)
if (fstat(handle, &statbuf)) {
return false;
}
- block_size = statbuf.st_blksize;
os_offset_t size_bytes = statbuf.st_size;
#else
os_offset_t size_bytes = os_file_get_size(handle);