From 30a8764b924a8d90cdd1ad7ad356e6a32b1b29ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 2 Nov 2017 16:18:41 +0200 Subject: MDEV-14244 MariaDB fails to run with O_DIRECT os_file_set_size(): If posix_fallocate() returns EINVAL, fall back to writing zero bytes to the file. Also, remove some error log output, and make it possible for a server shutdown to interrupt the fall-back code. MariaDB used to ignore any possible return value from posix_fallocate() ever since innodb_use_fallocate was introduced in MDEV-4338. If EINVAL was returned, the file would not be extended. Starting with MDEV-11520, MariaDB would treat EINVAL as a hard error. Why is the EINVAL returned? The GNU posix_fallocate() function would first try the fallocate() system call, which would return -EOPNOTSUPP for many file systems (notably, not ext4). Then, it would fall back to extending the file one block at a time by invoking pwrite(fd, "", 1, offset) where offset is 1 less than a multiple of the file block size. This would fail with EINVAL if the file is in O_DIRECT mode, because O_DIRECT requires aligned operation. --- storage/innobase/os/os0file.cc | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'storage/innobase') diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 2a13746516f..7edf69a4571 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -2394,15 +2394,22 @@ os_file_set_size( } while (err == EINTR && srv_shutdown_state == SRV_SHUTDOWN_NONE); - if (err) { + switch (err) { + case 0: + return true; + default: ib_logf(IB_LOG_LEVEL_ERROR, "preallocating " INT64PF " bytes for" "file %s failed with error %d", size, name, err); + /* fall through */ + case EINTR: + errno = err; + return false; + case EINVAL: + /* fall back to the code below */ + break; } - /* Set errno because posix_fallocate() does not do it.*/ - errno = err; - return(!err); } # endif @@ -2444,11 +2451,12 @@ os_file_set_size( } current_size += n_bytes; - } while (current_size < size); + } while (current_size < size + && srv_shutdown_state == SRV_SHUTDOWN_NONE); free(buf2); - return(ret && os_file_flush(file)); + return(ret && current_size >= size && os_file_flush(file)); #endif } -- cgit v1.2.1