summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2017-11-02 16:18:41 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2017-11-06 08:53:50 +0200
commit30a8764b924a8d90cdd1ad7ad356e6a32b1b29ff (patch)
tree3282e091ad061d43c0159ef81d980a246a8dd0da
parent6ceb49a941f5b5aa93364ef1f4b57dc1d780cf25 (diff)
downloadmariadb-git-30a8764b924a8d90cdd1ad7ad356e6a32b1b29ff.tar.gz
MDEV-14244 MariaDB fails to run with O_DIRECT
os_file_set_size(): If posix_fallocate() returns EINVAL, fall back to writing zero bytes to the file. Also, remove some error log output, and make it possible for a server shutdown to interrupt the fall-back code. MariaDB used to ignore any possible return value from posix_fallocate() ever since innodb_use_fallocate was introduced in MDEV-4338. If EINVAL was returned, the file would not be extended. Starting with MDEV-11520, MariaDB would treat EINVAL as a hard error. Why is the EINVAL returned? The GNU posix_fallocate() function would first try the fallocate() system call, which would return -EOPNOTSUPP for many file systems (notably, not ext4). Then, it would fall back to extending the file one block at a time by invoking pwrite(fd, "", 1, offset) where offset is 1 less than a multiple of the file block size. This would fail with EINVAL if the file is in O_DIRECT mode, because O_DIRECT requires aligned operation.
-rw-r--r--storage/innobase/os/os0file.cc20
-rw-r--r--storage/xtradb/os/os0file.cc20
2 files changed, 28 insertions, 12 deletions
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 2a13746516f..7edf69a4571 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -2394,15 +2394,22 @@ os_file_set_size(
} while (err == EINTR
&& srv_shutdown_state == SRV_SHUTDOWN_NONE);
- if (err) {
+ switch (err) {
+ case 0:
+ return true;
+ default:
ib_logf(IB_LOG_LEVEL_ERROR,
"preallocating " INT64PF " bytes for"
"file %s failed with error %d",
size, name, err);
+ /* fall through */
+ case EINTR:
+ errno = err;
+ return false;
+ case EINVAL:
+ /* fall back to the code below */
+ break;
}
- /* Set errno because posix_fallocate() does not do it.*/
- errno = err;
- return(!err);
}
# endif
@@ -2444,11 +2451,12 @@ os_file_set_size(
}
current_size += n_bytes;
- } while (current_size < size);
+ } while (current_size < size
+ && srv_shutdown_state == SRV_SHUTDOWN_NONE);
free(buf2);
- return(ret && os_file_flush(file));
+ return(ret && current_size >= size && os_file_flush(file));
#endif
}
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 183f65bcbd8..69b8a9da671 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -2629,15 +2629,22 @@ os_file_set_size(
} while (err == EINTR
&& srv_shutdown_state == SRV_SHUTDOWN_NONE);
- if (err) {
+ switch (err) {
+ case 0:
+ return true;
+ default:
ib_logf(IB_LOG_LEVEL_ERROR,
"preallocating " INT64PF " bytes for"
"file %s failed with error %d",
size, name, err);
+ /* fall through */
+ case EINTR:
+ errno = err;
+ return false;
+ case EINVAL:
+ /* fall back to the code below */
+ break;
}
- /* Set errno because posix_fallocate() does not do it.*/
- errno = err;
- return(!err);
}
# endif
@@ -2679,11 +2686,12 @@ os_file_set_size(
}
current_size += n_bytes;
- } while (current_size < size);
+ } while (current_size < size
+ && srv_shutdown_state == SRV_SHUTDOWN_NONE);
free(buf2);
- return(ret && os_file_flush(file));
+ return(ret && current_size >= size && os_file_flush(file));
#endif
}