summaryrefslogtreecommitdiff
path: root/storage/xtradb
diff options
context:
space:
mode:
authorVladislav Vaintroub <wlad@montyprogram.com>2013-04-06 00:35:45 +0200
committerVladislav Vaintroub <wlad@montyprogram.com>2013-04-06 00:35:45 +0200
commit0aa607a01aebc0c98e9f38752af8b1acd1fc3bc2 (patch)
tree15d8d6e34e4c17faeb0125dfb14b0396d0e5d0e6 /storage/xtradb
parent5d4ba5589d7d416af580364a5b75c9fc9ac5e1d9 (diff)
downloadmariadb-git-0aa607a01aebc0c98e9f38752af8b1acd1fc3bc2.tar.gz
MDEV-4338 - Support FusionIO/directFS atomic writes
Diffstat (limited to 'storage/xtradb')
-rw-r--r--storage/xtradb/fil/fil0fil.c22
-rw-r--r--storage/xtradb/handler/ha_innodb.cc50
-rw-r--r--storage/xtradb/include/srv0srv.h5
-rw-r--r--storage/xtradb/os/os0file.c72
-rw-r--r--storage/xtradb/srv/srv0srv.c4
5 files changed, 153 insertions, 0 deletions
diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c
index af8353cdf45..397c4de4b6e 100644
--- a/storage/xtradb/fil/fil0fil.c
+++ b/storage/xtradb/fil/fil0fil.c
@@ -4865,6 +4865,24 @@ fil_extend_space_to_desired_size(
start_page_no = space->size;
file_start_page_no = space->size - node->size;
+#ifdef HAVE_POSIX_FALLOCATE
+ if (srv_use_posix_fallocate) {
+ offset_high = size_after_extend * page_size / (4ULL*1024*1024*1024);
+ offset_low = size_after_extend * page_size % (4ULL*1024*1024*1024);
+
+ mutex_exit(&fil_system->mutex);
+ success = os_file_set_size(node->name, node->handle,
+ offset_low, offset_high);
+ mutex_enter(&fil_system->mutex);
+ if (success) {
+ node->size += (size_after_extend - start_page_no);
+ space->size += (size_after_extend - start_page_no);
+ os_has_said_disk_full = FALSE;
+ }
+ goto complete_io;
+ }
+#endif
+
/* Extend at most 64 pages at a time */
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
buf2 = mem_alloc(buf_size + page_size);
@@ -4921,6 +4939,10 @@ fil_extend_space_to_desired_size(
mem_free(buf2);
+#ifdef HAVE_POSIX_FALLOCATE
+complete_io:
+#endif
+
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
*actual_size = space->size;
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 19ead6a5b29..c194dc38889 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -185,6 +185,8 @@ static my_bool innobase_file_format_check = TRUE;
static my_bool innobase_log_archive = FALSE;
static char* innobase_log_arch_dir = NULL;
#endif /* UNIV_LOG_ARCHIVE */
+static my_bool innobase_use_atomic_writes = FALSE;
+static my_bool innobase_use_fallocate = TRUE;
static my_bool innobase_use_doublewrite = TRUE;
static my_bool innobase_use_checksums = TRUE;
static my_bool innobase_fast_checksum = FALSE;
@@ -3057,6 +3059,38 @@ innobase_change_buffering_inited_ok:
srv_kill_idle_transaction = 0;
#endif
+#ifdef HAVE_POSIX_FALLOCATE
+ srv_use_posix_fallocate = (ibool) innobase_use_fallocate;
+#endif
+ srv_use_atomic_writes = (ibool) innobase_use_atomic_writes;
+ if (innobase_use_atomic_writes) {
+ fprintf(stderr, "InnoDB: using atomic writes.\n");
+
+ /* Force doublewrite buffer off, atomic writes replace it. */
+ if (srv_use_doublewrite_buf) {
+ fprintf(stderr, "InnoDB: Switching off doublewrite buffer "
+ "because of atomic writes.\n");
+ innobase_use_doublewrite = srv_use_doublewrite_buf = FALSE;
+ }
+
+ /* Force O_DIRECT on Unixes (on Windows writes are always unbuffered)*/
+#ifndef _WIN32
+ if(!innobase_file_flush_method ||
+ !strstr(innobase_file_flush_method, "O_DIRECT")) {
+ innobase_file_flush_method =
+ srv_file_flush_method_str = (char*)"O_DIRECT";
+ fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n");
+ }
+#endif
+#ifdef HAVE_POSIX_FALLOCATE
+ /* Due to a bug in directFS, using atomics needs
+ * posix_fallocate to extend the file
+ * pwrite() past end of the file won't work
+ */
+ srv_use_posix_fallocate = TRUE;
+#endif
+ }
+
#ifdef HAVE_PSI_INTERFACE
/* Register keys with MySQL performance schema */
if (PSI_server) {
@@ -12615,6 +12649,20 @@ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
"Disable with --skip-innodb-doublewrite.",
NULL, NULL, TRUE);
+static MYSQL_SYSVAR_BOOL(use_atomic_writes, innobase_use_atomic_writes,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Prevent partial page writes, via atomic writes."
+ "The option is used to prevent partial writes in case of a crash/poweroff, "
+ "as faster alternative to doublewrite buffer."
+ "Currently this option works only "
+ "on Linux only with FusionIO device, and directFS filesystem.",
+ NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(use_fallocate, innobase_use_fallocate,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Preallocate files fast, using operating system functionality. On POSIX systems, posix_fallocate system call is used.",
+ NULL, NULL, TRUE);
+
static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
PLUGIN_VAR_RQCMDARG,
"Number of IOPs the server can do. Tunes the background IO rate",
@@ -13259,6 +13307,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(doublewrite_file),
MYSQL_SYSVAR(data_home_dir),
MYSQL_SYSVAR(doublewrite),
+ MYSQL_SYSVAR(use_atomic_writes),
+ MYSQL_SYSVAR(use_fallocate),
MYSQL_SYSVAR(recovery_stats),
MYSQL_SYSVAR(fast_shutdown),
MYSQL_SYSVAR(file_io_threads),
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index a64e5ee607b..586c1e73879 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -249,6 +249,11 @@ extern ulong srv_sys_stats_root_page;
#endif
extern ibool srv_use_doublewrite_buf;
+extern ibool srv_use_atomic_writes;
+#ifdef HAVE_POSIX_FALLOCATE
+extern ibool srv_use_posix_fallocate;
+#endif
+
extern ibool srv_use_checksums;
extern ibool srv_fast_checksum;
diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c
index f4a6c33655f..8f1b3e46bb2 100644
--- a/storage/xtradb/os/os0file.c
+++ b/storage/xtradb/os/os0file.c
@@ -1454,6 +1454,43 @@ os_file_set_nocache(
#endif
}
+
+#ifdef __linux__
+#include <sys/ioctl.h>
+#ifndef DFS_IOCTL_ATOMIC_WRITE_SET
+#define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint)
+#endif
+static int os_file_set_atomic_writes(os_file_t file, const char *name)
+{
+ static int first_time = 1;
+ int atomic_option = 1;
+
+ int ret = ioctl (file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option);
+
+ if (ret) {
+ fprintf(stderr,
+ "InnoDB : can't use atomic write on %s, errno %d\n",
+ name, errno);
+ return ret;
+ }
+ return ret;
+}
+#else
+static int os_file_set_atomic_writes(os_file_t file, const char *name)
+{
+ fprintf(stderr,
+ "InnoDB : can't use atomic writes on %s - not implemented on this platform."
+ "innodb_use_atomic_writes needs to be 0.\n",
+ name);
+#ifdef _WIN32
+ SetLastError(ERROR_INVALID_FUNCTION);
+#else
+ errno = EINVAL;
+#endif
+ return -1;
+}
+#endif
+
/****************************************************************//**
NOTE! Use the corresponding macro os_file_create(), not directly
this function!
@@ -1618,6 +1655,13 @@ try_again:
}
}
+ if (srv_use_atomic_writes && type == OS_DATA_FILE &&
+ os_file_set_atomic_writes(file, name)) {
+ CloseHandle(file);
+ *success = FALSE;
+ file = INVALID_HANDLE_VALUE;
+ }
+
return(file);
#else /* __WIN__ */
os_file_t file;
@@ -1737,6 +1781,12 @@ try_again:
file = -1;
}
#endif /* USE_FILE_LOCK */
+ if (srv_use_atomic_writes && type == OS_DATA_FILE
+ && os_file_set_atomic_writes(file, name)) {
+ close(file);
+ *success = FALSE;
+ file = -1;
+ }
return(file);
#endif /* __WIN__ */
@@ -2081,6 +2131,28 @@ os_file_set_size(
current_size = 0;
desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
+#ifdef HAVE_POSIX_FALLOCATE
+ if (srv_use_posix_fallocate) {
+ if (posix_fallocate(file, current_size, desired_size) == -1) {
+ fprintf(stderr,
+ "InnoDB: Error: preallocating data for"
+ " file %s failed at\n"
+ "InnoDB: offset 0 size %lld %lld. Operating system"
+ " error number %llu.\n"
+ "InnoDB: Check that the disk is not full"
+ " or a disk quota exceeded.\n"
+ "InnoDB: Some operating system error numbers"
+ " are described at\n"
+ "InnoDB: "
+ REFMAN "operating-system-error-codes.html\n",
+ name, (long long)size_high, (long long)size, errno);
+
+ return (FALSE);
+ }
+ return (TRUE);
+ }
+#endif
+
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c
index 1de5523f62c..6edfbaa7755 100644
--- a/storage/xtradb/srv/srv0srv.c
+++ b/storage/xtradb/srv/srv0srv.c
@@ -409,6 +409,10 @@ UNIV_INTERN ulong srv_sys_stats_root_page = 0;
#endif
UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
+UNIV_INTERN ibool srv_use_atomic_writes = FALSE;
+#ifdef HAVE_POSIX_FALLOCATE
+UNIV_INTERN ibool srv_use_posix_fallocate = TRUE;
+#endif
UNIV_INTERN ibool srv_use_checksums = TRUE;
UNIV_INTERN ibool srv_fast_checksum = FALSE;