summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <guilhem@gbichot3.local>2006-11-21 22:22:59 +0100
committerunknown <guilhem@gbichot3.local>2006-11-21 22:22:59 +0100
commita41ac15b960aee306e3464835b05a835fd98771d (patch)
treebac3eccdcb9906c3a529aa064ac3b9dcb8841edf
parent3becab22e9dd774d58983e553b6fbbfb9960f852 (diff)
downloadmariadb-git-a41ac15b960aee306e3464835b05a835fd98771d.tar.gz
Maria - various fixes around durability of files:
1) on Mac OS X >=10.3, fcntl() is recommended over fsync (from the man page: "[With fsync()] the disk drive may also re-order the data so that later writes may be present while earlier writes are not. Applications such as databases that require a strict ordering of writes should use F_FULLFSYNC to ensure their data is written in the order they expect"). I have seen two other pieces of software changing from fsync to F_FULLFSYNC on Mac OS X. 2) to make a file creation/deletion/renaming durable on Linux (at least ext2 as I have tested) (see "man fsync"), a fsync() on the directory is needed: new functions to do that, and a flag MY_SYNC_DIR to do it in my_create/my_delete/my_rename. 3) now using this directory syncing when creating he frm if opt_sync_frm, and for Maria's control file when it is created. include/my_sys.h: new flag to my_create/my_delete/my_rename, which asks to sync the directory after the operation is done (currently does nothing except on Linux) libmysql/CMakeLists.txt: my_create() now depends on my_sync() so my_sync is needed for libmysql libmysql/Makefile.shared: my_create() now depends on my_sync() so my_sync is needed for libmysql mysys/my_create.c: my_create() can now sync the directory if asked for mysys/my_delete.c: my_delete() can now sync the directory if asked for mysys/my_open.c: it was a bug that my_close() is done on fd but a positive fd would still be returned, by my_register_filename(). mysys/my_rename.c: my_rename() can now sync the two directories (the one of "from" and the one of "to") if asked for. mysys/my_sync.c: On recent Mac OS X, fcntl(F_FULLFSYNC) is recommended over fsync() (see "man fsync" on Mac OS X 10.3). my_sync_dir(): to sync a directory after a file creation/deletion/ renaming; can be called directly or via MY_SYNC_DIR in my_create/ my_delete/my_rename(). No-op except on Linux (see "man fsync" on Linux). my_sync_dir_from_file(): same as above, just more practical when the caller has a file name but no directory name ready. Should the #warning even be a #error? I mean do we want to release binaries which don't guarantee any durability? sql/log.cc: a TODO for the future. sql/unireg.cc: If we sync the frm it makes sense to also sync its creation in the directory. storage/maria/ma_control_file.c: control file is vital, try to make it to disk
-rw-r--r--include/my_sys.h3
-rw-r--r--libmysql/CMakeLists.txt1
-rw-r--r--libmysql/Makefile.shared2
-rw-r--r--mysys/my_create.c3
-rw-r--r--mysys/my_delete.c2
-rw-r--r--mysys/my_open.c1
-rw-r--r--mysys/my_rename.c5
-rw-r--r--mysys/my_sync.c74
-rw-r--r--sql/log.cc5
-rw-r--r--sql/unireg.cc9
-rw-r--r--storage/maria/ma_control_file.c11
11 files changed, 104 insertions, 12 deletions
diff --git a/include/my_sys.h b/include/my_sys.h
index fc83c583201..3c5b5e6cbf4 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -55,6 +55,7 @@ extern int NEAR my_errno; /* Last error in mysys */
#define MY_WME 16 /* Write message on error */
#define MY_WAIT_IF_FULL 32 /* Wait and try again if disk full error */
#define MY_IGNORE_BADFD 32 /* my_sync: ignore 'bad descriptor' errors */
+#define MY_SYNC_DIR 1024 /* my_create/delete/rename: sync directory */
#define MY_RAID 64 /* Support for RAID */
#define MY_FULL_IO 512 /* For my_read - loop intil I/O is complete */
#define MY_DONT_CHECK_FILESIZE 128 /* Option to init_io_cache() */
@@ -622,6 +623,8 @@ extern FILE *my_fdopen(File Filedes,const char *name, int Flags,myf MyFlags);
extern int my_fclose(FILE *fd,myf MyFlags);
extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags);
extern int my_sync(File fd, myf my_flags);
+extern void my_sync_dir(const char *dir_name, myf my_flags);
+extern void my_sync_dir_by_file(const char *file_name, myf my_flags);
extern int my_error _VARARGS((int nr,myf MyFlags, ...));
extern int my_printf_error _VARARGS((uint my_err, const char *format,
myf MyFlags, ...))
diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt
index d12b6ca6c10..3c9ed53a866 100644
--- a/libmysql/CMakeLists.txt
+++ b/libmysql/CMakeLists.txt
@@ -37,6 +37,7 @@ ADD_LIBRARY(libmysql SHARED dll.c libmysql.def
../mysys/my_open.c ../mysys/my_pread.c ../mysys/my_pthread.c ../mysys/my_read.c
../mysys/my_realloc.c ../mysys/my_rename.c ../mysys/my_seek.c
../mysys/my_static.c ../strings/my_strtoll10.c ../mysys/my_symlink.c
+ ../mysys/my_sync.c
../mysys/my_symlink2.c ../mysys/my_thr_init.c ../sql-common/my_time.c
../strings/my_vsnprintf.c ../mysys/my_wincond.c ../mysys/my_winthread.c
../mysys/my_write.c ../sql/net_serv.cc ../sql-common/pack.c ../sql/password.c
diff --git a/libmysql/Makefile.shared b/libmysql/Makefile.shared
index c2d98a81042..6326e255559 100644
--- a/libmysql/Makefile.shared
+++ b/libmysql/Makefile.shared
@@ -68,7 +68,7 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \
mf_iocache2.lo my_seek.lo my_sleep.lo \
my_pread.lo mf_cache.lo md5.lo sha1.lo \
my_getopt.lo my_gethostbyname.lo my_port.lo \
- my_rename.lo my_chsize.lo
+ my_rename.lo my_chsize.lo my_sync.lo
sqlobjects = net.lo
sql_cmn_objects = pack.lo client.lo my_time.lo
diff --git a/mysys/my_create.c b/mysys/my_create.c
index e1e32b50842..bb3801691a5 100644
--- a/mysys/my_create.c
+++ b/mysys/my_create.c
@@ -53,6 +53,9 @@ File my_create(const char *FileName, int CreateFlags, int access_flags,
fd = open(FileName, access_flags);
#endif
+ if ((MyFlags & MY_SYNC_DIR) && (fd >=0))
+ my_sync_dir_by_file(FileName, MyFlags);
+
DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE,
EE_CANTCREATEFILE, MyFlags));
} /* my_create */
diff --git a/mysys/my_delete.c b/mysys/my_delete.c
index de2a9814a56..6d90caa48ed 100644
--- a/mysys/my_delete.c
+++ b/mysys/my_delete.c
@@ -30,6 +30,8 @@ int my_delete(const char *name, myf MyFlags)
my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)),
name,errno);
}
+ else if (MyFlags & MY_SYNC_DIR)
+ my_sync_dir_by_file(name, MyFlags);
DBUG_RETURN(err);
} /* my_delete */
diff --git a/mysys/my_open.c b/mysys/my_open.c
index ab2f7c9ff27..344e9c0a43b 100644
--- a/mysys/my_open.c
+++ b/mysys/my_open.c
@@ -162,6 +162,7 @@ File my_register_filename(File fd, const char *FileName, enum file_type
}
pthread_mutex_unlock(&THR_LOCK_open);
(void) my_close(fd, MyFlags);
+ fd= -1;
my_errno=ENOMEM;
}
else
diff --git a/mysys/my_rename.c b/mysys/my_rename.c
index 9c27238cc72..2c9ace6223a 100644
--- a/mysys/my_rename.c
+++ b/mysys/my_rename.c
@@ -61,5 +61,10 @@ int my_rename(const char *from, const char *to, myf MyFlags)
if (MyFlags & (MY_FAE+MY_WME))
my_error(EE_LINK, MYF(ME_BELL+ME_WAITTANG),from,to,my_errno);
}
+ else if (MyFlags & MY_SYNC_DIR)
+ {
+ my_sync_dir_by_file(from, MyFlags);
+ my_sync_dir_by_file(to, MyFlags);
+ }
DBUG_RETURN(error);
} /* my_rename */
diff --git a/mysys/my_sync.c b/mysys/my_sync.c
index c557324b52c..eaa26ef07a7 100644
--- a/mysys/my_sync.c
+++ b/mysys/my_sync.c
@@ -49,6 +49,12 @@ int my_sync(File fd, myf my_flags)
do
{
+#if defined(F_FULLFSYNC)
+ /* Recent Mac OS X versions insist this call is safer than fsync() */
+ if (!(res= fcntl(fd, F_FULLFSYNC, 0)))
+ break; /* ok */
+ /* Some fs don't support F_FULLFSYNC and fail above, fallback: */
+#endif
#if defined(HAVE_FDATASYNC)
res= fdatasync(fd);
#elif defined(HAVE_FSYNC)
@@ -56,6 +62,7 @@ int my_sync(File fd, myf my_flags)
#elif defined(__WIN__)
res= _commit(fd);
#else
+#warning Cannot find a way to sync a file, durability in danger
res= 0; /* No sync (strange OS) */
#endif
} while (res == -1 && errno == EINTR);
@@ -74,3 +81,70 @@ int my_sync(File fd, myf my_flags)
DBUG_RETURN(res);
} /* my_sync */
+
+/*
+ Force directory information to disk. Only Linux is known to need this to
+ make sure a file creation/deletion/renaming in(from,to) this directory
+ durable.
+
+ SYNOPSIS
+ my_sync_dir()
+ dir_name the name of the directory
+ my_flags unused
+
+ RETURN
+ nothing (the sync may fail sometimes).
+*/
+void my_sync_dir(const char *dir_name, myf my_flags __attribute__((unused)))
+{
+#ifdef TARGET_OS_LINUX
+ DBUG_ENTER("my_sync_dir");
+ DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags));
+ File dir_fd;
+ int error= 0;
+ /*
+ Syncing a dir does not work on all filesystems (e.g. tmpfs->EINVAL) :
+ ignore errors. But print them to the debug log.
+ */
+ if (((dir_fd= my_open(dir_name, O_RDONLY, MYF(0))) >= 0))
+ {
+ if (my_sync(dir_fd, MYF(0)))
+ {
+ error= errno;
+ DBUG_PRINT("info",("my_sync failed errno: %d", error));
+ }
+ my_close(dir_fd, MYF(0));
+ }
+ else
+ {
+ error= errno;
+ DBUG_PRINT("info",("my_open failed errno: %d", error));
+ }
+ DBUG_VOID_RETURN;
+#endif
+}
+
+
+/*
+ Force directory information to disk. Only Linux is known to need this to
+ make sure a file creation/deletion/renaming in(from,to) this directory
+ durable.
+
+ SYNOPSIS
+ my_sync_dir_by_file()
+ file_name the name of a file in the directory
+ my_flags unused
+
+ RETURN
+ nothing (the sync may fail sometimes).
+*/
+void my_sync_dir_by_file(const char *file_name,
+ myf my_flags __attribute__((unused)))
+{
+#ifdef TARGET_OS_LINUX
+ char dir_name[FN_REFLEN];
+ dirname_part(dir_name, file_name);
+ return my_sync_dir(dir_name, my_flags);
+#endif
+}
+
diff --git a/sql/log.cc b/sql/log.cc
index b63ec563baf..cbba6ec1ddf 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -2102,6 +2102,11 @@ bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
0, MYF(MY_WME | MY_WAIT_IF_FULL)))
{
+ /*
+ TODO: all operations creating/deleting the index file or a log, should
+ call my_sync_dir() or my_sync_dir_by_file() to be durable.
+ TODO: file creation should be done with my_create() not my_open().
+ */
if (index_file_nr >= 0)
my_close(index_file_nr,MYF(0));
return TRUE;
diff --git a/sql/unireg.cc b/sql/unireg.cc
index 2ea572c782c..5c2997c1483 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -285,9 +285,12 @@ bool mysql_create_frm(THD *thd, const char *file_name,
my_free((gptr) screen_buff,MYF(0));
my_free((gptr) keybuff, MYF(0));
- if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE) &&
- my_sync(file, MYF(MY_WME)))
- goto err2;
+ if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE))
+ {
+ if (my_sync(file, MYF(MY_WME)))
+ goto err2;
+ my_sync_dir_by_file(file_name, MYF(0));
+ }
if (my_close(file,MYF(MY_WME)))
goto err3;
diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c
index 5090fac4182..47583466cd7 100644
--- a/storage/maria/ma_control_file.c
+++ b/storage/maria/ma_control_file.c
@@ -134,16 +134,11 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
if (create_file)
{
- if ((control_file_fd= my_create(name, 0, open_flags, MYF(0))) < 0)
+ if ((control_file_fd= my_create(name, 0,
+ open_flags, MYF(MY_SYNC_DIR))) < 0)
DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);
- /*
- TODO: from "man fsync" on Linux:
- "fsync does not necessarily ensure that the entry in the directory
- containing the file has also reached disk. For that an explicit
- fsync on the file descriptor of the directory is also needed."
- So if we just created the file we should sync the directory.
- Maybe there should be a flag of my_create() to do this.
+ /*
To be safer we should make sure that there are no logs or data/index
files around (indeed it could be that the control file alone was deleted
or not restored, and we should not go on with life at this point).