diff options
author | unknown <guilhem@gbichot3.local> | 2006-11-21 22:22:59 +0100 |
---|---|---|
committer | unknown <guilhem@gbichot3.local> | 2006-11-21 22:22:59 +0100 |
commit | a41ac15b960aee306e3464835b05a835fd98771d (patch) | |
tree | bac3eccdcb9906c3a529aa064ac3b9dcb8841edf | |
parent | 3becab22e9dd774d58983e553b6fbbfb9960f852 (diff) | |
download | mariadb-git-a41ac15b960aee306e3464835b05a835fd98771d.tar.gz |
Maria - various fixes around durability of files:
1) on Mac OS X >=10.3, fcntl() is recommended over fsync (from the
man page: "[With fsync()] the disk drive may also re-order the data
so that later writes may be present while earlier writes are not.
Applications such as databases that require a strict ordering of writes
should use F_FULLFSYNC to ensure their data is written in the order
they expect"). I have seen two other pieces of software changing from
fsync to F_FULLFSYNC on Mac OS X.
2) to make a file creation/deletion/renaming durable on Linux (at least
ext2 as I have tested) (see "man fsync"), a fsync() on the directory
is needed: new functions to do that, and a flag MY_SYNC_DIR to do
it in my_create/my_delete/my_rename.
3) now using this directory syncing when creating he frm if
opt_sync_frm, and for Maria's control file when it is created.
include/my_sys.h:
new flag to my_create/my_delete/my_rename, which asks to sync the
directory after the operation is done (currently does nothing except
on Linux)
libmysql/CMakeLists.txt:
my_create() now depends on my_sync() so my_sync is needed for libmysql
libmysql/Makefile.shared:
my_create() now depends on my_sync() so my_sync is needed for libmysql
mysys/my_create.c:
my_create() can now sync the directory if asked for
mysys/my_delete.c:
my_delete() can now sync the directory if asked for
mysys/my_open.c:
it was a bug that my_close() is done on fd but a positive fd would
still be returned, by my_register_filename().
mysys/my_rename.c:
my_rename() can now sync the two directories (the one of "from" and
the one of "to") if asked for.
mysys/my_sync.c:
On recent Mac OS X, fcntl(F_FULLFSYNC) is recommended over fsync()
(see "man fsync" on Mac OS X 10.3).
my_sync_dir(): to sync a directory after a file creation/deletion/
renaming; can be called directly or via MY_SYNC_DIR in my_create/
my_delete/my_rename(). No-op except on Linux (see "man fsync" on Linux).
my_sync_dir_from_file(): same as above, just more practical when the
caller has a file name but no directory name ready.
Should the #warning even be a #error? I mean do we want to release
binaries which don't guarantee any durability?
sql/log.cc:
a TODO for the future.
sql/unireg.cc:
If we sync the frm it makes sense to also sync its creation in the
directory.
storage/maria/ma_control_file.c:
control file is vital, try to make it to disk
-rw-r--r-- | include/my_sys.h | 3 | ||||
-rw-r--r-- | libmysql/CMakeLists.txt | 1 | ||||
-rw-r--r-- | libmysql/Makefile.shared | 2 | ||||
-rw-r--r-- | mysys/my_create.c | 3 | ||||
-rw-r--r-- | mysys/my_delete.c | 2 | ||||
-rw-r--r-- | mysys/my_open.c | 1 | ||||
-rw-r--r-- | mysys/my_rename.c | 5 | ||||
-rw-r--r-- | mysys/my_sync.c | 74 | ||||
-rw-r--r-- | sql/log.cc | 5 | ||||
-rw-r--r-- | sql/unireg.cc | 9 | ||||
-rw-r--r-- | storage/maria/ma_control_file.c | 11 |
11 files changed, 104 insertions, 12 deletions
diff --git a/include/my_sys.h b/include/my_sys.h index fc83c583201..3c5b5e6cbf4 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -55,6 +55,7 @@ extern int NEAR my_errno; /* Last error in mysys */ #define MY_WME 16 /* Write message on error */ #define MY_WAIT_IF_FULL 32 /* Wait and try again if disk full error */ #define MY_IGNORE_BADFD 32 /* my_sync: ignore 'bad descriptor' errors */ +#define MY_SYNC_DIR 1024 /* my_create/delete/rename: sync directory */ #define MY_RAID 64 /* Support for RAID */ #define MY_FULL_IO 512 /* For my_read - loop intil I/O is complete */ #define MY_DONT_CHECK_FILESIZE 128 /* Option to init_io_cache() */ @@ -622,6 +623,8 @@ extern FILE *my_fdopen(File Filedes,const char *name, int Flags,myf MyFlags); extern int my_fclose(FILE *fd,myf MyFlags); extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags); extern int my_sync(File fd, myf my_flags); +extern void my_sync_dir(const char *dir_name, myf my_flags); +extern void my_sync_dir_by_file(const char *file_name, myf my_flags); extern int my_error _VARARGS((int nr,myf MyFlags, ...)); extern int my_printf_error _VARARGS((uint my_err, const char *format, myf MyFlags, ...)) diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt index d12b6ca6c10..3c9ed53a866 100644 --- a/libmysql/CMakeLists.txt +++ b/libmysql/CMakeLists.txt @@ -37,6 +37,7 @@ ADD_LIBRARY(libmysql SHARED dll.c libmysql.def ../mysys/my_open.c ../mysys/my_pread.c ../mysys/my_pthread.c ../mysys/my_read.c ../mysys/my_realloc.c ../mysys/my_rename.c ../mysys/my_seek.c ../mysys/my_static.c ../strings/my_strtoll10.c ../mysys/my_symlink.c + ../mysys/my_sync.c ../mysys/my_symlink2.c ../mysys/my_thr_init.c ../sql-common/my_time.c ../strings/my_vsnprintf.c ../mysys/my_wincond.c ../mysys/my_winthread.c ../mysys/my_write.c ../sql/net_serv.cc ../sql-common/pack.c ../sql/password.c diff --git a/libmysql/Makefile.shared b/libmysql/Makefile.shared index c2d98a81042..6326e255559 100644 --- a/libmysql/Makefile.shared +++ b/libmysql/Makefile.shared @@ -68,7 +68,7 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \ mf_iocache2.lo my_seek.lo my_sleep.lo \ my_pread.lo mf_cache.lo md5.lo sha1.lo \ my_getopt.lo my_gethostbyname.lo my_port.lo \ - my_rename.lo my_chsize.lo + my_rename.lo my_chsize.lo my_sync.lo sqlobjects = net.lo sql_cmn_objects = pack.lo client.lo my_time.lo diff --git a/mysys/my_create.c b/mysys/my_create.c index e1e32b50842..bb3801691a5 100644 --- a/mysys/my_create.c +++ b/mysys/my_create.c @@ -53,6 +53,9 @@ File my_create(const char *FileName, int CreateFlags, int access_flags, fd = open(FileName, access_flags); #endif + if ((MyFlags & MY_SYNC_DIR) && (fd >=0)) + my_sync_dir_by_file(FileName, MyFlags); + DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE, EE_CANTCREATEFILE, MyFlags)); } /* my_create */ diff --git a/mysys/my_delete.c b/mysys/my_delete.c index de2a9814a56..6d90caa48ed 100644 --- a/mysys/my_delete.c +++ b/mysys/my_delete.c @@ -30,6 +30,8 @@ int my_delete(const char *name, myf MyFlags) my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)), name,errno); } + else if (MyFlags & MY_SYNC_DIR) + my_sync_dir_by_file(name, MyFlags); DBUG_RETURN(err); } /* my_delete */ diff --git a/mysys/my_open.c b/mysys/my_open.c index ab2f7c9ff27..344e9c0a43b 100644 --- a/mysys/my_open.c +++ b/mysys/my_open.c @@ -162,6 +162,7 @@ File my_register_filename(File fd, const char *FileName, enum file_type } pthread_mutex_unlock(&THR_LOCK_open); (void) my_close(fd, MyFlags); + fd= -1; my_errno=ENOMEM; } else diff --git a/mysys/my_rename.c b/mysys/my_rename.c index 9c27238cc72..2c9ace6223a 100644 --- a/mysys/my_rename.c +++ b/mysys/my_rename.c @@ -61,5 +61,10 @@ int my_rename(const char *from, const char *to, myf MyFlags) if (MyFlags & (MY_FAE+MY_WME)) my_error(EE_LINK, MYF(ME_BELL+ME_WAITTANG),from,to,my_errno); } + else if (MyFlags & MY_SYNC_DIR) + { + my_sync_dir_by_file(from, MyFlags); + my_sync_dir_by_file(to, MyFlags); + } DBUG_RETURN(error); } /* my_rename */ diff --git a/mysys/my_sync.c b/mysys/my_sync.c index c557324b52c..eaa26ef07a7 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -49,6 +49,12 @@ int my_sync(File fd, myf my_flags) do { +#if defined(F_FULLFSYNC) + /* Recent Mac OS X versions insist this call is safer than fsync() */ + if (!(res= fcntl(fd, F_FULLFSYNC, 0))) + break; /* ok */ + /* Some fs don't support F_FULLFSYNC and fail above, fallback: */ +#endif #if defined(HAVE_FDATASYNC) res= fdatasync(fd); #elif defined(HAVE_FSYNC) @@ -56,6 +62,7 @@ int my_sync(File fd, myf my_flags) #elif defined(__WIN__) res= _commit(fd); #else +#warning Cannot find a way to sync a file, durability in danger res= 0; /* No sync (strange OS) */ #endif } while (res == -1 && errno == EINTR); @@ -74,3 +81,70 @@ int my_sync(File fd, myf my_flags) DBUG_RETURN(res); } /* my_sync */ + +/* + Force directory information to disk. Only Linux is known to need this to + make sure a file creation/deletion/renaming in(from,to) this directory + durable. + + SYNOPSIS + my_sync_dir() + dir_name the name of the directory + my_flags unused + + RETURN + nothing (the sync may fail sometimes). +*/ +void my_sync_dir(const char *dir_name, myf my_flags __attribute__((unused))) +{ +#ifdef TARGET_OS_LINUX + DBUG_ENTER("my_sync_dir"); + DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags)); + File dir_fd; + int error= 0; + /* + Syncing a dir does not work on all filesystems (e.g. tmpfs->EINVAL) : + ignore errors. But print them to the debug log. + */ + if (((dir_fd= my_open(dir_name, O_RDONLY, MYF(0))) >= 0)) + { + if (my_sync(dir_fd, MYF(0))) + { + error= errno; + DBUG_PRINT("info",("my_sync failed errno: %d", error)); + } + my_close(dir_fd, MYF(0)); + } + else + { + error= errno; + DBUG_PRINT("info",("my_open failed errno: %d", error)); + } + DBUG_VOID_RETURN; +#endif +} + + +/* + Force directory information to disk. Only Linux is known to need this to + make sure a file creation/deletion/renaming in(from,to) this directory + durable. + + SYNOPSIS + my_sync_dir_by_file() + file_name the name of a file in the directory + my_flags unused + + RETURN + nothing (the sync may fail sometimes). +*/ +void my_sync_dir_by_file(const char *file_name, + myf my_flags __attribute__((unused))) +{ +#ifdef TARGET_OS_LINUX + char dir_name[FN_REFLEN]; + dirname_part(dir_name, file_name); + return my_sync_dir(dir_name, my_flags); +#endif +} + diff --git a/sql/log.cc b/sql/log.cc index b63ec563baf..cbba6ec1ddf 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -2102,6 +2102,11 @@ bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg, my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)), 0, MYF(MY_WME | MY_WAIT_IF_FULL))) { + /* + TODO: all operations creating/deleting the index file or a log, should + call my_sync_dir() or my_sync_dir_by_file() to be durable. + TODO: file creation should be done with my_create() not my_open(). + */ if (index_file_nr >= 0) my_close(index_file_nr,MYF(0)); return TRUE; diff --git a/sql/unireg.cc b/sql/unireg.cc index 2ea572c782c..5c2997c1483 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -285,9 +285,12 @@ bool mysql_create_frm(THD *thd, const char *file_name, my_free((gptr) screen_buff,MYF(0)); my_free((gptr) keybuff, MYF(0)); - if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE) && - my_sync(file, MYF(MY_WME))) - goto err2; + if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) + { + if (my_sync(file, MYF(MY_WME))) + goto err2; + my_sync_dir_by_file(file_name, MYF(0)); + } if (my_close(file,MYF(MY_WME))) goto err3; diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index 5090fac4182..47583466cd7 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -134,16 +134,11 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() if (create_file) { - if ((control_file_fd= my_create(name, 0, open_flags, MYF(0))) < 0) + if ((control_file_fd= my_create(name, 0, + open_flags, MYF(MY_SYNC_DIR))) < 0) DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); - /* - TODO: from "man fsync" on Linux: - "fsync does not necessarily ensure that the entry in the directory - containing the file has also reached disk. For that an explicit - fsync on the file descriptor of the directory is also needed." - So if we just created the file we should sync the directory. - Maybe there should be a flag of my_create() to do this. + /* To be safer we should make sure that there are no logs or data/index files around (indeed it could be that the control file alone was deleted or not restored, and we should not go on with life at this point). |