diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/os_posix/os_fs.c')
-rw-r--r-- | src/third_party/wiredtiger/src/os_posix/os_fs.c | 78 |
1 files changed, 62 insertions, 16 deletions
diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c index bc8cbf67025..3c90183caf2 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_fs.c +++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c @@ -31,6 +31,23 @@ /* * __posix_sync -- * Underlying support function to flush a file descriptor. + * + * Fsync calls (or fsync-style calls, for example, fdatasync) are not retried + * on failure, and failure halts the system. + * + * Excerpted from the LWN.net article https://lwn.net/Articles/752063/: + * In short, PostgreSQL assumes that a successful call to fsync() indicates + * that all data written since the last successful call made it safely to + * persistent storage. But that is not what the kernel actually does. When + * a buffered I/O write fails due to a hardware-level error, filesystems + * will respond differently, but that behavior usually includes discarding + * the data in the affected pages and marking them as being clean. So a read + * of the blocks that were just written will likely return something other + * than the data that was written. + * + * Given the shared history of UNIX filesystems, and the difficulty of knowing + * what specific error will be returned under specific circumstances, we don't + * retry fsync-style calls and panic if a flush operation fails. */ static int __posix_sync( @@ -52,25 +69,49 @@ __posix_sync( * OS X F_FULLFSYNC fcntl documentation: * "This is currently implemented on HFS, MS-DOS (FAT), and Universal * Disk Format (UDF) file systems." + * + * See comment in __posix_sync(): sync cannot be retried or fail. */ - WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret); - if (ret == 0) - return (0); - /* - * Assume F_FULLFSYNC failed because the file system doesn't support it - * and fallback to fsync. - */ + static enum { FF_NOTSET, FF_IGNORE, FF_OK } ff_status = FF_NOTSET; + switch (ff_status) { + case FF_NOTSET: + WT_SYSCALL(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret); + if (ret == 0) { + ff_status = FF_OK; + return (0); + } + + /* + * If the first F_FULLFSYNC fails, assume the file system + * doesn't support it and fallback to fdatasync or fsync. + */ + ff_status = FF_IGNORE; + __wt_err(session, ret, + "fcntl(F_FULLFSYNC) failed, falling back to fdatasync " + "or fsync"); + break; + case FF_IGNORE: + break; + case FF_OK: + WT_SYSCALL(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret); + if (ret == 0) + return (0); + WT_PANIC_RET(session, + ret, "%s: %s: fcntl(F_FULLFSYNC)", name, func); + } #endif #if defined(HAVE_FDATASYNC) - WT_SYSCALL_RETRY(fdatasync(fd), ret); + /* See comment in __posix_sync(): sync cannot be retried or fail. */ + WT_SYSCALL(fdatasync(fd), ret); if (ret == 0) return (0); - WT_RET_MSG(session, ret, "%s: %s: fdatasync", name, func); + WT_PANIC_RET(session, ret, "%s: %s: fdatasync", name, func); #else - WT_SYSCALL_RETRY(fsync(fd), ret); + /* See comment in __posix_sync(): sync cannot be retried or fail. */ + WT_SYSCALL(fsync(fd), ret); if (ret == 0) return (0); - WT_RET_MSG(session, ret, "%s: %s: fsync", name, func); + WT_PANIC_RET(session, ret, "%s: %s: fsync", name, func); #endif } @@ -108,12 +149,15 @@ __posix_directory_sync(WT_SESSION_IMPL *session, const char *path) WT_SYSCALL(close(fd), tret); if (tret != 0) { __wt_err(session, tret, "%s: directory-sync: close", dir); - if (ret == 0) - ret = tret; + WT_TRET(tret); } err: __wt_scr_free(session, &tmp); - return (ret); + if (ret == 0) + return (ret); + + /* See comment in __posix_sync(): sync cannot be retried or fail. */ + WT_PANIC_RET(session, ret, "%s: directory-sync", path); } #endif @@ -460,11 +504,13 @@ __posix_file_sync_nowait(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session) session = (WT_SESSION_IMPL *)wt_session; pfh = (WT_FILE_HANDLE_POSIX *)file_handle; - WT_SYSCALL_RETRY(sync_file_range(pfh->fd, + /* See comment in __posix_sync(): sync cannot be retried or fail. */ + WT_SYSCALL(sync_file_range(pfh->fd, (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); if (ret == 0) return (0); - WT_RET_MSG(session, ret, + + WT_PANIC_RET(session, ret, "%s: handle-sync-nowait: sync_file_range", file_handle->name); } #endif |