summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcostan <costan@google.com>2019-01-09 14:53:09 -0800
committerVictor Costan <pwnall@chromium.org>2019-01-09 14:58:22 -0800
commit296de8d5b8e4e57bd1e46c981114dfbe58a8c4fa (patch)
treec524462ed37d9470b42334c084d9edf1cdef5c82
parentb70493ca8586285b49e9888e2b528f71806bdc6e (diff)
downloadleveldb-296de8d5b8e4e57bd1e46c981114dfbe58a8c4fa.tar.gz
leveldb: Fix PosixWritableFile::Sync() on Apple systems.
Apple doesn't follow POSIX specifications for fsync(). Instead, fsync() guarantees to flush the buffer cache to the device, which means the data will survive kernel panics, but may not survive power outages. Applications that need stronger guarantees (like databases) need to use fcntl(F_FULLFSYNC). This CL switches PosixWritableFile::Sync() to get the stronger guarantees on Apple systems. The improved implementation follows the same principles as SQLite [1] and node.js [2]. Research for the fcntl() to fsync() fallback strategy: Apple's released source code at https://opensource.apple.com/ shows at least three different error codes being returned when a filesystem does not support F_FULLFSYNC. fcntl() is implemented in xnu-4903.221.2 in bsd/kern/kern_descrip.c, where it delegates to fcntl_nocancel(). The documentation for fcntl_nocancel() mentions error codes for some operations, but does not include F_FULLFSYNC. The F_FULLSYNC branch in fcntl_nocancel() calls VNOP_IOCTL(_, F_FULLSYNC, NULL, 0, _), whose return value sets the error code. VNOP_IOCTL() is implemented in bsd/vfs/kpi_vfs.c and calls the ioctl function in the vnode's operation vector. The per-filesystem function names follow the pattern _vnop_ioctl() for all the instances in opensource code: {hfs,msdosfs,nfs,ntfs,smbfs,webdav,zfs}_vnop_ioctl(). hfs-407.30.1, msdosfs-229.200.3, and nfs in xnu-4903.221.2 handle F_FULLFSYNC. ntfs-94.200.1 and smb-759.40.1 do not handle F_FULLFSYNC, and the default branch returns ENOSUP. webdav-380.200.1 also does not handle F_FULLFSYNC, but the default branch returns EINVAL. zfs-59 also does not handle F_FULLSYNC, and its default branch returns ENOTTY. From a different angle, Apple's ntfs-94.200.1 includes utility code that uses fcntl(F_FULLFSYNC) and falls back to fsync() just like we do, supporting the hypothesis that there is no good way to detect lack of F_FULLFSYNC support. Also, Apple's fcntl() man page [3] does not mention a way to detect lack of F_FULLFSYNC support. [1] https://www.sqlite.org/src/doc/trunk/src/os_unix.c [2] https://github.com/libuv/libuv/blob/master/src/unix/fs.c [3] https://developer.apple.com/library/archive/documentatiVon/System/Conceptual/ManPages_iPhoneOS/man2/fcntl.2.html Tested: https://travis-ci.org/pwnall/leveldb/builds/477318498 TAP global presubmit ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=228593729
-rw-r--r--CMakeLists.txt1
-rw-r--r--port/port_config.h.in5
-rw-r--r--util/env_posix.cc46
3 files changed, 40 insertions, 12 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 57a0c74..f6a7c0a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,6 +37,7 @@ include(CheckCXXSymbolExists)
# versions of do not expose fdatasync() in <unistd.h> in standard C mode
# (-std=c11), but do expose the function in standard C++ mode (-std=c++11).
check_cxx_symbol_exists(fdatasync "unistd.h" HAVE_FDATASYNC)
+check_cxx_symbol_exists(F_FULLFSYNC "fcntl.h" HAVE_FULLFSYNC)
include(CheckCXXSourceCompiles)
diff --git a/port/port_config.h.in b/port/port_config.h.in
index 645c78c..d6a6d01 100644
--- a/port/port_config.h.in
+++ b/port/port_config.h.in
@@ -10,6 +10,11 @@
#cmakedefine01 HAVE_FDATASYNC
#endif // !defined(HAVE_FDATASYNC)
+// Define to 1 if you have a definition for F_FULLFSYNC in <fcntl.h>.
+#if !defined(HAVE_FULLFSYNC)
+#cmakedefine01 HAVE_FULLFSYNC
+#endif // !defined(HAVE_FULLFSYNC)
+
// Define to 1 if you have Google CRC32C.
#if !defined(HAVE_CRC32C)
#cmakedefine01 HAVE_CRC32C
diff --git a/util/env_posix.cc b/util/env_posix.cc
index 76bb648..362adb3 100644
--- a/util/env_posix.cc
+++ b/util/env_posix.cc
@@ -35,12 +35,6 @@
#include "util/posix_logger.h"
#include "util/env_posix_test_helper.h"
-// HAVE_FDATASYNC is defined in the auto-generated port_config.h, which is
-// included by port_stdcxx.h.
-#if !HAVE_FDATASYNC
-#define fdatasync fsync
-#endif // !HAVE_FDATASYNC
-
namespace leveldb {
namespace {
@@ -314,10 +308,11 @@ class PosixWritableFile final : public WritableFile {
}
status = FlushBuffer();
- if (status.ok() && ::fdatasync(fd_) != 0) {
- status = PosixError(filename_, errno);
+ if (!status.ok()) {
+ return status;
}
- return status;
+
+ return SyncFd(fd_, filename_);
}
private:
@@ -352,14 +347,41 @@ class PosixWritableFile final : public WritableFile {
if (fd < 0) {
status = PosixError(dirname_, errno);
} else {
- if (::fsync(fd) < 0) {
- status = PosixError(dirname_, errno);
- }
+ status = SyncFd(fd, dirname_);
::close(fd);
}
return status;
}
+ // Ensures that all the caches associated with the given file descriptor's
+ // data are flushed all the way to durable media, and can withstand power
+ // failures.
+ //
+ // The path argument is only used to populate the description string in the
+ // returned Status if an error occurs.
+ static Status SyncFd(int fd, const std::string& fd_path) {
+#if HAVE_FULLFSYNC
+ // On macOS and iOS, fsync() doesn't guarantee durability past power
+ // failures. fcntl(F_FULLFSYNC) is required for that purpose. Some
+ // filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to
+ // fsync().
+ if (::fcntl(fd, F_FULLFSYNC) == 0) {
+ return Status::OK();
+ }
+#endif // HAVE_FULLFSYNC
+
+#if HAVE_FDATASYNC
+ bool sync_success = ::fdatasync(fd) == 0;
+#else
+ bool sync_success = ::fsync(fd) == 0;
+#endif // HAVE_FDATASYNC
+
+ if (sync_success) {
+ return Status::OK();
+ }
+ return PosixError(fd_path, errno);
+ }
+
// Returns the directory name in a path pointing to a file.
//
// Returns "." if the path does not contain any directory separator.