diff options
author | Sage Weil <sage@inktank.com> | 2013-01-03 17:15:07 -0800 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-01-03 21:02:35 -0800 |
commit | 39a734fbf34ccd121f17023bcec814e61c8bdaab (patch) | |
tree | 9899bdcf217fec74028cc0a7f4e1a349805acac5 | |
parent | 49416619d733572368e5d2ba7f2b34150c754b23 (diff) | |
download | ceph-39a734fbf34ccd121f17023bcec814e61c8bdaab.tar.gz |
os/FileStore: fix non-btrfs op_seq commit order
The op_seq file is the starting point for journal replay. For stable btrfs
commit mode, which is using a snapshot as a reference, we should write this
file before we take the snap. We normally ignore current/ contents anyway.
On non-btrfs file systems, however, we should only write this file *after*
we do a full sync, and we should then fsync(2) it before we continue
(and potentially trim anything from the journal).
This fixes a serious bug that could cause data loss and corruption after
a power loss event. For a 'kill -9' or crash, however, there was little
risk, since the writes were still captured by the host's cache.
Fixes: #3721
Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Samuel Just <sam.just@inktank.com>
(cherry picked from commit 28d59d374b28629a230d36b93e60a8474c902aa5)
-rw-r--r-- | src/os/FileStore.cc | 21 |
1 files changed, 16 insertions, 5 deletions
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 4ad69bbaa62..9ab0e74b9c0 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -3660,11 +3660,6 @@ void FileStore::sync_entry() sync_epoch++; dout(15) << "sync_entry committing " << cp << " sync_epoch " << sync_epoch << dendl; - int err = write_op_seq(op_fd, cp); - if (err < 0) { - derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl; - assert(0); - } stringstream errstream; if (g_conf->filestore_debug_omap_check && !object_map->check(errstream)) { derr << errstream.str() << dendl; @@ -3672,6 +3667,11 @@ void FileStore::sync_entry() } if (btrfs_stable_commits) { + int err = write_op_seq(op_fd, cp); + if (err < 0) { + derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl; + assert(0 == "error during write_op_seq"); + } if (btrfs_snap_create_v2) { // be smart! @@ -3740,6 +3740,17 @@ void FileStore::sync_entry() dout(15) << "sync_entry doing a full sync (syncfs(2) if possible)" << dendl; sync_filesystem(basedir_fd); } + + int err = write_op_seq(op_fd, cp); + if (err < 0) { + derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl; + assert(0 == "error during write_op_seq"); + } + err = ::fsync(op_fd); + if (err < 0) { + derr << "Error during fsync of op_seq: " << cpp_strerror(err) << dendl; + assert(0 == "error during fsync of op_seq"); + } } utime_t done = ceph_clock_now(g_ceph_context); |