summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-01-03 17:15:07 -0800
committerSage Weil <sage@inktank.com>2013-01-03 21:02:35 -0800
commit39a734fbf34ccd121f17023bcec814e61c8bdaab (patch)
tree9899bdcf217fec74028cc0a7f4e1a349805acac5
parent49416619d733572368e5d2ba7f2b34150c754b23 (diff)
downloadceph-39a734fbf34ccd121f17023bcec814e61c8bdaab.tar.gz
os/FileStore: fix non-btrfs op_seq commit order
The op_seq file is the starting point for journal replay. For stable btrfs commit mode, which is using a snapshot as a reference, we should write this file before we take the snap. We normally ignore current/ contents anyway. On non-btrfs file systems, however, we should only write this file *after* we do a full sync, and we should then fsync(2) it before we continue (and potentially trim anything from the journal). This fixes a serious bug that could cause data loss and corruption after a power loss event. For a 'kill -9' or crash, however, there was little risk, since the writes were still captured by the host's cache. Fixes: #3721 Signed-off-by: Sage Weil <sage@inktank.com> Reviewed-by: Samuel Just <sam.just@inktank.com> (cherry picked from commit 28d59d374b28629a230d36b93e60a8474c902aa5)
-rw-r--r--src/os/FileStore.cc21
1 files changed, 16 insertions, 5 deletions
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index 4ad69bbaa62..9ab0e74b9c0 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -3660,11 +3660,6 @@ void FileStore::sync_entry()
sync_epoch++;
dout(15) << "sync_entry committing " << cp << " sync_epoch " << sync_epoch << dendl;
- int err = write_op_seq(op_fd, cp);
- if (err < 0) {
- derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl;
- assert(0);
- }
stringstream errstream;
if (g_conf->filestore_debug_omap_check && !object_map->check(errstream)) {
derr << errstream.str() << dendl;
@@ -3672,6 +3667,11 @@ void FileStore::sync_entry()
}
if (btrfs_stable_commits) {
+ int err = write_op_seq(op_fd, cp);
+ if (err < 0) {
+ derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl;
+ assert(0 == "error during write_op_seq");
+ }
if (btrfs_snap_create_v2) {
// be smart!
@@ -3740,6 +3740,17 @@ void FileStore::sync_entry()
dout(15) << "sync_entry doing a full sync (syncfs(2) if possible)" << dendl;
sync_filesystem(basedir_fd);
}
+
+ int err = write_op_seq(op_fd, cp);
+ if (err < 0) {
+ derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl;
+ assert(0 == "error during write_op_seq");
+ }
+ err = ::fsync(op_fd);
+ if (err < 0) {
+ derr << "Error during fsync of op_seq: " << cpp_strerror(err) << dendl;
+ assert(0 == "error during fsync of op_seq");
+ }
}
utime_t done = ceph_clock_now(g_ceph_context);