summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2013-07-18 10:12:17 -0700
committerSamuel Just <sam.just@inktank.com>2013-07-18 13:24:02 -0700
commitf3f92fe21061e21c8b259df5ef283a61782a44db (patch)
tree4bfcfc5842d2e8e007eb93247bc750a5b6f3a010
parent723d691f7a1f53888618dfc311868d1988f61f56 (diff)
downloadceph-f3f92fe21061e21c8b259df5ef283a61782a44db.tar.gz
FileStore: add global replay guard for split, collection_rename
In the event of a split or collection rename, we need to ensure that we don't replay any operations on objects within those collections prior to that point. Thus, we mark a global replay guard on the collection after doing a syncfs and make sure to check that in _check_replay_guard() for all object operations. Fixes: #5154 Signed-off-by: Samuel Just <sam.just@inktank.com> Reviewed-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/os/FileStore.cc83
-rw-r--r--src/os/FileStore.h3
2 files changed, 85 insertions, 1 deletions
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index 5afeba070d8..1363eff27d1 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -99,6 +99,7 @@ static const __SWORD_TYPE XFS_SUPER_MAGIC(0x58465342);
#define CLUSTER_SNAP_ITEM "clustersnap_%s"
#define REPLAY_GUARD_XATTR "user.cephos.seq"
+#define GLOBAL_REPLAY_GUARD_XATTR "user.cephos.gseq"
/*
* long file names will have the following format:
@@ -2182,6 +2183,78 @@ int FileStore::_do_transactions(
return r;
}
+void FileStore::_set_global_replay_guard(coll_t cid,
+ const SequencerPosition &spos)
+{
+ if (btrfs_stable_commits)
+ return;
+
+ // sync all previous operations on this sequencer
+ sync_filesystem(basedir_fd);
+
+ char fn[PATH_MAX];
+ get_cdir(cid, fn, sizeof(fn));
+ int fd = ::open(fn, O_RDONLY);
+ if (fd < 0) {
+ int err = errno;
+ derr << __func__ << ": " << cid << " error " << cpp_strerror(err) << dendl;
+ assert(0 == "_set_global_replay_guard failed");
+ }
+
+ _inject_failure();
+
+ // then record that we did it
+ bufferlist v;
+ ::encode(spos, v);
+ int r = chain_fsetxattr(fd, GLOBAL_REPLAY_GUARD_XATTR, v.c_str(), v.length());
+ if (r < 0) {
+ derr << __func__ << ": fsetxattr " << GLOBAL_REPLAY_GUARD_XATTR
+ << " got " << cpp_strerror(r) << dendl;
+ assert(0 == "fsetxattr failed");
+ }
+
+ // and make sure our xattr is durable.
+ ::fsync(fd);
+
+ _inject_failure();
+
+ TEMP_FAILURE_RETRY(::close(fd));
+ dout(10) << __func__ << ": " << spos << " done" << dendl;
+}
+
+int FileStore::_check_global_replay_guard(coll_t cid,
+ const SequencerPosition& spos)
+{
+ if (!replaying || btrfs_stable_commits)
+ return 1;
+
+ char fn[PATH_MAX];
+ get_cdir(cid, fn, sizeof(fn));
+ int fd = ::open(fn, O_RDONLY);
+ if (fd < 0) {
+ dout(10) << __func__ << ": " << cid << " dne" << dendl;
+ return 1; // if collection does not exist, there is no guard, and we can replay.
+ }
+
+ char buf[100];
+ int r = chain_fgetxattr(fd, GLOBAL_REPLAY_GUARD_XATTR, buf, sizeof(buf));
+ if (r < 0) {
+ dout(20) << __func__ << " no xattr" << dendl;
+ assert(!m_filestore_fail_eio || r != -EIO);
+ return 1; // no xattr
+ }
+ bufferlist bl;
+ bl.append(buf, r);
+
+ SequencerPosition opos;
+ bufferlist::iterator p = bl.begin();
+ ::decode(opos, p);
+
+ TEMP_FAILURE_RETRY(::close(fd));
+ return spos >= opos ? 1 : -1;
+}
+
+
void FileStore::_set_replay_guard(coll_t cid,
const SequencerPosition &spos,
bool in_progress=false)
@@ -2287,8 +2360,12 @@ int FileStore::_check_replay_guard(coll_t cid, hobject_t oid, const SequencerPos
if (!replaying || btrfs_stable_commits)
return 1;
+ int r = _check_global_replay_guard(cid, spos);
+ if (r < 0)
+ return r;
+
FDRef fd;
- int r = lfn_open(cid, oid, false, &fd);
+ r = lfn_open(cid, oid, false, &fd);
if (r < 0) {
dout(10) << "_check_replay_guard " << cid << " " << oid << " dne" << dendl;
return 1; // if file does not exist, there is no guard, and we can replay.
@@ -4224,6 +4301,9 @@ int FileStore::_collection_rename(const coll_t &cid, const coll_t &ncid,
get_cdir(cid, old_coll, sizeof(old_coll));
get_cdir(ncid, new_coll, sizeof(new_coll));
+ _set_global_replay_guard(cid, spos);
+ _set_replay_guard(cid, spos);
+
if (_check_replay_guard(cid, spos) < 0) {
return 0;
}
@@ -4747,6 +4827,7 @@ int FileStore::_split_collection(coll_t cid,
if (srccmp < 0)
return 0;
+ _set_global_replay_guard(cid, spos);
_set_replay_guard(cid, spos, true);
_set_replay_guard(dest, spos, true);
diff --git a/src/os/FileStore.h b/src/os/FileStore.h
index 5a2a0b88566..86d267dddf1 100644
--- a/src/os/FileStore.h
+++ b/src/os/FileStore.h
@@ -324,6 +324,8 @@ public:
void _set_replay_guard(coll_t cid,
const SequencerPosition& spos,
bool in_progress);
+ void _set_global_replay_guard(coll_t cid,
+ const SequencerPosition &spos);
/// close a replay guard opened with in_progress=true
void _close_replay_guard(int fd, const SequencerPosition& spos);
@@ -348,6 +350,7 @@ public:
int _check_replay_guard(int fd, const SequencerPosition& spos);
int _check_replay_guard(coll_t cid, const SequencerPosition& spos);
int _check_replay_guard(coll_t cid, hobject_t oid, const SequencerPosition& pos);
+ int _check_global_replay_guard(coll_t cid, const SequencerPosition& spos);
// ------------------
// objects