diff options
author | Samuel Just <sam.just@inktank.com> | 2013-07-18 10:12:17 -0700 |
---|---|---|
committer | Samuel Just <sam.just@inktank.com> | 2013-07-18 13:24:02 -0700 |
commit | f3f92fe21061e21c8b259df5ef283a61782a44db (patch) | |
tree | 4bfcfc5842d2e8e007eb93247bc750a5b6f3a010 | |
parent | 723d691f7a1f53888618dfc311868d1988f61f56 (diff) | |
download | ceph-f3f92fe21061e21c8b259df5ef283a61782a44db.tar.gz |
FileStore: add global replay guard for split, collection_rename
In the event of a split or collection rename, we need to ensure that
we don't replay any operations on objects within those collections
prior to that point. Thus, we mark a global replay guard on the
collection after doing a syncfs and make sure to check that in
_check_replay_guard() for all object operations.
Fixes: #5154
Signed-off-by: Samuel Just <sam.just@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
-rw-r--r-- | src/os/FileStore.cc | 83 | ||||
-rw-r--r-- | src/os/FileStore.h | 3 |
2 files changed, 85 insertions, 1 deletions
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 5afeba070d8..1363eff27d1 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -99,6 +99,7 @@ static const __SWORD_TYPE XFS_SUPER_MAGIC(0x58465342); #define CLUSTER_SNAP_ITEM "clustersnap_%s" #define REPLAY_GUARD_XATTR "user.cephos.seq" +#define GLOBAL_REPLAY_GUARD_XATTR "user.cephos.gseq" /* * long file names will have the following format: @@ -2182,6 +2183,78 @@ int FileStore::_do_transactions( return r; } +void FileStore::_set_global_replay_guard(coll_t cid, + const SequencerPosition &spos) +{ + if (btrfs_stable_commits) + return; + + // sync all previous operations on this sequencer + sync_filesystem(basedir_fd); + + char fn[PATH_MAX]; + get_cdir(cid, fn, sizeof(fn)); + int fd = ::open(fn, O_RDONLY); + if (fd < 0) { + int err = errno; + derr << __func__ << ": " << cid << " error " << cpp_strerror(err) << dendl; + assert(0 == "_set_global_replay_guard failed"); + } + + _inject_failure(); + + // then record that we did it + bufferlist v; + ::encode(spos, v); + int r = chain_fsetxattr(fd, GLOBAL_REPLAY_GUARD_XATTR, v.c_str(), v.length()); + if (r < 0) { + derr << __func__ << ": fsetxattr " << GLOBAL_REPLAY_GUARD_XATTR + << " got " << cpp_strerror(r) << dendl; + assert(0 == "fsetxattr failed"); + } + + // and make sure our xattr is durable. + ::fsync(fd); + + _inject_failure(); + + TEMP_FAILURE_RETRY(::close(fd)); + dout(10) << __func__ << ": " << spos << " done" << dendl; +} + +int FileStore::_check_global_replay_guard(coll_t cid, + const SequencerPosition& spos) +{ + if (!replaying || btrfs_stable_commits) + return 1; + + char fn[PATH_MAX]; + get_cdir(cid, fn, sizeof(fn)); + int fd = ::open(fn, O_RDONLY); + if (fd < 0) { + dout(10) << __func__ << ": " << cid << " dne" << dendl; + return 1; // if collection does not exist, there is no guard, and we can replay. + } + + char buf[100]; + int r = chain_fgetxattr(fd, GLOBAL_REPLAY_GUARD_XATTR, buf, sizeof(buf)); + if (r < 0) { + dout(20) << __func__ << " no xattr" << dendl; + assert(!m_filestore_fail_eio || r != -EIO); + return 1; // no xattr + } + bufferlist bl; + bl.append(buf, r); + + SequencerPosition opos; + bufferlist::iterator p = bl.begin(); + ::decode(opos, p); + + TEMP_FAILURE_RETRY(::close(fd)); + return spos >= opos ? 1 : -1; +} + + void FileStore::_set_replay_guard(coll_t cid, const SequencerPosition &spos, bool in_progress=false) @@ -2287,8 +2360,12 @@ int FileStore::_check_replay_guard(coll_t cid, hobject_t oid, const SequencerPos if (!replaying || btrfs_stable_commits) return 1; + int r = _check_global_replay_guard(cid, spos); + if (r < 0) + return r; + FDRef fd; - int r = lfn_open(cid, oid, false, &fd); + r = lfn_open(cid, oid, false, &fd); if (r < 0) { dout(10) << "_check_replay_guard " << cid << " " << oid << " dne" << dendl; return 1; // if file does not exist, there is no guard, and we can replay. @@ -4224,6 +4301,9 @@ int FileStore::_collection_rename(const coll_t &cid, const coll_t &ncid, get_cdir(cid, old_coll, sizeof(old_coll)); get_cdir(ncid, new_coll, sizeof(new_coll)); + _set_global_replay_guard(cid, spos); + _set_replay_guard(cid, spos); + if (_check_replay_guard(cid, spos) < 0) { return 0; } @@ -4747,6 +4827,7 @@ int FileStore::_split_collection(coll_t cid, if (srccmp < 0) return 0; + _set_global_replay_guard(cid, spos); _set_replay_guard(cid, spos, true); _set_replay_guard(dest, spos, true); diff --git a/src/os/FileStore.h b/src/os/FileStore.h index 5a2a0b88566..86d267dddf1 100644 --- a/src/os/FileStore.h +++ b/src/os/FileStore.h @@ -324,6 +324,8 @@ public: void _set_replay_guard(coll_t cid, const SequencerPosition& spos, bool in_progress); + void _set_global_replay_guard(coll_t cid, + const SequencerPosition &spos); /// close a replay guard opened with in_progress=true void _close_replay_guard(int fd, const SequencerPosition& spos); @@ -348,6 +350,7 @@ public: int _check_replay_guard(int fd, const SequencerPosition& spos); int _check_replay_guard(coll_t cid, const SequencerPosition& spos); int _check_replay_guard(coll_t cid, hobject_t oid, const SequencerPosition& pos); + int _check_global_replay_guard(coll_t cid, const SequencerPosition& spos); // ------------------ // objects |