summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYan, Zheng <zheng.z.yan@intel.com>2012-11-19 10:43:47 +0800
committerSage Weil <sage@inktank.com>2012-12-01 12:52:23 -0800
commit1203cd21104d131e459891392ac1fbf52bd0160d (patch)
tree49f469fa3407fa5fc2e346cc1f944cc7ffe4c7f4
parent587aec1706b31a3074e35e9799a2a8fefd14c96d (diff)
downloadceph-1203cd21104d131e459891392ac1fbf52bd0160d.tar.gz
mds: allow open_remote_ino() to open xlocked dentry
discover_ino() has a parameter want_xlocked. The parameter indicates if remote discover handler can proceed when xlocked dentry is encountered. open_remote_ino() uses discover_ino() to find non-auth inode, but always set 'want_xlocked' to false. This may cause dead lock in some corner cases. For example: we rename a inode's primary dentry to one of its remote dentry and send slave request to one witness MDS. but before the slave request reaches the witness MDS, the inode is trimmed from the witness MDS' cache. Then when the slave request arrives, open_remote_ino() will be called during traversing the destpath. open_remote_ino() calls discover_ino() with 'want_xlocled=false' to find the inode. discover_ino() sends MDiscover message to the inode's authority MDS. The handler of MDiscover message finds the inode's primary dentry is xlocked and it sleeps. The fix is add a parameter 'want_xlocked' to open_remote_ino() and make open_remote_ino() pass the parameter to discover_ino(). Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
-rw-r--r--src/mds/MDCache.cc51
-rw-r--r--src/mds/MDCache.h8
-rw-r--r--src/mds/Server.cc7
3 files changed, 39 insertions, 27 deletions
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index 55c4b54e218..d45a8143730 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -6731,7 +6731,8 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, Context *fin, // wh
} else {
dout(7) << "remote link to " << dnl->get_remote_ino() << ", which i don't have" << dendl;
assert(mdr); // we shouldn't hit non-primary dentries doing a non-mdr traversal!
- open_remote_ino(dnl->get_remote_ino(), _get_waiter(mdr, req, fin));
+ open_remote_ino(dnl->get_remote_ino(), _get_waiter(mdr, req, fin),
+ (null_okay && depth == path.depth() - 1));
if (mds->logger) mds->logger->inc(l_mds_trino);
return 1;
}
@@ -7017,12 +7018,13 @@ CInode *MDCache::get_dentry_inode(CDentry *dn, MDRequest *mdr, bool projected)
class C_MDC_RetryOpenRemoteIno : public Context {
MDCache *mdcache;
inodeno_t ino;
+ bool want_xlocked;
Context *onfinish;
public:
- C_MDC_RetryOpenRemoteIno(MDCache *mdc, inodeno_t i, Context *c) :
- mdcache(mdc), ino(i), onfinish(c) {}
+ C_MDC_RetryOpenRemoteIno(MDCache *mdc, inodeno_t i, Context *c, bool wx) :
+ mdcache(mdc), ino(i), want_xlocked(wx), onfinish(c) {}
void finish(int r) {
- mdcache->open_remote_ino(ino, onfinish);
+ mdcache->open_remote_ino(ino, onfinish, want_xlocked);
}
};
@@ -7032,19 +7034,20 @@ class C_MDC_OpenRemoteIno : public Context {
inodeno_t ino;
inodeno_t hadino;
version_t hadv;
+ bool want_xlocked;
Context *onfinish;
public:
vector<Anchor> anchortrace;
- C_MDC_OpenRemoteIno(MDCache *mdc, inodeno_t i, inodeno_t hi, version_t hv, Context *c) :
- mdcache(mdc), ino(i), hadino(hi), hadv(hv), onfinish(c) {}
- C_MDC_OpenRemoteIno(MDCache *mdc, inodeno_t i, vector<Anchor>& at, Context *c) :
- mdcache(mdc), ino(i), hadino(0), hadv(0), onfinish(c), anchortrace(at) {}
+ C_MDC_OpenRemoteIno(MDCache *mdc, inodeno_t i, bool wx, inodeno_t hi, version_t hv, Context *c) :
+ mdcache(mdc), ino(i), hadino(hi), hadv(hv), want_xlocked(wx), onfinish(c) {}
+ C_MDC_OpenRemoteIno(MDCache *mdc, inodeno_t i, bool wx, vector<Anchor>& at, Context *c) :
+ mdcache(mdc), ino(i), hadino(0), hadv(0), want_xlocked(wx), onfinish(c), anchortrace(at) {}
void finish(int r) {
assert(r == 0);
if (r == 0)
- mdcache->open_remote_ino_2(ino, anchortrace, hadino, hadv, onfinish);
+ mdcache->open_remote_ino_2(ino, anchortrace, want_xlocked, hadino, hadv, onfinish);
else {
onfinish->finish(r);
delete onfinish;
@@ -7052,18 +7055,18 @@ public:
}
};
-void MDCache::open_remote_ino(inodeno_t ino, Context *onfinish, inodeno_t hadino, version_t hadv)
+void MDCache::open_remote_ino(inodeno_t ino, Context *onfinish, bool want_xlocked,
+ inodeno_t hadino, version_t hadv)
{
- dout(7) << "open_remote_ino on " << ino << dendl;
+ dout(7) << "open_remote_ino on " << ino << (want_xlocked ? " want_xlocked":"") << dendl;
- C_MDC_OpenRemoteIno *c = new C_MDC_OpenRemoteIno(this, ino, hadino, hadv, onfinish);
+ C_MDC_OpenRemoteIno *c = new C_MDC_OpenRemoteIno(this, ino, want_xlocked,
+ hadino, hadv, onfinish);
mds->anchorclient->lookup(ino, c->anchortrace, c);
}
-void MDCache::open_remote_ino_2(inodeno_t ino,
- vector<Anchor>& anchortrace,
- inodeno_t hadino, version_t hadv,
- Context *onfinish)
+void MDCache::open_remote_ino_2(inodeno_t ino, vector<Anchor>& anchortrace, bool want_xlocked,
+ inodeno_t hadino, version_t hadv, Context *onfinish)
{
dout(7) << "open_remote_ino_2 on " << ino
<< ", trace depth is " << anchortrace.size() << dendl;
@@ -7106,7 +7109,7 @@ void MDCache::open_remote_ino_2(inodeno_t ino,
if (!in->dirfragtree.contains(frag)) {
dout(10) << "frag " << frag << " not valid, requerying anchortable" << dendl;
- open_remote_ino(ino, onfinish);
+ open_remote_ino(ino, onfinish, want_xlocked);
return;
}
@@ -7116,7 +7119,7 @@ void MDCache::open_remote_ino_2(inodeno_t ino,
dout(10) << "opening remote dirfrag " << frag << " under " << *in << dendl;
/* we re-query the anchortable just to avoid a fragtree update race */
open_remote_dirfrag(in, frag,
- new C_MDC_RetryOpenRemoteIno(this, ino, onfinish));
+ new C_MDC_RetryOpenRemoteIno(this, ino, onfinish, want_xlocked));
return;
}
@@ -7124,7 +7127,7 @@ void MDCache::open_remote_ino_2(inodeno_t ino,
if (in->is_frozen_dir()) {
dout(7) << "traverse: " << *in << " is frozen_dir, waiting" << dendl;
in->parent->dir->add_waiter(CDir::WAIT_UNFREEZE,
- new C_MDC_RetryOpenRemoteIno(this, ino, onfinish));
+ new C_MDC_RetryOpenRemoteIno(this, ino, onfinish, want_xlocked));
return;
}
dir = in->get_or_open_dirfrag(this, frag);
@@ -7146,20 +7149,22 @@ void MDCache::open_remote_ino_2(inodeno_t ino,
<< " in complete dir " << *dir
<< ", requerying anchortable"
<< dendl;
- open_remote_ino(ino, onfinish, anchortrace[i].ino, anchortrace[i].updated);
+ open_remote_ino(ino, onfinish, want_xlocked,
+ anchortrace[i].ino, anchortrace[i].updated);
}
} else {
dout(10) << "need ino " << anchortrace[i].ino
<< ", fetching incomplete dir " << *dir
<< dendl;
- dir->fetch(new C_MDC_OpenRemoteIno(this, ino, anchortrace, onfinish));
+ dir->fetch(new C_MDC_OpenRemoteIno(this, ino, want_xlocked, anchortrace, onfinish));
}
} else {
// hmm, discover.
dout(10) << "have remote dirfrag " << *dir << ", discovering "
<< anchortrace[i].ino << dendl;
- discover_ino(dir, anchortrace[i].ino,
- new C_MDC_RetryOpenRemoteIno(this, ino, onfinish));
+ discover_ino(dir, anchortrace[i].ino,
+ new C_MDC_RetryOpenRemoteIno(this, ino, onfinish, want_xlocked),
+ (want_xlocked && i == anchortrace.size() - 1));
}
}
diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h
index 64290aa97b9..31c7467bf41 100644
--- a/src/mds/MDCache.h
+++ b/src/mds/MDCache.h
@@ -701,11 +701,11 @@ public:
void open_remote_dirfrag(CInode *diri, frag_t fg, Context *fin);
CInode *get_dentry_inode(CDentry *dn, MDRequest *mdr, bool projected=false);
- void open_remote_ino(inodeno_t ino, Context *fin, inodeno_t hadino=0, version_t hadv=0);
+ void open_remote_ino(inodeno_t ino, Context *fin, bool want_xlocked=false,
+ inodeno_t hadino=0, version_t hadv=0);
void open_remote_ino_2(inodeno_t ino,
- vector<Anchor>& anchortrace,
- inodeno_t hadino, version_t hadv,
- Context *onfinish);
+ vector<Anchor>& anchortrace, bool want_xlocked,
+ inodeno_t hadino, version_t hadv, Context *onfinish);
void open_remote_dentry(CDentry *dn, bool projected, Context *fin);
void _open_remote_dentry_finish(int r, CDentry *dn, bool projected, Context *fin);
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index 83b89308dd9..1545fef73c5 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -5216,6 +5216,13 @@ void Server::handle_client_rename(MDRequest *mdr)
if (desttrace[i]->is_auth() && desttrace[i]->is_projected())
xlocks.insert(&desttrace[i]->versionlock);
}
+ // xlock srci and oldin's primary dentries, so witnesses can call
+ // open_remote_ino() with 'want_locked=true' when the srcdn or destdn
+ // is traversed.
+ if (srcdnl->is_remote())
+ xlocks.insert(&srci->get_projected_parent_dn()->lock);
+ if (destdnl->is_remote())
+ xlocks.insert(&oldin->get_projected_parent_dn()->lock);
}
// we need to update srci's ctime. xlock its least contended lock to do that...