summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2013-07-19 15:56:52 -0700
committerSamuel Just <sam.just@inktank.com>2013-07-22 10:31:31 -0700
commitd28c18da9dd905316a30267305f0a5d656e65ec4 (patch)
tree59aefd1d3fb345bbead0a311ac5dec4fb576e511
parent6c4cd22e608efc593228c02529077667ec01ec16 (diff)
downloadceph-d28c18da9dd905316a30267305f0a5d656e65ec4.tar.gz
OSD::RemoveWQ: do not apply_transaction while blocking _try_resurrect_pg
Some callbacks take the osd lock, so we need to avoid blocking an osd lock holding thread while waiting on a filestore callback. Instead, just queue the transaction, and allow _try_resurrect_pg to cancel us while we are waiting for the transaction to go through (CLEARING_WAITING). Fixes: #5672 Signed-off-by: Samuel Just <sam.just@inktank.com> Reviewed-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/osd/OSD.cc21
-rw-r--r--src/osd/OSD.h40
2 files changed, 42 insertions, 19 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 1ee4c09a63e..8cc9e31459d 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -3267,21 +3267,30 @@ bool remove_dir(
}
t->remove(coll, *i);
if (num >= g_conf->osd_target_transaction_size) {
- store->apply_transaction(osr, *t);
+ C_SaferCond waiter;
+ store->queue_transaction(osr, t, &waiter);
+ bool cont = dstate->pause_clearing();
+ waiter.wait();
+ if (cont)
+ cont = dstate->resume_clearing();
delete t;
- if (!dstate->check_canceled()) {
- // canceled!
+ if (!cont)
return false;
- }
t = new ObjectStore::Transaction;
num = 0;
}
}
olist.clear();
}
- store->apply_transaction(osr, *t);
+
+ C_SaferCond waiter;
+ store->queue_transaction(osr, t, &waiter);
+ bool cont = dstate->pause_clearing();
+ waiter.wait();
+ if (cont)
+ cont = dstate->resume_clearing();
delete t;
- return true;
+ return cont;
}
void OSD::RemoveWQ::_process(pair<PGRef, DeletingStateRef> item)
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 238c5b43594..04ad4dcd7d7 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -148,6 +148,7 @@ class DeletingState {
enum {
QUEUED,
CLEARING_DIR,
+ CLEARING_WAITING,
DELETING_DIR,
DELETED_DIR,
CANCELED,
@@ -160,8 +161,23 @@ public:
lock("DeletingState::lock"), status(QUEUED), stop_deleting(false),
pgid(in.first), old_pg_state(in.second) {}
- /// check whether removal was canceled
- bool check_canceled() {
+ /// transition status to clearing
+ bool start_clearing() {
+ Mutex::Locker l(lock);
+ assert(
+ status == QUEUED ||
+ status == DELETED_DIR);
+ if (stop_deleting) {
+ status = CANCELED;
+ cond.Signal();
+ return false;
+ }
+ status = CLEARING_DIR;
+ return true;
+ } ///< @return false if we should cancel deletion
+
+ /// transition status to CLEARING_WAITING
+ bool pause_clearing() {
Mutex::Locker l(lock);
assert(status == CLEARING_DIR);
if (stop_deleting) {
@@ -169,15 +185,14 @@ public:
cond.Signal();
return false;
}
+ status = CLEARING_WAITING;
return true;
- } ///< @return false if canceled, true if we should continue
+ } ///< @return false if we should cancel deletion
- /// transition status to clearing
- bool start_clearing() {
+ /// transition status to CLEARING_DIR
+ bool resume_clearing() {
Mutex::Locker l(lock);
- assert(
- status == QUEUED ||
- status == DELETED_DIR);
+ assert(status == CLEARING_WAITING);
if (stop_deleting) {
status = CANCELED;
cond.Signal();
@@ -215,11 +230,10 @@ public:
/**
* If we are in DELETING_DIR or CLEARING_DIR, there are in progress
* operations we have to wait for before continuing on. States
- * DELETED_DIR, QUEUED, and CANCELED either check for stop_deleting
- * prior to performing any operations or signify the end of the
- * deleting process. We don't want to wait to leave the QUEUED
- * state, because this might block the caller behind an entire pg
- * removal.
+ * CLEARING_WAITING and QUEUED indicate that the remover will check
+ * stop_deleting before queueing any further operations. CANCELED
+ * indicates that the remover has already halted. DELETED_DIR
+ * indicates that the deletion has been fully queueud.
*/
while (status == DELETING_DIR || status == CLEARING_DIR)
cond.Wait(lock);