summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-01-15 19:27:13 -0800
committerSage Weil <sage@inktank.com>2013-01-15 19:27:13 -0800
commit63e33c8ad9df5fc43b5695ebc839a26a3c88487c (patch)
treee1349b8cb35c4bf0f2bba86b69d8009a29b48b70
parent27ad74b9efe17ec90864aa32d07ae047bad9f366 (diff)
downloadceph-63e33c8ad9df5fc43b5695ebc839a26a3c88487c.tar.gz
osd: send forced scrub/repair through scrub scheduling
This marks a PG for immediate scrub or repair. Adjust the sched_scrub() code so that we handle these PGs even when should_schedule_scrub is false (e.g., because the load is high). When we explicitly request a scrub or repair, we then go through the normal scrub reservation process to avoid unduly impacting cluster performance. This is particularly helpful on argonaut, where the final scrub finalization step blocks writes to the PG, and overlapping scrubs can exacerbate the problem. Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/osd/OSD.cc32
-rw-r--r--src/osd/PG.cc15
-rw-r--r--src/osd/PG.h2
-rw-r--r--src/osd/ReplicatedPG.cc5
4 files changed, 34 insertions, 20 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index f2801534c25..4caaf46638b 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -1949,9 +1949,7 @@ void OSD::tick()
// periodically kick recovery work queue
recovery_tp.wake();
- if (scrub_should_schedule()) {
- sched_scrub();
- }
+ sched_scrub();
map_lock.get_read();
@@ -3090,11 +3088,11 @@ void OSD::handle_scrub(MOSDScrub *m)
PG *pg = p->second;
pg->lock();
if (pg->is_primary()) {
- if (m->repair)
- pg->state_set(PG_STATE_REPAIR);
- if (pg->queue_scrub()) {
- dout(10) << "queueing " << *pg << " for scrub" << dendl;
- }
+ pg->unreg_scrub();
+ pg->must_scrub = true;
+ pg->must_repair = m->repair;
+ pg->reg_scrub();
+ dout(10) << "marking " << *pg << " for scrub" << dendl;
}
pg->unlock();
}
@@ -3106,11 +3104,11 @@ void OSD::handle_scrub(MOSDScrub *m)
PG *pg = pg_map[*p];
pg->lock();
if (pg->is_primary()) {
- if (m->repair)
- pg->state_set(PG_STATE_REPAIR);
- if (pg->queue_scrub()) {
- dout(10) << "queueing " << *pg << " for scrub" << dendl;
- }
+ pg->unreg_scrub();
+ pg->must_scrub = true;
+ pg->must_repair = m->repair;
+ pg->reg_scrub();
+ dout(10) << "marking " << *pg << " for scrub" << dendl;
}
pg->unlock();
}
@@ -3157,7 +3155,9 @@ void OSD::sched_scrub()
{
assert(osd_lock.is_locked());
- dout(20) << "sched_scrub" << dendl;
+ bool should = scrub_should_schedule();
+
+ dout(20) << "sched_scrub should=" << (int)should << dendl;
pair<utime_t,pg_t> pos;
utime_t max = ceph_clock_now(g_ceph_context);
@@ -3184,7 +3184,9 @@ void OSD::sched_scrub()
sched_scrub_lock.Unlock();
PG *pg = _lookup_lock_pg(pgid);
if (pg) {
- if (pg->is_active() && !pg->sched_scrub()) {
+ if (pg->is_active() &&
+ (should || pg->must_scrub) &&
+ !pg->sched_scrub()) {
pg->unlock();
sched_scrub_lock.Lock();
break;
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 28293b3c1e3..c9d2a65fa45 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1584,6 +1584,7 @@ bool PG::queue_scrub()
if (is_scrubbing()) {
return false;
}
+ must_scrub = false;
state_set(PG_STATE_SCRUBBING);
osd->scrub_wq.queue(this);
return true;
@@ -2620,7 +2621,11 @@ bool PG::sched_scrub()
void PG::reg_scrub()
{
+ if (must_scrub) {
+ scrub_reg_stamp = utime_t();
+ } else {
scrub_reg_stamp = info.history.last_scrub_stamp;
+ }
osd->reg_last_pg_scrub(info.pgid, scrub_reg_stamp);
}
@@ -3037,7 +3042,6 @@ void PG::scrub()
if (!is_primary() || !is_active() || !is_clean() || !is_scrubbing()) {
dout(10) << "scrub -- not primary or active or not clean" << dendl;
- state_clear(PG_STATE_REPAIR);
state_clear(PG_STATE_SCRUBBING);
clear_scrub_reserved();
unlock();
@@ -3145,7 +3149,6 @@ void PG::scrub_clear_state()
{
assert(_lock.is_locked());
state_clear(PG_STATE_SCRUBBING);
- state_clear(PG_STATE_REPAIR);
update_stats();
// active -> nothing.
@@ -3153,6 +3156,9 @@ void PG::scrub_clear_state()
osd->requeue_ops(this, waiting_for_active);
+ must_scrub = false;
+ must_repair = false;
+
finalizing_scrub = false;
scrub_block_writes = false;
scrub_active = false;
@@ -3318,7 +3324,7 @@ void PG::scrub_finalize() {
dout(10) << "scrub_finalize has maps, analyzing" << dendl;
int errors = 0, fixed = 0;
- bool repair = state_test(PG_STATE_REPAIR);
+ bool repair = must_repair;
const char *mode = repair ? "repair":"scrub";
if (acting.size() > 1) {
dout(10) << "scrub comparing replica scrub maps" << dendl;
@@ -3716,6 +3722,9 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
state_clear(PG_STATE_DOWN);
state_clear(PG_STATE_RECOVERING);
+ must_scrub = false;
+ must_repair = false;
+
peer_missing.clear();
peer_purged.clear();
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 3c4353dab51..1c680bfea2d 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -747,6 +747,7 @@ public:
epoch_t scrub_epoch_start;
ScrubMap primary_scrubmap;
MOSDRepScrub *active_rep_scrub;
+ bool must_scrub, must_repair;
utime_t scrub_reg_stamp;
void repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer, int ok_peer);
@@ -1269,6 +1270,7 @@ public:
scrub_reserved(false), scrub_reserve_failed(false),
scrub_waiting_on(0),
active_rep_scrub(0),
+ must_scrub(false), must_repair(false),
recovery_state(this)
{
pool->get();
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index d6ff0d2917f..92df99778c6 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -5738,7 +5738,8 @@ void ReplicatedPG::on_change()
scrub_clear_state();
} else if (is_scrubbing()) {
state_clear(PG_STATE_SCRUBBING);
- state_clear(PG_STATE_REPAIR);
+ must_scrub = false;
+ must_repair = false;
}
context_registry_on_change();
@@ -6461,7 +6462,7 @@ int ReplicatedPG::_scrub(ScrubMap& scrubmap, int& errors, int& fixed)
dout(10) << "_scrub" << dendl;
coll_t c(info.pgid);
- bool repair = state_test(PG_STATE_REPAIR);
+ bool repair = must_repair;
const char *mode = repair ? "repair":"scrub";
// traverse in reverse order.