From 63e33c8ad9df5fc43b5695ebc839a26a3c88487c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 15 Jan 2013 19:27:13 -0800 Subject: osd: send forced scrub/repair through scrub scheduling This marks a PG for immediate scrub or repair. Adjust the sched_scrub() code so that we handle these PGs even when should_schedule_scrub is false (e.g., because the load is high). When we explicitly request a scrub or repair, we then go through the normal scrub reservation process to avoid unduly impacting cluster performance. This is particularly helpful on argonaut, where the final scrub finalization step blocks writes to the PG, and overlapping scrubs can exacerbate the problem. Signed-off-by: Sage Weil --- src/osd/OSD.cc | 32 +++++++++++++++++--------------- src/osd/PG.cc | 15 ++++++++++++--- src/osd/PG.h | 2 ++ src/osd/ReplicatedPG.cc | 5 +++-- 4 files changed, 34 insertions(+), 20 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index f2801534c25..4caaf46638b 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1949,9 +1949,7 @@ void OSD::tick() // periodically kick recovery work queue recovery_tp.wake(); - if (scrub_should_schedule()) { - sched_scrub(); - } + sched_scrub(); map_lock.get_read(); @@ -3090,11 +3088,11 @@ void OSD::handle_scrub(MOSDScrub *m) PG *pg = p->second; pg->lock(); if (pg->is_primary()) { - if (m->repair) - pg->state_set(PG_STATE_REPAIR); - if (pg->queue_scrub()) { - dout(10) << "queueing " << *pg << " for scrub" << dendl; - } + pg->unreg_scrub(); + pg->must_scrub = true; + pg->must_repair = m->repair; + pg->reg_scrub(); + dout(10) << "marking " << *pg << " for scrub" << dendl; } pg->unlock(); } @@ -3106,11 +3104,11 @@ void OSD::handle_scrub(MOSDScrub *m) PG *pg = pg_map[*p]; pg->lock(); if (pg->is_primary()) { - if (m->repair) - pg->state_set(PG_STATE_REPAIR); - if (pg->queue_scrub()) { - dout(10) << "queueing " << *pg << " for scrub" << dendl; - } + pg->unreg_scrub(); + pg->must_scrub = true; + pg->must_repair = m->repair; + pg->reg_scrub(); + dout(10) << "marking " << *pg << " for scrub" << dendl; } pg->unlock(); } @@ -3157,7 +3155,9 @@ void OSD::sched_scrub() { assert(osd_lock.is_locked()); - dout(20) << "sched_scrub" << dendl; + bool should = scrub_should_schedule(); + + dout(20) << "sched_scrub should=" << (int)should << dendl; pair pos; utime_t max = ceph_clock_now(g_ceph_context); @@ -3184,7 +3184,9 @@ void OSD::sched_scrub() sched_scrub_lock.Unlock(); PG *pg = _lookup_lock_pg(pgid); if (pg) { - if (pg->is_active() && !pg->sched_scrub()) { + if (pg->is_active() && + (should || pg->must_scrub) && + !pg->sched_scrub()) { pg->unlock(); sched_scrub_lock.Lock(); break; diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 28293b3c1e3..c9d2a65fa45 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1584,6 +1584,7 @@ bool PG::queue_scrub() if (is_scrubbing()) { return false; } + must_scrub = false; state_set(PG_STATE_SCRUBBING); osd->scrub_wq.queue(this); return true; @@ -2620,7 +2621,11 @@ bool PG::sched_scrub() void PG::reg_scrub() { + if (must_scrub) { + scrub_reg_stamp = utime_t(); + } else { scrub_reg_stamp = info.history.last_scrub_stamp; + } osd->reg_last_pg_scrub(info.pgid, scrub_reg_stamp); } @@ -3037,7 +3042,6 @@ void PG::scrub() if (!is_primary() || !is_active() || !is_clean() || !is_scrubbing()) { dout(10) << "scrub -- not primary or active or not clean" << dendl; - state_clear(PG_STATE_REPAIR); state_clear(PG_STATE_SCRUBBING); clear_scrub_reserved(); unlock(); @@ -3145,7 +3149,6 @@ void PG::scrub_clear_state() { assert(_lock.is_locked()); state_clear(PG_STATE_SCRUBBING); - state_clear(PG_STATE_REPAIR); update_stats(); // active -> nothing. @@ -3153,6 +3156,9 @@ void PG::scrub_clear_state() osd->requeue_ops(this, waiting_for_active); + must_scrub = false; + must_repair = false; + finalizing_scrub = false; scrub_block_writes = false; scrub_active = false; @@ -3318,7 +3324,7 @@ void PG::scrub_finalize() { dout(10) << "scrub_finalize has maps, analyzing" << dendl; int errors = 0, fixed = 0; - bool repair = state_test(PG_STATE_REPAIR); + bool repair = must_repair; const char *mode = repair ? "repair":"scrub"; if (acting.size() > 1) { dout(10) << "scrub comparing replica scrub maps" << dendl; @@ -3716,6 +3722,9 @@ void PG::start_peering_interval(const OSDMapRef lastmap, state_clear(PG_STATE_DOWN); state_clear(PG_STATE_RECOVERING); + must_scrub = false; + must_repair = false; + peer_missing.clear(); peer_purged.clear(); diff --git a/src/osd/PG.h b/src/osd/PG.h index 3c4353dab51..1c680bfea2d 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -747,6 +747,7 @@ public: epoch_t scrub_epoch_start; ScrubMap primary_scrubmap; MOSDRepScrub *active_rep_scrub; + bool must_scrub, must_repair; utime_t scrub_reg_stamp; void repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer, int ok_peer); @@ -1269,6 +1270,7 @@ public: scrub_reserved(false), scrub_reserve_failed(false), scrub_waiting_on(0), active_rep_scrub(0), + must_scrub(false), must_repair(false), recovery_state(this) { pool->get(); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index d6ff0d2917f..92df99778c6 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -5738,7 +5738,8 @@ void ReplicatedPG::on_change() scrub_clear_state(); } else if (is_scrubbing()) { state_clear(PG_STATE_SCRUBBING); - state_clear(PG_STATE_REPAIR); + must_scrub = false; + must_repair = false; } context_registry_on_change(); @@ -6461,7 +6462,7 @@ int ReplicatedPG::_scrub(ScrubMap& scrubmap, int& errors, int& fixed) dout(10) << "_scrub" << dendl; coll_t c(info.pgid); - bool repair = state_test(PG_STATE_REPAIR); + bool repair = must_repair; const char *mode = repair ? "repair":"scrub"; // traverse in reverse order. -- cgit v1.2.1