From 213e3559dd260a2e19324f2a671c808261249f96 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 6 Jan 2013 20:43:21 -0800 Subject: osd: fix race in do_recovery() Verify that the PG is still RECOVERING or BACKFILL when we take the pg lock in the recovery thread. This prevents a crash from an invalid state machine event when the recovery queue races with a PG state change (e.g., due to peering). Signed-off-by: Sage Weil Reviewed-by: Samuel Just --- src/osd/OSD.cc | 8 ++++++++ src/osd/PG.cc | 1 + 2 files changed, 9 insertions(+) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index a6e26e7f536..f57f2264f74 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -5217,6 +5217,13 @@ void OSD::do_recovery(PG *pg) } else { pg->lock(); + + if (!pg->state_test(PG_STATE_RECOVERING) && + !pg->state_test(PG_STATE_BACKFILL)) { + dout(10) << "do_recovery not recovering|backfill on " << *pg << dendl; + pg->unlock(); + goto out; + } dout(10) << "do_recovery starting " << max << " (" << recovery_ops_active << "/" << g_conf->osd_recovery_max_active << " rops) on " @@ -5269,6 +5276,7 @@ void OSD::do_recovery(PG *pg) } pg->unlock(); } + out: pg->put(); } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index a80d95dcbae..2f38dac426e 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2919,6 +2919,7 @@ void PG::repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer log.last_requested = 0; } + state_set(PG_STATE_RECOVERING); osd->queue_for_recovery(this); } -- cgit v1.2.1