diff options
author | Sage Weil <sage@inktank.com> | 2013-01-06 20:43:21 -0800 |
---|---|---|
committer | Samuel Just <sam.just@inktank.com> | 2013-01-07 10:39:21 -0800 |
commit | 213e3559dd260a2e19324f2a671c808261249f96 (patch) | |
tree | 2f759830d281056c56effe2e56ec7047093aab3d | |
parent | e410d1a066b906cad3103a5bbfa5b4509be9ac37 (diff) | |
download | ceph-213e3559dd260a2e19324f2a671c808261249f96.tar.gz |
osd: fix race in do_recovery()
Verify that the PG is still RECOVERING or BACKFILL when we take the pg
lock in the recovery thread. This prevents a crash from an invalid
state machine event when the recovery queue races with a PG state change
(e.g., due to peering).
Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r-- | src/osd/OSD.cc | 8 | ||||
-rw-r--r-- | src/osd/PG.cc | 1 |
2 files changed, 9 insertions, 0 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index a6e26e7f536..f57f2264f74 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -5217,6 +5217,13 @@ void OSD::do_recovery(PG *pg) } else { pg->lock(); + + if (!pg->state_test(PG_STATE_RECOVERING) && + !pg->state_test(PG_STATE_BACKFILL)) { + dout(10) << "do_recovery not recovering|backfill on " << *pg << dendl; + pg->unlock(); + goto out; + } dout(10) << "do_recovery starting " << max << " (" << recovery_ops_active << "/" << g_conf->osd_recovery_max_active << " rops) on " @@ -5269,6 +5276,7 @@ void OSD::do_recovery(PG *pg) } pg->unlock(); } + out: pg->put(); } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index a80d95dcbae..2f38dac426e 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2919,6 +2919,7 @@ void PG::repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer log.last_requested = 0; } + state_set(PG_STATE_RECOVERING); osd->queue_for_recovery(this); } |