diff options
author | Samuel Just <sam.just@inktank.com> | 2013-03-22 14:15:31 -0700 |
---|---|---|
committer | Samuel Just <sam.just@inktank.com> | 2013-03-22 14:15:33 -0700 |
commit | 16063db20e24d443aa38a04aae0f470df0d7693c (patch) | |
tree | 498d3519979810289be9dc1ce2cbd932be008043 | |
parent | 0d3f065c23d36b402b943f0f44f5ac879e1e7900 (diff) | |
parent | 000310fd10d1b6727807e98056140c2dc0f314fc (diff) | |
download | ceph-16063db20e24d443aa38a04aae0f470df0d7693c.tar.gz |
Merge remote-tracking branch 'upstream/wip_4435'
Fixes: #4435
Reviewed-by: David Zafman <david.zafman@inktank.com>
-rw-r--r-- | src/common/config_opts.h | 1 | ||||
-rw-r--r-- | src/osd/OSD.cc | 15 | ||||
-rw-r--r-- | src/osd/OSD.h | 3 | ||||
-rw-r--r-- | src/osd/PG.cc | 86 | ||||
-rw-r--r-- | src/osd/PG.h | 29 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.cc | 13 |
6 files changed, 108 insertions, 39 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 62d2d4cc010..0d9e05a9233 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -419,6 +419,7 @@ OPTION(osd_debug_drop_pg_create_duration, OPT_INT, 1) OPTION(osd_debug_drop_op_probability, OPT_DOUBLE, 0) // probability of stalling/dropping a client op OPTION(osd_debug_op_order, OPT_BOOL, false) OPTION(osd_debug_verify_snaps_on_info, OPT_BOOL, false) +OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false) OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to track OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track OPTION(osd_target_transaction_size, OPT_INT, 30) // to adjust various transactions that batch smaller items diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 978c24056f5..c32f36df728 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -186,6 +186,7 @@ OSDService::OSDService(OSD *osd) : full_status_lock("OSDService::full_status_lock"), cur_state(NONE), last_msg(0), + cur_ratio(0), is_stopping_lock("OSDService::is_stopping_lock"), state(NOT_STOPPING) {} @@ -1997,6 +1998,7 @@ void OSDService::check_nearfull_warning(const osd_stat_t &osd_stat) float ratio = ((float)osd_stat.kb_used) / ((float)osd_stat.kb); float nearfull_ratio = get_nearfull_ratio(); float full_ratio = get_full_ratio(); + cur_ratio = ratio; if (full_ratio > 0 && ratio > full_ratio) { new_state = FULL; @@ -2027,6 +2029,19 @@ bool OSDService::check_failsafe_full() return false; } +bool OSDService::too_full_for_backfill(double *_ratio, double *_max_ratio) +{ + Mutex::Locker l(full_status_lock); + double max_ratio; + max_ratio = g_conf->osd_backfill_full_ratio; + if (_ratio) + *_ratio = cur_ratio; + if (_max_ratio) + *_max_ratio = max_ratio; + return cur_ratio >= max_ratio; +} + + void OSD::update_osd_stat() { // fill in osd stats too diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 148b761f532..5166ae74aa4 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -401,10 +401,13 @@ public: Mutex full_status_lock; enum s_names { NONE, NEAR, FULL } cur_state; time_t last_msg; + double cur_ratio; float get_full_ratio(); float get_nearfull_ratio(); void check_nearfull_warning(const osd_stat_t &stat); bool check_failsafe_full(); + bool too_full_for_backfill(double *ratio, double *max_ratio); + // -- stopping -- Mutex is_stopping_lock; diff --git a/src/osd/PG.cc b/src/osd/PG.cc index c0d6cb4cf86..d54faed8862 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -3294,6 +3294,26 @@ void PG::sub_op_scrub_stop(OpRequestRef op) osd->send_message_osd_cluster(reply, m->get_connection()); } +void PG::reject_reservation() +{ + osd->send_message_osd_cluster( + acting[0], + new MBackfillReserve( + MBackfillReserve::REJECT, + info.pgid, + get_osdmap()->get_epoch()), + get_osdmap()->get_epoch()); +} + +void PG::schedule_backfill_full_retry() +{ + Mutex::Locker lock(osd->backfill_request_lock); + osd->backfill_request_timer.add_event_after( + g_conf->osd_backfill_retry_interval, + new QueuePeeringEvt<RequestBackfill>( + this, get_osdmap()->get_epoch(), + RequestBackfill())); +} void PG::clear_scrub_reserved() { @@ -5894,6 +5914,19 @@ PG::RecoveryState::Backfilling::Backfilling(my_context ctx) pg->state_set(PG_STATE_BACKFILL); } +boost::statechart::result +PG::RecoveryState::Backfilling::react(const RemoteReservationRejected &) +{ + PG *pg = context< RecoveryMachine >().pg; + pg->osd->local_reserver.cancel_reservation(pg->info.pgid); + pg->state_set(PG_STATE_BACKFILL_TOOFULL); + + pg->osd->recovery_wq.dequeue(pg); + + pg->schedule_backfill_full_retry(); + return transit<NotBackfilling>(); +} + void PG::RecoveryState::Backfilling::exit() { context< RecoveryMachine >().log_exit(state_name, enter_time); @@ -5903,24 +5936,6 @@ void PG::RecoveryState::Backfilling::exit() pg->state_clear(PG_STATE_BACKFILL); } -template <class EVT> -struct QueuePeeringEvt : Context { - boost::intrusive_ptr<PG> pg; - epoch_t epoch; - EVT evt; - QueuePeeringEvt(PG *pg, epoch_t epoch, EVT evt) : - pg(pg), epoch(epoch), evt(evt) {} - void finish(int r) { - pg->lock(); - pg->queue_peering_event(PG::CephPeeringEvtRef( - new PG::CephPeeringEvt( - epoch, - epoch, - evt))); - pg->unlock(); - } -}; - /*--WaitRemoteBackfillReserved--*/ PG::RecoveryState::WaitRemoteBackfillReserved::WaitRemoteBackfillReserved(my_context ctx) @@ -5967,12 +5982,7 @@ PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteReservationReje pg->state_clear(PG_STATE_BACKFILL_WAIT); pg->state_set(PG_STATE_BACKFILL_TOOFULL); - Mutex::Locker lock(pg->osd->backfill_request_lock); - pg->osd->backfill_request_timer.add_event_after( - g_conf->osd_backfill_retry_interval, - new QueuePeeringEvt<RequestBackfill>( - pg, pg->get_osdmap()->get_epoch(), - RequestBackfill())); + pg->schedule_backfill_full_retry(); return transit<NotBackfilling>(); } @@ -6065,12 +6075,12 @@ PG::RecoveryState::RepWaitBackfillReserved::RepWaitBackfillReserved(my_context c context< RecoveryMachine >().log_enter(state_name); PG *pg = context< RecoveryMachine >().pg; - int64_t kb = pg->osd->osd->osd_stat.kb, - kb_used = pg->osd->osd->osd_stat.kb_used; - int64_t max = kb * g_conf->osd_backfill_full_ratio; - if (kb_used >= max) { - dout(10) << "backfill reservation rejected: kb used >= max: " - << kb_used << " >= " << max << dendl; + double ratio, max_ratio; + if (pg->osd->too_full_for_backfill(&ratio, &max_ratio) && + !g_conf->osd_debug_skip_full_check_in_backfill_reservation) { + dout(10) << "backfill reservation rejected: full ratio is " + << ratio << ", which is greater than max allowed ratio " + << max_ratio << dendl; post_event(RemoteReservationRejected()); } else { pg->osd->remote_reserver.request_reservation( @@ -6104,13 +6114,7 @@ boost::statechart::result PG::RecoveryState::RepWaitBackfillReserved::react(const RemoteReservationRejected &evt) { PG *pg = context< RecoveryMachine >().pg; - pg->osd->send_message_osd_cluster( - pg->acting[0], - new MBackfillReserve( - MBackfillReserve::REJECT, - pg->info.pgid, - pg->get_osdmap()->get_epoch()), - pg->get_osdmap()->get_epoch()); + pg->reject_reservation(); return transit<RepNotRecovering>(); } @@ -6122,6 +6126,14 @@ PG::RecoveryState::RepRecovering::RepRecovering(my_context ctx) context< RecoveryMachine >().log_enter(state_name); } +boost::statechart::result +PG::RecoveryState::RepRecovering::react(const BackfillTooFull &) +{ + PG *pg = context< RecoveryMachine >().pg; + pg->reject_reservation(); + return transit<RepNotRecovering>(); +} + void PG::RecoveryState::RepRecovering::exit() { context< RecoveryMachine >().log_exit(state_name, enter_time); diff --git a/src/osd/PG.h b/src/osd/PG.h index b24c74f8bf5..5cc19229fb6 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1050,9 +1050,29 @@ public: void sub_op_scrub_unreserve(OpRequestRef op); void sub_op_scrub_stop(OpRequestRef op); + void reject_reservation(); + void schedule_backfill_full_retry(); // -- recovery state -- + template <class EVT> + struct QueuePeeringEvt : Context { + boost::intrusive_ptr<PG> pg; + epoch_t epoch; + EVT evt; + QueuePeeringEvt(PG *pg, epoch_t epoch, EVT evt) : + pg(pg), epoch(epoch), evt(evt) {} + void finish(int r) { + pg->lock(); + pg->queue_peering_event(PG::CephPeeringEvtRef( + new PG::CephPeeringEvt( + epoch, + epoch, + evt))); + pg->unlock(); + } + }; + class CephPeeringEvt { epoch_t epoch_sent; epoch_t epoch_requested; @@ -1177,6 +1197,7 @@ public: TrivialEvent(RequestBackfill) TrivialEvent(RequestRecovery) TrivialEvent(RecoveryDone) + TrivialEvent(BackfillTooFull) TrivialEvent(AllReplicasRecovered) TrivialEvent(DoRecovery) @@ -1471,9 +1492,11 @@ public: struct Backfilling : boost::statechart::state< Backfilling, Active >, NamedState { typedef boost::mpl::list< - boost::statechart::transition< Backfilled, Recovered > + boost::statechart::transition< Backfilled, Recovered >, + boost::statechart::custom_reaction< RemoteReservationRejected > > reactions; Backfilling(my_context ctx); + boost::statechart::result react(const RemoteReservationRejected& evt); void exit(); }; @@ -1527,9 +1550,11 @@ public: struct RepRecovering : boost::statechart::state< RepRecovering, ReplicaActive >, NamedState { typedef boost::mpl::list< - boost::statechart::transition< RecoveryDone, RepNotRecovering > + boost::statechart::transition< RecoveryDone, RepNotRecovering >, + boost::statechart::custom_reaction< BackfillTooFull > > reactions; RepRecovering(my_context ctx); + boost::statechart::result react(const BackfillTooFull &evt); void exit(); }; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 50318fc5869..dd836ec89cb 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1186,6 +1186,19 @@ void ReplicatedPG::do_scan(OpRequestRef op) switch (m->op) { case MOSDPGScan::OP_SCAN_GET_DIGEST: { + double ratio, full_ratio; + if (osd->too_full_for_backfill(&ratio, &full_ratio)) { + dout(1) << __func__ << ": Canceling backfill, current usage is " + << ratio << ", which exceeds " << full_ratio << dendl; + queue_peering_event( + CephPeeringEvtRef( + new CephPeeringEvt( + get_osdmap()->get_epoch(), + get_osdmap()->get_epoch(), + BackfillTooFull()))); + return; + } + BackfillInterval bi; osr->flush(); scan_range(m->begin, g_conf->osd_backfill_scan_min, g_conf->osd_backfill_scan_max, &bi); |