summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2013-03-22 14:15:31 -0700
committerSamuel Just <sam.just@inktank.com>2013-03-22 14:15:33 -0700
commit16063db20e24d443aa38a04aae0f470df0d7693c (patch)
tree498d3519979810289be9dc1ce2cbd932be008043
parent0d3f065c23d36b402b943f0f44f5ac879e1e7900 (diff)
parent000310fd10d1b6727807e98056140c2dc0f314fc (diff)
downloadceph-16063db20e24d443aa38a04aae0f470df0d7693c.tar.gz
Merge remote-tracking branch 'upstream/wip_4435'
Fixes: #4435 Reviewed-by: David Zafman <david.zafman@inktank.com>
-rw-r--r--src/common/config_opts.h1
-rw-r--r--src/osd/OSD.cc15
-rw-r--r--src/osd/OSD.h3
-rw-r--r--src/osd/PG.cc86
-rw-r--r--src/osd/PG.h29
-rw-r--r--src/osd/ReplicatedPG.cc13
6 files changed, 108 insertions, 39 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index 62d2d4cc010..0d9e05a9233 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -419,6 +419,7 @@ OPTION(osd_debug_drop_pg_create_duration, OPT_INT, 1)
OPTION(osd_debug_drop_op_probability, OPT_DOUBLE, 0) // probability of stalling/dropping a client op
OPTION(osd_debug_op_order, OPT_BOOL, false)
OPTION(osd_debug_verify_snaps_on_info, OPT_BOOL, false)
+OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false)
OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to track
OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track
OPTION(osd_target_transaction_size, OPT_INT, 30) // to adjust various transactions that batch smaller items
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 978c24056f5..c32f36df728 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -186,6 +186,7 @@ OSDService::OSDService(OSD *osd) :
full_status_lock("OSDService::full_status_lock"),
cur_state(NONE),
last_msg(0),
+ cur_ratio(0),
is_stopping_lock("OSDService::is_stopping_lock"),
state(NOT_STOPPING)
{}
@@ -1997,6 +1998,7 @@ void OSDService::check_nearfull_warning(const osd_stat_t &osd_stat)
float ratio = ((float)osd_stat.kb_used) / ((float)osd_stat.kb);
float nearfull_ratio = get_nearfull_ratio();
float full_ratio = get_full_ratio();
+ cur_ratio = ratio;
if (full_ratio > 0 && ratio > full_ratio) {
new_state = FULL;
@@ -2027,6 +2029,19 @@ bool OSDService::check_failsafe_full()
return false;
}
+bool OSDService::too_full_for_backfill(double *_ratio, double *_max_ratio)
+{
+ Mutex::Locker l(full_status_lock);
+ double max_ratio;
+ max_ratio = g_conf->osd_backfill_full_ratio;
+ if (_ratio)
+ *_ratio = cur_ratio;
+ if (_max_ratio)
+ *_max_ratio = max_ratio;
+ return cur_ratio >= max_ratio;
+}
+
+
void OSD::update_osd_stat()
{
// fill in osd stats too
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 148b761f532..5166ae74aa4 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -401,10 +401,13 @@ public:
Mutex full_status_lock;
enum s_names { NONE, NEAR, FULL } cur_state;
time_t last_msg;
+ double cur_ratio;
float get_full_ratio();
float get_nearfull_ratio();
void check_nearfull_warning(const osd_stat_t &stat);
bool check_failsafe_full();
+ bool too_full_for_backfill(double *ratio, double *max_ratio);
+
// -- stopping --
Mutex is_stopping_lock;
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index c0d6cb4cf86..d54faed8862 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -3294,6 +3294,26 @@ void PG::sub_op_scrub_stop(OpRequestRef op)
osd->send_message_osd_cluster(reply, m->get_connection());
}
+void PG::reject_reservation()
+{
+ osd->send_message_osd_cluster(
+ acting[0],
+ new MBackfillReserve(
+ MBackfillReserve::REJECT,
+ info.pgid,
+ get_osdmap()->get_epoch()),
+ get_osdmap()->get_epoch());
+}
+
+void PG::schedule_backfill_full_retry()
+{
+ Mutex::Locker lock(osd->backfill_request_lock);
+ osd->backfill_request_timer.add_event_after(
+ g_conf->osd_backfill_retry_interval,
+ new QueuePeeringEvt<RequestBackfill>(
+ this, get_osdmap()->get_epoch(),
+ RequestBackfill()));
+}
void PG::clear_scrub_reserved()
{
@@ -5894,6 +5914,19 @@ PG::RecoveryState::Backfilling::Backfilling(my_context ctx)
pg->state_set(PG_STATE_BACKFILL);
}
+boost::statechart::result
+PG::RecoveryState::Backfilling::react(const RemoteReservationRejected &)
+{
+ PG *pg = context< RecoveryMachine >().pg;
+ pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
+ pg->state_set(PG_STATE_BACKFILL_TOOFULL);
+
+ pg->osd->recovery_wq.dequeue(pg);
+
+ pg->schedule_backfill_full_retry();
+ return transit<NotBackfilling>();
+}
+
void PG::RecoveryState::Backfilling::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
@@ -5903,24 +5936,6 @@ void PG::RecoveryState::Backfilling::exit()
pg->state_clear(PG_STATE_BACKFILL);
}
-template <class EVT>
-struct QueuePeeringEvt : Context {
- boost::intrusive_ptr<PG> pg;
- epoch_t epoch;
- EVT evt;
- QueuePeeringEvt(PG *pg, epoch_t epoch, EVT evt) :
- pg(pg), epoch(epoch), evt(evt) {}
- void finish(int r) {
- pg->lock();
- pg->queue_peering_event(PG::CephPeeringEvtRef(
- new PG::CephPeeringEvt(
- epoch,
- epoch,
- evt)));
- pg->unlock();
- }
-};
-
/*--WaitRemoteBackfillReserved--*/
PG::RecoveryState::WaitRemoteBackfillReserved::WaitRemoteBackfillReserved(my_context ctx)
@@ -5967,12 +5982,7 @@ PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteReservationReje
pg->state_clear(PG_STATE_BACKFILL_WAIT);
pg->state_set(PG_STATE_BACKFILL_TOOFULL);
- Mutex::Locker lock(pg->osd->backfill_request_lock);
- pg->osd->backfill_request_timer.add_event_after(
- g_conf->osd_backfill_retry_interval,
- new QueuePeeringEvt<RequestBackfill>(
- pg, pg->get_osdmap()->get_epoch(),
- RequestBackfill()));
+ pg->schedule_backfill_full_retry();
return transit<NotBackfilling>();
}
@@ -6065,12 +6075,12 @@ PG::RecoveryState::RepWaitBackfillReserved::RepWaitBackfillReserved(my_context c
context< RecoveryMachine >().log_enter(state_name);
PG *pg = context< RecoveryMachine >().pg;
- int64_t kb = pg->osd->osd->osd_stat.kb,
- kb_used = pg->osd->osd->osd_stat.kb_used;
- int64_t max = kb * g_conf->osd_backfill_full_ratio;
- if (kb_used >= max) {
- dout(10) << "backfill reservation rejected: kb used >= max: "
- << kb_used << " >= " << max << dendl;
+ double ratio, max_ratio;
+ if (pg->osd->too_full_for_backfill(&ratio, &max_ratio) &&
+ !g_conf->osd_debug_skip_full_check_in_backfill_reservation) {
+ dout(10) << "backfill reservation rejected: full ratio is "
+ << ratio << ", which is greater than max allowed ratio "
+ << max_ratio << dendl;
post_event(RemoteReservationRejected());
} else {
pg->osd->remote_reserver.request_reservation(
@@ -6104,13 +6114,7 @@ boost::statechart::result
PG::RecoveryState::RepWaitBackfillReserved::react(const RemoteReservationRejected &evt)
{
PG *pg = context< RecoveryMachine >().pg;
- pg->osd->send_message_osd_cluster(
- pg->acting[0],
- new MBackfillReserve(
- MBackfillReserve::REJECT,
- pg->info.pgid,
- pg->get_osdmap()->get_epoch()),
- pg->get_osdmap()->get_epoch());
+ pg->reject_reservation();
return transit<RepNotRecovering>();
}
@@ -6122,6 +6126,14 @@ PG::RecoveryState::RepRecovering::RepRecovering(my_context ctx)
context< RecoveryMachine >().log_enter(state_name);
}
+boost::statechart::result
+PG::RecoveryState::RepRecovering::react(const BackfillTooFull &)
+{
+ PG *pg = context< RecoveryMachine >().pg;
+ pg->reject_reservation();
+ return transit<RepNotRecovering>();
+}
+
void PG::RecoveryState::RepRecovering::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
diff --git a/src/osd/PG.h b/src/osd/PG.h
index b24c74f8bf5..5cc19229fb6 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -1050,9 +1050,29 @@ public:
void sub_op_scrub_unreserve(OpRequestRef op);
void sub_op_scrub_stop(OpRequestRef op);
+ void reject_reservation();
+ void schedule_backfill_full_retry();
// -- recovery state --
+ template <class EVT>
+ struct QueuePeeringEvt : Context {
+ boost::intrusive_ptr<PG> pg;
+ epoch_t epoch;
+ EVT evt;
+ QueuePeeringEvt(PG *pg, epoch_t epoch, EVT evt) :
+ pg(pg), epoch(epoch), evt(evt) {}
+ void finish(int r) {
+ pg->lock();
+ pg->queue_peering_event(PG::CephPeeringEvtRef(
+ new PG::CephPeeringEvt(
+ epoch,
+ epoch,
+ evt)));
+ pg->unlock();
+ }
+ };
+
class CephPeeringEvt {
epoch_t epoch_sent;
epoch_t epoch_requested;
@@ -1177,6 +1197,7 @@ public:
TrivialEvent(RequestBackfill)
TrivialEvent(RequestRecovery)
TrivialEvent(RecoveryDone)
+ TrivialEvent(BackfillTooFull)
TrivialEvent(AllReplicasRecovered)
TrivialEvent(DoRecovery)
@@ -1471,9 +1492,11 @@ public:
struct Backfilling : boost::statechart::state< Backfilling, Active >, NamedState {
typedef boost::mpl::list<
- boost::statechart::transition< Backfilled, Recovered >
+ boost::statechart::transition< Backfilled, Recovered >,
+ boost::statechart::custom_reaction< RemoteReservationRejected >
> reactions;
Backfilling(my_context ctx);
+ boost::statechart::result react(const RemoteReservationRejected& evt);
void exit();
};
@@ -1527,9 +1550,11 @@ public:
struct RepRecovering : boost::statechart::state< RepRecovering, ReplicaActive >, NamedState {
typedef boost::mpl::list<
- boost::statechart::transition< RecoveryDone, RepNotRecovering >
+ boost::statechart::transition< RecoveryDone, RepNotRecovering >,
+ boost::statechart::custom_reaction< BackfillTooFull >
> reactions;
RepRecovering(my_context ctx);
+ boost::statechart::result react(const BackfillTooFull &evt);
void exit();
};
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 50318fc5869..dd836ec89cb 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -1186,6 +1186,19 @@ void ReplicatedPG::do_scan(OpRequestRef op)
switch (m->op) {
case MOSDPGScan::OP_SCAN_GET_DIGEST:
{
+ double ratio, full_ratio;
+ if (osd->too_full_for_backfill(&ratio, &full_ratio)) {
+ dout(1) << __func__ << ": Canceling backfill, current usage is "
+ << ratio << ", which exceeds " << full_ratio << dendl;
+ queue_peering_event(
+ CephPeeringEvtRef(
+ new CephPeeringEvt(
+ get_osdmap()->get_epoch(),
+ get_osdmap()->get_epoch(),
+ BackfillTooFull())));
+ return;
+ }
+
BackfillInterval bi;
osr->flush();
scan_range(m->begin, g_conf->osd_backfill_scan_min, g_conf->osd_backfill_scan_max, &bi);