summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Zafman <david.zafman@inktank.com>2013-04-30 10:52:23 -0700
committerDavid Zafman <david.zafman@inktank.com>2013-04-30 10:55:12 -0700
commit53a2c64ff1807c8d863dcd8c05571f4b3ed9c608 (patch)
tree6ea3eeceb31a3359243046c6548e622605bb5f89
parent0acede3bff88bd3e4901d1d704b0cc933b61c0fd (diff)
parent1c15636b223f0137ad72f3029750759314384a05 (diff)
downloadceph-53a2c64ff1807c8d863dcd8c05571f4b3ed9c608.tar.gz
Merge branch 'wip-2209' into next
Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r--src/osd/OSD.cc26
-rw-r--r--src/osd/PG.cc74
-rw-r--r--src/osd/PG.h15
-rw-r--r--src/osd/ReplicatedPG.cc41
4 files changed, 95 insertions, 61 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 85edf66f3b4..aa662b59b1a 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -3014,14 +3014,14 @@ void OSD::send_pg_stats(const utime_t &now)
pg->put("pg_stat_queue");
continue;
}
- pg->pg_stats_lock.Lock();
- if (pg->pg_stats_valid) {
- m->pg_stat[pg->info.pgid] = pg->pg_stats_stable;
- dout(25) << " sending " << pg->info.pgid << " " << pg->pg_stats_stable.reported << dendl;
+ pg->pg_stats_publish_lock.Lock();
+ if (pg->pg_stats_publish_valid) {
+ m->pg_stat[pg->info.pgid] = pg->pg_stats_publish;
+ dout(25) << " sending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << dendl;
} else {
- dout(25) << " NOT sending " << pg->info.pgid << " " << pg->pg_stats_stable.reported << ", not valid" << dendl;
+ dout(25) << " NOT sending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << ", not valid" << dendl;
}
- pg->pg_stats_lock.Unlock();
+ pg->pg_stats_publish_lock.Unlock();
}
if (!outstanding_pg_stats) {
@@ -3060,18 +3060,18 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack)
if (ack->pg_stat.count(pg->info.pgid)) {
eversion_t acked = ack->pg_stat[pg->info.pgid];
- pg->pg_stats_lock.Lock();
- if (acked == pg->pg_stats_stable.reported) {
- dout(25) << " ack on " << pg->info.pgid << " " << pg->pg_stats_stable.reported << dendl;
+ pg->pg_stats_publish_lock.Lock();
+ if (acked == pg->pg_stats_publish.reported) {
+ dout(25) << " ack on " << pg->info.pgid << " " << pg->pg_stats_publish.reported << dendl;
pg->stat_queue_item.remove_myself();
pg->put("pg_stat_queue");
} else {
- dout(25) << " still pending " << pg->info.pgid << " " << pg->pg_stats_stable.reported
+ dout(25) << " still pending " << pg->info.pgid << " " << pg->pg_stats_publish.reported
<< " > acked " << acked << dendl;
}
- pg->pg_stats_lock.Unlock();
+ pg->pg_stats_publish_lock.Unlock();
} else {
- dout(30) << " still pending " << pg->info.pgid << " " << pg->pg_stats_stable.reported << dendl;
+ dout(30) << " still pending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << dendl;
}
}
@@ -5153,7 +5153,7 @@ void OSD::handle_pg_create(OpRequestRef op)
wake_pg_waiters(pg->info.pgid);
pg->handle_create(&rctx);
pg->write_if_dirty(*rctx.transaction);
- pg->update_stats();
+ pg->publish_stats_to_osd();
pg->unlock();
num_created++;
}
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 70584e44591..ae88be652da 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -163,8 +163,8 @@ PG::PG(OSDService *o, OSDMapRef curmap,
backfill_reserved(0),
backfill_reserving(0),
flushed(false),
- pg_stats_lock("PG::pg_stats_lock"),
- pg_stats_valid(false),
+ pg_stats_publish_lock("PG::pg_stats_publish_lock"),
+ pg_stats_publish_valid(false),
osr(osd->osr_registry.lookup_or_create(p, (stringify(p)))),
finish_sync_event(NULL),
scrub_after_recovery(false),
@@ -790,7 +790,7 @@ bool PG::search_for_missing(const pg_info_t &oinfo, const pg_missing_t *omissing
found_missing = true;
}
if (stats_updated) {
- update_stats();
+ publish_stats_to_osd();
}
dout(20) << "search_for_missing missing " << missing.missing << dendl;
@@ -1705,7 +1705,7 @@ void PG::activate(ObjectStore::Transaction& t,
AllReplicasRecovered())));
}
- update_stats();
+ publish_stats_to_osd();
}
// we need to flush this all out before doing anything else..
@@ -1848,7 +1848,7 @@ void PG::replay_queued_ops()
requeue_ops(replay);
requeue_ops(waiting_for_active);
- update_stats();
+ publish_stats_to_osd();
}
void PG::_activate_committed(epoch_t e)
@@ -1899,7 +1899,7 @@ void PG::all_activated_and_committed()
info.history.last_epoch_started = info.last_epoch_started;
share_pg_info();
- update_stats();
+ publish_stats_to_osd();
queue_peering_event(
CephPeeringEvtRef(
@@ -1991,7 +1991,7 @@ void PG::_finish_recovery(Context *c)
finish_sync_event = 0;
purge_strays();
- update_stats();
+ publish_stats_to_osd();
if (scrub_after_recovery) {
dout(10) << "_finish_recovery requeueing for scrub" << dendl;
@@ -2299,9 +2299,9 @@ void PG::update_heartbeat_peers()
osd->need_heartbeat_peer_update();
}
-void PG::update_stats()
+void PG::publish_stats_to_osd()
{
- pg_stats_lock.Lock();
+ pg_stats_publish_lock.Lock();
if (is_primary()) {
// update our stat summary
info.stats.reported.inc(info.history.same_primary_since);
@@ -2339,17 +2339,18 @@ void PG::update_stats()
info.stats.log_start = log.tail;
info.stats.ondisk_log_start = log.tail;
- pg_stats_valid = true;
- pg_stats_stable = info.stats;
+ pg_stats_publish_valid = true;
+ pg_stats_publish = info.stats;
+ pg_stats_publish.stats.add(unstable_stats);
// calc copies, degraded
unsigned target = MAX(get_osdmap()->get_pg_size(info.pgid), acting.size());
- pg_stats_stable.stats.calc_copies(target);
- pg_stats_stable.stats.sum.num_objects_degraded = 0;
+ pg_stats_publish.stats.calc_copies(target);
+ pg_stats_publish.stats.sum.num_objects_degraded = 0;
if ((is_degraded() || !is_clean()) && is_active()) {
// NOTE: we only generate copies, degraded, unfound values for
// the summation, not individual stat categories.
- uint64_t num_objects = pg_stats_stable.stats.sum.num_objects;
+ uint64_t num_objects = pg_stats_publish.stats.sum.num_objects;
uint64_t degraded = 0;
@@ -2358,7 +2359,7 @@ void PG::update_stats()
degraded += (target - acting.size()) * num_objects;
// missing on primary
- pg_stats_stable.stats.sum.num_objects_missing_on_primary = missing.num_missing();
+ pg_stats_publish.stats.sum.num_objects_missing_on_primary = missing.num_missing();
degraded += missing.num_missing();
for (unsigned i=1; i<acting.size(); i++) {
@@ -2370,27 +2371,27 @@ void PG::update_stats()
// not yet backfilled
degraded += num_objects - peer_info[acting[i]].stats.stats.sum.num_objects;
}
- pg_stats_stable.stats.sum.num_objects_degraded = degraded;
- pg_stats_stable.stats.sum.num_objects_unfound = get_num_unfound();
+ pg_stats_publish.stats.sum.num_objects_degraded = degraded;
+ pg_stats_publish.stats.sum.num_objects_unfound = get_num_unfound();
}
- dout(15) << "update_stats " << pg_stats_stable.reported << dendl;
+ dout(15) << "publish_stats_to_osd " << pg_stats_publish.reported << dendl;
} else {
- pg_stats_valid = false;
- dout(15) << "update_stats -- not primary" << dendl;
+ pg_stats_publish_valid = false;
+ dout(15) << "publish_stats_to_osd -- not primary" << dendl;
}
- pg_stats_lock.Unlock();
+ pg_stats_publish_lock.Unlock();
if (is_primary())
osd->pg_stat_queue_enqueue(this);
}
-void PG::clear_stats()
+void PG::clear_publish_stats()
{
dout(15) << "clear_stats" << dendl;
- pg_stats_lock.Lock();
- pg_stats_valid = false;
- pg_stats_lock.Unlock();
+ pg_stats_publish_lock.Lock();
+ pg_stats_publish_valid = false;
+ pg_stats_publish_lock.Unlock();
osd->pg_stat_queue_dequeue(this);
}
@@ -2620,6 +2621,9 @@ int PG::_write_info(ObjectStore::Transaction& t, epoch_t epoch,
void PG::write_info(ObjectStore::Transaction& t)
{
+ info.stats.stats.add(unstable_stats);
+ unstable_stats.clear();
+
int ret = _write_info(t, get_osdmap()->get_epoch(), info, coll,
past_intervals, snap_collections, osd->infos_oid,
info_struct_v, dirty_big_info);
@@ -3817,7 +3821,7 @@ void PG::scrub()
state_clear(PG_STATE_SCRUBBING);
state_clear(PG_STATE_REPAIR);
state_clear(PG_STATE_DEEP_SCRUB);
- update_stats();
+ publish_stats_to_osd();
unlock();
return;
}
@@ -3903,7 +3907,7 @@ void PG::classic_scrub()
scrubber.active = true;
scrubber.classic = true;
- update_stats();
+ publish_stats_to_osd();
scrubber.received_maps.clear();
scrubber.epoch_start = info.history.same_interval_since;
@@ -4078,7 +4082,7 @@ void PG::chunky_scrub() {
case PG::Scrubber::INACTIVE:
dout(10) << "scrub start" << dendl;
- update_stats();
+ publish_stats_to_osd();
scrubber.epoch_start = info.history.same_interval_since;
scrubber.active = true;
@@ -4256,7 +4260,7 @@ void PG::scrub_clear_state()
state_clear(PG_STATE_SCRUBBING);
state_clear(PG_STATE_REPAIR);
state_clear(PG_STATE_DEEP_SCRUB);
- update_stats();
+ publish_stats_to_osd();
// active -> nothing.
if (scrubber.active)
@@ -5093,7 +5097,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
// old primary?
if (oldrole == 0) {
state_clear(PG_STATE_CLEAN);
- clear_stats();
+ clear_publish_stats();
// take replay queue waiters
list<OpRequestRef> ls;
@@ -6048,7 +6052,7 @@ boost::statechart::result PG::RecoveryState::Primary::react(const ActMap&)
{
dout(7) << "handle ActMap primary" << dendl;
PG *pg = context< RecoveryMachine >().pg;
- pg->update_stats();
+ pg->publish_stats_to_osd();
pg->take_waiters();
return discard_event();
}
@@ -6568,7 +6572,7 @@ PG::RecoveryState::Clean::Clean(my_context ctx)
pg->mark_clean();
pg->share_pg_info();
- pg->update_stats();
+ pg->publish_stats_to_osd();
}
@@ -6636,7 +6640,7 @@ boost::statechart::result PG::RecoveryState::Active::react(const AdvMap& advmap)
pg->state_clear(PG_STATE_DEGRADED);
else
pg->state_set(PG_STATE_DEGRADED);
- pg->update_stats(); // degraded may have changed
+ pg->publish_stats_to_osd(); // degraded may have changed
}
return forward_event();
}
@@ -7015,7 +7019,7 @@ PG::RecoveryState::GetInfo::GetInfo(my_context ctx)
if (!prior_set.get())
pg->build_prior(prior_set);
- pg->update_stats();
+ pg->publish_stats_to_osd();
get_infos();
if (peer_info_requested.empty() && !prior_set->pg_down) {
@@ -7348,7 +7352,7 @@ PG::RecoveryState::Incomplete::Incomplete(my_context ctx)
pg->state_clear(PG_STATE_PEERING);
pg->state_set(PG_STATE_INCOMPLETE);
- pg->update_stats();
+ pg->publish_stats_to_osd();
}
boost::statechart::result PG::RecoveryState::Incomplete::react(const AdvMap &advmap) {
diff --git a/src/osd/PG.h b/src/osd/PG.h
index bdd276d0bc7..9f10d8dfbd0 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -716,16 +716,19 @@ protected:
void requeue_object_waiters(map<hobject_t, list<OpRequestRef> >& m);
void requeue_ops(list<OpRequestRef> &l);
- // stats
- Mutex pg_stats_lock;
- bool pg_stats_valid;
- pg_stat_t pg_stats_stable;
+ // stats that persist lazily
+ object_stat_collection_t unstable_stats;
+
+ // publish stats
+ Mutex pg_stats_publish_lock;
+ bool pg_stats_publish_valid;
+ pg_stat_t pg_stats_publish;
// for ordering writes
std::tr1::shared_ptr<ObjectStore::Sequencer> osr;
- void update_stats();
- void clear_stats();
+ void publish_stats_to_osd();
+ void clear_publish_stats();
public:
void clear_primary_state();
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index fb24375dca6..708e4153ca8 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -1039,7 +1039,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
if (ctx->op_t.empty() || result < 0) {
if (result >= 0) {
log_op_stats(ctx);
- update_stats();
+ publish_stats_to_osd();
}
MOSDOpReply *reply = ctx->reply;
@@ -2197,9 +2197,10 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
if (r >= 0) {
op.xattr.value_len = r;
result = 0;
+ ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(r, 10);
+ ctx->delta_stats.num_rd++;
} else
result = r;
- ctx->delta_stats.num_rd++;
}
break;
@@ -2217,6 +2218,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
bufferlist bl;
::encode(newattrs, bl);
+ ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(bl.length(), 10);
+ ctx->delta_stats.num_rd++;
osd_op.outdata.claim_append(bl);
}
break;
@@ -2237,6 +2240,9 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
if (result < 0 && result != -EEXIST && result != -ENODATA)
break;
+ ctx->delta_stats.num_rd++;
+ ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(xattr.length(), 10);
+
switch (op.xattr.cmp_mode) {
case CEPH_OSD_CMPXATTR_MODE_STRING:
{
@@ -2281,7 +2287,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
dout(10) << "comparison returned true" << dendl;
- ctx->delta_stats.num_rd++;
}
break;
@@ -2848,6 +2853,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
out_set.insert(iter->first);
}
::encode(out_set, osd_op.outdata);
+ ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
+ ctx->delta_stats.num_rd++;
break;
}
dout(10) << "failed, reading from omap" << dendl;
@@ -2867,6 +2874,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
}
::encode(out_set, osd_op.outdata);
+ ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
+ ctx->delta_stats.num_rd++;
}
break;
case CEPH_OSD_OP_OMAPGETVALS:
@@ -2901,6 +2910,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
out_set.insert(*iter);
}
::encode(out_set, osd_op.outdata);
+ ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
+ ctx->delta_stats.num_rd++;
break;
}
// No valid tmap, use omap
@@ -2923,6 +2934,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
}
::encode(out_set, osd_op.outdata);
+ ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
+ ctx->delta_stats.num_rd++;
}
break;
case CEPH_OSD_OP_OMAPGETHEADER:
@@ -2941,6 +2954,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
dout(10) << "failed, reading from omap" << dendl;
}
osd->store->omap_get_header(coll, soid, &osd_op.outdata);
+ ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
+ ctx->delta_stats.num_rd++;
}
break;
case CEPH_OSD_OP_OMAPGETVALSBYKEYS:
@@ -2969,6 +2984,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
}
::encode(out, osd_op.outdata);
+ ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
+ ctx->delta_stats.num_rd++;
break;
}
// No valid tmap, use omap
@@ -2976,6 +2993,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
osd->store->omap_get_values(coll, soid, keys_to_get, &out);
::encode(out, osd_op.outdata);
+ ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
+ ctx->delta_stats.num_rd++;
}
break;
case CEPH_OSD_OP_OMAP_CMP:
@@ -3004,6 +3023,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = r;
break;
}
+ //Should set num_rd_kb based on encode length of map
+ ctx->delta_stats.num_rd++;
r = 0;
bufferlist empty;
@@ -3066,6 +3087,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
dout(20) << "\t" << i->first << dendl;
}
t.omap_setkeys(coll, soid, to_set);
+ ctx->delta_stats.num_wr++;
}
break;
case CEPH_OSD_OP_OMAPSETHEADER:
@@ -3079,6 +3101,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
t.touch(coll, soid);
t.omap_setheader(coll, soid, osd_op.indata);
+ ctx->delta_stats.num_wr++;
}
break;
case CEPH_OSD_OP_OMAPCLEAR:
@@ -3092,6 +3115,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
t.touch(coll, soid);
t.omap_clear(coll, soid);
+ ctx->delta_stats.num_wr++;
}
break;
case CEPH_OSD_OP_OMAPRMKEYS:
@@ -3113,6 +3137,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
goto fail;
}
t.omap_rmkeys(coll, soid, to_rm);
+ ctx->delta_stats.num_wr++;
}
break;
default:
@@ -3575,6 +3600,7 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx)
// read-op? done?
if (ctx->op_t.empty() && !ctx->modify) {
ctx->reply_version = ctx->obs->oi.user_version;
+ unstable_stats.add(ctx->delta_stats, ctx->obc->obs.oi.category);
return result;
}
@@ -3879,7 +3905,7 @@ void ReplicatedPG::eval_repop(RepGather *repop)
if (repop->waitfor_disk.empty()) {
log_op_stats(repop->ctx);
- update_stats();
+ publish_stats_to_osd();
// send dup commits, in order
if (waiting_for_ondisk.count(repop->v)) {
@@ -5454,7 +5480,7 @@ void ReplicatedPG::handle_pull_response(OpRequestRef op)
if (complete) {
pulling.erase(hoid);
pull_from_peer[m->get_source().num()].erase(hoid);
- update_stats();
+ publish_stats_to_osd();
if (waiting_for_missing_object.count(hoid)) {
dout(20) << " kicking waiters on " << hoid << dendl;
requeue_ops(waiting_for_missing_object[hoid]);
@@ -5701,7 +5727,7 @@ void ReplicatedPG::sub_op_push_reply(OpRequestRef op)
pushing[soid].erase(peer);
pi = NULL;
- update_stats();
+ publish_stats_to_osd();
if (pushing[soid].empty()) {
pushing.erase(soid);
@@ -6335,6 +6361,7 @@ void ReplicatedPG::on_change()
snap_trimmer_machine.process_event(Reset());
debug_op_order.clear();
+ unstable_stats.clear();
}
void ReplicatedPG::on_role_change()
@@ -7277,7 +7304,7 @@ void ReplicatedPG::_scrub_finish()
if (repair) {
++scrubber.fixed;
info.stats.stats = scrub_cstat;
- update_stats();
+ publish_stats_to_osd();
share_pg_info();
}
}