diff options
author | David Zafman <david.zafman@inktank.com> | 2013-04-30 10:52:23 -0700 |
---|---|---|
committer | David Zafman <david.zafman@inktank.com> | 2013-04-30 10:55:12 -0700 |
commit | 53a2c64ff1807c8d863dcd8c05571f4b3ed9c608 (patch) | |
tree | 6ea3eeceb31a3359243046c6548e622605bb5f89 | |
parent | 0acede3bff88bd3e4901d1d704b0cc933b61c0fd (diff) | |
parent | 1c15636b223f0137ad72f3029750759314384a05 (diff) | |
download | ceph-53a2c64ff1807c8d863dcd8c05571f4b3ed9c608.tar.gz |
Merge branch 'wip-2209' into next
Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r-- | src/osd/OSD.cc | 26 | ||||
-rw-r--r-- | src/osd/PG.cc | 74 | ||||
-rw-r--r-- | src/osd/PG.h | 15 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.cc | 41 |
4 files changed, 95 insertions, 61 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 85edf66f3b4..aa662b59b1a 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3014,14 +3014,14 @@ void OSD::send_pg_stats(const utime_t &now) pg->put("pg_stat_queue"); continue; } - pg->pg_stats_lock.Lock(); - if (pg->pg_stats_valid) { - m->pg_stat[pg->info.pgid] = pg->pg_stats_stable; - dout(25) << " sending " << pg->info.pgid << " " << pg->pg_stats_stable.reported << dendl; + pg->pg_stats_publish_lock.Lock(); + if (pg->pg_stats_publish_valid) { + m->pg_stat[pg->info.pgid] = pg->pg_stats_publish; + dout(25) << " sending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << dendl; } else { - dout(25) << " NOT sending " << pg->info.pgid << " " << pg->pg_stats_stable.reported << ", not valid" << dendl; + dout(25) << " NOT sending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << ", not valid" << dendl; } - pg->pg_stats_lock.Unlock(); + pg->pg_stats_publish_lock.Unlock(); } if (!outstanding_pg_stats) { @@ -3060,18 +3060,18 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack) if (ack->pg_stat.count(pg->info.pgid)) { eversion_t acked = ack->pg_stat[pg->info.pgid]; - pg->pg_stats_lock.Lock(); - if (acked == pg->pg_stats_stable.reported) { - dout(25) << " ack on " << pg->info.pgid << " " << pg->pg_stats_stable.reported << dendl; + pg->pg_stats_publish_lock.Lock(); + if (acked == pg->pg_stats_publish.reported) { + dout(25) << " ack on " << pg->info.pgid << " " << pg->pg_stats_publish.reported << dendl; pg->stat_queue_item.remove_myself(); pg->put("pg_stat_queue"); } else { - dout(25) << " still pending " << pg->info.pgid << " " << pg->pg_stats_stable.reported + dout(25) << " still pending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << " > acked " << acked << dendl; } - pg->pg_stats_lock.Unlock(); + pg->pg_stats_publish_lock.Unlock(); } else { - dout(30) << " still pending " << pg->info.pgid << " " << pg->pg_stats_stable.reported << dendl; + dout(30) << " still pending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << dendl; } } @@ -5153,7 +5153,7 @@ void OSD::handle_pg_create(OpRequestRef op) wake_pg_waiters(pg->info.pgid); pg->handle_create(&rctx); pg->write_if_dirty(*rctx.transaction); - pg->update_stats(); + pg->publish_stats_to_osd(); pg->unlock(); num_created++; } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 70584e44591..ae88be652da 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -163,8 +163,8 @@ PG::PG(OSDService *o, OSDMapRef curmap, backfill_reserved(0), backfill_reserving(0), flushed(false), - pg_stats_lock("PG::pg_stats_lock"), - pg_stats_valid(false), + pg_stats_publish_lock("PG::pg_stats_publish_lock"), + pg_stats_publish_valid(false), osr(osd->osr_registry.lookup_or_create(p, (stringify(p)))), finish_sync_event(NULL), scrub_after_recovery(false), @@ -790,7 +790,7 @@ bool PG::search_for_missing(const pg_info_t &oinfo, const pg_missing_t *omissing found_missing = true; } if (stats_updated) { - update_stats(); + publish_stats_to_osd(); } dout(20) << "search_for_missing missing " << missing.missing << dendl; @@ -1705,7 +1705,7 @@ void PG::activate(ObjectStore::Transaction& t, AllReplicasRecovered()))); } - update_stats(); + publish_stats_to_osd(); } // we need to flush this all out before doing anything else.. @@ -1848,7 +1848,7 @@ void PG::replay_queued_ops() requeue_ops(replay); requeue_ops(waiting_for_active); - update_stats(); + publish_stats_to_osd(); } void PG::_activate_committed(epoch_t e) @@ -1899,7 +1899,7 @@ void PG::all_activated_and_committed() info.history.last_epoch_started = info.last_epoch_started; share_pg_info(); - update_stats(); + publish_stats_to_osd(); queue_peering_event( CephPeeringEvtRef( @@ -1991,7 +1991,7 @@ void PG::_finish_recovery(Context *c) finish_sync_event = 0; purge_strays(); - update_stats(); + publish_stats_to_osd(); if (scrub_after_recovery) { dout(10) << "_finish_recovery requeueing for scrub" << dendl; @@ -2299,9 +2299,9 @@ void PG::update_heartbeat_peers() osd->need_heartbeat_peer_update(); } -void PG::update_stats() +void PG::publish_stats_to_osd() { - pg_stats_lock.Lock(); + pg_stats_publish_lock.Lock(); if (is_primary()) { // update our stat summary info.stats.reported.inc(info.history.same_primary_since); @@ -2339,17 +2339,18 @@ void PG::update_stats() info.stats.log_start = log.tail; info.stats.ondisk_log_start = log.tail; - pg_stats_valid = true; - pg_stats_stable = info.stats; + pg_stats_publish_valid = true; + pg_stats_publish = info.stats; + pg_stats_publish.stats.add(unstable_stats); // calc copies, degraded unsigned target = MAX(get_osdmap()->get_pg_size(info.pgid), acting.size()); - pg_stats_stable.stats.calc_copies(target); - pg_stats_stable.stats.sum.num_objects_degraded = 0; + pg_stats_publish.stats.calc_copies(target); + pg_stats_publish.stats.sum.num_objects_degraded = 0; if ((is_degraded() || !is_clean()) && is_active()) { // NOTE: we only generate copies, degraded, unfound values for // the summation, not individual stat categories. - uint64_t num_objects = pg_stats_stable.stats.sum.num_objects; + uint64_t num_objects = pg_stats_publish.stats.sum.num_objects; uint64_t degraded = 0; @@ -2358,7 +2359,7 @@ void PG::update_stats() degraded += (target - acting.size()) * num_objects; // missing on primary - pg_stats_stable.stats.sum.num_objects_missing_on_primary = missing.num_missing(); + pg_stats_publish.stats.sum.num_objects_missing_on_primary = missing.num_missing(); degraded += missing.num_missing(); for (unsigned i=1; i<acting.size(); i++) { @@ -2370,27 +2371,27 @@ void PG::update_stats() // not yet backfilled degraded += num_objects - peer_info[acting[i]].stats.stats.sum.num_objects; } - pg_stats_stable.stats.sum.num_objects_degraded = degraded; - pg_stats_stable.stats.sum.num_objects_unfound = get_num_unfound(); + pg_stats_publish.stats.sum.num_objects_degraded = degraded; + pg_stats_publish.stats.sum.num_objects_unfound = get_num_unfound(); } - dout(15) << "update_stats " << pg_stats_stable.reported << dendl; + dout(15) << "publish_stats_to_osd " << pg_stats_publish.reported << dendl; } else { - pg_stats_valid = false; - dout(15) << "update_stats -- not primary" << dendl; + pg_stats_publish_valid = false; + dout(15) << "publish_stats_to_osd -- not primary" << dendl; } - pg_stats_lock.Unlock(); + pg_stats_publish_lock.Unlock(); if (is_primary()) osd->pg_stat_queue_enqueue(this); } -void PG::clear_stats() +void PG::clear_publish_stats() { dout(15) << "clear_stats" << dendl; - pg_stats_lock.Lock(); - pg_stats_valid = false; - pg_stats_lock.Unlock(); + pg_stats_publish_lock.Lock(); + pg_stats_publish_valid = false; + pg_stats_publish_lock.Unlock(); osd->pg_stat_queue_dequeue(this); } @@ -2620,6 +2621,9 @@ int PG::_write_info(ObjectStore::Transaction& t, epoch_t epoch, void PG::write_info(ObjectStore::Transaction& t) { + info.stats.stats.add(unstable_stats); + unstable_stats.clear(); + int ret = _write_info(t, get_osdmap()->get_epoch(), info, coll, past_intervals, snap_collections, osd->infos_oid, info_struct_v, dirty_big_info); @@ -3817,7 +3821,7 @@ void PG::scrub() state_clear(PG_STATE_SCRUBBING); state_clear(PG_STATE_REPAIR); state_clear(PG_STATE_DEEP_SCRUB); - update_stats(); + publish_stats_to_osd(); unlock(); return; } @@ -3903,7 +3907,7 @@ void PG::classic_scrub() scrubber.active = true; scrubber.classic = true; - update_stats(); + publish_stats_to_osd(); scrubber.received_maps.clear(); scrubber.epoch_start = info.history.same_interval_since; @@ -4078,7 +4082,7 @@ void PG::chunky_scrub() { case PG::Scrubber::INACTIVE: dout(10) << "scrub start" << dendl; - update_stats(); + publish_stats_to_osd(); scrubber.epoch_start = info.history.same_interval_since; scrubber.active = true; @@ -4256,7 +4260,7 @@ void PG::scrub_clear_state() state_clear(PG_STATE_SCRUBBING); state_clear(PG_STATE_REPAIR); state_clear(PG_STATE_DEEP_SCRUB); - update_stats(); + publish_stats_to_osd(); // active -> nothing. if (scrubber.active) @@ -5093,7 +5097,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap, // old primary? if (oldrole == 0) { state_clear(PG_STATE_CLEAN); - clear_stats(); + clear_publish_stats(); // take replay queue waiters list<OpRequestRef> ls; @@ -6048,7 +6052,7 @@ boost::statechart::result PG::RecoveryState::Primary::react(const ActMap&) { dout(7) << "handle ActMap primary" << dendl; PG *pg = context< RecoveryMachine >().pg; - pg->update_stats(); + pg->publish_stats_to_osd(); pg->take_waiters(); return discard_event(); } @@ -6568,7 +6572,7 @@ PG::RecoveryState::Clean::Clean(my_context ctx) pg->mark_clean(); pg->share_pg_info(); - pg->update_stats(); + pg->publish_stats_to_osd(); } @@ -6636,7 +6640,7 @@ boost::statechart::result PG::RecoveryState::Active::react(const AdvMap& advmap) pg->state_clear(PG_STATE_DEGRADED); else pg->state_set(PG_STATE_DEGRADED); - pg->update_stats(); // degraded may have changed + pg->publish_stats_to_osd(); // degraded may have changed } return forward_event(); } @@ -7015,7 +7019,7 @@ PG::RecoveryState::GetInfo::GetInfo(my_context ctx) if (!prior_set.get()) pg->build_prior(prior_set); - pg->update_stats(); + pg->publish_stats_to_osd(); get_infos(); if (peer_info_requested.empty() && !prior_set->pg_down) { @@ -7348,7 +7352,7 @@ PG::RecoveryState::Incomplete::Incomplete(my_context ctx) pg->state_clear(PG_STATE_PEERING); pg->state_set(PG_STATE_INCOMPLETE); - pg->update_stats(); + pg->publish_stats_to_osd(); } boost::statechart::result PG::RecoveryState::Incomplete::react(const AdvMap &advmap) { diff --git a/src/osd/PG.h b/src/osd/PG.h index bdd276d0bc7..9f10d8dfbd0 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -716,16 +716,19 @@ protected: void requeue_object_waiters(map<hobject_t, list<OpRequestRef> >& m); void requeue_ops(list<OpRequestRef> &l); - // stats - Mutex pg_stats_lock; - bool pg_stats_valid; - pg_stat_t pg_stats_stable; + // stats that persist lazily + object_stat_collection_t unstable_stats; + + // publish stats + Mutex pg_stats_publish_lock; + bool pg_stats_publish_valid; + pg_stat_t pg_stats_publish; // for ordering writes std::tr1::shared_ptr<ObjectStore::Sequencer> osr; - void update_stats(); - void clear_stats(); + void publish_stats_to_osd(); + void clear_publish_stats(); public: void clear_primary_state(); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index fb24375dca6..708e4153ca8 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1039,7 +1039,7 @@ void ReplicatedPG::do_op(OpRequestRef op) if (ctx->op_t.empty() || result < 0) { if (result >= 0) { log_op_stats(ctx); - update_stats(); + publish_stats_to_osd(); } MOSDOpReply *reply = ctx->reply; @@ -2197,9 +2197,10 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) if (r >= 0) { op.xattr.value_len = r; result = 0; + ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(r, 10); + ctx->delta_stats.num_rd++; } else result = r; - ctx->delta_stats.num_rd++; } break; @@ -2217,6 +2218,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) bufferlist bl; ::encode(newattrs, bl); + ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(bl.length(), 10); + ctx->delta_stats.num_rd++; osd_op.outdata.claim_append(bl); } break; @@ -2237,6 +2240,9 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) if (result < 0 && result != -EEXIST && result != -ENODATA) break; + ctx->delta_stats.num_rd++; + ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(xattr.length(), 10); + switch (op.xattr.cmp_mode) { case CEPH_OSD_CMPXATTR_MODE_STRING: { @@ -2281,7 +2287,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) } dout(10) << "comparison returned true" << dendl; - ctx->delta_stats.num_rd++; } break; @@ -2848,6 +2853,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) out_set.insert(iter->first); } ::encode(out_set, osd_op.outdata); + ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10); + ctx->delta_stats.num_rd++; break; } dout(10) << "failed, reading from omap" << dendl; @@ -2867,6 +2874,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) } } ::encode(out_set, osd_op.outdata); + ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10); + ctx->delta_stats.num_rd++; } break; case CEPH_OSD_OP_OMAPGETVALS: @@ -2901,6 +2910,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) out_set.insert(*iter); } ::encode(out_set, osd_op.outdata); + ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10); + ctx->delta_stats.num_rd++; break; } // No valid tmap, use omap @@ -2923,6 +2934,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) } } ::encode(out_set, osd_op.outdata); + ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10); + ctx->delta_stats.num_rd++; } break; case CEPH_OSD_OP_OMAPGETHEADER: @@ -2941,6 +2954,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) dout(10) << "failed, reading from omap" << dendl; } osd->store->omap_get_header(coll, soid, &osd_op.outdata); + ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10); + ctx->delta_stats.num_rd++; } break; case CEPH_OSD_OP_OMAPGETVALSBYKEYS: @@ -2969,6 +2984,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) } } ::encode(out, osd_op.outdata); + ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10); + ctx->delta_stats.num_rd++; break; } // No valid tmap, use omap @@ -2976,6 +2993,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) } osd->store->omap_get_values(coll, soid, keys_to_get, &out); ::encode(out, osd_op.outdata); + ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10); + ctx->delta_stats.num_rd++; } break; case CEPH_OSD_OP_OMAP_CMP: @@ -3004,6 +3023,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) result = r; break; } + //Should set num_rd_kb based on encode length of map + ctx->delta_stats.num_rd++; r = 0; bufferlist empty; @@ -3066,6 +3087,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) dout(20) << "\t" << i->first << dendl; } t.omap_setkeys(coll, soid, to_set); + ctx->delta_stats.num_wr++; } break; case CEPH_OSD_OP_OMAPSETHEADER: @@ -3079,6 +3101,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) } t.touch(coll, soid); t.omap_setheader(coll, soid, osd_op.indata); + ctx->delta_stats.num_wr++; } break; case CEPH_OSD_OP_OMAPCLEAR: @@ -3092,6 +3115,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) } t.touch(coll, soid); t.omap_clear(coll, soid); + ctx->delta_stats.num_wr++; } break; case CEPH_OSD_OP_OMAPRMKEYS: @@ -3113,6 +3137,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) goto fail; } t.omap_rmkeys(coll, soid, to_rm); + ctx->delta_stats.num_wr++; } break; default: @@ -3575,6 +3600,7 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx) // read-op? done? if (ctx->op_t.empty() && !ctx->modify) { ctx->reply_version = ctx->obs->oi.user_version; + unstable_stats.add(ctx->delta_stats, ctx->obc->obs.oi.category); return result; } @@ -3879,7 +3905,7 @@ void ReplicatedPG::eval_repop(RepGather *repop) if (repop->waitfor_disk.empty()) { log_op_stats(repop->ctx); - update_stats(); + publish_stats_to_osd(); // send dup commits, in order if (waiting_for_ondisk.count(repop->v)) { @@ -5454,7 +5480,7 @@ void ReplicatedPG::handle_pull_response(OpRequestRef op) if (complete) { pulling.erase(hoid); pull_from_peer[m->get_source().num()].erase(hoid); - update_stats(); + publish_stats_to_osd(); if (waiting_for_missing_object.count(hoid)) { dout(20) << " kicking waiters on " << hoid << dendl; requeue_ops(waiting_for_missing_object[hoid]); @@ -5701,7 +5727,7 @@ void ReplicatedPG::sub_op_push_reply(OpRequestRef op) pushing[soid].erase(peer); pi = NULL; - update_stats(); + publish_stats_to_osd(); if (pushing[soid].empty()) { pushing.erase(soid); @@ -6335,6 +6361,7 @@ void ReplicatedPG::on_change() snap_trimmer_machine.process_event(Reset()); debug_op_order.clear(); + unstable_stats.clear(); } void ReplicatedPG::on_role_change() @@ -7277,7 +7304,7 @@ void ReplicatedPG::_scrub_finish() if (repair) { ++scrubber.fixed; info.stats.stats = scrub_cstat; - update_stats(); + publish_stats_to_osd(); share_pg_info(); } } |