diff options
author | Sage Weil <sage@inktank.com> | 2013-07-17 14:21:40 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-07-17 18:21:12 -0700 |
commit | 82722efaea2a61ec854653084638ae3543e38dec (patch) | |
tree | 7b88a64486a94f297af81f5ec554a61eeae1e374 | |
parent | 2e216b547481bb52f44eb99102b2ae9ef07efa4d (diff) | |
download | ceph-82722efaea2a61ec854653084638ae3543e38dec.tar.gz |
osd: include op queue age histogram in osd_stat_t
This includes a simple power-of-2 histogram of op ages in the op queue
inside osd_stat_t. This can be used for a coarse view of overall cluster
performance (it will get summed by the mon), to identify specific outlier
osds who have a higher latency than the others, or to identify stuck ops.
Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r-- | src/osd/OSD.cc | 2 | ||||
-rw-r--r-- | src/osd/OpRequest.cc | 29 | ||||
-rw-r--r-- | src/osd/OpRequest.h | 2 | ||||
-rw-r--r-- | src/osd/osd_types.cc | 44 | ||||
-rw-r--r-- | src/osd/osd_types.h | 67 | ||||
-rw-r--r-- | src/test/encoding/types.h | 1 |
6 files changed, 142 insertions, 3 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 5b8ef307dd1..9010f542009 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2502,6 +2502,8 @@ void OSD::update_osd_stat() service.check_nearfull_warning(osd_stat); + op_tracker.get_age_ms_histogram(&osd_stat.op_queue_age_hist); + dout(20) << "update_osd_stat " << osd_stat << dendl; } diff --git a/src/osd/OpRequest.cc b/src/osd/OpRequest.cc index ea9beaacda3..3b8a8714d92 100644 --- a/src/osd/OpRequest.cc +++ b/src/osd/OpRequest.cc @@ -183,6 +183,35 @@ bool OpTracker::check_ops_in_flight(std::vector<string> &warning_vector) return warning_vector.size(); } +void OpTracker::get_age_ms_histogram(pow2_hist_t *h) +{ + Mutex::Locker locker(ops_in_flight_lock); + + h->clear(); + + utime_t now = ceph_clock_now(NULL); + unsigned bin = 30; + uint32_t lb = 1 << (bin-1); // lower bound for this bin + int count = 0; + for (xlist<OpRequest*>::iterator i = ops_in_flight.begin(); !i.end(); ++i) { + utime_t age = now - (*i)->received_time; + uint32_t ms = (long)(age * 1000.0); + if (ms >= lb) { + count++; + continue; + } + if (count) + h->set(bin, count); + while (lb > ms) { + bin--; + lb >>= 1; + } + count = 1; + } + if (count) + h->set(bin, count); +} + void OpRequest::dump(utime_t now, Formatter *f) const { Message *m = request; diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h index ca419f34ff8..47b050b8538 100644 --- a/src/osd/OpRequest.h +++ b/src/osd/OpRequest.h @@ -64,6 +64,8 @@ public: void register_inflight_op(xlist<OpRequest*>::item *i); void unregister_inflight_op(OpRequest *i); + void get_age_ms_histogram(pow2_hist_t *h); + /** * Look for Ops which are too old, and insert warning * strings for each Op that is too old. diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 08f720edc4c..fa3b7ecc45d 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -102,6 +102,40 @@ void object_locator_t::generate_test_instances(list<object_locator_t*>& o) } +// -- pow2_hist_t -- +void pow2_hist_t::dump(Formatter *f) const +{ + f->open_array_section("histogram"); + for (vector<int>::const_iterator p = h.begin(); p != h.end(); ++p) + f->dump_int("count", *p); + f->close_section(); + f->dump_int("upper_bound", upper_bound()); +} + +void pow2_hist_t::encode(bufferlist& bl) const +{ + ENCODE_START(1, 1, bl); + ::encode(h, bl); + ENCODE_FINISH(bl); +} + +void pow2_hist_t::decode(bufferlist::iterator& p) +{ + DECODE_START(1, p); + ::decode(h, p); + DECODE_FINISH(p); +} + +void pow2_hist_t::generate_test_instances(list<pow2_hist_t*>& ls) +{ + ls.push_back(new pow2_hist_t); + ls.push_back(new pow2_hist_t); + ls.back()->h.push_back(1); + ls.back()->h.push_back(3); + ls.back()->h.push_back(0); + ls.back()->h.push_back(2); +} + // -- osd_stat_t -- void osd_stat_t::dump(Formatter *f) const { @@ -118,11 +152,14 @@ void osd_stat_t::dump(Formatter *f) const f->close_section(); f->dump_int("snap_trim_queue_len", snap_trim_queue_len); f->dump_int("num_snap_trimming", num_snap_trimming); + f->open_object_section("op_queue_age_hist"); + op_queue_age_hist.dump(f); + f->close_section(); } void osd_stat_t::encode(bufferlist &bl) const { - ENCODE_START(2, 2, bl); + ENCODE_START(3, 2, bl); ::encode(kb, bl); ::encode(kb_used, bl); ::encode(kb_avail, bl); @@ -130,12 +167,13 @@ void osd_stat_t::encode(bufferlist &bl) const ::encode(num_snap_trimming, bl); ::encode(hb_in, bl); ::encode(hb_out, bl); + ::encode(op_queue_age_hist, bl); ENCODE_FINISH(bl); } void osd_stat_t::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl); ::decode(kb, bl); ::decode(kb_used, bl); ::decode(kb_avail, bl); @@ -143,6 +181,8 @@ void osd_stat_t::decode(bufferlist::iterator &bl) ::decode(num_snap_trimming, bl); ::decode(hb_in, bl); ::decode(hb_out, bl); + if (struct_v >= 3) + ::decode(op_queue_age_hist, bl); DECODE_FINISH(bl); } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index e3f46f9d601..ff6d70fa69a 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -509,6 +509,65 @@ inline ostream& operator<<(ostream& out, const eversion_t e) { } +/** + * power of 2 histogram + */ +struct pow2_hist_t { + /** + * histogram + * + * bin size is 2^index + * value is count of elements that are <= the current bin but > the previous bin. + */ + vector<int32_t> h; + +private: + /// expand to at least another's size + void _expand_to(unsigned s) { + if (s > h.size()) + h.resize(s, 0); + } + /// drop useless trailing 0's + void _contract() { + unsigned p = h.size(); + while (p > 0 && h[p-1] == 0) + --p; + h.resize(p); + } + +public: + void clear() { + h.clear(); + } + void set(int bin, int32_t v) { + _expand_to(bin + 1); + h[bin] = v; + _contract(); + } + + void add(const pow2_hist_t& o) { + _expand_to(o.h.size()); + for (unsigned p = 0; p < o.h.size(); ++p) + h[p] += o.h[p]; + _contract(); + } + void sub(const pow2_hist_t& o) { + _expand_to(o.h.size()); + for (unsigned p = 0; p < o.h.size(); ++p) + h[p] -= o.h[p]; + _contract(); + } + + int32_t upper_bound() const { + return 1 << h.size(); + } + + void dump(Formatter *f) const; + void encode(bufferlist &bl) const; + void decode(bufferlist::iterator &bl); + static void generate_test_instances(std::list<pow2_hist_t*>& o); +}; +WRITE_CLASS_ENCODER(pow2_hist_t) /** osd_stat * aggregate stats for an osd @@ -518,6 +577,8 @@ struct osd_stat_t { vector<int> hb_in, hb_out; int32_t snap_trim_queue_len, num_snap_trimming; + pow2_hist_t op_queue_age_hist; + osd_stat_t() : kb(0), kb_used(0), kb_avail(0), snap_trim_queue_len(0), num_snap_trimming(0) {} @@ -527,6 +588,7 @@ struct osd_stat_t { kb_avail += o.kb_avail; snap_trim_queue_len += o.snap_trim_queue_len; num_snap_trimming += o.num_snap_trimming; + op_queue_age_hist.add(o.op_queue_age_hist); } void sub(const osd_stat_t& o) { kb -= o.kb; @@ -534,6 +596,7 @@ struct osd_stat_t { kb_avail -= o.kb_avail; snap_trim_queue_len -= o.snap_trim_queue_len; num_snap_trimming -= o.num_snap_trimming; + op_queue_age_hist.sub(o.op_queue_age_hist); } void dump(Formatter *f) const; @@ -562,7 +625,9 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) { return out << "osd_stat(" << kb_t(s.kb_used) << " used, " << kb_t(s.kb_avail) << " avail, " << kb_t(s.kb) << " total, " - << "peers " << s.hb_in << "/" << s.hb_out << ")"; + << "peers " << s.hb_in << "/" << s.hb_out + << " op hist " << s.op_queue_age_hist.h + << ")"; } diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h index fd10d28f119..e0bc0a149a8 100644 --- a/src/test/encoding/types.h +++ b/src/test/encoding/types.h @@ -34,6 +34,7 @@ TYPE(osd_reqid_t) TYPE(object_locator_t) TYPE(pg_t) TYPE(coll_t) +TYPE(pow2_hist_t) TYPE(osd_stat_t) TYPE(OSDSuperblock) TYPE_FEATUREFUL(pool_snap_info_t) |