summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-07-17 14:21:40 -0700
committerSage Weil <sage@inktank.com>2013-07-17 18:21:12 -0700
commit82722efaea2a61ec854653084638ae3543e38dec (patch)
tree7b88a64486a94f297af81f5ec554a61eeae1e374
parent2e216b547481bb52f44eb99102b2ae9ef07efa4d (diff)
downloadceph-82722efaea2a61ec854653084638ae3543e38dec.tar.gz
osd: include op queue age histogram in osd_stat_t
This includes a simple power-of-2 histogram of op ages in the op queue inside osd_stat_t. This can be used for a coarse view of overall cluster performance (it will get summed by the mon), to identify specific outlier osds who have a higher latency than the others, or to identify stuck ops. Signed-off-by: Sage Weil <sage@inktank.com> Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r--src/osd/OSD.cc2
-rw-r--r--src/osd/OpRequest.cc29
-rw-r--r--src/osd/OpRequest.h2
-rw-r--r--src/osd/osd_types.cc44
-rw-r--r--src/osd/osd_types.h67
-rw-r--r--src/test/encoding/types.h1
6 files changed, 142 insertions, 3 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 5b8ef307dd1..9010f542009 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -2502,6 +2502,8 @@ void OSD::update_osd_stat()
service.check_nearfull_warning(osd_stat);
+ op_tracker.get_age_ms_histogram(&osd_stat.op_queue_age_hist);
+
dout(20) << "update_osd_stat " << osd_stat << dendl;
}
diff --git a/src/osd/OpRequest.cc b/src/osd/OpRequest.cc
index ea9beaacda3..3b8a8714d92 100644
--- a/src/osd/OpRequest.cc
+++ b/src/osd/OpRequest.cc
@@ -183,6 +183,35 @@ bool OpTracker::check_ops_in_flight(std::vector<string> &warning_vector)
return warning_vector.size();
}
+void OpTracker::get_age_ms_histogram(pow2_hist_t *h)
+{
+ Mutex::Locker locker(ops_in_flight_lock);
+
+ h->clear();
+
+ utime_t now = ceph_clock_now(NULL);
+ unsigned bin = 30;
+ uint32_t lb = 1 << (bin-1); // lower bound for this bin
+ int count = 0;
+ for (xlist<OpRequest*>::iterator i = ops_in_flight.begin(); !i.end(); ++i) {
+ utime_t age = now - (*i)->received_time;
+ uint32_t ms = (long)(age * 1000.0);
+ if (ms >= lb) {
+ count++;
+ continue;
+ }
+ if (count)
+ h->set(bin, count);
+ while (lb > ms) {
+ bin--;
+ lb >>= 1;
+ }
+ count = 1;
+ }
+ if (count)
+ h->set(bin, count);
+}
+
void OpRequest::dump(utime_t now, Formatter *f) const
{
Message *m = request;
diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h
index ca419f34ff8..47b050b8538 100644
--- a/src/osd/OpRequest.h
+++ b/src/osd/OpRequest.h
@@ -64,6 +64,8 @@ public:
void register_inflight_op(xlist<OpRequest*>::item *i);
void unregister_inflight_op(OpRequest *i);
+ void get_age_ms_histogram(pow2_hist_t *h);
+
/**
* Look for Ops which are too old, and insert warning
* strings for each Op that is too old.
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 08f720edc4c..fa3b7ecc45d 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -102,6 +102,40 @@ void object_locator_t::generate_test_instances(list<object_locator_t*>& o)
}
+// -- pow2_hist_t --
+void pow2_hist_t::dump(Formatter *f) const
+{
+ f->open_array_section("histogram");
+ for (vector<int>::const_iterator p = h.begin(); p != h.end(); ++p)
+ f->dump_int("count", *p);
+ f->close_section();
+ f->dump_int("upper_bound", upper_bound());
+}
+
+void pow2_hist_t::encode(bufferlist& bl) const
+{
+ ENCODE_START(1, 1, bl);
+ ::encode(h, bl);
+ ENCODE_FINISH(bl);
+}
+
+void pow2_hist_t::decode(bufferlist::iterator& p)
+{
+ DECODE_START(1, p);
+ ::decode(h, p);
+ DECODE_FINISH(p);
+}
+
+void pow2_hist_t::generate_test_instances(list<pow2_hist_t*>& ls)
+{
+ ls.push_back(new pow2_hist_t);
+ ls.push_back(new pow2_hist_t);
+ ls.back()->h.push_back(1);
+ ls.back()->h.push_back(3);
+ ls.back()->h.push_back(0);
+ ls.back()->h.push_back(2);
+}
+
// -- osd_stat_t --
void osd_stat_t::dump(Formatter *f) const
{
@@ -118,11 +152,14 @@ void osd_stat_t::dump(Formatter *f) const
f->close_section();
f->dump_int("snap_trim_queue_len", snap_trim_queue_len);
f->dump_int("num_snap_trimming", num_snap_trimming);
+ f->open_object_section("op_queue_age_hist");
+ op_queue_age_hist.dump(f);
+ f->close_section();
}
void osd_stat_t::encode(bufferlist &bl) const
{
- ENCODE_START(2, 2, bl);
+ ENCODE_START(3, 2, bl);
::encode(kb, bl);
::encode(kb_used, bl);
::encode(kb_avail, bl);
@@ -130,12 +167,13 @@ void osd_stat_t::encode(bufferlist &bl) const
::encode(num_snap_trimming, bl);
::encode(hb_in, bl);
::encode(hb_out, bl);
+ ::encode(op_queue_age_hist, bl);
ENCODE_FINISH(bl);
}
void osd_stat_t::decode(bufferlist::iterator &bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
::decode(kb, bl);
::decode(kb_used, bl);
::decode(kb_avail, bl);
@@ -143,6 +181,8 @@ void osd_stat_t::decode(bufferlist::iterator &bl)
::decode(num_snap_trimming, bl);
::decode(hb_in, bl);
::decode(hb_out, bl);
+ if (struct_v >= 3)
+ ::decode(op_queue_age_hist, bl);
DECODE_FINISH(bl);
}
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index e3f46f9d601..ff6d70fa69a 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -509,6 +509,65 @@ inline ostream& operator<<(ostream& out, const eversion_t e) {
}
+/**
+ * power of 2 histogram
+ */
+struct pow2_hist_t {
+ /**
+ * histogram
+ *
+ * bin size is 2^index
+ * value is count of elements that are <= the current bin but > the previous bin.
+ */
+ vector<int32_t> h;
+
+private:
+ /// expand to at least another's size
+ void _expand_to(unsigned s) {
+ if (s > h.size())
+ h.resize(s, 0);
+ }
+ /// drop useless trailing 0's
+ void _contract() {
+ unsigned p = h.size();
+ while (p > 0 && h[p-1] == 0)
+ --p;
+ h.resize(p);
+ }
+
+public:
+ void clear() {
+ h.clear();
+ }
+ void set(int bin, int32_t v) {
+ _expand_to(bin + 1);
+ h[bin] = v;
+ _contract();
+ }
+
+ void add(const pow2_hist_t& o) {
+ _expand_to(o.h.size());
+ for (unsigned p = 0; p < o.h.size(); ++p)
+ h[p] += o.h[p];
+ _contract();
+ }
+ void sub(const pow2_hist_t& o) {
+ _expand_to(o.h.size());
+ for (unsigned p = 0; p < o.h.size(); ++p)
+ h[p] -= o.h[p];
+ _contract();
+ }
+
+ int32_t upper_bound() const {
+ return 1 << h.size();
+ }
+
+ void dump(Formatter *f) const;
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &bl);
+ static void generate_test_instances(std::list<pow2_hist_t*>& o);
+};
+WRITE_CLASS_ENCODER(pow2_hist_t)
/** osd_stat
* aggregate stats for an osd
@@ -518,6 +577,8 @@ struct osd_stat_t {
vector<int> hb_in, hb_out;
int32_t snap_trim_queue_len, num_snap_trimming;
+ pow2_hist_t op_queue_age_hist;
+
osd_stat_t() : kb(0), kb_used(0), kb_avail(0),
snap_trim_queue_len(0), num_snap_trimming(0) {}
@@ -527,6 +588,7 @@ struct osd_stat_t {
kb_avail += o.kb_avail;
snap_trim_queue_len += o.snap_trim_queue_len;
num_snap_trimming += o.num_snap_trimming;
+ op_queue_age_hist.add(o.op_queue_age_hist);
}
void sub(const osd_stat_t& o) {
kb -= o.kb;
@@ -534,6 +596,7 @@ struct osd_stat_t {
kb_avail -= o.kb_avail;
snap_trim_queue_len -= o.snap_trim_queue_len;
num_snap_trimming -= o.num_snap_trimming;
+ op_queue_age_hist.sub(o.op_queue_age_hist);
}
void dump(Formatter *f) const;
@@ -562,7 +625,9 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) {
return out << "osd_stat(" << kb_t(s.kb_used) << " used, "
<< kb_t(s.kb_avail) << " avail, "
<< kb_t(s.kb) << " total, "
- << "peers " << s.hb_in << "/" << s.hb_out << ")";
+ << "peers " << s.hb_in << "/" << s.hb_out
+ << " op hist " << s.op_queue_age_hist.h
+ << ")";
}
diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h
index fd10d28f119..e0bc0a149a8 100644
--- a/src/test/encoding/types.h
+++ b/src/test/encoding/types.h
@@ -34,6 +34,7 @@ TYPE(osd_reqid_t)
TYPE(object_locator_t)
TYPE(pg_t)
TYPE(coll_t)
+TYPE(pow2_hist_t)
TYPE(osd_stat_t)
TYPE(OSDSuperblock)
TYPE_FEATUREFUL(pool_snap_info_t)