summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-01-21 16:36:36 -0800
committerSage Weil <sage@inktank.com>2013-01-22 14:47:42 -0800
commita1137eb3e168c2d00f93789e4d565c1584790df0 (patch)
tree26f456cee6ee84c270410ea91cb471080d22f114
parent24d0d7eb0165c8b8f923f2d8896b156bfb5e0e60 (diff)
downloadceph-a1137eb3e168c2d00f93789e4d565c1584790df0.tar.gz
osd: make last state for slow requests more informative
Report on the last event string, and pass in important context for the op event list, including: - which peers were sent sub ops and we are waiting for - which pg queue we are delayed by Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/osd/OSD.cc4
-rw-r--r--src/osd/OpRequest.cc3
-rw-r--r--src/osd/OpRequest.h14
-rw-r--r--src/osd/PG.cc2
-rw-r--r--src/osd/ReplicatedPG.cc17
5 files changed, 26 insertions, 14 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index b28cace35b4..ecc23cdcde2 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -3682,7 +3682,7 @@ void OSD::wait_for_new_map(OpRequestRef op)
}
waiting_for_osdmap.push_back(op);
- op->mark_delayed();
+ op->mark_delayed("wait for new map");
}
@@ -5910,7 +5910,7 @@ void OSD::handle_op(OpRequestRef op)
if (osdmap->get_pg_acting_role(pgid, whoami) >= 0) {
dout(7) << "we are valid target for op, waiting" << dendl;
waiting_for_pg[pgid].push_back(op);
- op->mark_delayed();
+ op->mark_delayed("waiting for pg to exist locally");
return;
}
diff --git a/src/osd/OpRequest.cc b/src/osd/OpRequest.cc
index 436e2de4176..b3d95367ec5 100644
--- a/src/osd/OpRequest.cc
+++ b/src/osd/OpRequest.cc
@@ -153,7 +153,8 @@ bool OpTracker::check_ops_in_flight(std::vector<string> &warning_vector)
utime_t age = now - (*i)->received_time;
stringstream ss;
ss << "slow request " << age << " seconds old, received at " << (*i)->received_time
- << ": " << *((*i)->request) << " currently " << (*i)->state_string();
+ << ": " << *((*i)->request) << " currently "
+ << ((*i)->current.size() ? (*i)->current : (*i)->state_string());
warning_vector.push_back(ss.str());
// only those that have been shown will backoff
diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h
index 93277789ddf..432bddb1672 100644
--- a/src/osd/OpRequest.h
+++ b/src/osd/OpRequest.h
@@ -128,6 +128,7 @@ struct OpRequest : public TrackedOp {
private:
list<pair<utime_t, string> > events;
+ string current;
Mutex lock;
OpTracker *tracker;
osd_reqid_t reqid;
@@ -186,30 +187,37 @@ public:
void mark_queued_for_pg() {
mark_event("queued_for_pg");
+ current = "queued for pg";
hit_flag_points |= flag_queued_for_pg;
latest_flag_point = flag_queued_for_pg;
}
void mark_reached_pg() {
mark_event("reached_pg");
+ current = "reached pg";
hit_flag_points |= flag_reached_pg;
latest_flag_point = flag_reached_pg;
}
- void mark_delayed() {
+ void mark_delayed(string s) {
+ mark_event(s);
+ current = s;
hit_flag_points |= flag_delayed;
latest_flag_point = flag_delayed;
}
void mark_started() {
mark_event("started");
+ current = "started";
hit_flag_points |= flag_started;
latest_flag_point = flag_started;
}
- void mark_sub_op_sent() {
- mark_event("sub_op_sent");
+ void mark_sub_op_sent(string s) {
+ mark_event(s);
+ current = s;
hit_flag_points |= flag_sub_op_sent;
latest_flag_point = flag_sub_op_sent;
}
void mark_commit_sent() {
mark_event("commit_sent");
+ current = "commit sent";
hit_flag_points |= flag_commit_sent;
latest_flag_point = flag_commit_sent;
}
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 32d5830cd21..264e2c8bdc1 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -4927,7 +4927,7 @@ bool PG::can_discard_op(OpRequestRef op)
dout(7) << " queueing replay at " << m->get_version()
<< " for " << *m << dendl;
replay_queue[m->get_version()] = op;
- op->mark_delayed();
+ op->mark_delayed("waiting for replay");
return true;
}
}
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index aaa11f1188f..09d02da7f95 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -122,7 +122,7 @@ void ReplicatedPG::wait_for_missing_object(const hobject_t& soid, OpRequestRef o
pull(soid, v, g_conf->osd_client_op_priority);
}
waiting_for_missing_object[soid].push_back(op);
- op->mark_delayed();
+ op->mark_delayed("waiting for missing object");
}
void ReplicatedPG::wait_for_all_missing(OpRequestRef op)
@@ -178,7 +178,7 @@ void ReplicatedPG::wait_for_degraded_object(const hobject_t& soid, OpRequestRef
recover_object_replicas(soid, v, g_conf->osd_client_op_priority);
}
waiting_for_degraded_object[soid].push_back(op);
- op->mark_delayed();
+ op->mark_delayed("waiting for degraded object");
}
void ReplicatedPG::wait_for_backfill_pos(OpRequestRef op)
@@ -631,7 +631,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
if (op->may_write() && scrubber.write_blocked_by_scrub(head)) {
dout(20) << __func__ << ": waiting for scrub" << dendl;
waiting_for_active.push_back(op);
- op->mark_delayed();
+ op->mark_delayed("waiting for scrub");
return;
}
@@ -734,7 +734,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
if (!ok) {
dout(10) << "do_op waiting on mode " << mode << dendl;
mode.waiting.push_back(op);
- op->mark_delayed();
+ op->mark_delayed("waiting on pg mode");
return;
}
@@ -876,7 +876,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
}
dout(10) << " waiting for " << oldv << " to commit" << dendl;
waiting_for_ondisk[oldv].push_back(op); // always queue ondisk waiters, so that we can requeue if needed
- op->mark_delayed();
+ op->mark_delayed("waiting for ondisk");
}
return;
}
@@ -3963,9 +3963,12 @@ void ReplicatedPG::issue_repop(RepGather *repop, utime_t now,
int acks_wanted = CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK;
+ if (ctx->op && acting.size() > 1) {
+ ostringstream ss;
+ ss << "waiting for subops from " << vector<int>(acting.begin() + 1, acting.end());
+ ctx->op->mark_sub_op_sent(ss.str());
+ }
for (unsigned i=1; i<acting.size(); i++) {
- if (ctx->op)
- ctx->op->mark_sub_op_sent();
int peer = acting[i];
pg_info_t &pinfo = peer_info[peer];