summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2012-07-13 13:45:24 -0700
committerSamuel Just <sam.just@inktank.com>2012-07-13 16:09:53 -0700
commit117b28680e56e10aac6c6489ba77963e7e8abd18 (patch)
tree2c3892f705856128b831cee38ab14df4824c3cb9
parentce20e020219eb482aec7c7e2d2b80820ccea8755 (diff)
downloadceph-117b28680e56e10aac6c6489ba77963e7e8abd18.tar.gz
OSD: add config options to fake missed pings
In order to test monitor and osd failure detection and false positive correction, this patch adds the following options: 1. osd_debug_drop_ping_probability: probability of dropping a string of pings from a client upon ping recipt. 2. osd_debug_drop_ping_duration: number of pings to drop in a row. This should help with replicating some wrongly-marked-down thrashing cases. Signed-off-by: Samuel Just <sam.just@inktank.com>
-rw-r--r--src/common/config_opts.h2
-rw-r--r--src/osd/OSD.cc21
-rw-r--r--src/osd/OSD.h1
3 files changed, 24 insertions, 0 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index c5f182e0dbc..a7e2a11e0e3 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -336,6 +336,8 @@ OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op
OPTION(osd_command_max_records, OPT_INT, 256)
OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false) // read fiemap-reported holes and verify they are zeros
+OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0)
+OPTION(osd_debug_drop_ping_duration, OPT_INT, 0)
OPTION(filestore, OPT_BOOL, false)
OPTION(filestore_debug_omap_check, OPT_BOOL, 0) // Expensive debugging check on sync
// Use omap for xattrs for attrs over
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 28efcc4d771..44b155f2fdb 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -1671,6 +1671,27 @@ void OSD::handle_osd_ping(MOSDPing *m)
case MOSDPing::PING:
{
+ if (g_conf->osd_debug_drop_ping_probability > 0) {
+ if (debug_heartbeat_drops_remaining.count(from)) {
+ if (debug_heartbeat_drops_remaining[from] == 0) {
+ debug_heartbeat_drops_remaining.erase(from);
+ } else {
+ debug_heartbeat_drops_remaining[from]--;
+ dout(5) << "Dropping heartbeat from " << from
+ << ", " << debug_heartbeat_drops_remaining[from]
+ << " remaining to drop" << dendl;
+ break;
+ }
+ } else if (g_conf->osd_debug_drop_ping_probability >
+ ((((double)(rand()%100))/100.0))) {
+ debug_heartbeat_drops_remaining[from] =
+ g_conf->osd_debug_drop_ping_duration;
+ dout(5) << "Dropping heartbeat from " << from
+ << ", " << debug_heartbeat_drops_remaining[from]
+ << " remaining to drop" << dendl;
+ break;
+ }
+ }
Message *r = new MOSDPing(monc->get_fsid(),
locked ? osdmap->get_epoch():0,
MOSDPing::PING_REPLY,
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index bfbecca3cf1..86a02cd61b8 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -453,6 +453,7 @@ private:
epoch_t epoch; ///< most recent epoch we wanted this peer
};
Mutex heartbeat_lock;
+ map<int, int> debug_heartbeat_drops_remaining;
Cond heartbeat_cond;
bool heartbeat_stop;
bool heartbeat_need_update; ///< true if we need to refresh our heartbeat peers