diff options
author | Samuel Just <sam.just@inktank.com> | 2012-07-13 13:45:24 -0700 |
---|---|---|
committer | Samuel Just <sam.just@inktank.com> | 2012-07-13 16:09:53 -0700 |
commit | 117b28680e56e10aac6c6489ba77963e7e8abd18 (patch) | |
tree | 2c3892f705856128b831cee38ab14df4824c3cb9 | |
parent | ce20e020219eb482aec7c7e2d2b80820ccea8755 (diff) | |
download | ceph-117b28680e56e10aac6c6489ba77963e7e8abd18.tar.gz |
OSD: add config options to fake missed pings
In order to test monitor and osd failure detection and false
positive correction, this patch adds the following options:
1. osd_debug_drop_ping_probability: probability of dropping
a string of pings from a client upon ping recipt.
2. osd_debug_drop_ping_duration: number of pings to drop in
a row.
This should help with replicating some wrongly-marked-down
thrashing cases.
Signed-off-by: Samuel Just <sam.just@inktank.com>
-rw-r--r-- | src/common/config_opts.h | 2 | ||||
-rw-r--r-- | src/osd/OSD.cc | 21 | ||||
-rw-r--r-- | src/osd/OSD.h | 1 |
3 files changed, 24 insertions, 0 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h index c5f182e0dbc..a7e2a11e0e3 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -336,6 +336,8 @@ OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op OPTION(osd_command_max_records, OPT_INT, 256) OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false) // read fiemap-reported holes and verify they are zeros +OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0) +OPTION(osd_debug_drop_ping_duration, OPT_INT, 0) OPTION(filestore, OPT_BOOL, false) OPTION(filestore_debug_omap_check, OPT_BOOL, 0) // Expensive debugging check on sync // Use omap for xattrs for attrs over diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 28efcc4d771..44b155f2fdb 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1671,6 +1671,27 @@ void OSD::handle_osd_ping(MOSDPing *m) case MOSDPing::PING: { + if (g_conf->osd_debug_drop_ping_probability > 0) { + if (debug_heartbeat_drops_remaining.count(from)) { + if (debug_heartbeat_drops_remaining[from] == 0) { + debug_heartbeat_drops_remaining.erase(from); + } else { + debug_heartbeat_drops_remaining[from]--; + dout(5) << "Dropping heartbeat from " << from + << ", " << debug_heartbeat_drops_remaining[from] + << " remaining to drop" << dendl; + break; + } + } else if (g_conf->osd_debug_drop_ping_probability > + ((((double)(rand()%100))/100.0))) { + debug_heartbeat_drops_remaining[from] = + g_conf->osd_debug_drop_ping_duration; + dout(5) << "Dropping heartbeat from " << from + << ", " << debug_heartbeat_drops_remaining[from] + << " remaining to drop" << dendl; + break; + } + } Message *r = new MOSDPing(monc->get_fsid(), locked ? osdmap->get_epoch():0, MOSDPing::PING_REPLY, diff --git a/src/osd/OSD.h b/src/osd/OSD.h index bfbecca3cf1..86a02cd61b8 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -453,6 +453,7 @@ private: epoch_t epoch; ///< most recent epoch we wanted this peer }; Mutex heartbeat_lock; + map<int, int> debug_heartbeat_drops_remaining; Cond heartbeat_cond; bool heartbeat_stop; bool heartbeat_need_update; ///< true if we need to refresh our heartbeat peers |