summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Anderson <b@banjiewen.net>2014-06-17 14:26:25 -0700
committerBenjamin Anderson <b@banjiewen.net>2014-06-17 14:26:25 -0700
commitfcbc55cd86f6fdfd83ef6b1e1ddce5568abdf35e (patch)
tree4ba1eec7f7ae57d2ce8ea635d6a23d0c73071145
parente29d1ed76feaadc64e65b0a5e67ed8ffe64ec681 (diff)
downloadcouchdb-fcbc55cd86f6fdfd83ef6b1e1ddce5568abdf35e.tar.gz
Use warning level for non-critical cases
n=2 or n>N cases are not "critical" - that is, they don't require immediate operator intervention. Custodian should send alerts that reflect the true urgency of the situation in order to reduce alert fatigue. BugzID: 31759
-rw-r--r--src/custodian/src/custodian_server.erl18
1 files changed, 11 insertions, 7 deletions
diff --git a/src/custodian/src/custodian_server.erl b/src/custodian/src/custodian_server.erl
index 399d15aac..1c24cb27f 100644
--- a/src/custodian/src/custodian_server.erl
+++ b/src/custodian/src/custodian_server.erl
@@ -128,20 +128,24 @@ check_shards() ->
[send_sensu_event(Item) || Item <- custodian:summary()].
send_sensu_event({_, Count} = Item) ->
- if Count > 0 -> twig:log(crit, "~s", [describe(Item)]); true -> ok end,
+ Level = case Count of
+ 0 ->
+ "--ok";
+ 1 ->
+ twig:log(crit, "~s", [describe(Item)]),
+ "--critical";
+ _ ->
+ twig:log(warn, "~s", [describe(Item)]),
+ "--warning"
+ end,
Cmd = lists:concat(["send-sensu-event --standalone ",
- level(Item),
+ Level,
" --output=\"",
describe(Item),
"\" ",
check_name(Item)]),
os:cmd(Cmd).
-level({_, 0}) ->
- "--ok";
-level(_) ->
- "--critical".
-
describe({{safe, N}, Count}) ->
lists:concat([Count, " ", shards(Count), " in cluster with only ", N,
" ", copies(N), " on nodes that are currently up"]);