diff options
author | Robert Newson <robert.newson@cloudant.com> | 2013-11-11 16:04:08 +0000 |
---|---|---|
committer | Robert Newson <robert.newson@cloudant.com> | 2013-11-20 12:51:48 +0000 |
commit | 1c1b48e19d0eb36691d98f2194c19fe898cb55db (patch) | |
tree | 81ed80923d6f7034d7c1a06aeab93e25f29ce233 | |
parent | 8b01c3af6ae6761b472547e43fb65d6a042bf080 (diff) | |
download | couchdb-1c1b48e19d0eb36691d98f2194c19fe898cb55db.tar.gz |
Send sensu events for everything
-rw-r--r-- | src/custodian/src/custodian_db_checker.erl | 22 | ||||
-rw-r--r-- | src/custodian/src/custodian_server.erl | 66 |
2 files changed, 48 insertions, 40 deletions
diff --git a/src/custodian/src/custodian_db_checker.erl b/src/custodian/src/custodian_db_checker.erl index 02bd0d486..2dd770674 100644 --- a/src/custodian/src/custodian_db_checker.erl +++ b/src/custodian/src/custodian_db_checker.erl @@ -151,20 +151,16 @@ get_bacon_db() -> send_missing_db_alert(DbName) -> twig:log(notice, "Missing system database ~s", [DbName]), Command = [ - "send_snmptrap", - "--trap", - "CLOUDANT-DBCORE-MIB::cloudantDbcoreMissingDbEvent", - "-o", - "'cloudantDbcoreDbName:STRING:" ++ binary_to_list(DbName) ++ "'" - ], - os:cmd(string:join(Command, " ")). - + "send-sensu-event --standalone --critical", + " --output=\"Missing system database ", + binary_to_list(DbName), + "\" --handler=default custodian-missing-db-check"], + os:cmd(lists:concat(Command)). clear_missing_dbs_alert() -> twig:log(notice, "All system databases exist.", []), Command = [ - "send_snmptrap", - "--trap", - "CLOUDANT-DBCORE-MIB::cloudantDbcoreAllDbsAvailableEvent" - ], - os:cmd(string:join(Command, " ")). + "send-sensu-event --standalone --ok", + " --output=\"All system databases exist\"", + " --handler=default custodian-missing-db-check"], + os:cmd(lists:concat(Command)). diff --git a/src/custodian/src/custodian_server.erl b/src/custodian/src/custodian_server.erl index 61885bb79..188d3713f 100644 --- a/src/custodian/src/custodian_server.erl +++ b/src/custodian/src/custodian_server.erl @@ -106,31 +106,43 @@ handle_db_event(_DbName, _Event, _St) -> {ok, nil}. check_shards() -> - Summary = custodian:summary(), - send_conflicted_alert(proplists:get_value(conflicted, Summary)), - send_unavailable_alert(proplists:get_value(unavailable, Summary)), - send_one_copy_alert(proplists:get_value(one_copy, Summary)), - send_impaired_alert(proplists:get_value(impaired, Summary)). - -%% specific alert functions -send_conflicted_alert(Count) -> - send_snmp_alert(Count, "partition tables conflicted", "NoPartitionTablesConflictedEvent", "PartitionTablesConflictedEvent"). - -send_impaired_alert(Count) -> - send_snmp_alert(Count, "shards impaired", "AllShardsUnimpairedEvent", "ShardsImpairedEvent"). - -send_unavailable_alert(Count) -> - send_snmp_alert(Count, "unavailable shards", "AllShardsAvailableEvent", "ShardsUnavailableEvent"). - -send_one_copy_alert(Count) -> - send_snmp_alert(Count, "shards with only one copy", "AllShardsMultipleCopiesEvent", "ShardsOneCopyEvent"). - -%% generic SNMP alert functions -send_snmp_alert(undefined, AlertType, ClearMib, _) -> - twig:log(notice, "No ~s in this cluster", [AlertType]), - Cmd = lists:concat(["send_snmptrap --trap CLOUDANT-DBCORE-MIB::cloudantDbcore", ClearMib]), - os:cmd(Cmd); -send_snmp_alert(Count, AlertType, _, AlertMib) when is_integer(Count) -> - twig:log(crit, "~B ~s in this cluster", [Count, AlertType]), - Cmd = lists:concat(["send_snmptrap --trap CLOUDANT-DBCORE-MIB::cloudantDbcore", AlertMib," -o cloudantDbcoreShardCount:INTEGER:", Count]), + [send_sensu_event(Item) || Item <- custodian:summary()]. + +send_sensu_event({_, Count} = Item) -> + if Count > 0 -> twig:log(crit, "~s", [describe(Item)]); true -> ok end, + Cmd = lists:concat(["send-sensu-event --standalone ", + level(Item), + " --output=\"", + describe(Item), + "\" ", + check_name(Item)]), os:cmd(Cmd). + +level({_, 0}) -> + "--ok"; +level(_) -> + "--critical". + +describe({{safe, N}, Count}) -> + lists:concat([Count, " ", shards(Count), " in cluster with only ", N, + " ", copies(N), " on nodes that are currently up"]); +describe({{live, N}, Count}) -> + lists:concat([Count, " ", shards(Count), " in cluster with only ", + N, " ", copies(N), " on nodes not in maintenance mode"]); +describe({conflicted, Count}) -> + lists:concat([Count, " conflicted ", shards(Count), " in cluster"]). + +check_name({{Type, N}, _}) -> + lists:concat(["custodian-", N, "-", Type, "-shards-check"]); +check_name({Type, _}) -> + lists:concat(["custodian-", Type, "-shards-check"]). + +shards(1) -> + "shard"; +shards(_) -> + "shards". + +copies(1) -> + "copy"; +copies(_) -> + "copies". |