summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Wallace <mikewallace1979@googlemail.com>2014-08-06 14:35:36 +0100
committerJay Doane <jaydoane@apache.org>2021-04-19 00:34:24 -0700
commit90d0691f6ee26cfd2567c1a12fe0de21f46e383c (patch)
treec33cb2ad4643e337a1a73cd717afbdf97fc23bf7
parentd937147f1fe7516236b5eeb914360aa9815b3dce (diff)
downloadcouchdb-90d0691f6ee26cfd2567c1a12fe0de21f46e383c.tar.gz
Allow checks to be run across the cluster
Add a `--all-nodes` option (short name `-a`) which causes the specified checks to be run on all cluster nodes. This is achieved by running weatherreport_check:check via rpc:multicall/5. This requires a number of supporting changes: 1. Log messages to the console now report the node that is the origin of a message, rather than the current node 2. Checks now learn about expert mode from supplied options rather than the application environment. This is because remote nodes will not have the same environment as the escript. 3. Additional logging in checks is converted to additional messages which are returned to the caller. BugzID: 33243
-rw-r--r--src/weatherreport/src/weatherreport.erl24
-rw-r--r--src/weatherreport/src/weatherreport_check.erl18
-rw-r--r--src/weatherreport/src/weatherreport_check_custodian.erl6
-rw-r--r--src/weatherreport/src/weatherreport_check_disk.erl6
-rw-r--r--src/weatherreport/src/weatherreport_check_ioq.erl6
-rw-r--r--src/weatherreport/src/weatherreport_check_mem3_sync.erl6
-rw-r--r--src/weatherreport/src/weatherreport_check_membership.erl6
-rw-r--r--src/weatherreport/src/weatherreport_check_memory_use.erl6
-rw-r--r--src/weatherreport/src/weatherreport_check_message_queues.erl29
-rw-r--r--src/weatherreport/src/weatherreport_check_nodes_connected.erl6
-rw-r--r--src/weatherreport/src/weatherreport_check_process_calls.erl42
-rw-r--r--src/weatherreport/src/weatherreport_check_search.erl6
-rw-r--r--src/weatherreport/src/weatherreport_config.erl4
-rw-r--r--src/weatherreport/src/weatherreport_node.erl55
-rw-r--r--src/weatherreport/src/weatherreport_runner.erl93
-rw-r--r--src/weatherreport/src/weatherreport_util.erl29
16 files changed, 245 insertions, 97 deletions
diff --git a/src/weatherreport/src/weatherreport.erl b/src/weatherreport/src/weatherreport.erl
index fe9a95e16..d15ba5680 100644
--- a/src/weatherreport/src/weatherreport.erl
+++ b/src/weatherreport/src/weatherreport.erl
@@ -60,7 +60,8 @@
{level, $d, "level", {atom, notice}, "Minimum message severity level (default: notice)"},
{expert, $e, "expert", undefined, "Perform more detailed diagnostics" },
{usage, $h, "help", undefined, "Display help/usage" },
- {list, $l, "list", undefined, "Describe available diagnostic tasks" }
+ {list, $l, "list", undefined, "Describe available diagnostic tasks" },
+ {all_nodes, $a, "all-nodes", undefined, "Run weatherreport on all cluster nodes" }
]).
-define(USAGE_OPTS, [ O || O <- ?OPTS,
@@ -109,21 +110,23 @@ run(InputChecks) ->
ShortNames = [{weatherreport_util:short_name(Mod), Mod} || Mod <- weatherreport_check:modules() ],
element(1, lists:foldr(fun validate_checks/2, {[], ShortNames}, InputChecks))
end,
- Messages = lists:foldl(
- fun(Mod, Acc) -> Acc ++ weatherreport_check:check(Mod) end,
- [],
- Checks
- ),
+ Messages = case application:get_env(weatherreport, all_nodes) of
+ {ok, true} ->
+ weatherreport_runner:run(Checks, all);
+ _ ->
+ weatherreport_runner:run(Checks)
+
+ end,
case Messages of
[] ->
io:format("No diagnostic messages to report.~n"),
halt(0);
_ ->
%% Print the most critical messages first
- FilteredMessages = lists:filter(fun({Level,_,_}) ->
+ FilteredMessages = lists:filter(fun({_,Level,_,_}) ->
weatherreport_util:should_log(Level)
end, Messages),
- SortedMessages = lists:sort(fun({ALevel, _, _}, {BLevel, _, _}) ->
+ SortedMessages = lists:sort(fun({_, ALevel, _, _}, {_, BLevel, _, _}) ->
twig_util:level(ALevel) =< twig_util:level(BLevel)
end, FilteredMessages),
case SortedMessages of
@@ -161,7 +164,10 @@ process_option({level, Level}, Result) ->
application:set_env(weatherreport, log_level, Level),
Result;
process_option(expert, Result) ->
- application:set_env(weatherreport, expert_mode, true),
+ application:set_env(weatherreport, expert, true),
+ Result;
+process_option(all_nodes, Result) ->
+ application:set_env(weatherreport, all_nodes, true),
Result;
process_option(list, usage) -> %% Help should have precedence over listing checks
usage;
diff --git a/src/weatherreport/src/weatherreport_check.erl b/src/weatherreport/src/weatherreport_check.erl
index 35c8ac7e9..f50616fb6 100644
--- a/src/weatherreport/src/weatherreport_check.erl
+++ b/src/weatherreport/src/weatherreport_check.erl
@@ -58,7 +58,7 @@
-module(weatherreport_check).
-export([behaviour_info/1]).
--export([check/1,
+-export([check/2,
modules/0,
print/1]).
@@ -67,18 +67,18 @@
behaviour_info(callbacks) ->
[{description, 0},
{valid, 0},
- {check, 0},
+ {check, 1},
{format, 1}];
behaviour_info(_) ->
undefined.
%% @doc Runs the diagnostic in the given module, if it is valid. Returns a
%% list of messages that will be printed later using print/1.
--spec check(Module::module()) -> [{atom(), module(), term()}].
-check(Module) ->
+-spec check(Module::module(), list()) -> [{atom(), module(), term()}].
+check(Module, Opts) ->
case Module:valid() of
true ->
- [ {Level, Module, Message} || {Level, Message} <- Module:check() ];
+ [ {Level, Module, Message} || {Level, Message} <- Module:check(Opts) ];
_ ->
[]
end.
@@ -97,11 +97,11 @@ modules() ->
%% module's format/1 function will be called to provide a
%% human-readable message. It should return an iolist() or a 2-tuple
%% consisting of a format string and a list of terms.
--spec print({Level::atom(), Module::module(), Data::term()}) -> ok.
-print({Level, Mod, Data}) ->
+-spec print({Node::atom(), Level::atom(), Module::module(), Data::term()}) -> ok.
+print({Node, Level, Mod, Data}) ->
case Mod:format(Data) of
{Format, Terms} ->
- weatherreport_util:log(Level, Format, Terms);
+ weatherreport_util:log(Node, Level, Format, Terms);
String ->
- weatherreport_util:log(Level, String)
+ weatherreport_util:log(Node, Level, String)
end.
diff --git a/src/weatherreport/src/weatherreport_check_custodian.erl b/src/weatherreport/src/weatherreport_check_custodian.erl
index bb1a3082f..168fa9a52 100644
--- a/src/weatherreport/src/weatherreport_check_custodian.erl
+++ b/src/weatherreport/src/weatherreport_check_custodian.erl
@@ -33,7 +33,7 @@
-export([description/0,
valid/0,
- check/0,
+ check/1,
format/1]).
-include_lib("eunit/include/eunit.hrl").
@@ -58,8 +58,8 @@ n_to_level(_) ->
report_to_message({DbName, ShardRange, {Type, N}}, NodeName) ->
{n_to_level(N), {Type, N, DbName, ShardRange, NodeName}}.
--spec check() -> [{atom(), term()}].
-check() ->
+-spec check(list()) -> [{atom(), term()}].
+check(_Opts) ->
NodeName = weatherreport_node:nodename(),
case weatherreport_node:local_command(custodian, report, []) of
[] ->
diff --git a/src/weatherreport/src/weatherreport_check_disk.erl b/src/weatherreport/src/weatherreport_check_disk.erl
index 8d14d2a76..cf05f5907 100644
--- a/src/weatherreport/src/weatherreport_check_disk.erl
+++ b/src/weatherreport/src/weatherreport_check_disk.erl
@@ -48,7 +48,7 @@
-export([description/0,
valid/0,
- check/0,
+ check/1,
format/1]).
-spec description() -> string().
@@ -59,8 +59,8 @@ description() ->
valid() ->
true.
--spec check() -> [{atom(), term()}].
-check() ->
+-spec check(list()) -> [{atom(), term()}].
+check(_Opts) ->
DataDirs = weatherreport_config:data_directories(),
%% Add additional disk checks in the function below
lists:flatmap(fun(Dir) ->
diff --git a/src/weatherreport/src/weatherreport_check_ioq.erl b/src/weatherreport/src/weatherreport_check_ioq.erl
index 3055c5e9c..610d4e2e1 100644
--- a/src/weatherreport/src/weatherreport_check_ioq.erl
+++ b/src/weatherreport/src/weatherreport_check_ioq.erl
@@ -28,7 +28,7 @@
-export([description/0,
valid/0,
- check/0,
+ check/1,
format/1]).
-define(THRESHOLD, 500).
@@ -61,8 +61,8 @@ sum_queues([{channels, {Channels}} | Rest], Acc) ->
sum_queues([{_Name, Value} | Rest], Acc) ->
sum_queues(Rest, Acc + Value).
--spec check() -> [{atom(), term()}].
-check() ->
+-spec check(list()) -> [{atom(), term()}].
+check(_Opts) ->
case weatherreport_node:local_command(ioq, get_disk_queues, []) of
Queues when is_list(Queues) ->
Total = sum_queues(Queues, 0),
diff --git a/src/weatherreport/src/weatherreport_check_mem3_sync.erl b/src/weatherreport/src/weatherreport_check_mem3_sync.erl
index edea5b2e7..e143dc21b 100644
--- a/src/weatherreport/src/weatherreport_check_mem3_sync.erl
+++ b/src/weatherreport/src/weatherreport_check_mem3_sync.erl
@@ -28,7 +28,7 @@
-export([description/0,
valid/0,
- check/0,
+ check/1,
format/1]).
-spec description() -> string().
@@ -39,8 +39,8 @@ description() ->
valid() ->
weatherreport_node:can_connect().
--spec check() -> [{atom(), term()}].
-check() ->
+-spec check(list()) -> [{atom(), term()}].
+check(_Opts) ->
NodeName = weatherreport_node:nodename(),
case weatherreport_node:local_command(erlang, whereis, [mem3_sync]) of
undefined ->
diff --git a/src/weatherreport/src/weatherreport_check_membership.erl b/src/weatherreport/src/weatherreport_check_membership.erl
index ffc8bfbc0..f60bf1b01 100644
--- a/src/weatherreport/src/weatherreport_check_membership.erl
+++ b/src/weatherreport/src/weatherreport_check_membership.erl
@@ -36,7 +36,7 @@
-export([description/0,
valid/0,
- check/0,
+ check/1,
format/1]).
-include_lib("eunit/include/eunit.hrl").
@@ -49,8 +49,8 @@ description() ->
valid() ->
weatherreport_node:can_connect().
--spec check() -> [{atom(), term()}].
-check() ->
+-spec check(list()) -> [{atom(), term()}].
+check(_Opts) ->
NodeName = weatherreport_node:nodename(),
Members = weatherreport_node:local_command(mem3, nodes, []),
case lists:member(NodeName, Members) of
diff --git a/src/weatherreport/src/weatherreport_check_memory_use.erl b/src/weatherreport/src/weatherreport_check_memory_use.erl
index c18ce8e13..0d93f9d1b 100644
--- a/src/weatherreport/src/weatherreport_check_memory_use.erl
+++ b/src/weatherreport/src/weatherreport_check_memory_use.erl
@@ -36,7 +36,7 @@
-export([description/0,
valid/0,
- check/0,
+ check/1,
format/1]).
-spec description() -> string().
@@ -47,8 +47,8 @@ description() ->
valid() ->
weatherreport_node:can_connect().
--spec check() -> [{atom(), term()}].
-check() ->
+-spec check(list()) -> [{atom(), term()}].
+check(_Opts) ->
Pid = weatherreport_node:pid(),
Output = weatherreport_util:run_command("ps -o pmem,rss -p " ++ Pid),
[_,_,Percent, RealSize| _] = string:tokens(Output, "/n \n"),
diff --git a/src/weatherreport/src/weatherreport_check_message_queues.erl b/src/weatherreport/src/weatherreport_check_message_queues.erl
index 9d9502801..3633ae3f1 100644
--- a/src/weatherreport/src/weatherreport_check_message_queues.erl
+++ b/src/weatherreport/src/weatherreport_check_message_queues.erl
@@ -28,7 +28,7 @@
-export([description/0,
valid/0,
- check/0,
+ check/1,
format/1]).
-define(THRESHOLD, 1000).
@@ -41,32 +41,33 @@ description() ->
valid() ->
weatherreport_node:can_connect().
-fold_processes([], Acc) ->
+fold_processes([], Acc, _Opts) ->
Acc;
-fold_processes([{Pid, MBoxSize, Info} | T], Acc) when MBoxSize < ?THRESHOLD ->
+fold_processes([{Pid, MBoxSize, Info} | T], Acc, Opts) when MBoxSize < ?THRESHOLD ->
Message = {info, {mbox_ok, {Pid, MBoxSize, Info}}},
- fold_processes(T, [Message | Acc]);
-fold_processes([{Pid, MBoxSize, Info} | T], Acc) ->
- case application:get_env(weatherreport, expert_mode) of
- {ok, true} ->
+ fold_processes(T, [Message | Acc], Opts);
+fold_processes([{Pid, MBoxSize, Info} | T], Acc, Opts) ->
+ Message = case proplists:get_value(expert, Opts) of
+ true ->
Pinfo = weatherreport_node:local_command(recon, info, [Pid]),
- weatherreport_util:log(warning, "Process info for ~w:~n~p", [Pid, Pinfo]);
+ {warning, {mbox_large, {Pid, MBoxSize, Info, Pinfo}}};
_ ->
- ok
+ {warning, {mbox_large, {Pid, MBoxSize, Info}}}
end,
- Message = {warning, {mbox_large, {Pid, MBoxSize, Info}}},
- fold_processes(T, [Message | Acc]).
+ fold_processes(T, [Message | Acc], Opts).
--spec check() -> [{atom(), term()}].
-check() ->
+-spec check(list()) -> [{atom(), term()}].
+check(Opts) ->
Processes = weatherreport_node:local_command(
recon,
proc_count,
[message_queue_len, 10]
),
- fold_processes(Processes, []).
+ fold_processes(Processes, [], Opts).
-spec format(term()) -> {io:format(), [term()]}.
+format({mbox_large, {Pid, MBoxSize, Info, Pinfo}}) ->
+ {"Process ~w has excessive mailbox size of ~w: ~w ~w", [Pid, MBoxSize, Info, Pinfo]};
format({mbox_large, {Pid, MBoxSize, Info}}) ->
{"Process ~w has excessive mailbox size of ~w: ~w", [Pid, MBoxSize, Info]};
format({mbox_ok, {Pid, MBoxSize, Info}}) ->
diff --git a/src/weatherreport/src/weatherreport_check_nodes_connected.erl b/src/weatherreport/src/weatherreport_check_nodes_connected.erl
index 0d38eb440..a68655666 100644
--- a/src/weatherreport/src/weatherreport_check_nodes_connected.erl
+++ b/src/weatherreport/src/weatherreport_check_nodes_connected.erl
@@ -34,7 +34,7 @@
-export([description/0,
valid/0,
- check/0,
+ check/1,
format/1]).
-spec description() -> string().
@@ -45,8 +45,8 @@ description() ->
valid() ->
weatherreport_node:can_connect().
--spec check() -> [{atom(), term()}].
-check() ->
+-spec check(list()) -> [{atom(), term()}].
+check(_Opts) ->
NodeName = weatherreport_node:nodename(),
ConnectedNodes = [NodeName | weatherreport_node:local_command(erlang, nodes, [])],
Members = weatherreport_node:local_command(mem3, nodes, []),
diff --git a/src/weatherreport/src/weatherreport_check_process_calls.erl b/src/weatherreport/src/weatherreport_check_process_calls.erl
index cb1747df9..de95d1a04 100644
--- a/src/weatherreport/src/weatherreport_check_process_calls.erl
+++ b/src/weatherreport/src/weatherreport_check_process_calls.erl
@@ -27,7 +27,7 @@
-export([description/0,
valid/0,
- check/0,
+ check/1,
format/1]).
-define(THRESHOLD, 1000).
@@ -40,37 +40,33 @@ description() ->
valid() ->
weatherreport_node:can_connect().
-fold_processes([], Acc, _Lim, _) ->
+fold_processes([], Acc, _Lim, _CallType, _Opts) ->
Acc;
-fold_processes(_, Acc, 0, _) ->
+fold_processes(_, Acc, 0, _CallType, _Opts) ->
Acc;
-fold_processes([{Count, {M, F, A}} | T], Acc, Lim, CallType) ->
+fold_processes([{Count, {M, F, A}} | T], Acc, Lim, CallType, Opts) ->
Level = case Count > ?THRESHOLD of
true ->
warning;
_ ->
info
end,
- case application:get_env(weatherreport, expert_mode) of
- {ok, true} ->
+ Message = case proplists:get_value(expert, Opts) of
+ true ->
PidFun = list_to_atom("find_by_" ++ CallType ++ "_call"),
Pids = weatherreport_node:local_command(recon, PidFun, [M, F]),
- lists:map(fun(Pid) ->
+ Pinfos = lists:map(fun(Pid) ->
Pinfo = weatherreport_node:local_command(recon, info, [Pid]),
- weatherreport_util:log(
- Level,
- "Process info for ~w:~n~p",
- [Pid, Pinfo]
- )
- end, lists:sublist(Pids, 10));
+ {Pid, Pinfo}
+ end, lists:sublist(Pids, 10)),
+ {Level, {process_count, {CallType, Count, M, F, A, Pinfos}}};
_ ->
- ok
+ {Level, {process_count, {CallType, Count, M, F, A}}}
end,
- Message = {Level, {process_count, {CallType, Count, M, F, A}}},
- fold_processes(T, [Message | Acc], Lim - 1, CallType).
+ fold_processes(T, [Message | Acc], Lim - 1, CallType, Opts).
--spec check() -> [{atom(), term()}].
-check() ->
+-spec check(list()) -> [{atom(), term()}].
+check(Opts) ->
CurrentCallCounts = weatherreport_node:local_command(
recon,
show_current_call_counts,
@@ -80,7 +76,8 @@ check() ->
CurrentCallCounts,
[],
10,
- "current"
+ "current",
+ Opts
),
FirstCallCounts = weatherreport_node:local_command(
recon,
@@ -91,9 +88,12 @@ check() ->
FirstCallCounts,
CurrentCallMessages,
10,
- "first"
+ "first",
+ Opts
)).
-spec format(term()) -> {io:format(), [term()]}.
format({process_count, {CallType, Count, M, F, A}}) ->
- {"~w processes with ~s call ~w:~w/~w", [Count, CallType, M, F, A]}.
+ {"~w processes with ~s call ~w:~w/~w", [Count, CallType, M, F, A]};
+format({process_count, {CallType, Count, M, F, A, Pinfos}}) ->
+ {"~w processes with ~s call ~w:~w/~w ~w", [Count, CallType, M, F, A, Pinfos]}.
diff --git a/src/weatherreport/src/weatherreport_check_search.erl b/src/weatherreport/src/weatherreport_check_search.erl
index 91d5bba6b..6237d0d4d 100644
--- a/src/weatherreport/src/weatherreport_check_search.erl
+++ b/src/weatherreport/src/weatherreport_check_search.erl
@@ -29,7 +29,7 @@
-export([description/0,
valid/0,
- check/0,
+ check/1,
format/1]).
-spec description() -> string().
@@ -40,8 +40,8 @@ description() ->
valid() ->
weatherreport_node:can_connect().
--spec check() -> [{atom(), term()}].
-check() ->
+-spec check(list()) -> [{atom(), term()}].
+check(_Opts) ->
SearchNode = 'clouseau@127.0.0.1',
case weatherreport_node:local_command(net_adm, ping, [SearchNode]) of
pong ->
diff --git a/src/weatherreport/src/weatherreport_config.erl b/src/weatherreport/src/weatherreport_config.erl
index 405b7f43c..e93da8359 100644
--- a/src/weatherreport/src/weatherreport_config.erl
+++ b/src/weatherreport/src/weatherreport_config.erl
@@ -135,9 +135,9 @@ load_app_config() ->
filename:join(Etc, "default.ini"),
filename:join(Etc, "local.ini")
],
- weatherreport_util:log(debug, "Reading config from files: ~p", [IniFiles]),
+ weatherreport_util:log(node(), debug, "Reading config from files: ~p", [IniFiles]),
config:start_link(IniFiles),
- weatherreport_util:log(debug, "Local node config: ~p~n", [config:all()]).
+ weatherreport_util:log(node(), debug, "Local node config: ~p~n", [config:all()]).
load_vm_args() ->
VmArgs = case init:get_argument(vm_args) of
diff --git a/src/weatherreport/src/weatherreport_node.erl b/src/weatherreport/src/weatherreport_node.erl
index 1b6a0e3d9..2cd0f0103 100644
--- a/src/weatherreport/src/weatherreport_node.erl
+++ b/src/weatherreport/src/weatherreport_node.erl
@@ -67,8 +67,24 @@ local_command(Module, Function, Args) ->
%% @see can_connect/0
-spec local_command(Module::atom(), Function::atom(), Args::[term()], Timeout::integer()) -> term().
local_command(Module, Function, Args, Timeout) ->
- weatherreport_util:log(debug, "Local RPC: ~p:~p(~p) [~p]", [Module, Function, Args, Timeout]),
- rpc:call(nodename(), Module, Function, Args, Timeout).
+ case is_cluster_node() of
+ true ->
+ weatherreport_util:log(
+ node(),
+ debug,
+ "Local function call: ~p:~p(~p)",
+ [Module, Function, Args]
+ ),
+ erlang:apply(Module, Function, Args);
+ _ ->
+ weatherreport_util:log(
+ node(),
+ debug,
+ "Local RPC: ~p:~p(~p) [~p]",
+ [Module, Function, Args, Timeout]
+ ),
+ rpc:call(nodename(), Module, Function, Args, Timeout)
+ end.
%% @doc Calls the given 0-arity module and function on all members of
%% the cluster.
@@ -109,10 +125,15 @@ pid() ->
%% already, and returns whether connection was successful.
-spec can_connect() -> true | false.
can_connect() ->
- case is_connected() of
+ case is_connected() or is_cluster_node() of
true -> true;
false ->
- weatherreport_util:log(debug, "Not connected to the local cluster node, trying to connect. alive:~p connect_failed:~p", [is_alive(), connect_failed()]),
+ weatherreport_util:log(
+ node(),
+ debug,
+ "Not connected to the local cluster node, trying to connect. alive:~p connect_failed:~p",
+ [is_alive(), connect_failed()]
+ ),
maybe_connect()
end.
@@ -128,7 +149,12 @@ can_connect_all() ->
end.
nodename() ->
- {_, Name} = weatherreport_config:node_name(),
+ Name = case weatherreport_config:node_name() of
+ undefined ->
+ atom_to_list(node());
+ {_, NodeName} ->
+ NodeName
+ end,
case string:tokens(Name, "@") of
[_Node, _Host] ->
list_to_atom(Name);
@@ -138,6 +164,9 @@ nodename() ->
end.
%% Private functions
+is_cluster_node() ->
+ nodename() =:= node().
+
is_connected() ->
is_alive() andalso connect_failed() =/= true.
@@ -156,11 +185,21 @@ try_connect() ->
case {net_kernel:hidden_connect_node(TargetNode), net_adm:ping(TargetNode)} of
{true, pong} ->
application:set_env(weatherreport, connect_failed, false),
- weatherreport_util:log(debug, "Connected to local cluster node ~p.", [TargetNode]),
+ weatherreport_util:log(
+ node(),
+ debug,
+ "Connected to local cluster node ~p.",
+ [TargetNode]
+ ),
true;
_ ->
application:set_env(weatherreport, connect_failed, true),
- weatherreport_util:log(warning, "Could not connect to the local cluster node ~p, some checks will not run.", [TargetNode]),
+ weatherreport_util:log(
+ node(),
+ warning,
+ "Could not connect to the local cluster node ~p, some checks will not run.",
+ [TargetNode]
+ ),
false
end.
@@ -172,7 +211,7 @@ connect_failed() ->
end.
start_net() ->
- weatherreport_util:log(debug, "Starting distributed Erlang."),
+ weatherreport_util:log(node(), debug, "Starting distributed Erlang."),
{Type, NodeName} = weatherreport_config:node_name(),
ThisNode = append_node_suffix(NodeName, "_diag"),
{ok, _} = net_kernel:start([ThisNode, Type]),
diff --git a/src/weatherreport/src/weatherreport_runner.erl b/src/weatherreport/src/weatherreport_runner.erl
new file mode 100644
index 000000000..7f4538eab
--- /dev/null
+++ b/src/weatherreport/src/weatherreport_runner.erl
@@ -0,0 +1,93 @@
+%% -------------------------------------------------------------------
+%%
+%% weatherreport - automated diagnostic tools for CouchDB
+%%
+%% Copyright (c) 2014 Cloudant
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+%% @doc <p>The <code>weatherreport_runner</code> module provides
+%% utility functions for running checks either on a single node or
+%% multiple nodes.
+
+-module(weatherreport_runner).
+
+-export([run/1, run/2, format/1]).
+
+%% @doc Run the supplied list of checks on the local node
+-spec run([Module::atom()]) -> [tuple()].
+run(Checks) ->
+ lists:flatten(lists:foldl(fun(Mod, Acc) ->
+ Node = weatherreport_node:nodename(),
+ MessagesWithNode = lists:map(fun({Level, Module, Message}) ->
+ {Node, Level, Module, Message}
+ end, weatherreport_check:check(Mod, get_check_options())),
+ [MessagesWithNode | Acc]
+ end, [], Checks)).
+
+%% @doc Run the supplied list of checks on the supplied list of cluster nodes
+-spec run([Module::atom()], [node()] | all) -> [tuple()].
+run(Checks, all) ->
+ weatherreport_node:can_connect(),
+ case weatherreport_node:local_command(mem3, nodes, []) of
+ ClusterNodes when is_list(ClusterNodes) ->
+ run(Checks, ClusterNodes);
+ Error ->
+ [{node(), critical, weatherreport_runner, {checks_failed, Error}}]
+ end;
+run(Checks, Nodes) ->
+ CheckOpts = get_check_options(),
+ lists:flatten(lists:foldl(fun(Mod, Acc) ->
+ {Resps, BadNodes} = rpc:multicall(
+ Nodes,
+ erlang,
+ apply,
+ [fun() -> {node(), weatherreport_check:check(Mod, CheckOpts)} end, []],
+ 5000
+ ),
+ lists:map(fun(Node) ->
+ weatherreport_util:log(
+ node(),
+ error,
+ io_lib:format("Could not run checks on cluster node ~w", [Node])
+ )
+ end, BadNodes),
+ TransformResponse = fun({Node, Messages}) ->
+ [{Node, Lvl, Module, Msg} || {Lvl, Module, Msg} <- Messages]
+ end,
+ ResponsesWithNode = [
+ TransformResponse({Node, Messages}) || {Node, Messages} <- Resps,
+ Node =/= bad_rpc
+ ],
+ [lists:concat(ResponsesWithNode) | Acc]
+ end, [], Checks)).
+
+%% @doc Part of the weatherreport_check behaviour. This means that any messages
+%% returned by this module can be handled via the existing message reporting
+%% code.
+format({checks_failed, Error}) ->
+ {"Could not run checks - received error: ~w", [Error]}.
+
+%% Private functions
+get_check_options() ->
+ Expert = case application:get_env(weatherreport, expert) of
+ {ok, true} ->
+ true;
+ _ ->
+ false
+ end,
+ [{expert, Expert}].
diff --git a/src/weatherreport/src/weatherreport_util.erl b/src/weatherreport/src/weatherreport_util.erl
index f61741ea2..37a437aee 100644
--- a/src/weatherreport/src/weatherreport_util.erl
+++ b/src/weatherreport/src/weatherreport_util.erl
@@ -30,7 +30,7 @@
-module(weatherreport_util).
-export([short_name/1,
run_command/1,
- log/2,log/3,
+ log/3,log/4,
binary_to_float/1,
should_log/1]).
@@ -46,14 +46,24 @@ short_name(Mod) when is_atom(Mod) ->
%% redirected to stdout so its output will be included.
-spec run_command(Command::iodata()) -> StdOut::iodata().
run_command(Command) ->
- weatherreport_util:log(debug, "Running shell command: ~s", [Command]),
+ weatherreport_util:log(
+ node(),
+ debug,
+ "Running shell command: ~s",
+ [Command]
+ ),
Port = erlang:open_port({spawn,Command},[exit_status, stderr_to_stdout]),
do_read(Port, []).
do_read(Port, Acc) ->
receive
{Port, {data, StdOut}} ->
- weatherreport_util:log(debug, "Shell command output: ~n~s~n",[StdOut]),
+ weatherreport_util:log(
+ node(),
+ debug,
+ "Shell command output: ~n~s~n",
+ [StdOut]
+ ),
do_read(Port, Acc ++ StdOut);
{Port, {exit_status, _}} ->
%%port_close(Port),
@@ -69,14 +79,13 @@ do_read(Port, Acc) ->
binary_to_float(Bin) ->
list_to_float(binary_to_list(Bin)).
-get_prefix(Level) ->
- {_, NodeName} = weatherreport_config:node_name(),
- io_lib:format("[~s] [~w]", [NodeName, Level]).
+get_prefix(Node, Level) ->
+ io_lib:format("[~w] [~w]", [Node, Level]).
-log(Level, Format, Terms) ->
+log(Node, Level, Format, Terms) ->
case should_log(Level) of
true ->
- Prefix = get_prefix(Level),
+ Prefix = get_prefix(Node, Level),
Message = io_lib:format(Format, Terms),
io:format("~s ~s~n", [Prefix, Message]);
false ->
@@ -84,10 +93,10 @@ log(Level, Format, Terms) ->
end,
twig:log(Level, Format, Terms).
-log(Level, String) ->
+log(Node, Level, String) ->
case should_log(Level) of
true ->
- Prefix = get_prefix(Level),
+ Prefix = get_prefix(Node, Level),
io:format("~s ~s~n", [Prefix, String]);
false ->
ok