summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRiccardo Brognara <brognara@us.ibm.com>2014-08-22 17:53:19 -0400
committerJay Doane <jaydoane@apache.org>2021-04-19 00:35:19 -0700
commitd09b5026ff625ff517a99b97d6112279b3151d99 (patch)
tree008bb298f67480acfa66842f8d80ddef44878717
parentb3272c7fa96cedac0b89b20ced1800789a68b08c (diff)
downloadcouchdb-d09b5026ff625ff517a99b97d6112279b3151d99.tar.gz
Check mean node statistics over one second
Check the absolute statistics obtained by recon:node_stats/4 over a one second period. The values are sampled ten times and the mean is returned. For run_queue and process_count the mean is compared to hard-coded thresholds which determine whether a warning or info message is returned. For all other statistics an info message is always returned. BugzID: 32877
-rw-r--r--src/weatherreport/src/weatherreport_check_node_stats.erl66
1 files changed, 66 insertions, 0 deletions
diff --git a/src/weatherreport/src/weatherreport_check_node_stats.erl b/src/weatherreport/src/weatherreport_check_node_stats.erl
new file mode 100644
index 000000000..27b77cefd
--- /dev/null
+++ b/src/weatherreport/src/weatherreport_check_node_stats.erl
@@ -0,0 +1,66 @@
+%% -------------------------------------------------------------------
+%%
+%% weatherreport - automated diagnostic tools for CouchDB
+%%
+%% Copyright (c) 2014 Cloudant
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+%% @doc Diagnostic that checks various erlang VM statistics that are
+%% useful for diagnostics. A warning message is printed if certain stats
+%% rise above pre-determined thresholds, otherwise an info message is sent.
+-module(weatherreport_check_node_stats).
+-behaviour(weatherreport_check).
+
+-export([description/0,
+ valid/0,
+ check/1,
+ format/1]).
+
+-define(SAMPLES, 10).
+-define(T_RUN_QUEUE, 40).
+-define(T_PROCESS_COUNT, 100000).
+
+-spec description() -> string().
+description() ->
+ "Check useful erlang statistics for diagnostics".
+
+-spec valid() -> boolean().
+valid() ->
+ weatherreport_node:can_connect().
+
+-spec sum_absolute_stats({list(), list()}, list()) -> list().
+sum_absolute_stats({AbsStats, _}, AbsSum) ->
+ [{K, V + proplists:get_value(K, AbsSum, 0)} || {K, V} <- AbsStats].
+
+-spec mean_to_message({atom(), integer()}) -> {atom(), {atom(), integer()}}.
+mean_to_message({run_queue, Mean}) when Mean > ?T_RUN_QUEUE ->
+ {warning, {run_queue, Mean}};
+mean_to_message({process_count, Mean}) when Mean > ?T_PROCESS_COUNT ->
+ {warning, {process_count, Mean}};
+mean_to_message({Statistic, Mean}) ->
+ {info, {Statistic, Mean}}.
+
+-spec check(list()) -> [{atom(), term()}].
+check(_Opts) ->
+ SumOfStats = recon:node_stats(?SAMPLES, 100, fun sum_absolute_stats/2, []),
+ MeanStats = [{K, erlang:round(V / ?SAMPLES)} || {K, V} <- SumOfStats],
+ lists:map(fun mean_to_message/1, MeanStats).
+
+-spec format(term()) -> {io:format(), [term()]}.
+format({Statistic, Value}) ->
+ {"Mean ~w over one second is ~w", [Statistic, Value]}.