diff options
author | Micael Karlberg <bmk@erlang.org> | 2023-03-09 07:31:41 +0100 |
---|---|---|
committer | Micael Karlberg <bmk@erlang.org> | 2023-03-09 07:31:41 +0100 |
commit | 2454b14e82ce967a1737356bd7126ad56b1623dd (patch) | |
tree | 4a28094fdd2d0a9e2567fece8a776aee2978d87d | |
parent | 3e47020fe9440438901cdccf660fb9571e6cf0c6 (diff) | |
parent | cf296c924576b8f569bd51baf2889868eaf13289 (diff) | |
download | erlang-2454b14e82ce967a1737356bd7126ad56b1623dd.tar.gz |
Merge branch 'bmk/snmp/agent/20230127/minimize_error_reporting_during_failed_init/ERIERL-873/OTP-18422' into maint
-rw-r--r-- | lib/snmp/doc/src/snmp_app.xml | 38 | ||||
-rw-r--r-- | lib/snmp/doc/src/snmp_config.xml | 38 | ||||
-rw-r--r-- | lib/snmp/src/agent/snmpa_agent.erl | 73 | ||||
-rw-r--r-- | lib/snmp/src/agent/snmpa_agent_sup.erl | 12 | ||||
-rw-r--r-- | lib/snmp/src/agent/snmpa_net_if.erl | 38 | ||||
-rw-r--r-- | lib/snmp/src/agent/snmpa_supervisor.erl | 43 | ||||
-rw-r--r-- | lib/snmp/src/app/snmp_app.erl | 11 | ||||
-rw-r--r-- | lib/snmp/src/app/snmp_app_sup.erl | 16 |
8 files changed, 202 insertions, 67 deletions
diff --git a/lib/snmp/doc/src/snmp_app.xml b/lib/snmp/doc/src/snmp_app.xml index 0613de3158..1400e7bf32 100644 --- a/lib/snmp/doc/src/snmp_app.xml +++ b/lib/snmp/doc/src/snmp_app.xml @@ -4,7 +4,7 @@ <appref> <header> <copyright> - <year>1997</year><year>2021</year> + <year>1997</year><year>2023</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -295,14 +295,15 @@ in the snmp_config file! <c><![CDATA[agent_net_if_options() = [agent_net_if_option()] <optional>]]></c></tag> <item> <p><c>agent_net_if_option() = - {bind_to, bind_to()} | - {sndbuf, sndbuf()} | - {recbuf, recbuf()} | - {no_reuse, no_reuse()} | - {req_limit, req_limit()} | - {filter, agent_net_if_filter_options()} | - {extra_sock_opts, extra_socket_options()} | - {inet_backend, inet | socket}</c></p> + {bind_to, bind_to()} | + {sndbuf, sndbuf()} | + {recbuf, recbuf()} | + {no_reuse, no_reuse()} | + {req_limit, req_limit()} | + {filter, agent_net_if_filter_options()} | + {open_err_filters, agent_net_if_open_err_filters()} | + {extra_sock_opts, extra_socket_options()} | + {inet_backend, inet | socket}</c></p> <p>These options are actually specific to the used module. The ones shown here are applicable to the default <c>agent_net_if_module()</c>.</p> @@ -343,6 +344,25 @@ in the snmp_config file! <p>Default is <c>snmpa_net_if_filter</c>.</p> </item> + <tag><marker id="agent_ni_open_err_filters"></marker> + <c><![CDATA[agent_net_if_open_err_filters() = [agent_net_if_open_err_filter()] <optional>]]></c></tag> + <item> + <p><c>agent_net_if_open_err_filter() = atom()</c></p> + <p>During agent initiation, the transports UDP sockets are opened. + If this operation fails, the net-if (and the agent) fails to start + (crash). This (filter) list contains error (reasons) that will + make net-if fail "nicely". + This (filter) list, is supposed to contain errors that can be + returned by + <seemfa marker="kernel:gen_udp#open/1">gen_udp:open/1,2</seemfa>. + The effect is that any error returned by + <seemfa marker="kernel:gen_udp#open/1">gen_udp:open</seemfa> + which *are* in this list, will be considered + "non-fatal" and will only result in an info message, rather than + an error message. Net If, and the agent, will still crash, + but will produce a less obnoxious message. </p> + </item> + <tag><marker id="agent_mibs"></marker> <c><![CDATA[agent_mibs() = [string()] <optional>]]></c></tag> <item> diff --git a/lib/snmp/doc/src/snmp_config.xml b/lib/snmp/doc/src/snmp_config.xml index 0b8d786a2d..99324ed400 100644 --- a/lib/snmp/doc/src/snmp_config.xml +++ b/lib/snmp/doc/src/snmp_config.xml @@ -4,7 +4,7 @@ <chapter> <header> <copyright> - <year>1997</year><year>2021</year> + <year>1997</year><year>2023</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -281,14 +281,15 @@ <c><![CDATA[agent_net_if_options() = [agent_net_if_option()] <optional>]]></c></tag> <item> <p><c>agent_net_if_option() = - {bind_to, bind_to()} | - {sndbuf, sndbuf()} | - {recbuf, recbuf()} | - {no_reuse, no_reuse()} | - {req_limit, req_limit()} | - {filter, agent_net_if_filter_options()} | - {extra_sock_opts, extra_socket_options()} | - {inet_backend, inet | socket}</c></p> + {bind_to, bind_to()} | + {sndbuf, sndbuf()} | + {recbuf, recbuf()} | + {no_reuse, no_reuse()} | + {req_limit, req_limit()} | + {filter, agent_net_if_filter_options()} | + {extra_sock_opts, extra_socket_options()} | + {open_err_filters, agent_net_if_open_err_filters()} | + {inet_backend, inet | socket}</c></p> <p>These options are actually specific to the used module. The ones shown here are applicable to the default <c>agent_net_if_module()</c>.</p> @@ -329,6 +330,25 @@ <p>Default is <c>snmpa_net_if_filter</c>.</p> </item> + <tag><marker id="agent_ni_open_err_filters"></marker> + <c><![CDATA[agent_net_if_open_err_filters() = [agent_net_if_open_err_filter()] <optional>]]></c></tag> + <item> + <p><c>agent_net_if_open_err_filter() = atom()</c></p> + <p>During agent initiation, the transports UDP sockets are opened. + If this operation fails, the net-if (and the agent) fails to start + (crash). This (filter) list contains error (reasons) that will + make net-if fail "nicely". + This (filter) list, is supposed to contain errors that can be + returned by + <seemfa marker="kernel:gen_udp#open/1">gen_udp:open/1,2</seemfa>. + The effect is that any error returned by + <seemfa marker="kernel:gen_udp#open/1">gen_udp:open</seemfa> + which *are* in this list, will be considered + "non-fatal" and will only result in an info message, rather than + an error message. Net If, and the agent, will still crash, + but will produce a less obnoxious message. </p> + </item> + <tag><marker id="agent_mibs"></marker> <c><![CDATA[agent_mibs() = [string()] <optional>]]></c></tag> <item> diff --git a/lib/snmp/src/agent/snmpa_agent.erl b/lib/snmp/src/agent/snmpa_agent.erl index 736b2907fa..8c24440efc 100644 --- a/lib/snmp/src/agent/snmpa_agent.erl +++ b/lib/snmp/src/agent/snmpa_agent.erl @@ -340,10 +340,16 @@ init([Prio, Parent, Ref, Options]) -> "~n Options: ~p", [Prio, Parent, Ref, Options]), case (catch do_init(Prio, Parent, Ref, Options)) of {ok, State} -> - ?vdebug("started",[]), + ?vdebug("started"), {ok, State}; + {error, {net_if, info, Reason}} -> + info_msg("Failed starting agent: " + "~n Net If error: ~p", [Reason]), + %% {shutdown, Reason}; + exit(Reason); {error, Reason} -> - config_err("failed starting agent: ~n~p", [Reason]), + config_err("Failed starting agent: " + "~n ~p", [Reason]), {stop, Reason} end. @@ -410,15 +416,18 @@ start_note_store(Prio, Ref, Options) -> {ok, Pid} -> ?vdebug("start_note_store -> Pid: ~p", [Pid]), Pid; - {error, Reason} -> - ?vinfo("error starting note store: ~n~p",[Reason]), - throw({error, {note_store_error, Reason}}); - {'EXIT', Reason} -> - ?vinfo("exit starting note store: ~n~p",[Reason]), - throw({error, {note_store_exit, Reason}}); + {error, {Reason, _ChildSpec}} -> + ?vinfo("error starting note store: " + "~n ~p", [Reason]), + throw({error, {note_store, error, Reason}}); + {'EXIT', {Reason, _ChildSpec}} -> + ?vinfo("exit starting note store: " + "~n ~p", [Reason]), + throw({error, {note_store, exit, Reason}}); Error -> - ?vinfo("failed starting note store: ~n~p",[Error]), - throw({error, {note_store_failed, Error}}) + ?vinfo("failed starting note store: " + "~n ~p", [Error]), + throw({error, {note_store, failed, Error}}) end. @@ -437,18 +446,25 @@ start_net_if(none, Prio, Ref, Vsns, NoteStore, Options) -> case (catch snmpa_misc_sup:start_net_if(Prio, NoteStore, Ref, self(), Mod, NiOpts)) of - {ok, Pid} -> + {ok, Pid} -> ?vdebug("start_net_if -> Pid: ~p", [Pid]), {master_agent, Pid, Mod}; - {error, Reason} -> - ?vinfo("error starting net if: ~n~p",[Reason]), - throw({error, {net_if_error, Reason}}); + {error, {{Class, udp_open, PortNo, Reason}, _ChildSpec}} -> + ?vinfo("error starting net if: " + "~n ~p", [Reason]), + throw({error, {net_if, Class, {udp_open, PortNo, Reason}}}); + {error, {Reason, _ChildSpec}} -> + ?vinfo("error starting net if: " + "~n ~p", [Reason]), + throw({error, {net_if, error, Reason}}); {'EXIT', Reason} -> - ?vinfo("exit starting net if: ~n~p",[Reason]), - throw({error, {net_if_exit, Reason}}); + ?vinfo("exit starting net if: " + "~n ~p", [Reason]), + throw({error, {net_if, exit, Reason}}); Error -> - ?vinfo("failed starting net if: ~n~p",[Error]), - throw({error, {net_if_failed, Error}}) + ?vinfo("failed starting net if: " + "~n ~p", [Error]), + throw({error, {net_if, failed, Error}}) end; start_net_if(Parent, _Prio, _Ref, _Vsns, _NoteStore, _Options) when is_pid(Parent) -> @@ -470,15 +486,18 @@ start_mib_server(Prio, Ref, Mibs, Options) -> {ok, Pid} -> ?vdebug("start_mib_server -> Pid: ~p", [Pid]), Pid; - {error, Reason} -> - ?vinfo("error starting mib server: ~n~p",[Reason]), - throw({error, {mib_server_error, Reason}}); + {error, {Reason, _ChildSpec}} -> + ?vinfo("error starting mib server: " + "~n ~p", [Reason]), + throw({error, {mib_server, error, Reason}}); {'EXIT', Reason} -> - ?vinfo("exit starting mib server: ~n~p",[Reason]), - throw({error, {mib_server_exit, Reason}}); + ?vinfo("exit starting mib server: " + "~n ~p", [Reason]), + throw({error, {mib_server, exit, Reason}}); Error -> - ?vinfo("failed starting mib server: ~n~p",[Error]), - throw({error, {mib_server_failed, Error}}) + ?vinfo("failed starting mib server: " + "~n ~p", [Error]), + throw({error, {mib_server, failed, Error}}) end. @@ -3204,8 +3223,8 @@ get_stats_counters([Counter|Counters], Acc) -> %% --------------------------------------------------------------------- -%% info_msg(F, A) -> -%% ?snmpa_info(F, A). +info_msg(F, A) -> + ?snmpa_info(F, A). warning_msg(F, A) -> ?snmpa_warning(F, A). diff --git a/lib/snmp/src/agent/snmpa_agent_sup.erl b/lib/snmp/src/agent/snmpa_agent_sup.erl index 0a5116b2d0..b1eeaba988 100644 --- a/lib/snmp/src/agent/snmpa_agent_sup.erl +++ b/lib/snmp/src/agent/snmpa_agent_sup.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1997-2016. All Rights Reserved. +%% Copyright Ericsson AB 1997-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -24,7 +24,9 @@ -behaviour(supervisor). %% External exports --export([start_link/0, start_link/1, start_subagent/3, stop_subagent/1]). +-export([start_link/0, start_link/1, + start_master_agent/1, + start_subagent/3, stop_subagent/1]). %% Internal exports -export([init/1]). @@ -40,8 +42,7 @@ %%%----------------------------------------------------------------- -%%% This is a supervisor for the mib processes. Each agent has one -%%% mib process. +%%% This is a supervisor for the agent processes (master and sub). %%%----------------------------------------------------------------- start_link() -> ?d("start_link -> entry", []), @@ -52,6 +53,9 @@ start_link(AgentSpec) -> "~n AgentSpec: ~p", [AgentSpec]), supervisor:start_link({local, ?SERVER}, ?MODULE, [[AgentSpec]]). +start_master_agent(MasterAgentSpec) -> + supervisor:start_child(snmpa_agent_sup, MasterAgentSpec). + start_subagent(ParentAgent, Subtree, Mibs) -> ?d("start_subagent -> entry with" "~n ParentAgent: ~p" diff --git a/lib/snmp/src/agent/snmpa_net_if.erl b/lib/snmp/src/agent/snmpa_net_if.erl index d37aa6c28a..72e3ac3e41 100644 --- a/lib/snmp/src/agent/snmpa_net_if.erl +++ b/lib/snmp/src/agent/snmpa_net_if.erl @@ -235,11 +235,21 @@ init(Prio, NoteStore, MasterAgent, Parent, Opts) -> end, erlang:raise(C, E, S) end; - {error, Reason} -> - config_err("failed starting net-if: ~n~p", [Reason]), - proc_lib:init_ack({error, Reason}); + {error, {udp_open, {open, PortNo, Reason}}} -> + OEFilters = get_open_err_filters(Opts), + Class = + case lists:member(Reason, OEFilters) of + false -> + error; + true -> + info + end, + proc_lib:init_ack({error, {Class, udp_open, PortNo, Reason}}); + {error, Reason} -> + %% config_err("failed starting net-if: ~n~p", [Reason]), + proc_lib:init_ack({error, Reason}); Error -> - config_err("failed starting net-if: ~n~p", [Error]), + %% config_err("failed starting net-if: ~n~p", [Error]), proc_lib:init_ack({error, Error}) end. @@ -256,7 +266,7 @@ do_init(Prio, NoteStore, MasterAgent, Parent, Opts) -> Vsns = get_vsns(Opts), ?vdebug("vsns: ~w",[Vsns]), - %% Flow control -- + %% -- Flow control -- Limit = get_req_limit(Opts), ?vdebug("Limit: ~w", [Limit]), FilterOpts = get_filter_opts(Opts), @@ -475,7 +485,7 @@ gen_udp_open(system, Opts) -> throw({udp_open, {port, PReason}}) end; {error, OReason} -> - throw({udp_open, {open, OReason}}) + throw({udp_open, {open, 0, OReason}}) end; %% This is for "future compat" since we cannot actually config '0'... gen_udp_open(IpPort, Opts) when (IpPort =:= 0) -> @@ -533,7 +543,7 @@ gen_udp_range_open(Min, Max, Opts) -> gen_udp_range_open(Min+1, Max, Opts); {error, Reason} -> ?vdebug("gen_udp_range_open(~w,~w) -> ~w", [Reason]), - throw({udp_open, {open, Reason}}) + throw({udp_open, {open, Min, Reason}}) catch C:E:S -> ?vinfo("gen_udp_range_open(~w,~w) -> failed open socket: " @@ -2101,6 +2111,16 @@ get_filter_opts(O) -> get_filter_module(O) -> snmp_misc:get_option(module, O, ?DEFAULT_FILTER_MODULE). +get_open_err_filters(O) -> + case snmp_misc:get_option(open_err_filters, O, []) of + Filters when is_list(Filters) -> + Filters; + Filter when is_atom(Filter) -> + [Filter]; + _ -> + [] + end. + get_recbuf(Opts, DefaultOpts) -> get_socket_opt(recbuf, Opts, DefaultOpts, use_default). @@ -2153,8 +2173,8 @@ info_msg(F,A) -> user_err(F, A) -> snmpa_error:user_err(F, A). -config_err(F, A) -> - snmpa_error:config_err(F, A). +%% config_err(F, A) -> +%% snmpa_error:config_err(F, A). %% ---------------------------------------------------------------- diff --git a/lib/snmp/src/agent/snmpa_supervisor.erl b/lib/snmp/src/agent/snmpa_supervisor.erl index 215851f23c..26c61579d5 100644 --- a/lib/snmp/src/agent/snmpa_supervisor.erl +++ b/lib/snmp/src/agent/snmpa_supervisor.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2022. All Rights Reserved. +%% Copyright Ericsson AB 1996-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ %% Internal exports -export([init/1, config/2]). +-compile({no_auto_import,[erase/1]}). -define(SERVER, ?MODULE). @@ -41,6 +42,10 @@ %% Process structure %% ================= %% +%% "application" +%% | +%% app-sup +%% | %% ___________________ supervisor __________________ %% / | | \ \ %% ___misc_sup___ target_cache symbolic_store local_db agent_sup @@ -176,7 +181,23 @@ start_master_sup(Opts) -> do_start_master_sup(Opts) -> verify_mandatory([db_dir], Opts), - supervisor:start_link({local, ?SERVER}, ?MODULE, [master, Opts]). + case supervisor:start_link({local, ?SERVER}, ?MODULE, [master, Opts]) of + {ok, Pid} = OK -> + %% <HACKETI-HACK-HACK> + Key = master_agent_child_spec, + MasterAgentSpec = lookup(Key), + case snmpa_agent_sup:start_master_agent(MasterAgentSpec) of + {ok, MPid} when is_pid(MPid) -> + erase(Key), + OK; + {error, {Reason, _ChildSpec}} -> + stop(Pid, 0), + {error, Reason} + end; + %% </HACKETI-HACK-HACK> + Else -> + Else + end. verify_mandatory([], _) -> ok; @@ -503,9 +524,18 @@ init([AgentType, Opts]) -> worker_spec(snmpa_agent, [Prio, snmp_master_agent, none, Ref, AgentOpts], Restart, 15000), + %% <HACKETI-HACK-HACK> + %% The point is to make start failure more quiet + %% Often the failure happens in the master agent, + %% so we move the start of that out of this function + %% and into the 'do_start_master_sup' function. + %% At some point we should rewrite this. Maybe start all + %% children the same way (explicitly). + store(master_agent_child_spec, AgentSpec), AgentSupSpec = - sup_spec(snmpa_agent_sup, [AgentSpec], + sup_spec(snmpa_agent_sup, [], Restart, infinity), + %% </HACKETI-HACK-HACK> [ConfigSpec, AgentSupSpec]; _ -> ?vdebug("[sub agent] spec for the agent supervisor",[]), @@ -521,6 +551,13 @@ init([AgentType, Opts]) -> store(Key, Value) -> ets:insert(snmp_agent_table, {Key, Value}). +lookup(Key) -> + [{Key, Value}] = ets:lookup(snmp_agent_table, Key), + Value. + +erase(Key) -> + ets:delete(snmp_agent_table, Key). + get_mibs(Mibs, Vsns) -> MibDir = filename:join(code:priv_dir(snmp), "mibs"), StdMib = diff --git a/lib/snmp/src/app/snmp_app.erl b/lib/snmp/src/app/snmp_app.erl index 486b276383..7ba3c659f0 100644 --- a/lib/snmp/src/app/snmp_app.erl +++ b/lib/snmp/src/app/snmp_app.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2021. All Rights Reserved. +%% Copyright Ericsson AB 2003-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -38,8 +38,13 @@ start(Type, []) -> %% First start the (new) central supervisor, {ok, Pid} = snmp_app_sup:start_link(), Entities = entities(), - ok = start_entities(Type, Entities), - {ok, Pid}. + case start_entities(Type, Entities) of + ok -> + {ok, Pid}; + Error -> + snmp_app_sup:stop(), + Error + end. entities() -> entities([agent, manager], []). diff --git a/lib/snmp/src/app/snmp_app_sup.erl b/lib/snmp/src/app/snmp_app_sup.erl index eb89cc5b6d..bb6faa18cb 100644 --- a/lib/snmp/src/app/snmp_app_sup.erl +++ b/lib/snmp/src/app/snmp_app_sup.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. +%% Copyright Ericsson AB 2003-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -64,7 +64,12 @@ start_agent(Type, Opts) -> "~n Type: ~p" "~n Opts: ~p", [Type, Opts]), Restart = get_restart(Opts, permanent), - start_sup_child(snmpa_supervisor, Restart, [Type, Opts]). + case start_sup_child(snmpa_supervisor, Restart, [Type, Opts]) of + {ok, Pid} = OK when is_pid(Pid) -> + OK; + {error, {Reason, _ChildSpec}} -> + {error, Reason} + end. start_manager(Type, Opts) -> @@ -72,7 +77,12 @@ start_manager(Type, Opts) -> "~n Type: ~p" "~n Opts: ~p", [Type, Opts]), Restart = get_restart(Opts, transient), - start_sup_child(snmpm_supervisor, Restart, [Type, Opts]). + case start_sup_child(snmpm_supervisor, Restart, [Type, Opts]) of + {ok, Pid} = OK when is_pid(Pid) -> + OK; + {error, {Reason, _ChildSpec}} -> + {error, Reason} + end. %%%------------------------------------------------------------------- |