diff options
| author | Michael Klishin <klishinm@vmware.com> | 2022-09-26 22:53:50 +0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-09-26 22:53:50 +0400 |
| commit | 88e27e26b6d85b1f897f6a931f91ac2511366267 (patch) | |
| tree | 18c9ccdc0a09a6fbb665cc21658a3deee0b1a79a | |
| parent | e6764f72f663a6237ebecdb82d12358e331afa52 (diff) | |
| parent | a57d5919c7e5baa2e2aefc6a67a597f1cca5dedb (diff) | |
| download | rabbitmq-server-git-88e27e26b6d85b1f897f6a931f91ac2511366267.tar.gz | |
Merge pull request #5831 from rabbitmq/lukebakken/disk-monitor-resilience
Make rabbit_disk_monitor more resilient
| -rw-r--r-- | deps/rabbit/src/rabbit_disk_monitor.erl | 50 |
1 files changed, 25 insertions, 25 deletions
diff --git a/deps/rabbit/src/rabbit_disk_monitor.erl b/deps/rabbit/src/rabbit_disk_monitor.erl index eb9eb65e4e..b4991e82c3 100644 --- a/deps/rabbit/src/rabbit_disk_monitor.erl +++ b/deps/rabbit/src/rabbit_disk_monitor.erl @@ -118,9 +118,6 @@ start_link(Args) -> gen_server:start_link({local, ?SERVER}, ?MODULE, [Args], []). init([Limit]) -> - process_flag(trap_exit, true), - process_flag(priority, low), - Dir = dir(), {ok, Retries} = application:get_env(rabbit, disk_monitor_failure_retries), {ok, Interval} = application:get_env(rabbit, disk_monitor_failure_retry_interval), @@ -151,7 +148,7 @@ init([Limit]) -> handle_call({set_disk_free_limit, _}, _From, #state{enabled = false} = State) -> rabbit_log:info("Cannot set disk free limit: " - "disabled disk free space monitoring", []), + "disabled disk free space monitoring", []), {reply, ok, State}; handle_call({set_disk_free_limit, Limit}, _From, State) -> @@ -188,9 +185,6 @@ handle_info(try_enable, #state{retries = Retries} = State) -> handle_info(update, State) -> {noreply, start_timer(internal_update(State))}; -handle_info({'EXIT', Port, Reason}, #state{port=Port}=State) -> - {stop, {port_died, Reason}, State#state{port=not_used}}; - handle_info(Info, State) -> rabbit_log:debug("~tp unhandled msg: ~tp", [?MODULE, Info]), {noreply, State}. @@ -421,24 +415,30 @@ interval(#state{limit = Limit, enable(#state{retries = 0} = State) -> rabbit_log:error("Free disk space monitor failed to start!"), State; -enable(#state{dir = Dir, - interval = Interval, - limit = Limit, - retries = Retries, - os = OS, - port = Port} = State) -> - case {catch get_disk_free(Dir, OS, Port), - vm_memory_monitor:get_total_memory()} of - {N1, N2} when is_integer(N1), is_integer(N2) -> - rabbit_log:info("Enabling free disk space monitoring", []), - start_timer(set_disk_limits(State, Limit)); - Err -> - rabbit_log:error("Free disk space monitor encountered an error " - "(e.g. failed to parse output from OS tools): ~tp, retries left: ~b", - [Err, Retries]), - erlang:send_after(Interval, self(), try_enable), - State#state{enabled = false} - end. +enable(#state{dir = Dir, os = OS, port = Port} = State) -> + enable_handle_disk_free(catch get_disk_free(Dir, OS, Port), State). + +enable_handle_disk_free(DiskFree, State) when is_integer(DiskFree) -> + enable_handle_total_memory(catch vm_memory_monitor:get_total_memory(), DiskFree, State); +enable_handle_disk_free(Error, #state{interval = Interval, retries = Retries} = State) -> + rabbit_log:warning("Free disk space monitor encountered an error " + "(e.g. failed to parse output from OS tools). " + "Retries left: ~b Error:~n~tp", + [Retries, Error]), + erlang:send_after(Interval, self(), try_enable), + State#state{enabled = false}. + +enable_handle_total_memory(TotalMemory, DiskFree, #state{limit = Limit} = State) when is_integer(TotalMemory) -> + rabbit_log:info("Enabling free disk space monitoring " + "(disk free space: ~b, total memory: ~b)", [DiskFree, TotalMemory]), + start_timer(set_disk_limits(State, Limit)); +enable_handle_total_memory(Error, _DiskFree, #state{interval = Interval, retries = Retries} = State) -> + rabbit_log:warning("Free disk space monitor encountered an error " + "retrieving total memory. " + "Retries left: ~b Error:~n~tp", + [Retries, Error]), + erlang:send_after(Interval, self(), try_enable), + State#state{enabled = false}. run_os_cmd(Cmd) -> Pid = self(), |
