diff options
author | Jan Lehnardt <jan@apache.org> | 2019-09-19 12:01:53 +0200 |
---|---|---|
committer | Jan Lehnardt <jan@apache.org> | 2019-09-20 10:49:16 +0200 |
commit | aa311b8cd7ac6e2c8cfe8757d2e39e020b153997 (patch) | |
tree | ccac7839113f089465fbcd4e14a2db844b68aee1 | |
parent | 9da6c7ae8658273d32da1a805d3549670f118d51 (diff) | |
download | couchdb-aa311b8cd7ac6e2c8cfe8757d2e39e020b153997.tar.gz |
feat: do not run stats aggregations on an interval
Similar to 448be7996999a706464d8f7429a56dc9e9c87c3a (hello 0.10.1),
`timer:{send,apply}_interval()` will apply functions / send messages
for all intervals that match the time that a machine was in sleep /
hibernation mode that is common on desktop systems.
In a typical office scneario, a laptop system that sleeps over a
weekend , when woken up on a monday, issue thousands of function
calls, that together with other, unrelated wake-up activity, make
a machine top out its CPU for no good reason.
The change addresses this by instead of relying on an interval to
start a given task, on startup, start the task once after a timeout,
and then start a fresh timer after the task is done.
Other than the 0.10-era patch, this one does not account for a system
waking up before the timeout. I’m happy to add that behaviour, if a
reviewer insists on it.
As a result, no matter how long the sleep period is, we only run
the desired function _once_ after we wake up again. In the never-
sleep scenario, the existing behaviour is retained.
This might impact metrics that have a time component, but I think
that’s a fair compromise, so I didn’t investigate that further.
-rw-r--r-- | src/couch_stats/src/couch_stats_aggregator.erl | 27 |
1 files changed, 20 insertions, 7 deletions
diff --git a/src/couch_stats/src/couch_stats_aggregator.erl b/src/couch_stats/src/couch_stats_aggregator.erl index 17bd6fc33..8aef3d02d 100644 --- a/src/couch_stats/src/couch_stats_aggregator.erl +++ b/src/couch_stats/src/couch_stats_aggregator.erl @@ -55,18 +55,19 @@ start_link() -> init([]) -> {ok, Descs} = reload_metrics(), - Interval = config:get_integer("stats", "interval", ?DEFAULT_INTERVAL), - {ok, CT} = timer:send_interval(Interval * 1000, self(), collect), - {ok, RT} = timer:send_interval(?RELOAD_INTERVAL * 1000, self(), reload), + {ok, CT} = timer:send_after(get_interval(collect), self(), collect), + {ok, RT} = timer:send_after(get_interval(reload), self(), reload), {ok, #st{descriptions=Descs, stats=[], collect_timer=CT, reload_timer=RT}}. handle_call(fetch, _from, #st{stats = Stats}=State) -> {reply, {ok, Stats}, State}; handle_call(flush, _From, State) -> {reply, ok, collect(State)}; -handle_call(reload, _from, State) -> +handle_call(reload, _from, #st{reload_timer=OldRT} = State) -> + timer:cancel(OldRT), {ok, Descriptions} = reload_metrics(), - {reply, ok, State#st{descriptions=Descriptions}}; + {ok, RT} = update_timer(reload), + {reply, ok, State#st{descriptions=Descriptions, reload_timer=RT}}; handle_call(Msg, _From, State) -> {stop, {unknown_call, Msg}, error, State}. @@ -140,11 +141,23 @@ load_metrics_for_application(AppName) -> end end. -collect(State) -> +collect(#st{collect_timer=OldCT} = State) -> + timer:cancel(OldCT), Stats = lists:map( fun({Name, Props}) -> {Name, [{value, couch_stats:sample(Name)}|Props]} end, State#st.descriptions ), - State#st{stats=Stats}. + {ok, CT} = update_timer(collect), + State#st{stats=Stats, collect_timer=CT}. + +update_timer(collect) -> + Interval = get_interval(collect), + timer:send_after(Interval, self(), collect); +update_timer(reload) -> + Interval = get_interval(reload), + timer:send_after(Interval, self(), reload). + +get_interval(reload) -> 1000 * ?RELOAD_INTERVAL; +get_interval(collect) -> 1000 * config:get_integer("stats", "interval", ?DEFAULT_INTERVAL). |