summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Lehnardt <jan@apache.org>2019-09-19 12:01:53 +0200
committerJan Lehnardt <jan@apache.org>2019-09-20 10:49:16 +0200
commitaa311b8cd7ac6e2c8cfe8757d2e39e020b153997 (patch)
treeccac7839113f089465fbcd4e14a2db844b68aee1
parent9da6c7ae8658273d32da1a805d3549670f118d51 (diff)
downloadcouchdb-aa311b8cd7ac6e2c8cfe8757d2e39e020b153997.tar.gz
feat: do not run stats aggregations on an interval
Similar to 448be7996999a706464d8f7429a56dc9e9c87c3a (hello 0.10.1), `timer:{send,apply}_interval()` will apply functions / send messages for all intervals that match the time that a machine was in sleep / hibernation mode that is common on desktop systems. In a typical office scneario, a laptop system that sleeps over a weekend , when woken up on a monday, issue thousands of function calls, that together with other, unrelated wake-up activity, make a machine top out its CPU for no good reason. The change addresses this by instead of relying on an interval to start a given task, on startup, start the task once after a timeout, and then start a fresh timer after the task is done. Other than the 0.10-era patch, this one does not account for a system waking up before the timeout. I’m happy to add that behaviour, if a reviewer insists on it. As a result, no matter how long the sleep period is, we only run the desired function _once_ after we wake up again. In the never- sleep scenario, the existing behaviour is retained. This might impact metrics that have a time component, but I think that’s a fair compromise, so I didn’t investigate that further.
-rw-r--r--src/couch_stats/src/couch_stats_aggregator.erl27
1 files changed, 20 insertions, 7 deletions
diff --git a/src/couch_stats/src/couch_stats_aggregator.erl b/src/couch_stats/src/couch_stats_aggregator.erl
index 17bd6fc33..8aef3d02d 100644
--- a/src/couch_stats/src/couch_stats_aggregator.erl
+++ b/src/couch_stats/src/couch_stats_aggregator.erl
@@ -55,18 +55,19 @@ start_link() ->
init([]) ->
{ok, Descs} = reload_metrics(),
- Interval = config:get_integer("stats", "interval", ?DEFAULT_INTERVAL),
- {ok, CT} = timer:send_interval(Interval * 1000, self(), collect),
- {ok, RT} = timer:send_interval(?RELOAD_INTERVAL * 1000, self(), reload),
+ {ok, CT} = timer:send_after(get_interval(collect), self(), collect),
+ {ok, RT} = timer:send_after(get_interval(reload), self(), reload),
{ok, #st{descriptions=Descs, stats=[], collect_timer=CT, reload_timer=RT}}.
handle_call(fetch, _from, #st{stats = Stats}=State) ->
{reply, {ok, Stats}, State};
handle_call(flush, _From, State) ->
{reply, ok, collect(State)};
-handle_call(reload, _from, State) ->
+handle_call(reload, _from, #st{reload_timer=OldRT} = State) ->
+ timer:cancel(OldRT),
{ok, Descriptions} = reload_metrics(),
- {reply, ok, State#st{descriptions=Descriptions}};
+ {ok, RT} = update_timer(reload),
+ {reply, ok, State#st{descriptions=Descriptions, reload_timer=RT}};
handle_call(Msg, _From, State) ->
{stop, {unknown_call, Msg}, error, State}.
@@ -140,11 +141,23 @@ load_metrics_for_application(AppName) ->
end
end.
-collect(State) ->
+collect(#st{collect_timer=OldCT} = State) ->
+ timer:cancel(OldCT),
Stats = lists:map(
fun({Name, Props}) ->
{Name, [{value, couch_stats:sample(Name)}|Props]}
end,
State#st.descriptions
),
- State#st{stats=Stats}.
+ {ok, CT} = update_timer(collect),
+ State#st{stats=Stats, collect_timer=CT}.
+
+update_timer(collect) ->
+ Interval = get_interval(collect),
+ timer:send_after(Interval, self(), collect);
+update_timer(reload) ->
+ Interval = get_interval(reload),
+ timer:send_after(Interval, self(), reload).
+
+get_interval(reload) -> 1000 * ?RELOAD_INTERVAL;
+get_interval(collect) -> 1000 * config:get_integer("stats", "interval", ?DEFAULT_INTERVAL).