summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2020-06-09 17:53:56 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2020-06-10 13:54:31 -0400
commit7a96434bc014bd1aa37dce60e9d28f26b9262ff5 (patch)
tree8056d93cdf36f3ac86d4490c3a0dd9eebd149fb4
parent895c3748aa5e65473574d68cfd821f29312f42f2 (diff)
downloadcouchdb-3.0.x.tar.gz
In replicator, when rescheduling, pick only pending jobs which are not runningarchive/3.0.x3.0.x
Previously, when pending jobs were picked in the `ets:foldl` traversal, both running and non-running jobs were considered and a large number of running jobs could displace pending jobs in the accumulator. In the worst case, no crashed jobs would be restarted during rescheduling.
-rw-r--r--src/couch_replicator/src/couch_replicator_scheduler.erl16
1 files changed, 16 insertions, 0 deletions
diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl b/src/couch_replicator/src/couch_replicator_scheduler.erl
index 53c040e8c..641443a7c 100644
--- a/src/couch_replicator/src/couch_replicator_scheduler.erl
+++ b/src/couch_replicator/src/couch_replicator_scheduler.erl
@@ -456,6 +456,9 @@ pending_jobs(Count) when is_integer(Count), Count > 0 ->
[Job || {_Started, Job} <- gb_sets:to_list(Set1)].
+pending_fold(#job{pid = Pid}, Acc) when is_pid(Pid) ->
+ Acc;
+
pending_fold(Job, {Set, Now, Count, HealthThreshold}) ->
Set1 = case {not_recently_crashed(Job, Now, HealthThreshold),
gb_sets:size(Set) >= Count} of
@@ -1051,6 +1054,7 @@ scheduler_test_() ->
[
t_pending_jobs_simple(),
t_pending_jobs_skip_crashed(),
+ t_pending_jobs_skip_running(),
t_one_job_starts(),
t_no_jobs_start_if_max_is_0(),
t_one_job_starts_if_max_is_1(),
@@ -1112,6 +1116,18 @@ t_pending_jobs_skip_crashed() ->
end).
+t_pending_jobs_skip_running() ->
+ ?_test(begin
+ Job1 = continuous(1),
+ Job2 = continuous_running(2),
+ Job3 = oneshot(3),
+ Job4 = oneshot_running(4),
+ Jobs = [Job1, Job2, Job3, Job4],
+ setup_jobs(Jobs),
+ ?assertEqual([Job1, Job3], pending_jobs(4))
+ end).
+
+
t_one_job_starts() ->
?_test(begin
setup_jobs([oneshot(1)]),