diff options
author | Nick Vatamaniuc <vatamane@apache.org> | 2020-06-09 17:53:56 -0400 |
---|---|---|
committer | Nick Vatamaniuc <vatamane@apache.org> | 2020-06-12 16:33:18 -0400 |
commit | 01de0ee96c97993147fc084e52cba4201cda7d7e (patch) | |
tree | fb54bccdcd46d3f3235b3e4bb4ddab4828a5df45 | |
parent | 70512e57649ac69312beffdc8243a2bf263f7611 (diff) | |
download | couchdb-backport-replicator-pending-jobs-fix-to-3.x.tar.gz |
In replicator, when rescheduling, pick only pending jobs which are not runningbackport-replicator-pending-jobs-fix-to-3.x
Previously, when pending jobs were picked in the `ets:foldl` traversal, both
running and non-running jobs were considered and a large number of running jobs
could displace pending jobs in the accumulator. In the worst case, no crashed
jobs would be restarted during rescheduling.
-rw-r--r-- | src/couch_replicator/src/couch_replicator_scheduler.erl | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl b/src/couch_replicator/src/couch_replicator_scheduler.erl index 53c040e8c..641443a7c 100644 --- a/src/couch_replicator/src/couch_replicator_scheduler.erl +++ b/src/couch_replicator/src/couch_replicator_scheduler.erl @@ -456,6 +456,9 @@ pending_jobs(Count) when is_integer(Count), Count > 0 -> [Job || {_Started, Job} <- gb_sets:to_list(Set1)]. +pending_fold(#job{pid = Pid}, Acc) when is_pid(Pid) -> + Acc; + pending_fold(Job, {Set, Now, Count, HealthThreshold}) -> Set1 = case {not_recently_crashed(Job, Now, HealthThreshold), gb_sets:size(Set) >= Count} of @@ -1051,6 +1054,7 @@ scheduler_test_() -> [ t_pending_jobs_simple(), t_pending_jobs_skip_crashed(), + t_pending_jobs_skip_running(), t_one_job_starts(), t_no_jobs_start_if_max_is_0(), t_one_job_starts_if_max_is_1(), @@ -1112,6 +1116,18 @@ t_pending_jobs_skip_crashed() -> end). +t_pending_jobs_skip_running() -> + ?_test(begin + Job1 = continuous(1), + Job2 = continuous_running(2), + Job3 = oneshot(3), + Job4 = oneshot_running(4), + Jobs = [Job1, Job2, Job3, Job4], + setup_jobs(Jobs), + ?assertEqual([Job1, Job3], pending_jobs(4)) + end). + + t_one_job_starts() -> ?_test(begin setup_jobs([oneshot(1)]), |