diff options
author | Nick Vatamaniuc <vatamane@apache.org> | 2020-06-09 17:53:56 -0400 |
---|---|---|
committer | Nick Vatamaniuc <nickva@users.noreply.github.com> | 2020-06-10 01:05:41 -0400 |
commit | a7803fb2023b72b684f8d2f1198363b9d6723400 (patch) | |
tree | ae274a76de4bc3c9327244776d7f0be6bb57f80e | |
parent | 887d740ede15a7e2aafee899e3adff8024a7f9ef (diff) | |
download | couchdb-a7803fb2023b72b684f8d2f1198363b9d6723400.tar.gz |
In replicator, when rescheduling, pick only pending jobs which are not running
Previously, when pending jobs were picked in the `ets:foldl` traversal, both
running and non-running jobs were considered and a large number of running jobs
could displace pending jobs in the accumulator. In the worst case, no crashed
jobs would be restarted during rescheduling.
-rw-r--r-- | src/couch_replicator/src/couch_replicator_scheduler.erl | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl b/src/couch_replicator/src/couch_replicator_scheduler.erl index 53c040e8c..641443a7c 100644 --- a/src/couch_replicator/src/couch_replicator_scheduler.erl +++ b/src/couch_replicator/src/couch_replicator_scheduler.erl @@ -456,6 +456,9 @@ pending_jobs(Count) when is_integer(Count), Count > 0 -> [Job || {_Started, Job} <- gb_sets:to_list(Set1)]. +pending_fold(#job{pid = Pid}, Acc) when is_pid(Pid) -> + Acc; + pending_fold(Job, {Set, Now, Count, HealthThreshold}) -> Set1 = case {not_recently_crashed(Job, Now, HealthThreshold), gb_sets:size(Set) >= Count} of @@ -1051,6 +1054,7 @@ scheduler_test_() -> [ t_pending_jobs_simple(), t_pending_jobs_skip_crashed(), + t_pending_jobs_skip_running(), t_one_job_starts(), t_no_jobs_start_if_max_is_0(), t_one_job_starts_if_max_is_1(), @@ -1112,6 +1116,18 @@ t_pending_jobs_skip_crashed() -> end). +t_pending_jobs_skip_running() -> + ?_test(begin + Job1 = continuous(1), + Job2 = continuous_running(2), + Job3 = oneshot(3), + Job4 = oneshot_running(4), + Jobs = [Job1, Job2, Job3, Job4], + setup_jobs(Jobs), + ?assertEqual([Job1, Job3], pending_jobs(4)) + end). + + t_one_job_starts() -> ?_test(begin setup_jobs([oneshot(1)]), |