diff options
author | Nick Vatamaniuc <vatamane@apache.org> | 2020-06-09 17:53:56 -0400 |
---|---|---|
committer | Nick Vatamaniuc <vatamane@apache.org> | 2020-06-10 12:02:13 -0400 |
commit | 5618bd516417b54836f7c6b5f73e23f3dc292c1e (patch) | |
tree | 8056d93cdf36f3ac86d4490c3a0dd9eebd149fb4 | |
parent | 895c3748aa5e65473574d68cfd821f29312f42f2 (diff) | |
download | couchdb-backport-replicator-pending-jobs-fix-to-3.0.x.tar.gz |
In replicator, when rescheduling, pick only pending jobs which are not runningbackport-replicator-pending-jobs-fix-to-3.0.x
Previously, when pending jobs were picked in the `ets:foldl` traversal, both
running and non-running jobs were considered and a large number of running jobs
could displace pending jobs in the accumulator. In the worst case, no crashed
jobs would be restarted during rescheduling.
-rw-r--r-- | src/couch_replicator/src/couch_replicator_scheduler.erl | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl b/src/couch_replicator/src/couch_replicator_scheduler.erl index 53c040e8c..641443a7c 100644 --- a/src/couch_replicator/src/couch_replicator_scheduler.erl +++ b/src/couch_replicator/src/couch_replicator_scheduler.erl @@ -456,6 +456,9 @@ pending_jobs(Count) when is_integer(Count), Count > 0 -> [Job || {_Started, Job} <- gb_sets:to_list(Set1)]. +pending_fold(#job{pid = Pid}, Acc) when is_pid(Pid) -> + Acc; + pending_fold(Job, {Set, Now, Count, HealthThreshold}) -> Set1 = case {not_recently_crashed(Job, Now, HealthThreshold), gb_sets:size(Set) >= Count} of @@ -1051,6 +1054,7 @@ scheduler_test_() -> [ t_pending_jobs_simple(), t_pending_jobs_skip_crashed(), + t_pending_jobs_skip_running(), t_one_job_starts(), t_no_jobs_start_if_max_is_0(), t_one_job_starts_if_max_is_1(), @@ -1112,6 +1116,18 @@ t_pending_jobs_skip_crashed() -> end). +t_pending_jobs_skip_running() -> + ?_test(begin + Job1 = continuous(1), + Job2 = continuous_running(2), + Job3 = oneshot(3), + Job4 = oneshot_running(4), + Jobs = [Job1, Job2, Job3, Job4], + setup_jobs(Jobs), + ?assertEqual([Job1, Job3], pending_jobs(4)) + end). + + t_one_job_starts() -> ?_test(begin setup_jobs([oneshot(1)]), |