summaryrefslogtreecommitdiff
path: root/rel/overlay
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2021-02-15 22:35:32 -0500
committerNick Vatamaniuc <nickva@users.noreply.github.com>2021-03-11 13:13:03 -0500
commitab38c2dd3f429ce8f347fa4f04e2815b810a8a19 (patch)
tree11d35dd32da4713ba178f04dd10bc01d77136b33 /rel/overlay
parent04e1d64f5f7f951a0dcda399354fbd8cfa934181 (diff)
downloadcouchdb-ab38c2dd3f429ce8f347fa4f04e2815b810a8a19.tar.gz
Fair Share Replication Scheduler Implementation
Fair share replication scheduler allows configuring job priorities per-replicator db. Previously jobs from all the replication dbs would be added to the scheduler and run in a round-robin order. This update makes it possible to specify the relative priority of jobs from different databases. For example, there could be low, high and default priority _replicator dbs. The original algorithm comes from the [A Fair Share Scheduler](https://proteusmaster.urcf.drexel.edu/urcfwiki/images/KayLauderFairShare.pdf "Fair Share Scheduler") paper by Judy Kay and Piers Lauder. A summary of how the algorithm works is included in the top level comment in the couch_replicator_share module. There is minimal modification to the main scheduler logic. Besides the share accounting logic each cycle, the other changes are: * Running and stopping candidates are now picked based on the priority first, and then on their last_started timestamp. * When jobs finish executing mid-cycle, their charges are accounted for. That holds for jobs which terminate normally, are removed by the user, or crash. Other interesting aspects are the interaction with the error back-off mechanism and how one-shot replications are treated: * The exponential error back-off mechanism is unaltered and takes precedence over the priority values. That means unhealthy jobs are rejected and "penalized" before the priority value is even looked at. * One-shot replications, once started, are not stopped during each scheduling cycle unless the operator manually adjusts the `max_jobs` parameter. That behavior is necessary to preserve the "snapshot" semantics and is retained in this update.
Diffstat (limited to 'rel/overlay')
-rw-r--r--rel/overlay/etc/default.ini26
1 files changed, 26 insertions, 0 deletions
diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index 973b1a1fe..6c992d7bd 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -482,6 +482,32 @@ ssl_certificate_max_depth = 3
; or 403 response this setting is not needed.
;session_refresh_interval_sec = 550
+; Usage coefficient decays historic fair share usage every scheduling
+; cycle. The value must be between 0.0 and 1.0. Lower values will
+; ensure historic usage decays quicker and higher values means it will
+; be remembered longer.
+;usage_coeff = 0.5
+
+; Priority coefficient decays all the job priorities such that they slowly
+; drift towards the front of the run queue. This coefficient defines a maximum
+; time window over which this algorithm would operate. For example, if this
+; value is too small (0.1), after a few cycles quite a few jobs would end up at
+; priority 0, and would render this algorithm useless. The default value of
+; 0.98 is picked such that if a job ran for one scheduler cycle, then didn't
+; get to run for 7 hours, it would still have priority > 0. 7 hours was picked
+; as it was close enought to 8 hours which is the default maximum error backoff
+; interval.
+;priority_coeff = 0.98
+
+
+[replicator.shares]
+; Fair share configuration section. More shares result in a higher
+; chance that jobs from that db get to run. The default value is 100,
+; minimum is 1 and maximum is 1000. The configuration may be set even
+; if the database does not exit.
+;_replicator = 100
+
+
[log]
; Possible log levels:
; debug