Sentinel: example sentinel.conf updated.

author: antirez <antirez@gmail.com> 2013-11-21 17:07:00 +0100
committer: antirez <antirez@gmail.com> 2013-11-21 17:07:10 +0100
commit: f75fccafe7857c0e3d9b43f4874dab77ea8453e5 (patch)
tree: 8c4ea582e450e90c4e55abae1758d0ec8e3b4b83 /sentinel.conf
parent: 312ca4dacc923c17314134fd504d4098b4e853b7 (diff)
download: redis-f75fccafe7857c0e3d9b43f4874dab77ea8453e5.tar.gz
1 files changed, 27 insertions, 31 deletions
diff --git a/sentinel.conf b/sentinel.conf
index ac687b535..248e76c06 100644
--- a/sentinel.conf
+++ b/sentinel.conf
@@ -9,6 +9,10 @@ port 26379
 # Tells Sentinel to monitor this slave, and to consider it in O_DOWN
 # (Objectively Down) state only if at least <quorum> sentinels agree.
 #
+# Note that whatever is the ODOWN quorum, a Sentinel will require to
+# be elected by the majority of the known Sentinels in order to
+# start a failover, so no failover can be performed in minority.
+#
 # Note: master name should not include special characters or spaces.
 # The valid charset is A-z 0-9 and the three characters ".-_".
 sentinel monitor mymaster 127.0.0.1 6379 2
@@ -42,11 +46,6 @@ sentinel monitor mymaster 127.0.0.1 6379 2
 # Default is 30 seconds.
 sentinel down-after-milliseconds mymaster 30000
 
-# sentinel can-failover <master-name> <yes|no>
-#
-# Specify if this Sentinel can start the failover for this master.
-sentinel can-failover mymaster yes
-
 # sentinel parallel-syncs <master-name> <numslaves>
 #
 # How many slaves we can reconfigure to point to the new slave simultaneously
@@ -57,19 +56,28 @@ sentinel parallel-syncs mymaster 1
 
 # sentinel failover-timeout <master-name> <milliseconds>
 #
-# Specifies the failover timeout in milliseconds. When this time has elapsed
-# without any progress in the failover process, it is considered concluded by
-# the sentinel even if not all the attached slaves were correctly configured
-# to replicate with the new master (however a "best effort" SLAVEOF command
-# is sent to all the slaves before).
+# Specifies the failover timeout in milliseconds. It is used in many ways:
+#
+# - The time needed to re-start a failover after a previous failover was
+#   already tried against the same master by a given Sentinel, is two
+#   times the failover timeout.
+#
+# - The time needed for a slave replicating to a wrong master according
+#   to a Sentinel currnet configuration, to be forced to replicate
+#   with the right master, is exactly the failover timeout (counting since
+#   the moment a Sentinel detected the misconfiguration).
 #
-# Also when 25% of this time has elapsed without any advancement, and there
-# is a leader switch (the sentinel did not started the failover but is now
-# elected as leader), the sentinel will continue the failover doing a
-# "takeover".
+# - The time needed to cancel a failover that is already in progress but
+#   did not produced any configuration change (SLAVEOF NO ONE yet not
+#   acknowledged by the promoted slave).
 #
-# Default is 15 minutes.
-sentinel failover-timeout mymaster 900000
+# - The maximum time a failover in progress waits for all the slaves to be
+#   reconfigured as slaves of the new master. However even after this time
+#   the slaves will be reconfigured by the Sentinels anyway, but not with
+#   the exact parallel-syncs progression as specified.
+#
+# Default is 3 minutes.
+sentinel failover-timeout mymaster 180000
 
 # SCRIPTS EXECUTION
 #
@@ -114,32 +122,20 @@ sentinel failover-timeout mymaster 900000
 #
 # sentinel client-reconfig-script <master-name> <script-path>
 #
-# When the failover starts, ends, or is aborted, a script can be called in
+# When the master changed because of a failover a script can be called in
 # order to perform application-specific tasks to notify the clients that the
 # configuration has changed and the master is at a different address.
 # 
-# The script is called in the following cases:
-#
-# Failover started (a slave is already promoted)
-# Failover finished (all the additional slaves already reconfigured)
-# Failover aborted (in that case the script was previously called when the
-#                   failover started, and now gets called again with swapped
-#                   addresses).
-#
 # The following arguments are passed to the script:
 #
 # <master-name> <role> <state> <from-ip> <from-port> <to-ip> <to-port>
 #
-# <state> is "start", "end" or "abort"
+# <state> is currently always "failover"
 # <role> is either "leader" or "observer"
 # 
 # The arguments from-ip, from-port, to-ip, to-port are used to communicate
 # the old address of the master and the new address of the elected slave
-# (now a master) in the case state is "start" or "end".
-#
-# For abort instead the "from" is the address of the promoted slave and
-# "to" is the address of the original master address, since the failover
-# was aborted.
+# (now a master).
 #
 # This script should be resistant to multiple invocations.
 #
author	antirez <antirez@gmail.com>	2013-11-21 17:07:00 +0100
committer	antirez <antirez@gmail.com>	2013-11-21 17:07:10 +0100
commit	f75fccafe7857c0e3d9b43f4874dab77ea8453e5 (patch)
tree	8c4ea582e450e90c4e55abae1758d0ec8e3b4b83 /sentinel.conf
parent	312ca4dacc923c17314134fd504d4098b4e853b7 (diff)
download	redis-f75fccafe7857c0e3d9b43f4874dab77ea8453e5.tar.gz