summaryrefslogtreecommitdiff
path: root/doc/book/src/cpp-broker/Active-Passive-Cluster.xml
blob: 461b75d3204ccd38e0d82f77d986d327409e4e0c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
<?xml version="1.0" encoding="utf-8"?>
<!--

Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
h"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.

-->

<section id="chapter-ha">
  <title>Active-Passive Messaging Clusters</title>

  <section id="ha-overview">
    <title>Overview</title>
    <para>

      The High Availability (HA) module provides
      <firstterm>active-passive</firstterm>, <firstterm>hot-standby</firstterm>
      messaging clusters to provide fault tolerant message delivery.
    </para>
    <para>
      In an active-passive cluster only one broker, known as the
      <firstterm>primary</firstterm>, is active and serving clients at a time. The other
      brokers are standing by as <firstterm>backups</firstterm>. Changes on the primary
      are replicated to all the backups so they are always up-to-date or "hot". Backup
      brokers reject client connection attempts, to enforce the requirement that clients
      only connect to the primary.
    </para>
    <para>
      If the primary fails, one of the backups is promoted to take over as the new
      primary. Clients fail-over to the new primary automatically. If there are multiple
      backups, the other backups also fail-over to become backups of the new primary.
    </para>
    <para>
      This approach relies on an external <firstterm>cluster resource manager</firstterm>
      to detect failures, choose the new primary and handle network partitions. <ulink
      url="https://fedorahosted.org/cluster/wiki/RGManager">rgmanager</ulink> is supported
      initially, but others may be supported in the future.
    </para>
    <section id="ha-at-least-once">
      <title>Avoiding message loss</title>
      <para>
	In order to avoid message loss, the primary broker <emphasis>delays
	acknowledgement</emphasis> of messages received from clients until the
	message has been replicated and acknowledged by all of the back-up
	brokers, or has been consumed from the primary queue.
      </para>
      <para>
	This ensures that all acknowledged messages are safe: they have either
	been consumed or backed up to all backup brokers.  Messages that are
	consumed <emphasis>before</emphasis> they are replicated do not need to
	be replicated. This reduces the work load when replicating a queue with
	active consumers.
      </para>
      <para>
	Clients keep <emphasis>unacknowledged</emphasis> messages in a buffer
	<footnote>
	  <para>
	    You can control the maximum number of messages in the buffer by setting the
	    client's <literal>capacity</literal>. For details of how to set the capacity
	    in client code see &#34;Using the Qpid Messaging API&#34; in
	    <citetitle>Programming in Apache Qpid</citetitle>.
	  </para>
	</footnote>
	until they are acknowledged by the primary. If the primary fails, clients will
	fail-over to the new primary and <emphasis>re-send</emphasis> all their
	unacknowledged messages.
	<footnote>
	  <para>
	  Clients must use "at-least-once" reliability to enable re-send of unacknowledged
	  messages. This is the default behaviour, no options need be set to enable it. For
	  details of client addressing options see &#34;Using the Qpid Messaging API&#34;
	  in <citetitle>Programming in Apache Qpid</citetitle>.
	  </para>
	</footnote>
      </para>
      <para>
	If the primary crashes, all the <emphasis>acknowledged</emphasis>
	messages will be available on the backup that takes over as the new
	primary. The <emphasis>unacknowledged</emphasis> messages will be
	re-sent by the clients.  Thus no messages are lost.
      </para>
      <para>
	Note that this means it is possible for messages to be
	<emphasis>duplicated</emphasis>. In the event of a failure it is possible for a
	message to received by the backup that becomes the new primary
	<emphasis>and</emphasis> re-sent by the client.  The application must take steps
	to identify and eliminate duplicates.
      </para>
      <para>
	When a new primary is promoted after a fail-over it is initially in
	"recovering" mode. In this mode, it delays acknowledgement of messages
	on behalf of all the backups that were connected to the previous
	primary. This protects those messages against a failure of the new
	primary until the backups have a chance to connect and catch up.
      </para>
      <para>
	Not all messages need to be replicated to the back-up brokers. If a
	message is consumed and acknowledged by a regular client before it has
	been replicated to a backup, then it doesn't need to be replicated.
      </para>
      <variablelist id="ha-broker-states">
	<title>HA Broker States</title>
	<varlistentry>
	  <term>Stand-alone</term>
	  <listitem>
	    <para>
	      Broker is not part of a HA cluster.
	    </para>
	  </listitem>
	</varlistentry>
	<varlistentry>
	  <term>Joining</term>
	  <listitem>
	    <para>
	      Newly started broker, not yet connected to any existing primary.
	    </para>
	  </listitem>
	</varlistentry>
	<varlistentry>
	  <term>Catch-up</term>
	  <listitem>
	    <para>
	      A backup broker that is connected to the primary and downloading
	      existing state (queues, messages etc.)
	    </para>
	  </listitem>
	</varlistentry>
	<varlistentry>
	  <term>Ready</term>
	  <listitem>
	    <para>
	      A backup broker that is fully caught-up and ready to take over as
	      primary.
	    </para>
	  </listitem>
	</varlistentry>
	<varlistentry>
	  <term>Recovering</term>
	  <listitem>
	    <para>
	      Newly-promoted primary, waiting for backups to connect and catch up.
	      Clients can connect but they are stalled until the primary is active.
	    </para>
	  </listitem>
	</varlistentry>
	<varlistentry>
	  <term>Active</term>
	  <listitem>
	    <para>
	      The active primary broker with all backups connected and caught-up.
	    </para>
	  </listitem>
	</varlistentry>
      </variablelist>
    </section>
    <section id="limitations">
      <title>Limitations</title>
      <para>
	There are a some known limitations in the current implementation. These
	will be fixed in future versions.
      </para>
      <itemizedlist>
	<listitem>
	  <para>
	    Transactional changes to queue state are not replicated atomically. If
	    the primary crashes during a transaction, it is possible that the
	    backup could contain only part of the changes introduced by a
	    transaction.
	  </para>
	</listitem>
	<listitem>
	  <para>
	    Configuration changes (creating or deleting queues, exchanges and
	    bindings) are replicated asynchronously. Management tools used to
	    make changes will consider the change complete when it is complete
	    on the primary, it may not yet be replicated to all the backups.
	  </para>
	</listitem>
	<listitem>
	  <para>
	    Federation links <emphasis>to</emphasis> the primary will fail over
	    correctly.  Federated links <emphasis>from</emphasis> the primary
	    will be lost in fail over, they will not be re-connected to the new
	    primary. It is possible to work around this by replacing the
	    <literal>qpidd-primary</literal> start up script with a script that
	    re-creates federation links when the primary is promoted.
	  </para>
	</listitem>
      </itemizedlist>
    </section>
  </section>

  <section id="ha-virtual-ip">
    <title>Virtual IP Addresses</title>
    <para>
      Some resource managers (including <command>rgmanager</command>) support
      <firstterm>virtual IP addresses</firstterm>. A virtual IP address is an IP
      address that can be relocated to any of the nodes in a cluster.  The
      resource manager associates this address with the primary node in the
      cluster, and relocates it to the new primary when there is a failure. This
      simplifies configuration as you can publish a single IP address rather
      than a list.
    </para>
    <para>
      A virtual IP address can be used by clients to connect to the primary. The
      following sections will explain how to configure virtual IP addresses for
      clients or brokers.
    </para>
  </section>

  <section id="ha-broker-config">
    <title>Configuring the Brokers</title>
    <para>
      The broker must load the <filename>ha</filename> module, it is loaded by
      default. The following broker options are available for the HA module.
    </para>
    <note>
      <para>
	Broker management is required for HA to operate, it is enabled by
	default. The option <literal>mgmt-enable</literal> must not be set to
	"no"
      </para>
    </note>
    <note>
      <para>
	Incorrect security settings are a common cause of problems when
	getting started, see <xref linkend="ha-security"/>.
      </para>
    </note>
    <table frame="all" id="ha-broker-options">
      <title>Broker Options for High Availability Messaging Cluster</title>
      <tgroup align="left" cols="2" colsep="1" rowsep="1">
	<colspec colname="c1"/>
	<colspec colname="c2"/>
	<thead>
	  <row>
	    <entry align="center" nameend="c2" namest="c1">
	      Options for High Availability Messaging Cluster
	    </entry>
	  </row>
	</thead>
	<tbody>
	  <row>
	    <entry>
	      <literal>ha-cluster <replaceable>yes|no</replaceable></literal>
	    </entry>
	    <entry>
	      Set to "yes" to have the broker join a cluster.
	    </entry>
	  </row>
	  <row>
	    <entry>
	      <literal>ha-queue-replication <replaceable>yes|no</replaceable></literal>
	    </entry>
	    <entry>
	      Enable replication of specific queues without joining a cluster, see <xref linkend="ha-queue-replication"/>.
	    </entry>
	  </row>
	  <row>
	    <entry>
	      <literal>ha-brokers-url <replaceable>URL</replaceable></literal>
	    </entry>
	    <entry>
	      <para>
		The URL
		<footnote id="ha-url-grammar">
		  <para>
		  The full format of the URL is given by this grammar:
		  <programlisting>
url = ["amqp:"][ user ["/" password] "@" ] addr ("," addr)*
addr = tcp_addr / rmda_addr / ssl_addr / ...
tcp_addr = ["tcp:"] host [":" port]
rdma_addr = "rdma:" host [":" port]
ssl_addr = "ssl:" host [":" port]'
		  </programlisting>
		  </para>
		</footnote>
		used by cluster brokers to connect to each other. The URL should
		contain a comma separated list of the broker addresses, rather than a
		virtual IP address.
	      </para>
	    </entry>
	  </row>
	  <row>
	    <entry><literal>ha-public-url <replaceable>URL</replaceable></literal> </entry>
	    <entry>
	      <para>
		This option is only needed for backwards compatibility if you
		have been using the <literal>amq.failover</literal> exchange.
		This exchange is now obsolete, it is recommended to use a
		virtual IP address instead.
	      </para>
	      <para>
		If set, this URL is advertised by the
		<literal>amq.failover</literal> exchange and overrides the
		broker option <literal>known-hosts-url</literal>
	      </para>
	    </entry>
	  </row>
	  <row>
	    <entry><literal>ha-replicate </literal><replaceable>VALUE</replaceable></entry>
	    <entry>
	      <para>
		Specifies whether queues and exchanges are replicated by default.
		<replaceable>VALUE</replaceable> is one of: <literal>none</literal>,
		<literal>configuration</literal>, <literal>all</literal>.
		For details see <xref linkend="ha-replicate-values"/>.
	      </para>
	    </entry>
	  </row>
	  <row>
	    <entry>
	      <para><literal>ha-username <replaceable>USER</replaceable></literal></para>
	      <para><literal>ha-password <replaceable>PASS</replaceable></literal></para>
	      <para><literal>ha-mechanism <replaceable>MECHANISM</replaceable></literal></para>
	    </entry>
	    <entry>
	      Authentication settings used by HA brokers to connect to each other,
	      see <xref linkend="ha-security"/>
	    </entry>
	  </row>
	  <row>
	    <entry><literal>ha-backup-timeout<replaceable>SECONDS</replaceable></literal>
	    <footnote id="ha-seconds-spec">
	      <para>
		Values specified as <replaceable>SECONDS</replaceable> can be a
		fraction of a second, e.g. "0.1" for a tenth of a second.
		They can also have an explicit unit,
		e.g. 10s (seconds), 10ms (milliseconds), 10us (microseconds), 10ns (nanoseconds)
	      </para>
	    </footnote>
	    </entry>
	    <entry>
	      <para>
		Maximum time that a recovering primary will wait for an expected
		backup to connect and become ready.
	      </para>
	    </entry>
	  </row>
	  <row>
	    <entry>
	      <literal>link-maintenance-interval <replaceable>SECONDS</replaceable></literal>
	      <footnoteref linkend="ha-seconds-spec"/>
	    </entry>
	    <entry>
	      <para>
		HA uses federation links to connect from backup to primary.
		Backup brokers check the link to the primary on this interval
		and re-connect if need be. Default 2 seconds. Set lower for
		faster failover, e.g. 0.1 seconds.  Setting too low will result
		in excessive link-checking on the backups.
	      </para>
	    </entry>
	  </row>
	  <row>
	    <entry>
	      <literal>link-heartbeat-interval <replaceable>SECONDS</replaceable></literal>
	      <footnoteref linkend="ha-seconds-spec"/>
	    </entry>
	    <entry>
	      <para>
		HA uses federation links to connect from backup to primary.
		If no heart-beat is received for twice this interval the primary will consider that
		backup dead (e.g. if backup is hung or partitioned.)
		This interval is also used to time-out for broker status checks,
		it may take up to this interval for rgmanager to detect a hung or partitioned broker.
		Clients sending messages may be held up during this time.
		Default 120 seconds: you will probably want to set this to a lower value e.g. 10.
		If set too low rgmanager may consider a slow broker to have failed and kill it.
	      </para>
	    </entry>
	  </row>
	</tbody>
      </tgroup>
    </table>
    <para>
      To configure a HA cluster you must set at least <literal>ha-cluster</literal> and
      <literal>ha-brokers-url</literal>.
    </para>
  </section>

  <section id="ha-rm">
    <title>The Cluster Resource Manager</title>
    <para>
      Broker fail-over is managed by a <firstterm>cluster resource
      manager</firstterm>.  An integration with <ulink
      url="https://fedorahosted.org/cluster/wiki/RGManager">rgmanager</ulink> is
      provided, but it is possible to integrate with other resource managers.
    </para>
    <para>
      The resource manager is responsible for starting the <command>qpidd</command> broker
      on each node in the cluster. The resource manager then <firstterm>promotes</firstterm>
      one of the brokers to be the primary. The other brokers connect to the primary as
      backups, using the URL provided in the <literal>ha-brokers-url</literal> configuration
      option.
    </para>
    <para>
      Once connected, the backup brokers synchronize their state with the
      primary.  When a backup is synchronized, or "hot", it is ready to take
      over if the primary fails.  Backup brokers continually receive updates
      from the primary in order to stay synchronized.
    </para>
    <para>
      If the primary fails, backup brokers go into fail-over mode. The resource
      manager must detect the failure and promote one of the backups to be the
      new primary.  The other backups connect to the new primary and synchronize
      their state with it.
    </para>
    <para>
      The resource manager is also responsible for protecting the cluster from
      <firstterm>split-brain</firstterm> conditions resulting from a network partition.  A
      network partition divide a cluster into two sub-groups which cannot see each other.
      Usually a <firstterm>quorum</firstterm> voting algorithm is used that disables nodes
      in the inquorate sub-group.
    </para>
  </section>

  <section id="ha-rm-config">
    <title>Configuring with <command>rgmanager</command> as resource manager</title>
    <para>
      This section assumes that you are already familiar with setting up and configuring
      clustered services using <command>cman</command> and
      <command>rgmanager</command>. It will show you how to configure an active-passive,
      hot-standby <command>qpidd</command> HA cluster with <command>rgmanager</command>.
    </para>
    <note>
      <para>
	Once all components are installed it is important to take the following step:
	<programlisting>
chkconfig rgmanager on
chkconfig cman on
chkconfig qpidd <emphasis>off</emphasis>
	</programlisting>
      </para>
      <para>
	The qpidd service must be <emphasis>off</emphasis> in
	<literal>chkconfig</literal> because <literal>rgmanager</literal> will
	start and stop <literal>qpidd</literal>.  If the normal system init
	process also attempts to start and stop qpidd it can cause rgmanager to
	lose track of qpidd processes. The symptom when this happens is that
	<literal>clustat</literal> shows a <literal>qpidd</literal> service to
	be stopped when in fact there is a <literal>qpidd</literal> process
	running. The <literal>qpidd</literal> log will show errors like this:
	<programlisting>
critical Unexpected error: Daemon startup failed: Cannot lock /var/lib/qpidd/lock: Resource temporarily unavailable
	</programlisting>
      </para>
    </note>
    <para>
      You must provide a <literal>cluster.conf</literal> file to configure
      <command>cman</command> and <command>rgmanager</command>.  Here is
      an example <literal>cluster.conf</literal> file for a cluster of 3 nodes named
      node1, node2 and node3. We will go through the configuration step-by-step.
    </para>
    <programlisting>
      <![CDATA[
<?xml version="1.0"?>
<!--
This is an example of a cluster.conf file to run qpidd HA under rgmanager.
This example assumes a 3 node cluster, with nodes named node1, node2 and node3.

NOTE: fencing is not shown, you must configure fencing appropriately for your cluster.
-->

<cluster name="qpid-test" config_version="18">
  <!-- The cluster has 3 nodes. Each has a unique nodeid and one vote
       for quorum. -->
  <clusternodes>
    <clusternode name="node1.example.com" nodeid="1"/>
    <clusternode name="node2.example.com" nodeid="2"/>
    <clusternode name="node3.example.com" nodeid="3"/>
  </clusternodes>

  <!-- Resouce Manager configuration. -->

   status_poll_interval is the interval in seconds that the resource manager checks the status
   of managed services. This affects how quickly the manager will detect failed services.
   -->
  <rm status_poll_interval="1">
    <!--
	There is a failoverdomain for each node containing just that node.
	This lets us stipulate that the qpidd service should always run on each node.
    -->
    <failoverdomains>
      <failoverdomain name="node1-domain" restricted="1">
	<failoverdomainnode name="node1.example.com"/>
      </failoverdomain>
      <failoverdomain name="node2-domain" restricted="1">
	<failoverdomainnode name="node2.example.com"/>
      </failoverdomain>
      <failoverdomain name="node3-domain" restricted="1">
	<failoverdomainnode name="node3.example.com"/>
      </failoverdomain>
    </failoverdomains>

    <resources>
      <!-- This script starts a qpidd broker acting as a backup. -->
      <script file="/etc/init.d/qpidd" name="qpidd"/>

      <!-- This script promotes the qpidd broker on this node to primary. -->
      <script file="/etc/init.d/qpidd-primary" name="qpidd-primary"/>

      <!--
          This is a virtual IP address for client traffic.
	  monitor_link="yes" means monitor the health of the NIC used for the VIP.
	  sleeptime="0" means don't delay when failing over the VIP to a new address.
      -->
      <ip address="20.0.20.200" monitor_link="yes" sleeptime="0"/>
    </resources>

    <!-- There is a qpidd service on each node, it should be restarted if it fails. -->
    <service name="node1-qpidd-service" domain="node1-domain" recovery="restart">
      <script ref="qpidd"/>
    </service>
    <service name="node2-qpidd-service" domain="node2-domain" recovery="restart">
      <script ref="qpidd"/>
    </service>
    <service name="node3-qpidd-service" domain="node3-domain"  recovery="restart">
      <script ref="qpidd"/>
    </service>

    <!-- There should always be a single qpidd-primary service, it can run on any node. -->
    <service name="qpidd-primary-service" autostart="1" exclusive="0" recovery="relocate">
      <script ref="qpidd-primary"/>
      <!-- The primary has the IP addresses for brokers and clients to connect. -->
      <ip ref="20.0.20.200"/>
    </service>
  </rm>
</cluster>
      ]]>
    </programlisting>

    <para>
      There is a <literal>failoverdomain</literal> for each node containing just that
      one node.  This lets us stipulate that the qpidd service should always run on all
      nodes.
    </para>
    <para>
      The <literal>resources</literal> section defines the <command>qpidd</command>
      script used to start the <command>qpidd</command> service. It also defines the
      <command>qpid-primary</command> script which does not
      actually start a new service, rather it promotes the existing
      <command>qpidd</command> broker to primary status.
    </para>
    <para>
      The <literal>resources</literal> section also defines a virtual IP
      address for clients: <literal>20.0.20.200</literal>.
    </para>
    <para>
      <filename>qpidd.conf</filename> should contain these  lines:
    </para>
    <programlisting>
ha-cluster=yes
ha-brokers-url=20.0.20.1,20.0.20.2,20.0.20.3
    </programlisting>
    <para>
      The brokers connect to each other directly via the addresses
      listed in <command>ha-brokers-url</command>. Note the client and broker
      addresses are on separate sub-nets, this is recommended but not required.
    </para>
    <para>
      The <literal>service</literal> section defines 3 <literal>qpidd</literal>
      services, one for each node. Each service is in a restricted fail-over
      domain containing just that node, and has the <literal>restart</literal>
      recovery policy. The effect of this is that rgmanager will run
      <command>qpidd</command> on each node, restarting if it fails.
    </para>
    <para>
      There is a single <literal>qpidd-primary-service</literal> using the
      <command>qpidd-primary</command> script which is not restricted to a
      domain and has the <literal>relocate</literal> recovery policy. This means
      rgmanager will start <command>qpidd-primary</command> on one of the nodes
      when the cluster starts and will relocate it to another node if the
      original node fails. Running the <literal>qpidd-primary</literal> script
      does not start a new broker process, it promotes the existing broker to
      become the primary.
    </para>

    <section id="ha-rm-shutdown-node">
      <title>Shutting down qpidd on a HA node</title>
      <para>
        As explained above both the per-node <literal>qpidd</literal> service
        and the re-locatable <literal>qpidd-primary</literal> service are
        implemented by the same <literal>qpidd</literal> daemon.
      </para>
      <para>
        As a result, stopping the <literal>qpidd</literal> service will not stop
        a <literal>qpidd</literal> daemon that is acting as primary, and
        stopping the <literal>qpidd-primary</literal> service will not stop a
        <literal>qpidd</literal> process that is acting as backup.
      </para>
      <para>
        To shut down a node that is acting as primary you need to shut down the
        <literal>qpidd</literal> service <emphasis>and</emphasis> relocate the
        primary:
      </para>
      <para>
	<programlisting>
clusvcadm -d somenode-qpidd-service
clusvcadm -r qpidd-primary-service
        </programlisting>
      </para>
      <para>
        This will shut down the <literal>qpidd</literal> daemon on that node and
        prevent the primary service service from relocating back to the node
        because the qpidd service is no longer running there.
      </para>
    </section>
  </section>

  <section id="ha-broker-admin">
    <title>Broker Administration Tools</title>
    <para>
      Normally, clients are not allowed to connect to a backup broker. However
      management tools are allowed to connect to a backup brokers. If you use
      these tools you <emphasis>must not</emphasis> add or remove messages from
      replicated queues, nor create or delete replicated queues or exchanges as
      this will disrupt the replication process and may cause message loss.
    </para>
    <para>
      <command>qpid-ha</command> allows you to view and change HA configuration settings.
    </para>
    <para>
      The tools <command>qpid-config</command>, <command>qpid-route</command> and
      <command>qpid-stat</command> will connect to a backup if you pass the flag <command>ha-admin</command> on the
      command line.
    </para>
  </section>

  <section id="ha-replicate-values">
    <title>Controlling replication of queues and exchanges</title>
    <para>
      By default, queues and exchanges are not replicated automatically. You can change
      the default behaviour by setting the <literal>ha-replicate</literal> configuration
      option. It has one of the following values:
      <itemizedlist>
	<listitem>
	  <para>
	    <firstterm>all</firstterm>: Replicate everything automatically: queues,
	    exchanges, bindings and messages.
	  </para>
	</listitem>
	<listitem>
	  <para>
	    <firstterm>configuration</firstterm>: Replicate the existence of queues,
	    exchange and bindings but don't replicate messages.
	  </para>
	</listitem>
	<listitem>
	  <para>
	    <firstterm>none</firstterm>: Don't replicate anything, this is the default.
	  </para>
	</listitem>
      </itemizedlist>
    </para>
    <para>
      You can over-ride the default for a particular queue or exchange by passing the
      argument <literal>qpid.replicate</literal> when creating the queue or exchange. It
      takes the same values as <literal>ha-replicate</literal>
    </para>
    <para>
      Bindings are automatically replicated if the queue and exchange being bound both
      have replication <literal>all</literal> or <literal>configuration</literal>, they
      are not replicated otherwise.
    </para>
    <para>
      You can create replicated queues and exchanges with the
      <command>qpid-config</command> management tool like this:
    </para>
    <programlisting>
qpid-config add queue myqueue --replicate all
    </programlisting>
    <para>
      To create replicated queues and exchanges via the client API, add a
      <literal>node</literal> entry to the address like this:
    </para>
    <programlisting>
"myqueue;{create:always,node:{x-declare:{arguments:{'qpid.replicate':all}}}}"
    </programlisting>
    <para>
      There are some built-in exchanges created automatically by the broker, these
      exchanges are never replicated. The built-in exchanges are the default (nameless)
      exchange, the AMQP standard exchanges (<literal>amq.direct, amq.topic, amq.fanout</literal> and
      <literal>amq.match</literal>) and the management exchanges (<literal>qpid.management, qmf.default.direct</literal> and
      <literal>qmf.default.topic</literal>)
    </para>
    <para>
      Note that if you bind a replicated queue to one of these exchanges, the
      binding will <emphasis>not</emphasis> be replicated, so the queue will not
      have the binding after a fail-over.
    </para>
  </section>

  <section id="ha-failover">
    <title>Client Connection and Fail-over</title>
    <para>
      Clients can only connect to the primary broker. Backup brokers reject any
      connection attempt by a client. Clients rejected by a backup broker will
      automatically fail-over until they connect to the primary.
    </para>
    <para>
      Clients are configured with the URL for the cluster (details below for
      each type of client). There are two possibilities
      <itemizedlist>
	<listitem>
	  <para>
	    The URL contains multiple addresses, one for each broker in the cluster.
	  </para>
	</listitem>
	<listitem>
	  <para>
	    The URL contains a single <firstterm>virtual IP address</firstterm>
	    that is assigned to the primary broker by the resource manager.
	    This is the recommended configuration.
	  </para>
	</listitem>
      </itemizedlist>
      In the first case, clients will repeatedly re-try each address in the URL
      until they successfully connect to the primary. In the second case the
      resource manager will assign the virtual IP address to the primary broker,
      so clients only need to re-try on a single address.
    </para>
    <para>
      When the primary broker fails, clients re-try all known cluster addresses
      until they connect to the new primary.  The client re-sends any messages
      that were previously sent but not acknowledged by the broker at the time
      of the failure.  Similarly messages that have been sent by the broker, but
      not acknowledged by the client, are re-queued.
    </para>
    <para>
      TCP can be slow to detect connection failures. A client can configure a
      connection to use a <firstterm>heartbeat</firstterm> to detect connection
      failure, and can specify a time interval for the heartbeat. If heartbeats
      are in use, failures will be detected no later than twice the heartbeat
      interval. The following sections explain how to enable heartbeat in each
      client.
    </para>
    <para>
      Note: the following sections explain how to configure clients with
      multiple dresses, but if you are using a virtual IP address you only need
      to configure that one address for clients, you don't need to list all the
      addresses.
    </para>
    <para>
      Suppose your cluster has 3 nodes: <literal>node1</literal>,
      <literal>node2</literal> and <literal>node3</literal> all using the
      default AMQP port, and you are not using a virtual IP address. To connect
      a client you need to specify the address(es) and set the
      <literal>reconnect</literal> property to <literal>true</literal>. The
      following sub-sections show how to connect each type of client.
    </para>
    <section id="ha-clients">
      <title>C++ clients</title>
      <para>
	With the C++ client, you specify multiple cluster addresses in a single URL
	<footnote>
	  <para>
	    The full grammar for the URL is:
	  </para>
	  <programlisting>
url = ["amqp:"][ user ["/" password] "@" ] addr ("," addr)*
addr = tcp_addr / rmda_addr / ssl_addr / ...
tcp_addr = ["tcp:"] host [":" port]
rdma_addr = "rdma:" host [":" port]
ssl_addr = "ssl:" host [":" port]'
	  </programlisting>
	</footnote>
	You also need to specify the connection option
	<literal>reconnect</literal> to be true.  For example:
      </para>
      <programlisting>
qpid::messaging::Connection c("node1,node2,node3","{reconnect:true}");
      </programlisting>
      <para>
	Heartbeats are disabled by default. You can enable them by specifying a
	heartbeat interval (in seconds) for the connection via the
	<literal>heartbeat</literal> option. For example:
      </para>
      <programlisting>
qpid::messaging::Connection c("node1,node2,node3","{reconnect:true,heartbeat:10}");
      </programlisting>
    </section>
    <section id="ha-python-client">
      <title>Python clients</title>
      <para>
	With the python client, you specify <literal>reconnect=True</literal>
	and a list of <replaceable>host:port</replaceable> addresses as
	<literal>reconnect_urls</literal> when calling
	<literal>Connection.establish</literal> or
	<literal>Connection.open</literal>
      </para>
      <programlisting>
connection = qpid.messaging.Connection.establish("node1", reconnect=True, reconnect_urls=["node1", "node2", "node3"])
      </programlisting>
      <para>
	Heartbeats are disabled by default. You can
	enable them by specifying a heartbeat interval (in seconds) for the
	connection via the &#39;heartbeat&#39; option. For example:
      </para>
      <programlisting>
connection = qpid.messaging.Connection.establish("node1", reconnect=True, reconnect_urls=["node1", "node2", "node3"], heartbeat=10)
      </programlisting>
    </section>
    <section id="ha-jms-client">
      <title>Java JMS Clients</title>
      <para>
	In Java JMS clients, client fail-over is handled automatically if it is
	enabled in the connection.  You can configure a connection to use
	fail-over using the <command>failover</command> property:
      </para>

      <screen>
	connectionfactory.qpidConnectionfactory = amqp://guest:guest@clientid/test?brokerlist=&#39;tcp://localhost:5672&#39;&amp;failover=&#39;failover_exchange&#39;
      </screen>
      <para>
	This property can take three values:
      </para>
      <variablelist>
	<title>Fail-over Modes</title>
	<varlistentry>
	  <term>failover_exchange</term>
	  <listitem>
	    <para>
	      If the connection fails, fail over to any other broker in the cluster.
	    </para>

	  </listitem>

	</varlistentry>
	<varlistentry>
	  <term>roundrobin</term>
	  <listitem>
	    <para>
	      If the connection fails, fail over to one of the brokers specified in the <command>brokerlist</command>.
	    </para>

	  </listitem>

	</varlistentry>
	<varlistentry>
	  <term>singlebroker</term>
	  <listitem>
	    <para>
	      Fail-over is not supported; the connection is to a single broker only.
	    </para>

	  </listitem>

	</varlistentry>

      </variablelist>
      <para>
	In a Connection URL, heartbeat is set using the <command>heartbeat</command> property, which is an integer corresponding to the heartbeat period in seconds. For instance, the following line from a JNDI properties file sets the heartbeat time out to 3 seconds:
      </para>

      <screen>
	connectionfactory.qpidConnectionfactory = amqp://guest:guest@clientid/test?brokerlist=&#39;tcp://localhost:5672&#39;&amp;heartbeat=&#39;3&#39;
      </screen>
    </section>
  </section>

  <section id="ha-security">
    <title>Security and Access Control.</title>
    <para>
      This section outlines the HA specific aspects of security configuration.
      Please see <xref linkend="chap-Messaging_User_Guide-Security"/> for
      more details on enabling authentication and setting up Access Control Lists.
    </para>
    <note>
      <para>
	Unless you disable authentication with <literal>auth=no</literal> in
	your configuration, you <emphasis>must</emphasis> set the options below
	and you <emphasis>must</emphasis> have an ACL file with at least the
	entry described below.
      </para>
      <para>
	Backups will be <emphasis>unable to connect to the primary</emphasis> if
	the security configuration is incorrect. See also <xref
	linkend="ha-troubleshoot-security"/>
      </para>
    </note>
    <para>
      When authentication is enabled you must set the credentials used by HA
      brokers with following options:
    </para>
    <table frame="all" id="ha-security-options">
      <title>HA Security Options</title>
      <tgroup align="left" cols="2" colsep="1" rowsep="1">
	<colspec colname="c1"/>
	<colspec colname="c2"/>
	<thead>
	  <row>
	    <entry align="center" nameend="c2" namest="c1">
	      HA Security Options
	    </entry>
	  </row>
	</thead>
	<tbody>
	  <row>
	    <entry><para><literal>ha-username</literal> <replaceable>USER</replaceable></para></entry>
	    <entry><para>User name for HA brokers. Note this must <emphasis>not</emphasis> include the <literal>@QPID</literal> suffix.</para></entry>
	  </row>
	  <row>
	    <entry><para><literal>ha-password</literal> <replaceable>PASS</replaceable></para></entry>
	    <entry><para>Password for HA brokers.</para></entry>
	  </row>
	  <row>
	    <entry><para><literal>ha-mechanism</literal> <replaceable>MECHANISM</replaceable></para></entry>
	    <entry>
	      <para>
		Mechanism for HA brokers. Any mechanism you enable for
		broker-to-broker communication can also be used by a client, so
		do not use ha-mechanism=ANONYMOUS in a secure environment.
	      </para>
	    </entry>
	  </row>
	</tbody>
      </tgroup>
    </table>
    <para>
      This identity is used to authorize federation links from backup to
      primary.  It is also used to authorize actions on the backup to replicate
      primary state, for example creating queues and exchanges.
    </para>
    <para>
      When authorization is enabled you must have an Access Control List with the
      following rule to allow HA replication to function. Suppose
      <literal>ha-username</literal>=<replaceable>USER</replaceable>
    </para>
    <programlisting>
acl allow <replaceable>USER</replaceable>@QPID all all
    </programlisting>
  </section>

  <section id="ha-other-rm">
    <title>Integrating with other Cluster Resource Managers</title>
    <para>
      To integrate with a different resource manager you must configure it to:
      <itemizedlist>
	<listitem><para>Start a qpidd process on each node of the cluster.</para></listitem>
	<listitem><para>Restart qpidd if it crashes.</para></listitem>
	<listitem><para>Promote exactly one of the brokers to primary.</para></listitem>
	<listitem><para>Detect a failure and promote a new primary.</para></listitem>
      </itemizedlist>
    </para>
    <para>
      The <command>qpid-ha</command> command allows you to check if a broker is
      primary, and to promote a backup to primary.
    </para>
    <para>
      To test if a broker is the primary:
    </para>
    <programlisting>qpid-ha -b <replaceable>broker-address</replaceable> status --expect=primary</programlisting>
    <para>
      This will return 0 if the broker at <replaceable>broker-address</replaceable> is the primary,
      non-0 otherwise.
    </para>
    <para>
      To promote a broker to primary:
    <programlisting>qpid-ha --cluster-manager -b <replaceable>broker-address</replaceable> promote</programlisting>
    </para>
    <para>
      Note that <literal>promote</literal> is considered a "cluster manager
      only" command. Incorrect use of <literal>promote</literal> outside of the
      cluster manager could create a cluster with multiple primaries. Such a
      cluster will malfunction and lose data.  "Cluster manager only" commands
      are not accessible in <command>qpid-ha</command> without the
      <literal>--cluster-manager</literal> option.
    </para>
    <para>
      To list the full set of commands use:
    </para>
    <programlisting>
qpid-ha --cluster-manager --help
    </programlisting>
  </section>

  <section id ="ha-store">
    <title>Using a message store in a cluster</title>
    <para>
      If you use a persistent store for your messages then each broker in a
      cluster will have its own store. If the entire cluster fails and is
      restarted, the *first* broker that becomes primary will recover from its
      store. All the other brokers will clear their stores and get an update
      from the primary to ensure consistency.
    </para>
  </section>

  <section id="ha-troubleshoot">
    <title>Troubleshooting a cluster</title>
    <para>
      This section applies to clusters that are using rgmanager as the
      cluster manager.
    </para>
    <section id="ha-troubleshoot-no-primary">
      <title>No primary broker</title>
      <para>
	When you initially start a HA cluster, all brokers are in
	<literal>joining</literal> mode. The brokers do not automatically select
	a primary, they rely on the cluster manager <literal>rgmanager</literal>
	to do so. If <literal>rgmanager</literal> is not running or is not
	configured correctly, brokers will remain in the
	<literal>joining</literal> state. See <xref linkend="ha-rm-config"/>
      </para>
    </section>
    <section id="ha-troubleshoot-security">
      <title>Authentication and ACL failures</title>
      <para>
	If a broker is unable to establish a connection to another broker in the
	cluster due to authentication or ACL problems the logs may contain
	errors like the following:
	<programlisting>
info SASL: Authentication failed: SASL(-13): user not found: Password verification failed
	</programlisting>
	<programlisting>
warning Client closed connection with 320: User anonymous@QPID federation connection denied. Systems with authentication enabled must specify ACL create link rules.
	</programlisting>
	<programlisting>
warning Client closed connection with 320: ACL denied anonymous@QPID creating a federation link.
	</programlisting>
      </para>
      <para>
	Set the HA security configuration and ACL file as described in <xref
	linkend="ha-security"/>.  Once the cluster is running and the primary is
	promoted , run:
	<programlisting>qpid-ha status --all</programlisting>
	to make sure that the brokers are running as one cluster.
      </para>
    </section>
    <section id="ha-troubleshoot-slow-recovery">
      <title>Slow recovery times</title>
      <para>
	The following configuration settings affect recovery time. The
	values shown are examples that give fast recovery on a lightly
	loaded system. You should run tests to determine if the values are
	appropriate for your system and load conditions.
      </para>
      <section id="ha-troubleshoot-cluster.conf">
	<title>cluster.conf:</title>
	<programlisting>
&lt;rm status_poll_interval=1&gt;
	</programlisting>
	<para>
	  status_poll_interval is the interval in seconds that the
	  resource manager checks the status of managed services. This
	  affects how quickly the manager will detect failed services.
	</para>
	<programlisting>
&lt;ip address=&quot;20.0.20.200&quot; monitor_link=&quot;yes&quot; sleeptime=&quot;0&quot;/&gt;
	</programlisting>
	<para>
	  This is a virtual IP address for client traffic.
	  monitor_link=&quot;yes&quot; means monitor the health of the network interface
	  used for the VIP. sleeptime=&quot;0&quot; means don't delay when
	  failing over the VIP to a new address.
	</para>
      </section>
      <section id="ha-troubleshoot-qpidd.conf">
	<title>qpidd.conf</title>
	<programlisting>
link-maintenance-interval=0.1
	</programlisting>
	<para>
	  Interval for backup brokers to check the link to the primary
	  re-connect if need be. Default 2 seconds. Can be set lower for
	  faster fail-over. Setting too low will result in excessive
	  link-checking activity on the broker.
	</para>
	<programlisting>
link-heartbeat-interval=5
	</programlisting>
	<para>
	  Heartbeat interval for federation links. The HA cluster uses
	  federation links between the primary and each backup. The
	  primary can take up to twice the heartbeat interval to detect a
	  failed backup. When a sender sends a message the primary waits
	  for all backups to acknowledge before acknowledging to the
	  sender. A disconnected backup may cause the primary to block
	  senders until it is detected via heartbeat.
	</para>
	<para>
	  This interval is also used as the timeout for broker status
	  checks by rgmanager. It may take up to this interval for
	  rgmanager to detect a hung broker.
	</para>
	<para>
	  The default of 120 seconds is very high, you will probably want
	  to set this to a lower value. If set too low, under network
	  congestion or heavy load, a slow-to-respond broker may be
	  re-started by rgmanager.
	</para>
      </section>
    </section>
    <section id="ha-troubleshoot-total-cluster-failure">
      <title>Total cluster failure</title>
      <para>
	Note: for definition of broker states <firstterm>joining</firstterm>,
	<firstterm>catch-up</firstterm>, <firstterm>ready</firstterm>,
	<firstterm>recovering</firstterm> and <firstterm>active</firstterm> see
	<xref linkend="ha-broker-states"/>
      </para>
      <para>
	The cluster can only guarantee availability as long as there is at
	least one active primary broker or ready backup broker left alive.
	If all the brokers fail simultaneously, the cluster will fail and
	non-persistent data will be lost.
      </para>
      <para>
	While there is an active primary broker, clients can get service.
	If the active primary fails, one of the &quot;ready&quot; backup
	brokers will take over, recover and become active. Note a backup
	can only be promoted to primary if it is in the &quot;ready&quot;
	state (with the exception of the first primary in a new cluster
	where all brokers are in the &quot;joining&quot; state)
      </para>
      <para>
	Given a stable cluster of N brokers with one active primary and
	N-1 ready backups, the system can sustain up to N-1 failures in
	rapid succession. The surviving broker will be promoted to active
	and continue to give service.
      </para>
      <para>
	However at this point the system <emphasis>cannot</emphasis>
	sustain a failure of the surviving broker until at least one of
	the other brokers recovers, catches up and becomes a ready backup.
	If the surviving broker fails before that the cluster will fail in
	one of two modes (depending on the exact timing of failures)
      </para>
      <section id="ha-troubleshoot-the-cluster-hangs">
	<title>1. The cluster hangs</title>
	<para>
	  All brokers are in joining or catch-up mode. rgmanager tries to
	  promote a new primary but cannot find any candidates and so
	  gives up. clustat will show that the qpidd services are running
	  but the the qpidd-primary service has stopped, something like
	  this:
	</para>
	<programlisting>
Service Name                   Owner (Last)                   State
------- ----                   ----- ------                   -----
service:mrg33-qpidd-service    20.0.10.33                     started
service:mrg34-qpidd-service    20.0.10.34                     started
service:mrg35-qpidd-service    20.0.10.35                     started
service:qpidd-primary-service  (20.0.10.33)                   stopped
	</programlisting>
	<para>
	  Eventually all brokers become stuck in &quot;joining&quot; mode,
	  as shown by: <literal>qpid-ha status --all</literal>
	</para>
	<para>
	  At this point you need to restart the cluster in one of the
	  following ways:
	  <orderedlist>
	    <listitem><para>
	      Restart the entire cluster:
	      In <literal>luci:<replaceable>your-cluster</replaceable>:Nodes</literal>
	      click reboot to restart the entire cluster
	    </para></listitem>
	    <listitem><para>
	      Stop and restart the cluster with
	      <literal>ccs --stopall; ccs --startall</literal>
	    </para></listitem>
	    <listitem><para>
	      Restart just the Qpid services:In <literal>luci:<replaceable>your-cluster</replaceable>:Service Groups</literal>
	      <orderedlist>
		<listitem><para>Select all the qpidd (not qpidd-primary) services, click restart</para></listitem>
		<listitem><para>Select the qpidd-primary service, click restart</para></listitem>
	      </orderedlist>
	    </para></listitem>
	    <listitem><para>
	      Stop the <literal>qpidd-primary</literal> and
	      <literal>qpidd</literal> services with <literal>clusvcadm</literal>,
	      then restart (qpidd-primary last)
	    </para></listitem>
	  </orderedlist>
	</para>
      </section>
      <section id="ha-troubleshoot-the-cluster-reboots">
	<title>2. The cluster reboots</title>
	<para>
	  A new primary is promoted and the cluster is functional but all
	  non-persistent data from before the failure is lost.
	</para>
      </section>
    </section>
    <section id="ha-troubleshoot-fencing-and-network-partitions">
      <title>Fencing and network partitions</title>
      <para>
	A network partition is a a network failure that divides the
	cluster into two or more sub-clusters, where each broker can
	communicate with brokers in its own sub-cluster but not with
	brokers in other sub-clusters. This condition is also referred to
	as a &quot;split brain&quot;.
      </para>
      <para>
	Nodes in one sub-cluster can't tell whether nodes in other
	sub-clusters are dead or are still running but disconnected. We
	cannot allow each sub-cluster to independently declare its own
	qpidd primary and start serving clients, as the cluster will
	become inconsistent. We must ensure only one sub-cluster continues
	to provide service.
      </para>
      <para>
	A <emphasis>quorum</emphasis> determines which sub-cluster
	continues to operate, and <emphasis>power fencing</emphasis>
	ensures that nodes in non-quorate sub-clusters cannot attempt to
	provide service inconsistently. For more information see:
      </para>
      <para>
	https://access.redhat.com/site/documentation/en-US/Red_Hat_Enterprise_Linux/6/html-single/High_Availability_Add-On_Overview/index.html,
	chapter 2. Quorum and 4. Fencing.
      </para>
    </section>
  </section>
</section>