summaryrefslogtreecommitdiff
path: root/qpid/java/client/src/main/java/org/apache/qpid/client/failover/FailoverHandler.java
blob: 4099da18d2dde508de5d90a791be6649293d26ec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 */
package org.apache.qpid.client.failover;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.qpid.AMQDisconnectedException;
import org.apache.qpid.client.protocol.AMQProtocolHandler;
import org.apache.qpid.client.state.AMQState;
import org.apache.qpid.client.state.AMQStateManager;

import java.util.concurrent.CountDownLatch;

/**
 * FailoverHandler is a continuation that performs the failover procedure on a protocol session. As described in the
 * class level comment for {@link AMQProtocolHandler}, a protocol connection can span many physical transport
 * connections, failing over to a new connection if the transport connection fails. The procedure to establish a new
 * connection is expressed as a continuation, in order that it may be run in a seperate thread to the i/o thread that
 * detected the failure and is used to handle the communication to establish a new connection.
 *
 * </p>The reason this needs to be a separate thread is because this work cannot be done inside the i/o processor
 * thread. The significant task is the connection setup which involves a protocol exchange until a particular state
 * is achieved. This procedure waits until the state is achieved which would prevent the i/o thread doing the work
 * it needs to do to achieve the new state.
 *
 * <p/>The failover procedure does the following:
 *
 * <ol>
 * <li>Sets the failing over condition to true.</li>
 * <li>Creates a {@link FailoverException} and gets the protocol connection handler to propagate this event to all
 *     interested parties.</li>
 * <li>Takes the failover mutex on the protocol connection handler.</li>
 * <li>Abandons the fail over if any of the interested parties vetoes it. The mutex is released and the condition
 *     reset.</li>
 * <li>Creates a new {@link AMQStateManager} and re-established the connection through it.</li>
 * <li>Informs the AMQConnection if the connection cannot be re-established.</li>
 * <li>Recreates all sessions from the old connection to the new.</li>
 * <li>Resets the failing over condition and releases the mutex.</li>
 * </ol>
 *
 * <p/><table id="crc"><caption>CRC Card</caption>
 * <tr><th> Responsibilities <th> Collaborations
 * <tr><td> Update fail-over state <td> {@link AMQProtocolHandler}
 * </table>
 *
 * @todo The failover latch and mutex are used like a lock and condition. If the retrotranlator supports lock/condition
 *       then could change over to using them. 1.4 support still needed.
 *
 * @todo If the condition is set to null on a vetoes fail-over and there are already other threads waiting on the
 *       condition, they will never be released. It might be an idea to reset the condition in a finally block.
 *
 * @todo Creates a {@link AMQDisconnectedException} and passes it to the AMQConnection. No need to use an
 *       exception-as-argument here, could just as easily call a specific method for this purpose on AMQConnection.
 *
 * @todo Creates a {@link FailoverException} and propagates it to the MethodHandlers. No need to use an
 *       exception-as-argument here, could just as easily call a specific method for this purpose on
 *       {@link org.apache.qpid.protocol.AMQMethodListener}.
 */
public class FailoverHandler implements Runnable
{
    /** Used for debugging. */
    private static final Logger _logger = LoggerFactory.getLogger(FailoverHandler.class);

    /** Holds the protocol handler for the failed connection, upon which the new connection is to be set up. */
    private AMQProtocolHandler _amqProtocolHandler;

    /** Used to hold the host to fail over to. This is optional and if not set a reconnect to the previous host is tried. */
    private String _host;

    /** Used to hold the port to fail over to. */
    private int _port;

    /**
     * Creates a failover handler on a protocol session, for a particular MINA session (network connection).
     *
     * @param amqProtocolHandler The protocol handler that spans the failover.
     */
    public FailoverHandler(AMQProtocolHandler amqProtocolHandler)
    {
        _amqProtocolHandler = amqProtocolHandler;
    }

    /**
     * Performs the failover procedure. See the class level comment, {@link FailoverHandler}, for a description of the
     * failover procedure.
     */
    public void run()
    {
        if (Thread.currentThread().isDaemon())
        {
            throw new IllegalStateException("FailoverHandler must run on a non-daemon thread.");
        }

        // Create a latch, upon which tasks that must not run in parallel with a failover can wait for completion of
        // the fail over.
        _amqProtocolHandler.setFailoverLatch(new CountDownLatch(1));

        // We wake up listeners. If they can handle failover, they will extend the
        // FailoverRetrySupport class and will in turn block on the latch until failover
        // has completed before retrying the operation.
        _amqProtocolHandler.notifyFailoverStarting();

        // Since failover impacts several structures we protect them all with a single mutex. These structures
        // are also in child objects of the connection. This allows us to manipulate them without affecting
        // client code which runs in a separate thread.
        synchronized (_amqProtocolHandler.getConnection().getFailoverMutex())
        {
            //Clear the exception now that we have the failover mutex there can be no one else waiting for a frame so
            // we can clear the exception.
            _amqProtocolHandler.failoverInProgress();

            // We switch in a new state manager temporarily so that the interaction to get to the "connection open"
            // state works, without us having to terminate any existing "state waiters". We could theoretically
            // have a state waiter waiting until the connection is closed for some reason. Or in future we may have
            // a slightly more complex state model therefore I felt it was worthwhile doing this.
            AMQStateManager existingStateManager = _amqProtocolHandler.getStateManager();


            // Use a fresh new StateManager for the reconnection attempts
            _amqProtocolHandler.setStateManager(new AMQStateManager());


            if (!_amqProtocolHandler.getConnection().firePreFailover(_host != null))
            {
                _logger.info("Failover process veto-ed by client");

                //Restore Existing State Manager
                _amqProtocolHandler.setStateManager(existingStateManager);

                //todo: ritchiem these exceptions are useless... Would be better to attempt to propogate exception that
                // prompted the failover event.
                if (_host != null)
                {
                    _amqProtocolHandler.getConnection().exceptionReceived(new AMQDisconnectedException("Redirect was vetoed by client", null));
                }
                else
                {
                    _amqProtocolHandler.getConnection().exceptionReceived(new AMQDisconnectedException("Failover was vetoed by client", null));
                }

                _amqProtocolHandler.getFailoverLatch().countDown();
                _amqProtocolHandler.setFailoverLatch(null);

                return;
            }

            _logger.info("Starting failover process");

            boolean failoverSucceeded;
            // when host is non null we have a specified failover host otherwise we all the client to cycle through
            // all specified hosts

            // if _host has value then we are performing a redirect.
            if (_host != null)
            {
                failoverSucceeded = _amqProtocolHandler.getConnection().attemptReconnection(_host, _port);
            }
            else
            {
                failoverSucceeded = _amqProtocolHandler.getConnection().attemptReconnection();
            }

            if (!failoverSucceeded)
            {
                //Restore Existing State Manager
                _amqProtocolHandler.setStateManager(existingStateManager);

                _amqProtocolHandler.getConnection().exceptionReceived(
                        new AMQDisconnectedException("Server closed connection and no failover " +
                                "was successful", null));
            }
            else
            {
                // Set the new Protocol Session in the StateManager.
                existingStateManager.setProtocolSession(_amqProtocolHandler.getProtocolSession());

                // Now that the ProtocolHandler has been reconnected clean up
                // the state of the old state manager. As if we simply reinstate
                // it any old exception that had occured prior to failover may
                // prohibit reconnection.
                // e.g. During testing when the broker is shutdown gracefully.
                // The broker
                // Clear any exceptions we gathered
                if (existingStateManager.getCurrentState() != AMQState.CONNECTION_OPEN)
                {
                    // Clear the state of the previous state manager as it may
                    // have received an exception
                    existingStateManager.clearLastException();
                    existingStateManager.changeState(AMQState.CONNECTION_OPEN);
                }


                //Restore Existing State Manager
                _amqProtocolHandler.setStateManager(existingStateManager);
                try
                {
                    if (_amqProtocolHandler.getConnection().firePreResubscribe())
                    {
                        _logger.info("Resubscribing on new connection");
                        _amqProtocolHandler.getConnection().resubscribeSessions();
                    }
                    else
                    {
                        _logger.info("Client vetoed automatic resubscription");
                    }

                    _amqProtocolHandler.getConnection().fireFailoverComplete();
                    _amqProtocolHandler.setFailoverState(FailoverState.NOT_STARTED);
                    _logger.info("Connection failover completed successfully");
                }
                catch (Exception e)
                {
                    _logger.info("Failover process failed - exception being propagated by protocol handler");
                    _amqProtocolHandler.setFailoverState(FailoverState.FAILED);
                    _amqProtocolHandler.exception(e);
                }
            }
        }

        _amqProtocolHandler.getFailoverLatch().countDown();
    }

    /**
     * Sets the host name to fail over to. This is optional and if not set a reconnect to the previous host is tried.
     *
     * @param host The host name to fail over to.
     */
    public void setHost(String host)
    {
        _host = host;
    }

    /**
     * Sets the port to fail over to.
     *
     * @param port The port to fail over to.
     */
    public void setPort(int port)
    {
        _port = port;
    }
}