summaryrefslogtreecommitdiff
path: root/qpid/cpp/src/qpid/cluster/WatchDogPlugin.cpp
blob: 1b813411f69d782bf8de2d6cbac9719963c9f77c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/*
 *
 * Copyright (c) 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

#include "qpid/Plugin.h"
#include "qpid/Options.h"
#include "qpid/log/Statement.h"
#include "qpid/broker/Broker.h"
#include "qpid/sys/Timer.h"
#include "qpid/sys/Fork.h"
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>

namespace qpid {
namespace cluster {

using broker::Broker;

struct Settings {
    Settings() : interval(0) {}
    int interval;
};

struct WatchDogOptions : public qpid::Options {
    Settings& settings;

    WatchDogOptions(Settings& s) : settings(s) {
        addOptions()
            ("watchdog-interval", optValue(settings.interval, "N"),
             "broker is automatically killed if it is hung for more than \
	      N seconds. 0 disables watchdog.");
    }
};

struct WatchDogTask : public sys::TimerTask {
    int pid;
    sys::Timer& timer;
    int interval;

    WatchDogTask(int pid_, sys::Timer& t, int _interval)
        : TimerTask(_interval*sys::TIME_SEC/2), pid(pid_), timer(t), interval(_interval) {}

    void fire() {
        timer.add (new WatchDogTask(pid, timer, interval));
        QPID_LOG(debug, "Sending keepalive signal to watchdog");
        ::kill(pid, SIGUSR1);
    }
};

struct WatchDogPlugin : public qpid::Plugin, public qpid::sys::Fork {
    Settings settings;
    WatchDogOptions options;
    Broker* broker;
    int watchdogPid;

    WatchDogPlugin() : options(settings), broker(0), watchdogPid(0) {}

    ~WatchDogPlugin() {
        if (watchdogPid) ::kill(watchdogPid, SIGTERM);
        ::waitpid(watchdogPid, 0, 0);
    }

    Options* getOptions() { return &options; }

    void earlyInitialize(qpid::Plugin::Target& target) {
        broker = dynamic_cast<Broker*>(&target);
        if (broker && settings.interval) {
            QPID_LOG(notice, "Starting watchdog process with interval of " <<
                     settings.interval << " seconds");
            fork();
        }
    }

    void initialize(Target&) {}

  protected:

    void child() {              // Child of fork
        const char* watchdog = ::getenv("QPID_WATCHDOG_EXE"); // For use in tests
        if (!watchdog) watchdog=QPID_EXEC_DIR "/qpidd_watchdog";
        std::string interval = boost::lexical_cast<std::string>(settings.interval);
        ::execl(watchdog, watchdog, interval.c_str(), NULL);
        QPID_LOG(critical, "Failed to exec watchdog program " << watchdog );
        ::kill(::getppid(), SIGKILL);
        exit(1);
    }

    void parent(int pid) {          // Parent of fork
        watchdogPid = pid;
        broker->getTimer().add(
            new WatchDogTask(watchdogPid, broker->getTimer(), settings.interval));
        // TODO aconway 2009-08-10: to be extra safe, we could monitor
        // the watchdog child and re-start it if it exits.
    }
};

static WatchDogPlugin instance; // Static initialization.

}} // namespace qpid::cluster