summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Klötzke <Jan.Kloetzke@preh.de>2019-04-16 16:45:20 +0200
committerLennart Poettering <lennart@poettering.net>2020-06-09 10:04:57 +0200
commitbf760801804e55b045aed54bf9b1d0b0131be3f2 (patch)
tree4acadce5c9448df43d80753ee182ca3cb4d44ad9
parent8b5616fa913bf475d0bcec771ae0722b7a20c5e9 (diff)
downloadsystemd-bf760801804e55b045aed54bf9b1d0b0131be3f2.tar.gz
core: let user define start-/stop-timeout behaviour
The usual behaviour when a timeout expires is to terminate/kill the service. This is what user usually want in production systems. To debug services that fail to start/stop (especially sporadic failures) it might be necessary to trigger the watchdog machinery and write core dumps, though. Likewise, it is usually just a waste of time to gracefully stop a stuck service. Instead it might save time to go directly into kill mode. This commit adds two new options to services: TimeoutStartFailureMode= and TimeoutStopFailureMode=. Both take the same values and tweak the behavior of systemd when a start/stop timeout expires: * 'terminate': is the default behaviour as it has always been, * 'abort': triggers the watchdog machinery and will send SIGABRT (unless WatchdogSignal was changed) and * 'kill' will directly send SIGKILL. To handle the stop failure mode in stop-post state too a new final-watchdog state needs to be introduced.
-rw-r--r--man/systemd.service.xml41
-rw-r--r--src/basic/unit-def.c1
-rw-r--r--src/basic/unit-def.h1
-rw-r--r--src/core/dbus-service.c10
-rw-r--r--src/core/load-fragment-gperf.gperf.m42
-rw-r--r--src/core/load-fragment.c2
-rw-r--r--src/core/load-fragment.h1
-rw-r--r--src/core/service.c167
-rw-r--r--src/core/service.h13
-rw-r--r--src/shared/bus-unit-util.c4
10 files changed, 200 insertions, 42 deletions
diff --git a/man/systemd.service.xml b/man/systemd.service.xml
index e8c869244a..a4a49f39af 100644
--- a/man/systemd.service.xml
+++ b/man/systemd.service.xml
@@ -560,16 +560,12 @@
<varlistentry>
<term><varname>TimeoutStartSec=</varname></term>
- <listitem><para>Configures the time to wait for start-up. If a
- daemon service does not signal start-up completion within the
- configured time, the service will be considered failed and
- will be shut down again. Takes a unit-less value in seconds,
- or a time span value such as "5min 20s". Pass
- <literal>infinity</literal> to disable the timeout logic. Defaults to
- <varname>DefaultTimeoutStartSec=</varname> from the manager
- configuration file, except when
- <varname>Type=oneshot</varname> is used, in which case the
- timeout is disabled by default (see
+ <listitem><para>Configures the time to wait for start-up. If a daemon service does not signal start-up
+ completion within the configured time, the service will be considered failed and will be shut down again. The
+ precise action depends on the <varname>TimeoutStartFailureMode=</varname> option. Takes a unit-less value in
+ seconds, or a time span value such as "5min 20s". Pass <literal>infinity</literal> to disable the timeout logic.
+ Defaults to <varname>DefaultTimeoutStartSec=</varname> from the manager configuration file, except when
+ <varname>Type=oneshot</varname> is used, in which case the timeout is disabled by default (see
<citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>).
</para>
@@ -588,7 +584,8 @@
<listitem><para>This option serves two purposes. First, it configures the time to wait for each
<varname>ExecStop=</varname> command. If any of them times out, subsequent <varname>ExecStop=</varname> commands
are skipped and the service will be terminated by <constant>SIGTERM</constant>. If no <varname>ExecStop=</varname>
- commands are specified, the service gets the <constant>SIGTERM</constant> immediately. Second, it configures the time
+ commands are specified, the service gets the <constant>SIGTERM</constant> immediately. This default behavior
+ can be changed by the <varname>TimeoutStopFailureMode=</varname> option. Second, it configures the time
to wait for the service itself to stop. If it doesn't terminate in the specified time, it will be forcibly terminated
by <constant>SIGKILL</constant> (see <varname>KillMode=</varname> in
<citerefentry><refentrytitle>systemd.kill</refentrytitle><manvolnum>5</manvolnum></citerefentry>).
@@ -647,6 +644,28 @@
</varlistentry>
<varlistentry>
+ <term><varname>TimeoutStartFailureMode=</varname></term>
+ <term><varname>TimeoutStopFailureMode=</varname></term>
+
+ <listitem><para>These options configure the action that is taken in case a daemon service does not signal
+ start-up within its configured <varname>TimeoutStartSec=</varname>, respectively if it does not stop within
+ <varname>TimeoutStopSec=</varname>. Takes one of <option>terminate</option>, <option>abort</option> and
+ <option>kill</option>. Both options default to <option>terminate</option>.</para>
+
+ <para>If <option>terminate</option> is set the service will be gracefully terminated by sending the signal
+ specified in <varname>KillSignal=</varname> (defaults to <constant>SIGTERM</constant>, see
+ <citerefentry><refentrytitle>systemd.kill</refentrytitle><manvolnum>5</manvolnum></citerefentry>). If the
+ service does not terminate the <varname>FinalKillSignal=</varname> is sent after
+ <varname>TimeoutStopSec=</varname>. If <option>abort</option> is set, <varname>WatchdogSignal=</varname> is sent
+ instead and <varname>TimeoutAbortSec=</varname> applies before sending <varname>FinalKillSignal=</varname>.
+ This setting may be used to analyze services that fail to start-up or shut-down intermittently.
+ By using <option>kill</option> the service is immediately terminated by sending
+ <varname>FinalKillSignal=</varname> without any further timeout. This setting can be used to expedite the
+ shutdown of failing services.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>RuntimeMaxSec=</varname></term>
<listitem><para>Configures a maximum time for the service to run. If this is used and the service has been
diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c
index 64b2b2dd7e..94ec1f3d19 100644
--- a/src/basic/unit-def.c
+++ b/src/basic/unit-def.c
@@ -185,6 +185,7 @@ static const char* const service_state_table[_SERVICE_STATE_MAX] = {
[SERVICE_STOP_SIGTERM] = "stop-sigterm",
[SERVICE_STOP_SIGKILL] = "stop-sigkill",
[SERVICE_STOP_POST] = "stop-post",
+ [SERVICE_FINAL_WATCHDOG] = "final-watchdog",
[SERVICE_FINAL_SIGTERM] = "final-sigterm",
[SERVICE_FINAL_SIGKILL] = "final-sigkill",
[SERVICE_FAILED] = "failed",
diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h
index a7d6781988..53419ecd8a 100644
--- a/src/basic/unit-def.h
+++ b/src/basic/unit-def.h
@@ -127,6 +127,7 @@ typedef enum ServiceState {
SERVICE_STOP_SIGTERM,
SERVICE_STOP_SIGKILL,
SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG, /* In case the STOP_POST executable needs to be aborted. */
SERVICE_FINAL_SIGTERM, /* In case the STOP_POST executable hangs, we shoot that down, too */
SERVICE_FINAL_SIGKILL,
SERVICE_FAILED,
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
index 11680f0d69..5d4f4ef506 100644
--- a/src/core/dbus-service.c
+++ b/src/core/dbus-service.c
@@ -29,6 +29,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_notify_access, notify_access, N
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
static BUS_DEFINE_PROPERTY_GET(property_get_timeout_abort_usec, "t", Service, service_timeout_abort_usec);
static BUS_DEFINE_PROPERTY_GET(property_get_watchdog_usec, "t", Service, service_get_watchdog_usec);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_timeout_failure_mode, service_timeout_failure_mode, ServiceTimeoutFailureMode);
static int property_get_exit_status_set(
sd_bus *bus,
@@ -101,6 +102,8 @@ const sd_bus_vtable bus_service_vtable[] = {
SD_BUS_PROPERTY("TimeoutStartUSec", "t", bus_property_get_usec, offsetof(Service, timeout_start_usec), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Service, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TimeoutAbortUSec", "t", property_get_timeout_abort_usec, 0, 0),
+ SD_BUS_PROPERTY("TimeoutStartFailureMode", "s", property_get_timeout_failure_mode, offsetof(Service, timeout_start_failure_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimeoutStopFailureMode", "s", property_get_timeout_failure_mode, offsetof(Service, timeout_stop_failure_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("WatchdogUSec", "t", property_get_watchdog_usec, 0, 0),
BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0),
@@ -259,6 +262,7 @@ static BUS_DEFINE_SET_TRANSIENT_PARSE(service_type, ServiceType, service_type_fr
static BUS_DEFINE_SET_TRANSIENT_PARSE(service_restart, ServiceRestart, service_restart_from_string);
static BUS_DEFINE_SET_TRANSIENT_PARSE(oom_policy, OOMPolicy, oom_policy_from_string);
static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(bus_name, sd_bus_service_name_is_valid);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(timeout_failure_mode, ServiceTimeoutFailureMode, service_timeout_failure_mode_from_string);
static int bus_service_set_transient_property(
Service *s,
@@ -316,6 +320,12 @@ static int bus_service_set_transient_property(
return r;
}
+ if (streq(name, "TimeoutStartFailureMode"))
+ return bus_set_transient_timeout_failure_mode(u, name, &s->timeout_start_failure_mode, message, flags, error);
+
+ if (streq(name, "TimeoutStopFailureMode"))
+ return bus_set_transient_timeout_failure_mode(u, name, &s->timeout_stop_failure_mode, message, flags, error);
+
if (streq(name, "RuntimeMaxUSec"))
return bus_set_transient_usec(u, name, &s->runtime_max_usec, message, flags, error);
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index c76d08b3a6..69598e8430 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -322,6 +322,8 @@ Service.TimeoutSec, config_parse_service_timeout, 0,
Service.TimeoutStartSec, config_parse_service_timeout, 0, 0
Service.TimeoutStopSec, config_parse_sec_fix_0, 0, offsetof(Service, timeout_stop_usec)
Service.TimeoutAbortSec, config_parse_service_timeout_abort, 0, 0
+Service.TimeoutStartFailureMode, config_parse_service_timeout_failure_mode, 0, offsetof(Service, timeout_start_failure_mode)
+Service.TimeoutStopFailureMode, config_parse_service_timeout_failure_mode, 0, offsetof(Service, timeout_stop_failure_mode)
Service.RuntimeMaxSec, config_parse_sec, 0, offsetof(Service, runtime_max_usec)
Service.WatchdogSec, config_parse_sec, 0, offsetof(Service, watchdog_usec)
m4_dnl The following five only exist for compatibility, they moved into Unit, see above
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 517813986b..a2eede4cce 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -123,6 +123,7 @@ DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_system, protect_system, ProtectSys
DEFINE_CONFIG_PARSE_ENUM(config_parse_runtime_preserve_mode, exec_preserve_mode, ExecPreserveMode, "Failed to parse runtime directory preserve mode");
DEFINE_CONFIG_PARSE_ENUM(config_parse_service_type, service_type, ServiceType, "Failed to parse service type");
DEFINE_CONFIG_PARSE_ENUM(config_parse_service_restart, service_restart, ServiceRestart, "Failed to parse service restart specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_service_timeout_failure_mode, service_timeout_failure_mode, ServiceTimeoutFailureMode, "Failed to parse timeout failure mode");
DEFINE_CONFIG_PARSE_ENUM(config_parse_socket_bind, socket_address_bind_ipv6_only_or_bool, SocketAddressBindIPv6Only, "Failed to parse bind IPv6 only value");
DEFINE_CONFIG_PARSE_ENUM(config_parse_oom_policy, oom_policy, OOMPolicy, "Failed to parse OOM policy");
DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_ip_tos, ip_tos, int, -1, "Failed to parse IP TOS value");
@@ -4941,6 +4942,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_exec, "PATH [ARGUMENT [...]]" },
{ config_parse_service_type, "SERVICETYPE" },
{ config_parse_service_restart, "SERVICERESTART" },
+ { config_parse_service_timeout_failure_mode, "TIMEOUTMODE" },
{ config_parse_kill_mode, "KILLMODE" },
{ config_parse_signal, "SIGNAL" },
{ config_parse_socket_listen, "SOCKET [...]" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index bc72ef7745..9c30b6f882 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -30,6 +30,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_exec_coredump_filter);
CONFIG_PARSER_PROTOTYPE(config_parse_exec);
CONFIG_PARSER_PROTOTYPE(config_parse_service_timeout);
CONFIG_PARSER_PROTOTYPE(config_parse_service_timeout_abort);
+CONFIG_PARSER_PROTOTYPE(config_parse_service_timeout_failure_mode);
CONFIG_PARSER_PROTOTYPE(config_parse_service_type);
CONFIG_PARSER_PROTOTYPE(config_parse_service_restart);
CONFIG_PARSER_PROTOTYPE(config_parse_socket_bindtodevice);
diff --git a/src/core/service.c b/src/core/service.c
index 340b655059..8b3fd2e5a4 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -56,6 +56,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
[SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
[SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
[SERVICE_STOP_POST] = UNIT_DEACTIVATING,
+ [SERVICE_FINAL_WATCHDOG] = UNIT_DEACTIVATING,
[SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
[SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
[SERVICE_FAILED] = UNIT_FAILED,
@@ -79,6 +80,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
[SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
[SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
[SERVICE_STOP_POST] = UNIT_DEACTIVATING,
+ [SERVICE_FINAL_WATCHDOG] = UNIT_DEACTIVATING,
[SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
[SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
[SERVICE_FAILED] = UNIT_FAILED,
@@ -857,10 +859,14 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
fprintf(f,
"%sRestartSec: %s\n"
"%sTimeoutStartSec: %s\n"
- "%sTimeoutStopSec: %s\n",
+ "%sTimeoutStopSec: %s\n"
+ "%sTimeoutStartFailureMode: %s\n"
+ "%sTimeoutStopFailureMode: %s\n",
prefix, format_timespan(buf_restart, sizeof(buf_restart), s->restart_usec, USEC_PER_SEC),
prefix, format_timespan(buf_start, sizeof(buf_start), s->timeout_start_usec, USEC_PER_SEC),
- prefix, format_timespan(buf_stop, sizeof(buf_stop), s->timeout_stop_usec, USEC_PER_SEC));
+ prefix, format_timespan(buf_stop, sizeof(buf_stop), s->timeout_stop_usec, USEC_PER_SEC),
+ prefix, service_timeout_failure_mode_to_string(s->timeout_start_failure_mode),
+ prefix, service_timeout_failure_mode_to_string(s->timeout_stop_failure_mode));
if (s->timeout_abort_set)
fprintf(f,
@@ -1072,7 +1078,7 @@ static void service_set_state(Service *s, ServiceState state) {
SERVICE_RUNNING,
SERVICE_RELOAD,
SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
- SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
SERVICE_AUTO_RESTART,
SERVICE_CLEANING))
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
@@ -1081,7 +1087,7 @@ static void service_set_state(Service *s, ServiceState state) {
SERVICE_START, SERVICE_START_POST,
SERVICE_RUNNING, SERVICE_RELOAD,
SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
- SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
service_unwatch_main_pid(s);
s->main_command = NULL;
}
@@ -1090,7 +1096,7 @@ static void service_set_state(Service *s, ServiceState state) {
SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
SERVICE_RELOAD,
SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
- SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
SERVICE_CLEANING)) {
service_unwatch_control_pid(s);
s->control_command = NULL;
@@ -1106,7 +1112,7 @@ static void service_set_state(Service *s, ServiceState state) {
SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
SERVICE_RUNNING, SERVICE_RELOAD,
SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
- SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) &&
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) &&
!(state == SERVICE_DEAD && UNIT(s)->job))
service_close_socket_fd(s);
@@ -1154,6 +1160,7 @@ static usec_t service_coldplug_timeout(Service *s) {
return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->timeout_stop_usec);
case SERVICE_STOP_WATCHDOG:
+ case SERVICE_FINAL_WATCHDOG:
return usec_add(UNIT(s)->state_change_timestamp.monotonic, service_timeout_abort_usec(s));
case SERVICE_AUTO_RESTART:
@@ -1187,7 +1194,7 @@ static int service_coldplug(Unit *u) {
SERVICE_START, SERVICE_START_POST,
SERVICE_RUNNING, SERVICE_RELOAD,
SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
- SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
r = unit_watch_pid(UNIT(s), s->main_pid, false);
if (r < 0)
return r;
@@ -1199,7 +1206,7 @@ static int service_coldplug(Unit *u) {
SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
SERVICE_RELOAD,
SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
- SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
SERVICE_CLEANING)) {
r = unit_watch_pid(UNIT(s), s->control_pid, false);
if (r < 0)
@@ -1859,6 +1866,7 @@ static int state_to_kill_operation(Service *s, ServiceState state) {
switch (state) {
case SERVICE_STOP_WATCHDOG:
+ case SERVICE_FINAL_WATCHDOG:
return KILL_WATCHDOG;
case SERVICE_STOP_SIGTERM:
@@ -1879,7 +1887,7 @@ static int state_to_kill_operation(Service *s, ServiceState state) {
}
static void service_enter_signal(Service *s, ServiceState state, ServiceResult f) {
- int r;
+ int kill_operation, r;
assert(s);
@@ -1893,10 +1901,11 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
* died now */
(void) unit_enqueue_rewatch_pids(UNIT(s));
+ kill_operation = state_to_kill_operation(s, state);
r = unit_kill_context(
UNIT(s),
&s->kill_context,
- state_to_kill_operation(s, state),
+ kill_operation,
s->main_pid,
s->control_pid,
s->main_pid_alien);
@@ -1905,7 +1914,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
if (r > 0) {
r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC),
- state == SERVICE_STOP_WATCHDOG ? service_timeout_abort_usec(s) : s->timeout_stop_usec));
+ kill_operation == KILL_WATCHDOG ? service_timeout_abort_usec(s) : s->timeout_stop_usec));
if (r < 0)
goto fail;
@@ -1914,7 +1923,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_SUCCESS);
else if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL))
service_enter_stop_post(s, SERVICE_SUCCESS);
- else if (state == SERVICE_FINAL_SIGTERM && s->kill_context.send_sigkill)
+ else if (IN_SET(state, SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM) && s->kill_context.send_sigkill)
service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_SUCCESS);
else
service_enter_dead(s, SERVICE_SUCCESS, true);
@@ -2444,7 +2453,7 @@ static int service_start(Unit *u) {
* please! */
if (IN_SET(s->state,
SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
- SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_CLEANING))
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_CLEANING))
return -EAGAIN;
/* Already on it! */
@@ -2515,7 +2524,7 @@ static int service_stop(Unit *u) {
/* Already on it */
if (IN_SET(s->state,
SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
- SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))
return 0;
/* A restart will be scheduled or is in progress. */
@@ -3321,6 +3330,7 @@ static void service_notify_cgroup_empty_event(Unit *u) {
break;
case SERVICE_STOP_POST:
+ case SERVICE_FINAL_WATCHDOG:
case SERVICE_FINAL_SIGTERM:
case SERVICE_FINAL_SIGKILL:
if (main_pid_good(s) <= 0 && control_pid_good(s) <= 0)
@@ -3521,6 +3531,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
break;
+ case SERVICE_FINAL_WATCHDOG:
case SERVICE_FINAL_SIGTERM:
case SERVICE_FINAL_SIGKILL:
@@ -3674,6 +3685,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
service_enter_signal(s, SERVICE_FINAL_SIGTERM, f);
break;
+ case SERVICE_FINAL_WATCHDOG:
case SERVICE_FINAL_SIGTERM:
case SERVICE_FINAL_SIGKILL:
if (main_pid_good(s) <= 0)
@@ -3720,13 +3732,32 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us
case SERVICE_CONDITION:
case SERVICE_START_PRE:
case SERVICE_START:
- log_unit_warning(UNIT(s), "%s operation timed out. Terminating.", service_state_to_string(s->state));
- service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
- break;
-
case SERVICE_START_POST:
- log_unit_warning(UNIT(s), "Start-post operation timed out. Stopping.");
- service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ switch (s->timeout_start_failure_mode) {
+
+ case SERVICE_TIMEOUT_TERMINATE:
+ log_unit_warning(UNIT(s), "%s operation timed out. Terminating.", service_state_to_string(s->state));
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_TIMEOUT_ABORT:
+ log_unit_warning(UNIT(s), "%s operation timed out. Aborting.", service_state_to_string(s->state));
+ service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_TIMEOUT_KILL:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "%s operation timed out. Killing.", service_state_to_string(s->state));
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "%s operation timed out. Skipping SIGKILL.", service_state_to_string(s->state));
+ service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT);
+ }
+ break;
+
+ default:
+ assert_not_reached("unknown timeout mode");
+ }
break;
case SERVICE_RUNNING:
@@ -3742,17 +3773,48 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us
break;
case SERVICE_STOP:
- log_unit_warning(UNIT(s), "Stopping timed out. Terminating.");
- service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ switch (s->timeout_stop_failure_mode) {
+
+ case SERVICE_TIMEOUT_TERMINATE:
+ log_unit_warning(UNIT(s), "Stopping timed out. Terminating.");
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_TIMEOUT_ABORT:
+ log_unit_warning(UNIT(s), "Stopping timed out. Aborting.");
+ service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_TIMEOUT_KILL:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "Stopping timed out. Killing.");
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "Stopping timed out. Skipping SIGKILL.");
+ service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT);
+ }
+ break;
+
+ default:
+ assert_not_reached("unknown timeout mode");
+ }
break;
case SERVICE_STOP_WATCHDOG:
- log_unit_warning(UNIT(s), "State 'stop-watchdog' timed out. Terminating.");
- service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "State 'stop-watchdog' timed out. Killing.");
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "State 'stop-watchdog' timed out. Skipping SIGKILL.");
+ service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT);
+ }
break;
case SERVICE_STOP_SIGTERM:
- if (s->kill_context.send_sigkill) {
+ if (s->timeout_stop_failure_mode == SERVICE_TIMEOUT_ABORT) {
+ log_unit_warning(UNIT(s), "State 'stop-sigterm' timed out. Aborting.");
+ service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_TIMEOUT);
+ } else if (s->kill_context.send_sigkill) {
log_unit_warning(UNIT(s), "State 'stop-sigterm' timed out. Killing.");
service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT);
} else {
@@ -3772,16 +3834,52 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us
break;
case SERVICE_STOP_POST:
- log_unit_warning(UNIT(s), "State 'stop-post' timed out. Terminating.");
- service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ switch (s->timeout_stop_failure_mode) {
+
+ case SERVICE_TIMEOUT_TERMINATE:
+ log_unit_warning(UNIT(s), "State 'stop-post' timed out. Terminating.");
+ service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_TIMEOUT_ABORT:
+ log_unit_warning(UNIT(s), "State 'stop-post' timed out. Aborting.");
+ service_enter_signal(s, SERVICE_FINAL_WATCHDOG, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_TIMEOUT_KILL:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "State 'stop-post' timed out. Killing.");
+ service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "State 'stop-post' timed out. Skipping SIGKILL. Entering failed mode.");
+ service_enter_dead(s, SERVICE_FAILURE_TIMEOUT, false);
+ }
+ break;
+
+ default:
+ assert_not_reached("unknown timeout mode");
+ }
break;
- case SERVICE_FINAL_SIGTERM:
+ case SERVICE_FINAL_WATCHDOG:
if (s->kill_context.send_sigkill) {
- log_unit_warning(UNIT(s), "State 'stop-final-sigterm' timed out. Killing.");
+ log_unit_warning(UNIT(s), "State 'final-watchdog' timed out. Killing.");
service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_TIMEOUT);
} else {
- log_unit_warning(UNIT(s), "State 'stop-final-sigterm' timed out. Skipping SIGKILL. Entering failed mode.");
+ log_unit_warning(UNIT(s), "State 'final-watchdog' timed out. Skipping SIGKILL. Entering failed mode.");
+ service_enter_dead(s, SERVICE_FAILURE_TIMEOUT, false);
+ }
+ break;
+
+ case SERVICE_FINAL_SIGTERM:
+ if (s->timeout_stop_failure_mode == SERVICE_TIMEOUT_ABORT) {
+ log_unit_warning(UNIT(s), "State 'final-sigterm' timed out. Aborting.");
+ service_enter_signal(s, SERVICE_FINAL_WATCHDOG, SERVICE_FAILURE_TIMEOUT);
+ } else if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "State 'final-sigterm' timed out. Killing.");
+ service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "State 'final-sigterm' timed out. Skipping SIGKILL. Entering failed mode.");
service_enter_dead(s, SERVICE_FAILURE_TIMEOUT, false);
}
@@ -4263,6 +4361,7 @@ static bool service_needs_console(Unit *u) {
SERVICE_STOP_SIGTERM,
SERVICE_STOP_SIGKILL,
SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG,
SERVICE_FINAL_SIGTERM,
SERVICE_FINAL_SIGKILL);
}
@@ -4417,6 +4516,14 @@ static const char* const service_result_table[_SERVICE_RESULT_MAX] = {
DEFINE_STRING_TABLE_LOOKUP(service_result, ServiceResult);
+static const char* const service_timeout_failure_mode_table[_SERVICE_TIMEOUT_FAILURE_MODE_MAX] = {
+ [SERVICE_TIMEOUT_TERMINATE] = "terminate",
+ [SERVICE_TIMEOUT_ABORT] = "abort",
+ [SERVICE_TIMEOUT_KILL] = "kill",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_timeout_failure_mode, ServiceTimeoutFailureMode);
+
const UnitVTable service_vtable = {
.object_size = sizeof(Service),
.exec_context_offset = offsetof(Service, exec_context),
diff --git a/src/core/service.h b/src/core/service.h
index 3a84db14c4..4423f893bb 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -74,6 +74,14 @@ typedef enum ServiceResult {
_SERVICE_RESULT_INVALID = -1
} ServiceResult;
+typedef enum ServiceTimeoutFailureMode {
+ SERVICE_TIMEOUT_TERMINATE,
+ SERVICE_TIMEOUT_ABORT,
+ SERVICE_TIMEOUT_KILL,
+ _SERVICE_TIMEOUT_FAILURE_MODE_MAX,
+ _SERVICE_TIMEOUT_FAILURE_MODE_INVALID = -1
+} ServiceTimeoutFailureMode;
+
struct ServiceFDStore {
Service *service;
@@ -103,6 +111,8 @@ struct Service {
usec_t timeout_abort_usec;
bool timeout_abort_set;
usec_t runtime_max_usec;
+ ServiceTimeoutFailureMode timeout_start_failure_mode;
+ ServiceTimeoutFailureMode timeout_stop_failure_mode;
dual_timestamp watchdog_timestamp;
usec_t watchdog_usec; /* the requested watchdog timeout in the unit file */
@@ -228,6 +238,9 @@ NotifyState notify_state_from_string(const char *s) _pure_;
const char* service_result_to_string(ServiceResult i) _const_;
ServiceResult service_result_from_string(const char *s) _pure_;
+const char* service_timeout_failure_mode_to_string(ServiceTimeoutFailureMode i) _const_;
+ServiceTimeoutFailureMode service_timeout_failure_mode_from_string(const char *s) _pure_;
+
DEFINE_CAST(SERVICE, Service);
#define STATUS_TEXT_MAX (16U*1024U)
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index 9a5730f3ea..69f79ea176 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -1490,7 +1490,9 @@ static int bus_append_service_property(sd_bus_message *m, const char *field, con
"NotifyAccess",
"USBFunctionDescriptors",
"USBFunctionStrings",
- "OOMPolicy"))
+ "OOMPolicy",
+ "TimeoutStartFailureMode",
+ "TimeoutStopFailureMode"))
return bus_append_string(m, field, eq);
if (STR_IN_SET(field, "PermissionsStartOnly",