summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2023-03-29 21:41:31 +0200
committerGitHub <noreply@github.com>2023-03-29 21:41:31 +0200
commit2e34aed32b7c02e54642b3a84948d774528148d7 (patch)
tree79e0ff8c941d00f01a64d48da7760c7b1827b860 /src
parenta7b6eee4acd0d6b0985382bd0a621f0c9152b829 (diff)
parent8d12e219c578ef1769050a49eb8e857f03928d84 (diff)
downloadsystemd-2e34aed32b7c02e54642b3a84948d774528148d7.tar.gz
Merge pull request #26971 from poettering/autostart-dead-failed
pid1: introduce new SERVICE_{DEAD|FAILED}_BEFORE_AUTO_RESTART serviceā€¦
Diffstat (limited to 'src')
-rw-r--r--src/basic/unit-def.c44
-rw-r--r--src/basic/unit-def.h2
-rw-r--r--src/core/service.c157
-rw-r--r--src/core/service.h1
-rw-r--r--src/core/socket.c4
5 files changed, 127 insertions, 81 deletions
diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c
index bdb1860246..a0fab46a19 100644
--- a/src/basic/unit-def.c
+++ b/src/basic/unit-def.c
@@ -180,27 +180,29 @@ static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
DEFINE_STRING_TABLE_LOOKUP(scope_state, ScopeState);
static const char* const service_state_table[_SERVICE_STATE_MAX] = {
- [SERVICE_DEAD] = "dead",
- [SERVICE_CONDITION] = "condition",
- [SERVICE_START_PRE] = "start-pre",
- [SERVICE_START] = "start",
- [SERVICE_START_POST] = "start-post",
- [SERVICE_RUNNING] = "running",
- [SERVICE_EXITED] = "exited",
- [SERVICE_RELOAD] = "reload",
- [SERVICE_RELOAD_SIGNAL] = "reload-signal",
- [SERVICE_RELOAD_NOTIFY] = "reload-notify",
- [SERVICE_STOP] = "stop",
- [SERVICE_STOP_WATCHDOG] = "stop-watchdog",
- [SERVICE_STOP_SIGTERM] = "stop-sigterm",
- [SERVICE_STOP_SIGKILL] = "stop-sigkill",
- [SERVICE_STOP_POST] = "stop-post",
- [SERVICE_FINAL_WATCHDOG] = "final-watchdog",
- [SERVICE_FINAL_SIGTERM] = "final-sigterm",
- [SERVICE_FINAL_SIGKILL] = "final-sigkill",
- [SERVICE_FAILED] = "failed",
- [SERVICE_AUTO_RESTART] = "auto-restart",
- [SERVICE_CLEANING] = "cleaning",
+ [SERVICE_DEAD] = "dead",
+ [SERVICE_CONDITION] = "condition",
+ [SERVICE_START_PRE] = "start-pre",
+ [SERVICE_START] = "start",
+ [SERVICE_START_POST] = "start-post",
+ [SERVICE_RUNNING] = "running",
+ [SERVICE_EXITED] = "exited",
+ [SERVICE_RELOAD] = "reload",
+ [SERVICE_RELOAD_SIGNAL] = "reload-signal",
+ [SERVICE_RELOAD_NOTIFY] = "reload-notify",
+ [SERVICE_STOP] = "stop",
+ [SERVICE_STOP_WATCHDOG] = "stop-watchdog",
+ [SERVICE_STOP_SIGTERM] = "stop-sigterm",
+ [SERVICE_STOP_SIGKILL] = "stop-sigkill",
+ [SERVICE_STOP_POST] = "stop-post",
+ [SERVICE_FINAL_WATCHDOG] = "final-watchdog",
+ [SERVICE_FINAL_SIGTERM] = "final-sigterm",
+ [SERVICE_FINAL_SIGKILL] = "final-sigkill",
+ [SERVICE_FAILED] = "failed",
+ [SERVICE_DEAD_BEFORE_AUTO_RESTART] = "dead-before-auto-restart",
+ [SERVICE_FAILED_BEFORE_AUTO_RESTART] = "failed-before-auto-restart",
+ [SERVICE_AUTO_RESTART] = "auto-restart",
+ [SERVICE_CLEANING] = "cleaning",
};
DEFINE_STRING_TABLE_LOOKUP(service_state, ServiceState);
diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h
index bae132ea09..2fab42e9c7 100644
--- a/src/basic/unit-def.h
+++ b/src/basic/unit-def.h
@@ -144,6 +144,8 @@ typedef enum ServiceState {
SERVICE_FINAL_SIGTERM, /* In case the STOP_POST executable hangs, we shoot that down, too */
SERVICE_FINAL_SIGKILL,
SERVICE_FAILED,
+ SERVICE_DEAD_BEFORE_AUTO_RESTART,
+ SERVICE_FAILED_BEFORE_AUTO_RESTART,
SERVICE_AUTO_RESTART,
SERVICE_CLEANING,
_SERVICE_STATE_MAX,
diff --git a/src/core/service.c b/src/core/service.c
index 115d3fbef5..d73feb363e 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -66,6 +66,8 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
[SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
[SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
[SERVICE_FAILED] = UNIT_FAILED,
+ [SERVICE_DEAD_BEFORE_AUTO_RESTART] = UNIT_INACTIVE,
+ [SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
[SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
[SERVICE_CLEANING] = UNIT_MAINTENANCE,
};
@@ -92,6 +94,8 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
[SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
[SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
[SERVICE_FAILED] = UNIT_FAILED,
+ [SERVICE_DEAD_BEFORE_AUTO_RESTART] = UNIT_INACTIVE,
+ [SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
[SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
[SERVICE_CLEANING] = UNIT_MAINTENANCE,
};
@@ -267,6 +271,7 @@ static void service_start_watchdog(Service *s) {
usec_t service_restart_usec(Service *s) {
unsigned n_restarts;
long double unit;
+ usec_t value;
assert(s);
@@ -276,24 +281,26 @@ usec_t service_restart_usec(Service *s) {
* between job enqueuing and running is usually neglectable compared to the time
* we'll be sleeping. */
n_restarts = s->n_restarts +
- (IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART) ? 1 : 0);
+ (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART) ? 1 : 0);
/* n_restarts can equal to 0 if no restart has happened nor planned */
if (n_restarts <= 1 ||
s->restart_steps == 0 ||
s->restart_usec_max == USEC_INFINITY ||
- s->restart_usec == s->restart_usec_max)
- return s->restart_usec;
-
- if (n_restarts > s->restart_steps)
- return s->restart_usec_max;
-
- /* Enforced in service_verify() and above */
- assert(s->restart_usec_max > s->restart_usec);
+ s->restart_usec >= s->restart_usec_max)
+ value = s->restart_usec;
+ else if (n_restarts > s->restart_steps)
+ value = s->restart_usec_max;
+ else {
+ /* Enforced in service_verify() and above */
+ assert(s->restart_usec_max > s->restart_usec);
- unit = powl(s->restart_usec_max - s->restart_usec, 1.0L / s->restart_steps);
+ unit = powl(s->restart_usec_max - s->restart_usec, 1.0L / s->restart_steps);
+ value = usec_add(s->restart_usec, (usec_t) powl(unit, n_restarts - 1));
+ }
- return usec_add(s->restart_usec, (usec_t) powl(unit, n_restarts - 1));
+ log_unit_debug(UNIT(s), "Restart interval calculated as: %s", FORMAT_TIMESPAN(value, 0));
+ return value;
}
static void service_extend_event_source_timeout(Service *s, sd_event_source *source, usec_t extended) {
@@ -380,9 +387,6 @@ static void service_fd_store_unlink(ServiceFDStore *fs) {
static void service_release_fd_store(Service *s) {
assert(s);
- if (s->n_keep_fd_store > 0)
- return;
-
log_unit_debug(UNIT(s), "Releasing all stored fds");
while (s->fd_store)
service_fd_store_unlink(s->fd_store);
@@ -395,6 +399,10 @@ static void service_release_resources(Unit *u) {
assert(s);
+ /* Don't release resources if this is a transitionary failed/dead state */
+ if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART))
+ return;
+
if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0)
return;
@@ -1199,7 +1207,9 @@ static void service_set_state(Service *s, ServiceState state) {
s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
}
- if (IN_SET(state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART)) {
+ if (IN_SET(state,
+ SERVICE_DEAD, SERVICE_FAILED,
+ SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART)) {
unit_unwatch_all_pids(UNIT(s));
unit_dequeue_rewatch_pids(UNIT(s));
}
@@ -1312,7 +1322,10 @@ static int service_coldplug(Unit *u) {
return r;
}
- if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART, SERVICE_CLEANING)) {
+ if (!IN_SET(s->deserialized_state,
+ SERVICE_DEAD, SERVICE_FAILED,
+ SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART,
+ SERVICE_CLEANING)) {
(void) unit_enqueue_rewatch_pids(u);
(void) unit_setup_exec_runtime(u);
}
@@ -1891,14 +1904,14 @@ static bool service_will_restart(Unit *u) {
if (s->will_auto_restart)
return true;
- if (s->state == SERVICE_AUTO_RESTART)
+ if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART))
return true;
return unit_will_restart_default(u);
}
static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) {
- ServiceState end_state;
+ ServiceState end_state, restart_state;
int r;
assert(s);
@@ -1914,12 +1927,15 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
if (s->result == SERVICE_SUCCESS) {
unit_log_success(UNIT(s));
end_state = SERVICE_DEAD;
+ restart_state = SERVICE_DEAD_BEFORE_AUTO_RESTART;
} else if (s->result == SERVICE_SKIP_CONDITION) {
unit_log_skip(UNIT(s), service_result_to_string(s->result));
end_state = SERVICE_DEAD;
+ restart_state = SERVICE_DEAD_BEFORE_AUTO_RESTART;
} else {
unit_log_failure(UNIT(s), service_result_to_string(s->result));
end_state = SERVICE_FAILED;
+ restart_state = SERVICE_FAILED_BEFORE_AUTO_RESTART;
}
unit_warn_leftover_processes(UNIT(s), unit_log_leftover_process_stop);
@@ -1937,30 +1953,33 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
s->will_auto_restart = true;
}
- /* Make sure service_release_resources() doesn't destroy our FD store, while we are changing through
- * SERVICE_FAILED/SERVICE_DEAD before entering into SERVICE_AUTO_RESTART. */
- s->n_keep_fd_store ++;
-
- service_set_state(s, end_state);
-
if (s->will_auto_restart) {
s->will_auto_restart = false;
+ /* We make two state changes here: one that maps to the high-level UNIT_INACTIVE/UNIT_FAILED
+ * state (i.e. a state indicating deactivation), and then one that that maps to the
+ * high-level UNIT_STARTING state (i.e. a state indicating activation). We do this so that
+ * external software can watch the state changes and see all service failures, even if they
+ * are only transitionary and followed by an automatic restart. We have fine-grained
+ * low-level states for this though so that software can distinguish the permanent UNIT_INACTIVE
+ * state from this transitionary UNIT_INACTIVE state by looking at the low-level states. */
+ service_set_state(s, restart_state);
+
r = service_arm_timer(s, /* relative= */ true, service_restart_usec(s));
- if (r < 0) {
- s->n_keep_fd_store--;
+ if (r < 0)
goto fail;
- }
service_set_state(s, SERVICE_AUTO_RESTART);
- } else
+ } else {
+ service_set_state(s, end_state);
+
/* If we shan't restart, then flush out the restart counter. But don't do that immediately, so that the
* user can still introspect the counter. Do so on the next start. */
s->flush_n_restarts = true;
+ }
/* The new state is in effect, let's decrease the fd store ref counter again. Let's also re-add us to the GC
* queue, so that the fd store is possibly gc'ed again */
- s->n_keep_fd_store--;
unit_add_to_gc_queue(UNIT(s));
/* The next restart might not be a manual stop, hence reset the flag indicating manual stops */
@@ -2646,14 +2665,11 @@ static int service_start(Unit *u) {
if (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST))
return 0;
- /* A service that will be restarted must be stopped first to
- * trigger BindsTo and/or OnFailure dependencies. If a user
- * does not want to wait for the holdoff time to elapse, the
- * service should be manually restarted, not started. We
- * simply return EAGAIN here, so that any start jobs stay
- * queued, and assume that the auto restart timer will
- * eventually trigger the restart. */
- if (s->state == SERVICE_AUTO_RESTART)
+ /* A service that will be restarted must be stopped first to trigger BindsTo and/or OnFailure
+ * dependencies. If a user does not want to wait for the holdoff time to elapse, the service should
+ * be manually restarted, not started. We simply return EAGAIN here, so that any start jobs stay
+ * queued, and assume that the auto restart timer will eventually trigger the restart. */
+ if (IN_SET(s->state, SERVICE_AUTO_RESTART, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART))
return -EAGAIN;
assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED));
@@ -2701,34 +2717,55 @@ static int service_stop(Unit *u) {
/* Don't create restart jobs from manual stops. */
s->forbid_restart = true;
- /* Already on it */
- if (IN_SET(s->state,
- SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
- SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))
+ switch (s->state) {
+
+ case SERVICE_STOP:
+ case SERVICE_STOP_SIGTERM:
+ case SERVICE_STOP_SIGKILL:
+ case SERVICE_STOP_POST:
+ case SERVICE_FINAL_WATCHDOG:
+ case SERVICE_FINAL_SIGTERM:
+ case SERVICE_FINAL_SIGKILL:
+ /* Already on it */
return 0;
- /* A restart will be scheduled or is in progress. */
- if (s->state == SERVICE_AUTO_RESTART) {
+ case SERVICE_AUTO_RESTART:
+ /* A restart will be scheduled or is in progress. */
service_set_state(s, SERVICE_DEAD);
return 0;
- }
- /* If there's already something running we go directly into kill mode. */
- if (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_STOP_WATCHDOG)) {
+ case SERVICE_CONDITION:
+ case SERVICE_START_PRE:
+ case SERVICE_START:
+ case SERVICE_START_POST:
+ case SERVICE_RELOAD:
+ case SERVICE_RELOAD_SIGNAL:
+ case SERVICE_RELOAD_NOTIFY:
+ case SERVICE_STOP_WATCHDOG:
+ /* If there's already something running we go directly into kill mode. */
service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_SUCCESS);
return 0;
- }
- /* If we are currently cleaning, then abort it, brutally. */
- if (s->state == SERVICE_CLEANING) {
+ case SERVICE_CLEANING:
+ /* If we are currently cleaning, then abort it, brutally. */
service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_SUCCESS);
return 0;
+
+ case SERVICE_RUNNING:
+ case SERVICE_EXITED:
+ service_enter_stop(s, SERVICE_SUCCESS);
+ return 1;
+
+ case SERVICE_DEAD_BEFORE_AUTO_RESTART:
+ case SERVICE_FAILED_BEFORE_AUTO_RESTART:
+ case SERVICE_DEAD:
+ case SERVICE_FAILED:
+ default:
+ /* Unknown state, or unit_stop() should already have handled these */
+ assert_not_reached();
}
- assert(IN_SET(s->state, SERVICE_RUNNING, SERVICE_EXITED));
- service_enter_stop(s, SERVICE_SUCCESS);
- return 1;
}
static int service_reload(Unit *u) {
@@ -3331,6 +3368,11 @@ static bool service_may_gc(Unit *u) {
control_pid_good(s) > 0)
return false;
+ /* Only allow collection of actually dead services, i.e. not those that are in the transitionary
+ * SERVICE_DEAD_BEFORE_AUTO_RESTART/SERVICE_FAILED_BEFORE_AUTO_RESTART states. */
+ if (!IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED))
+ return false;
+
return true;
}
@@ -3492,11 +3534,9 @@ static void service_notify_cgroup_empty_event(Unit *u) {
switch (s->state) {
- /* Waiting for SIGCHLD is usually more interesting,
- * because it includes return codes/signals. Which is
- * why we ignore the cgroup events for most cases,
- * except when we don't know pid which to expect the
- * SIGCHLD for. */
+ /* Waiting for SIGCHLD is usually more interesting, because it includes return
+ * codes/signals. Which is why we ignore the cgroup events for most cases, except when we
+ * don't know pid which to expect the SIGCHLD for. */
case SERVICE_START:
if (IN_SET(s->type, SERVICE_NOTIFY, SERVICE_NOTIFY_RELOAD) &&
@@ -3554,6 +3594,9 @@ static void service_notify_cgroup_empty_event(Unit *u) {
* up the cgroup earlier and should do it now. */
case SERVICE_DEAD:
case SERVICE_FAILED:
+ case SERVICE_DEAD_BEFORE_AUTO_RESTART:
+ case SERVICE_FAILED_BEFORE_AUTO_RESTART:
+ case SERVICE_AUTO_RESTART:
unit_prune_cgroup(u);
break;
diff --git a/src/core/service.h b/src/core/service.h
index a57ed42fac..156d32ca17 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -206,7 +206,6 @@ struct Service {
ServiceFDStore *fd_store;
size_t n_fd_store;
unsigned n_fd_store_max;
- unsigned n_keep_fd_store;
char *usb_function_descriptors;
char *usb_function_strings;
diff --git a/src/core/socket.c b/src/core/socket.c
index af6603df07..c26daced1a 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -2484,7 +2484,7 @@ static int socket_start(Unit *u) {
/* If the service is already active we cannot start the
* socket */
- if (!IN_SET(service->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART))
+ if (!IN_SET(service->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART))
return log_unit_error_errno(u, SYNTHETIC_ERRNO(EBUSY), "Socket service %s already active, refusing.", UNIT(service)->id);
}
@@ -3287,7 +3287,7 @@ static void socket_trigger_notify(Unit *u, Unit *other) {
return;
if (IN_SET(SERVICE(other)->state,
- SERVICE_DEAD, SERVICE_FAILED,
+ SERVICE_DEAD, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED, SERVICE_FAILED_BEFORE_AUTO_RESTART,
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
SERVICE_AUTO_RESTART))
socket_enter_listening(s);