From fd69f2247520b0be3190ded96d646a415edc97b7 Mon Sep 17 00:00:00 2001 From: Michal Sekletar Date: Mon, 4 Oct 2021 19:44:06 +0200 Subject: sd-event: introduce callback invoked when event source ratelimit expires --- man/sd_event_source_set_ratelimit.xml | 22 +++++++++--- src/libsystemd/libsystemd.sym | 1 + src/libsystemd/sd-event/event-source.h | 1 + src/libsystemd/sd-event/sd-event.c | 61 +++++++++++++++++++++++++++++----- src/libsystemd/sd-event/test-event.c | 12 +++++++ src/systemd/sd-event.h | 1 + 6 files changed, 85 insertions(+), 13 deletions(-) diff --git a/man/sd_event_source_set_ratelimit.xml b/man/sd_event_source_set_ratelimit.xml index ac8529074a..37354a09f6 100644 --- a/man/sd_event_source_set_ratelimit.xml +++ b/man/sd_event_source_set_ratelimit.xml @@ -19,6 +19,7 @@ sd_event_source_set_ratelimit sd_event_source_get_ratelimit sd_event_source_is_ratelimited + sd_event_source_set_ratelimit_expire_callback Configure rate limiting on event sources @@ -46,6 +47,12 @@ sd_event_source *source + + int sd_event_source_set_ratelimit_expire_callback + sd_event_source *source + sd_event_handler_tcallback + + @@ -78,6 +85,10 @@ is currently affected by rate limiting, i.e. it has recently hit the rate limit and is currently temporarily disabled due to that. + sd_event_source_set_ratelimit_expire_callback may be used to set a callback + function that is invoked every time the event source leaves rate limited state. Note that function is + called in the same event loop iteration in which state transition occured. + Rate limiting is currently implemented for I/O, timer, signal, defer and inotify event sources. @@ -85,11 +96,12 @@ Return Value - On success, sd_event_source_set_ratelimit() and - sd_event_source_get_ratelimit() return a non-negative integer. On failure, they - return a negative errno-style error code. sd_event_source_is_ratelimited returns - zero if rate limiting is currently not in effect and greater than zero if it is in effect; it returns a - negative errno-style error code on failure. + On success, sd_event_source_set_ratelimit(), + sd_event_source_set_ratelimit_expire_callback and + sd_event_source_get_ratelimit() return a non-negative integer. On failure, they return + a negative errno-style error code. sd_event_source_is_ratelimited returns zero if rate + limiting is currently not in effect and greater than zero if it is in effect; it returns a negative + errno-style error code on failure. Errors diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym index 5e2fc9e231..2178668d11 100644 --- a/src/libsystemd/libsystemd.sym +++ b/src/libsystemd/libsystemd.sym @@ -767,4 +767,5 @@ LIBSYSTEMD_250 { global: sd_device_get_diskseq; sd_event_add_inotify_fd; + sd_event_source_set_ratelimit_expire_callback; } LIBSYSTEMD_249; diff --git a/src/libsystemd/sd-event/event-source.h b/src/libsystemd/sd-event/event-source.h index 41845c0bb5..74cbc26962 100644 --- a/src/libsystemd/sd-event/event-source.h +++ b/src/libsystemd/sd-event/event-source.h @@ -71,6 +71,7 @@ struct sd_event_source { uint64_t prepare_iteration; sd_event_destroy_t destroy_callback; + sd_event_handler_t ratelimit_expire_callback; LIST_FIELDS(sd_event_source, sources); diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index 0ca0248510..d8f84d9ba7 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -2908,7 +2908,7 @@ fail: return r; } -static int event_source_leave_ratelimit(sd_event_source *s) { +static int event_source_leave_ratelimit(sd_event_source *s, bool run_callback) { int r; assert(s); @@ -2940,6 +2940,30 @@ static int event_source_leave_ratelimit(sd_event_source *s) { ratelimit_reset(&s->rate_limit); log_debug("Event source %p (%s) left rate limit state.", s, strna(s->description)); + + if (run_callback && s->ratelimit_expire_callback) { + s->dispatching = true; + r = s->ratelimit_expire_callback(s, s->userdata); + s->dispatching = false; + + if (r < 0) { + log_debug_errno(r, "Ratelimit expiry callback of event source %s (type %s) returned error, %s: %m", + strna(s->description), + event_source_type_to_string(s->type), + s->exit_on_failure ? "exiting" : "disabling"); + + if (s->exit_on_failure) + (void) sd_event_exit(s->event, r); + } + + if (s->n_ref == 0) + source_free(s); + else if (r < 0) + sd_event_source_set_enabled(s, SD_EVENT_OFF); + + return 1; + } + return 0; fail: @@ -3139,6 +3163,7 @@ static int process_timer( struct clock_data *d) { sd_event_source *s; + bool callback_invoked = false; int r; assert(e); @@ -3156,9 +3181,11 @@ static int process_timer( * again. */ assert(s->ratelimited); - r = event_source_leave_ratelimit(s); + r = event_source_leave_ratelimit(s, /* run_callback */ true); if (r < 0) return r; + else if (r == 1) + callback_invoked = true; continue; } @@ -3173,7 +3200,7 @@ static int process_timer( event_source_time_prioq_reshuffle(s); } - return 0; + return callback_invoked; } static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priority) { @@ -4097,15 +4124,15 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { if (r < 0) goto finish; - r = process_timer(e, e->timestamp.realtime, &e->realtime); + r = process_inotify(e); if (r < 0) goto finish; - r = process_timer(e, e->timestamp.boottime, &e->boottime); + r = process_timer(e, e->timestamp.realtime, &e->realtime); if (r < 0) goto finish; - r = process_timer(e, e->timestamp.monotonic, &e->monotonic); + r = process_timer(e, e->timestamp.boottime, &e->boottime); if (r < 0) goto finish; @@ -4117,9 +4144,20 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { if (r < 0) goto finish; - r = process_inotify(e); + r = process_timer(e, e->timestamp.monotonic, &e->monotonic); if (r < 0) goto finish; + else if (r == 1) { + /* Ratelimit expiry callback was called. Let's postpone processing pending sources and + * put loop in the initial state in order to evaluate (in the next iteration) also sources + * there were potentially re-enabled by the callback. + * + * Wondering why we treat only this invocation of process_timer() differently? Once event + * source is ratelimited we essentially transform it into CLOCK_MONOTONIC timer hence + * ratelimit expiry callback is never called for any other timer type. */ + r = 0; + goto finish; + } if (event_next_pending(e)) { e->state = SD_EVENT_PENDING; @@ -4488,7 +4526,7 @@ _public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh, * non-ratelimited. */ - r = event_source_leave_ratelimit(s); + r = event_source_leave_ratelimit(s, /* run_callback */ false); if (r < 0) return r; @@ -4496,6 +4534,13 @@ _public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval return 0; } +_public_ int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback) { + assert_return(s, -EINVAL); + + s->ratelimit_expire_callback = callback; + return 0; +} + _public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval, unsigned *ret_burst) { assert_return(s, -EINVAL); diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c index b637639f26..0ac23c1118 100644 --- a/src/libsystemd/sd-event/test-event.c +++ b/src/libsystemd/sd-event/test-event.c @@ -623,6 +623,11 @@ static int ratelimit_time_handler(sd_event_source *s, uint64_t usec, void *userd return 0; } +static int expired = -1; +static int ratelimit_expired(sd_event_source *s, void *userdata) { + return ++expired; +} + static void test_ratelimit(void) { _cleanup_close_pair_ int p[2] = {-1, -1}; _cleanup_(sd_event_unrefp) sd_event *e = NULL; @@ -686,12 +691,19 @@ static void test_ratelimit(void) { assert_se(sd_event_source_set_ratelimit(s, 1 * USEC_PER_SEC, 10) >= 0); + /* Set callback that will be invoked when we leave rate limited state. */ + assert_se(sd_event_source_set_ratelimit_expire_callback(s, ratelimit_expired) >= 0); + do { assert_se(sd_event_run(e, UINT64_MAX) >= 0); } while (!sd_event_source_is_ratelimited(s)); log_info("ratelimit_time_handler: called 10 more times, event source got ratelimited"); assert_se(count == 20); + + /* Dispatch the event loop once more and check that ratelimit expiration callback got called */ + assert_se(sd_event_run(e, UINT64_MAX) >= 0); + assert_se(expired == 0); } static void test_simple_timeout(void) { diff --git a/src/systemd/sd-event.h b/src/systemd/sd-event.h index f4357aa593..63984eef15 100644 --- a/src/systemd/sd-event.h +++ b/src/systemd/sd-event.h @@ -166,6 +166,7 @@ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b); int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval_usec, unsigned burst); int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval_usec, unsigned *ret_burst); int sd_event_source_is_ratelimited(sd_event_source *s); +int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback); /* Define helpers so that __attribute__((cleanup(sd_event_unrefp))) and similar may be used. */ _SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event, sd_event_unref); -- cgit v1.2.1 From 705578c3b9d794097233aa66010cf67b2a444716 Mon Sep 17 00:00:00 2001 From: Michal Sekletar Date: Mon, 4 Oct 2021 17:51:52 +0200 Subject: core: rename/generalize UNIT(u)->test_start_limit() hook Up until now the main reason why we didn't proceed with starting the unit was exceed start limit burst. However, for unit types like mounts the other reason could be effective ratelimit on /proc/self/mountinfo event source. That means our mount unit state may not reflect current kernel state. Hence, we need to attempt to re-run the start job again after ratelimit on event source expires. As we will be introducing another reason than start limit let's rename the virtual function that implements the check. --- src/core/automount.c | 6 +++--- src/core/mount.c | 6 +++--- src/core/path.c | 6 +++--- src/core/service.c | 6 +++--- src/core/socket.c | 6 +++--- src/core/swap.c | 6 +++--- src/core/timer.c | 6 +++--- src/core/unit.c | 6 +++--- src/core/unit.h | 2 +- 9 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/core/automount.c b/src/core/automount.c index de470935c7..1fc3fc0f82 100644 --- a/src/core/automount.c +++ b/src/core/automount.c @@ -1063,7 +1063,7 @@ static bool automount_supported(void) { return supported; } -static int automount_test_start_limit(Unit *u) { +static int automount_can_start(Unit *u) { Automount *a = AUTOMOUNT(u); int r; @@ -1075,7 +1075,7 @@ static int automount_test_start_limit(Unit *u) { return r; } - return 0; + return 1; } static const char* const automount_result_table[_AUTOMOUNT_RESULT_MAX] = { @@ -1142,5 +1142,5 @@ const UnitVTable automount_vtable = { }, }, - .test_start_limit = automount_test_start_limit, + .can_start = automount_can_start, }; diff --git a/src/core/mount.c b/src/core/mount.c index 321c7986b3..2ebae752b6 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -2135,7 +2135,7 @@ static int mount_can_clean(Unit *u, ExecCleanMask *ret) { return exec_context_get_clean_mask(&m->exec_context, ret); } -static int mount_test_start_limit(Unit *u) { +static int mount_can_start(Unit *u) { Mount *m = MOUNT(u); int r; @@ -2147,7 +2147,7 @@ static int mount_test_start_limit(Unit *u) { return r; } - return 0; + return 1; } static const char* const mount_exec_command_table[_MOUNT_EXEC_COMMAND_MAX] = { @@ -2248,5 +2248,5 @@ const UnitVTable mount_vtable = { }, }, - .test_start_limit = mount_test_start_limit, + .can_start = mount_can_start, }; diff --git a/src/core/path.c b/src/core/path.c index 0a3d86e9db..cdab9dcf8c 100644 --- a/src/core/path.c +++ b/src/core/path.c @@ -811,7 +811,7 @@ static void path_reset_failed(Unit *u) { p->result = PATH_SUCCESS; } -static int path_test_start_limit(Unit *u) { +static int path_can_start(Unit *u) { Path *p = PATH(u); int r; @@ -823,7 +823,7 @@ static int path_test_start_limit(Unit *u) { return r; } - return 0; + return 1; } static const char* const path_type_table[_PATH_TYPE_MAX] = { @@ -882,5 +882,5 @@ const UnitVTable path_vtable = { .bus_set_property = bus_path_set_property, - .test_start_limit = path_test_start_limit, + .can_start = path_can_start, }; diff --git a/src/core/service.c b/src/core/service.c index 83cbc9f489..17c19a2c4a 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -4482,7 +4482,7 @@ static const char *service_finished_job(Unit *u, JobType t, JobResult result) { return NULL; } -static int service_test_start_limit(Unit *u) { +static int service_can_start(Unit *u) { Service *s = SERVICE(u); int r; @@ -4495,7 +4495,7 @@ static int service_test_start_limit(Unit *u) { return r; } - return 0; + return 1; } static const char* const service_restart_table[_SERVICE_RESTART_MAX] = { @@ -4669,5 +4669,5 @@ const UnitVTable service_vtable = { .finished_job = service_finished_job, }, - .test_start_limit = service_test_start_limit, + .can_start = service_can_start, }; diff --git a/src/core/socket.c b/src/core/socket.c index 6534311bef..f265aab594 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -3427,7 +3427,7 @@ static int socket_can_clean(Unit *u, ExecCleanMask *ret) { return exec_context_get_clean_mask(&s->exec_context, ret); } -static int socket_test_start_limit(Unit *u) { +static int socket_can_start(Unit *u) { Socket *s = SOCKET(u); int r; @@ -3439,7 +3439,7 @@ static int socket_test_start_limit(Unit *u) { return r; } - return 0; + return 1; } static const char* const socket_exec_command_table[_SOCKET_EXEC_COMMAND_MAX] = { @@ -3570,5 +3570,5 @@ const UnitVTable socket_vtable = { }, }, - .test_start_limit = socket_test_start_limit, + .can_start = socket_can_start, }; diff --git a/src/core/swap.c b/src/core/swap.c index de72ac9232..3b28235194 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -1581,7 +1581,7 @@ static int swap_can_clean(Unit *u, ExecCleanMask *ret) { return exec_context_get_clean_mask(&s->exec_context, ret); } -static int swap_test_start_limit(Unit *u) { +static int swap_can_start(Unit *u) { Swap *s = SWAP(u); int r; @@ -1593,7 +1593,7 @@ static int swap_test_start_limit(Unit *u) { return r; } - return 0; + return 1; } static const char* const swap_exec_command_table[_SWAP_EXEC_COMMAND_MAX] = { @@ -1692,5 +1692,5 @@ const UnitVTable swap_vtable = { }, }, - .test_start_limit = swap_test_start_limit, + .can_start = swap_can_start, }; diff --git a/src/core/timer.c b/src/core/timer.c index 240a2f473b..b22168fad5 100644 --- a/src/core/timer.c +++ b/src/core/timer.c @@ -889,7 +889,7 @@ static int timer_can_clean(Unit *u, ExecCleanMask *ret) { return 0; } -static int timer_test_start_limit(Unit *u) { +static int timer_can_start(Unit *u) { Timer *t = TIMER(u); int r; @@ -901,7 +901,7 @@ static int timer_test_start_limit(Unit *u) { return r; } - return 0; + return 1; } static const char* const timer_base_table[_TIMER_BASE_MAX] = { @@ -965,5 +965,5 @@ const UnitVTable timer_vtable = { .bus_set_property = bus_timer_set_property, - .test_start_limit = timer_test_start_limit, + .can_start = timer_can_start, }; diff --git a/src/core/unit.c b/src/core/unit.c index 77d4ceaf24..929cc85e13 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -1904,9 +1904,9 @@ int unit_start(Unit *u) { return unit_start(following); } - /* Check start rate limiting early so that failure conditions don't cause us to enter a busy loop. */ - if (UNIT_VTABLE(u)->test_start_limit) { - r = UNIT_VTABLE(u)->test_start_limit(u); + /* Check our ability to start early so that failure conditions don't cause us to enter a busy loop. */ + if (UNIT_VTABLE(u)->can_start) { + r = UNIT_VTABLE(u)->can_start(u); if (r < 0) return r; } diff --git a/src/core/unit.h b/src/core/unit.h index 3f3a75d33b..76701519c2 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -665,7 +665,7 @@ typedef struct UnitVTable { /* If this function is set, it's invoked first as part of starting a unit to allow start rate * limiting checks to occur before we do anything else. */ - int (*test_start_limit)(Unit *u); + int (*can_start)(Unit *u); /* The strings to print in status messages */ UnitStatusMessageFormats status_message_formats; -- cgit v1.2.1 From a7c93dfe91e88a5a561341c523a45c7f8d71a588 Mon Sep 17 00:00:00 2001 From: Michal Sekletar Date: Mon, 4 Oct 2021 19:41:34 +0200 Subject: mount: make mount units start jobs not runnable if /p/s/mountinfo ratelimit is in effect --- src/core/mount.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/mount.c b/src/core/mount.c index 2ebae752b6..88a670dc2a 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -2141,6 +2141,9 @@ static int mount_can_start(Unit *u) { assert(m); + if (sd_event_source_is_ratelimited(u->manager->mount_event_source)) + return -EAGAIN; + r = unit_test_start_limit(u); if (r < 0) { mount_enter_dead(m, MOUNT_FAILURE_START_LIMIT_HIT); -- cgit v1.2.1 From edc027b4f1cfaa49e8ecdde763eb8c623402d464 Mon Sep 17 00:00:00 2001 From: Michal Sekletar Date: Mon, 4 Oct 2021 20:31:49 +0200 Subject: mount: retrigger run queue after ratelimit expired to run delayed mount start jobs Fixes #20329 --- src/core/mount.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/core/mount.c b/src/core/mount.c index 88a670dc2a..3463641c6c 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -1838,6 +1838,21 @@ static bool mount_is_mounted(Mount *m) { return UNIT(m)->perpetual || FLAGS_SET(m->proc_flags, MOUNT_PROC_IS_MOUNTED); } +static int mount_on_ratelimit_expire(sd_event_source *s, void *userdata) { + Manager *m = userdata; + int r; + + assert(m); + + /* By entering ratelimited state we made all mount start jobs not runnable, now rate limit is over so let's + * make sure we dispatch them in the next iteration. */ + r = sd_event_source_set_enabled(m->run_queue_event_source, SD_EVENT_ONESHOT); + if (r < 0) + log_debug_errno(r, "Failed to enable run queue event source, ignoring: %m"); + + return 0; +} + static void mount_enumerate(Manager *m) { int r; @@ -1891,6 +1906,12 @@ static void mount_enumerate(Manager *m) { goto fail; } + r = sd_event_source_set_ratelimit_expire_callback(m->mount_event_source, mount_on_ratelimit_expire); + if (r < 0) { + log_error_errno(r, "Failed to enable rate limit for mount events: %m"); + goto fail; + } + (void) sd_event_source_set_description(m->mount_event_source, "mount-monitor-dispatch"); } -- cgit v1.2.1