summaryrefslogtreecommitdiff
path: root/fs/io_uring.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-16 12:33:35 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-16 12:33:35 -0800
commit005b2a9dc819a1265a8c765595f8f6d88d6173d9 (patch)
tree688107572f6b3e017fc7ccf54f552c6a30441a79 /fs/io_uring.c
parent5ee863bec794f30bdf7fdf57ce0d9f579b0d1aa3 (diff)
parent355fb9e2b78e78b38ec00f5cd9b05c6aceb98335 (diff)
downloadlinux-005b2a9dc819a1265a8c765595f8f6d88d6173d9.tar.gz
Merge tag 'tif-task_work.arch-2020-12-14' of git://git.kernel.dk/linux-block
Pull TIF_NOTIFY_SIGNAL updates from Jens Axboe: "This sits on top of of the core entry/exit and x86 entry branch from the tip tree, which contains the generic and x86 parts of this work. Here we convert the rest of the archs to support TIF_NOTIFY_SIGNAL. With that done, we can get rid of JOBCTL_TASK_WORK from task_work and signal.c, and also remove a deadlock work-around in io_uring around knowing that signal based task_work waking is invoked with the sighand wait queue head lock. The motivation for this work is to decouple signal notify based task_work, of which io_uring is a heavy user of, from sighand. The sighand lock becomes a huge contention point, particularly for threaded workloads where it's shared between threads. Even outside of threaded applications it's slower than it needs to be. Roman Gershman <romger@amazon.com> reported that his networked workload dropped from 1.6M QPS at 80% CPU to 1.0M QPS at 100% CPU after io_uring was changed to use TIF_NOTIFY_SIGNAL. The time was all spent hammering on the sighand lock, showing 57% of the CPU time there [1]. There are further cleanups possible on top of this. One example is TIF_PATCH_PENDING, where a patch already exists to use TIF_NOTIFY_SIGNAL instead. Hopefully this will also lead to more consolidation, but the work stands on its own as well" [1] https://github.com/axboe/liburing/issues/215 * tag 'tif-task_work.arch-2020-12-14' of git://git.kernel.dk/linux-block: (28 commits) io_uring: remove 'twa_signal_ok' deadlock work-around kernel: remove checking for TIF_NOTIFY_SIGNAL signal: kill JOBCTL_TASK_WORK io_uring: JOBCTL_TASK_WORK is no longer used by task_work task_work: remove legacy TWA_SIGNAL path sparc: add support for TIF_NOTIFY_SIGNAL riscv: add support for TIF_NOTIFY_SIGNAL nds32: add support for TIF_NOTIFY_SIGNAL ia64: add support for TIF_NOTIFY_SIGNAL h8300: add support for TIF_NOTIFY_SIGNAL c6x: add support for TIF_NOTIFY_SIGNAL alpha: add support for TIF_NOTIFY_SIGNAL xtensa: add support for TIF_NOTIFY_SIGNAL arm: add support for TIF_NOTIFY_SIGNAL microblaze: add support for TIF_NOTIFY_SIGNAL hexagon: add support for TIF_NOTIFY_SIGNAL csky: add support for TIF_NOTIFY_SIGNAL openrisc: add support for TIF_NOTIFY_SIGNAL sh: add support for TIF_NOTIFY_SIGNAL um: add support for TIF_NOTIFY_SIGNAL ...
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c30
1 files changed, 8 insertions, 22 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2b588bd5494c..0e8902be6b96 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1996,7 +1996,7 @@ static struct io_kiocb *io_req_find_next(struct io_kiocb *req)
return __io_req_find_next(req);
}
-static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
+static int io_req_task_work_add(struct io_kiocb *req)
{
struct task_struct *tsk = req->task;
struct io_ring_ctx *ctx = req->ctx;
@@ -2013,7 +2013,7 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
* will do the job.
*/
notify = TWA_NONE;
- if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok)
+ if (!(ctx->flags & IORING_SETUP_SQPOLL))
notify = TWA_SIGNAL;
ret = task_work_add(tsk, &req->task_work, notify);
@@ -2075,7 +2075,7 @@ static void io_req_task_queue(struct io_kiocb *req)
init_task_work(&req->task_work, io_req_task_submit);
percpu_ref_get(&req->ctx->refs);
- ret = io_req_task_work_add(req, true);
+ ret = io_req_task_work_add(req);
if (unlikely(ret)) {
struct task_struct *tsk;
@@ -2197,7 +2197,7 @@ static void io_free_req_deferred(struct io_kiocb *req)
int ret;
init_task_work(&req->task_work, io_put_req_deferred_cb);
- ret = io_req_task_work_add(req, true);
+ ret = io_req_task_work_add(req);
if (unlikely(ret)) {
struct task_struct *tsk;
@@ -3305,7 +3305,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
/* submit ref gets dropped, acquire a new one */
refcount_inc(&req->refs);
- ret = io_req_task_work_add(req, true);
+ ret = io_req_task_work_add(req);
if (unlikely(ret)) {
struct task_struct *tsk;
@@ -4859,7 +4859,6 @@ struct io_poll_table {
static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
__poll_t mask, task_work_func_t func)
{
- bool twa_signal_ok;
int ret;
/* for instances that support it check for an event match first: */
@@ -4875,20 +4874,12 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
percpu_ref_get(&req->ctx->refs);
/*
- * If we using the signalfd wait_queue_head for this wakeup, then
- * it's not safe to use TWA_SIGNAL as we could be recursing on the
- * tsk->sighand->siglock on doing the wakeup. Should not be needed
- * either, as the normal wakeup will suffice.
- */
- twa_signal_ok = (poll->head != &req->task->sighand->signalfd_wqh);
-
- /*
* If this fails, then the task is exiting. When a task exits, the
* work gets canceled, so just cancel this request as well instead
* of executing it. We can't safely execute it anyway, as we may not
* have the needed state needed for it anyway.
*/
- ret = io_req_task_work_add(req, twa_signal_ok);
+ ret = io_req_task_work_add(req);
if (unlikely(ret)) {
struct task_struct *tsk;
@@ -6862,13 +6853,8 @@ static int io_run_task_work_sig(void)
return 1;
if (!signal_pending(current))
return 0;
- if (current->jobctl & JOBCTL_TASK_WORK) {
- spin_lock_irq(&current->sighand->siglock);
- current->jobctl &= ~JOBCTL_TASK_WORK;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
- return 1;
- }
+ if (test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))
+ return -ERESTARTSYS;
return -EINTR;
}