From 52ea806ad983490b3132a9e526e11a10dc2fd10c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 Dec 2022 07:05:09 -0700 Subject: [PATCH 1/4] io_uring: finish waiting before flushing overflow entries If we have overflow entries being generated after we've done the initial flush in io_cqring_wait(), then we could be flushing them in the main wait loop as well. If that's done after having added ourselves to the cq_wait waitqueue, then the task state can be != TASK_RUNNING when we enter the overflow flush. Check for the need to overflow flush, and finish our wait cycle first if we have to do so. Reported-and-tested-by: syzbot+cf6ea1d6bb30a4ce10b2@syzkaller.appspotmail.com Link: https://lore.kernel.org/io-uring/000000000000cb143a05f04eee15@google.com/ Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ff2bbac1a10f..ac5d39eeb3d1 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -677,16 +677,20 @@ static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx) io_cq_unlock_post(ctx); } +static void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx) +{ + /* iopoll syncs against uring_lock, not completion_lock */ + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_lock(&ctx->uring_lock); + __io_cqring_overflow_flush(ctx); + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_unlock(&ctx->uring_lock); +} + static void io_cqring_overflow_flush(struct io_ring_ctx *ctx) { - if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { - /* iopoll syncs against uring_lock, not completion_lock */ - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_lock(&ctx->uring_lock); - __io_cqring_overflow_flush(ctx); - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_unlock(&ctx->uring_lock); - } + if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) + io_cqring_do_overflow_flush(ctx); } void __io_put_task(struct task_struct *task, int nr) @@ -2549,7 +2553,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, trace_io_uring_cqring_wait(ctx, min_events); do { - io_cqring_overflow_flush(ctx); + if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { + finish_wait(&ctx->cq_wait, &iowq.wq); + io_cqring_do_overflow_flush(ctx); + } prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, TASK_INTERRUPTIBLE); ret = io_cqring_wait_schedule(ctx, &iowq, timeout); From 23fffb2f09ce1145cbd751801d45ba74acaa6542 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 Dec 2022 07:11:33 -0700 Subject: [PATCH 2/4] io_uring/cancel: re-grab ctx mutex after finishing wait If we have a signal pending during cancelations, it'll cause the task_work run to return an error. Since we didn't run task_work, the current task is left in TASK_INTERRUPTIBLE state when we need to re-grab the ctx mutex, and the kernel will rightfully complain about that. Move the lock grabbing for the error cases outside the loop to avoid that issue. Reported-by: syzbot+7df055631cd1be4586fd@syzkaller.appspotmail.com Link: https://lore.kernel.org/io-uring/0000000000003a14a905f05050b0@google.com/ Signed-off-by: Jens Axboe --- io_uring/cancel.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 2291a53cdabd..b4f5dfacc0c3 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -288,24 +288,23 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); + mutex_unlock(&ctx->uring_lock); if (ret != -EALREADY) break; - mutex_unlock(&ctx->uring_lock); ret = io_run_task_work_sig(ctx); - if (ret < 0) { - mutex_lock(&ctx->uring_lock); + if (ret < 0) break; - } ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS); - mutex_lock(&ctx->uring_lock); if (!ret) { ret = -ETIME; break; } + mutex_lock(&ctx->uring_lock); } while (1); finish_wait(&ctx->cq_wait, &wait); + mutex_lock(&ctx->uring_lock); if (ret == -ENOENT || ret > 0) ret = 0; From 343190841a1f22b96996d9f8cfab902a4d1bfd0e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Dec 2022 06:37:08 -0700 Subject: [PATCH 3/4] io_uring: check for valid register opcode earlier We only check the register opcode value inside the restricted ring section, move it into the main io_uring_register() function instead and check it up front. Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ac5d39eeb3d1..58ac13b69dc8 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -4020,8 +4020,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, return -EEXIST; if (ctx->restricted) { - if (opcode >= IORING_REGISTER_LAST) - return -EINVAL; opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); if (!test_bit(opcode, ctx->restrictions.register_op)) return -EACCES; @@ -4177,6 +4175,9 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, long ret = -EBADF; struct fd f; + if (opcode >= IORING_REGISTER_LAST) + return -EINVAL; + f = fdget(fd); if (!f.file) return -EBADF; From 9eb803402a2a83400c6c6afd900e3b7c87c06816 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 16 Nov 2022 21:25:24 +0100 Subject: [PATCH 4/4] uapi:io_uring.h: allow linux/time_types.h to be skipped include/uapi/linux/io_uring.h is synced 1:1 into liburing:src/include/liburing/io_uring.h. liburing has a configure check to detect the need for linux/time_types.h. It can opt-out by defining UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H Fixes: 78a861b94959 ("io_uring: add sync cancelation API through io_uring_register()") Link: https://github.com/axboe/liburing/issues/708 Link: https://github.com/axboe/liburing/pull/709 Link: https://lore.kernel.org/io-uring/20221115212614.1308132-1-ammar.faizi@intel.com/T/#m9f5dd571cd4f6a5dee84452dbbca3b92ba7a4091 CC: Jens Axboe Cc: Ammar Faizi Signed-off-by: Stefan Metzmacher Reviewed-by: Ammar Faizi Link: https://lore.kernel.org/r/7071a0a1d751221538b20b63f9160094fc7e06f4.1668630247.git.metze@samba.org Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 9d4c4078e8d0..2780bce62faf 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -10,7 +10,15 @@ #include #include +/* + * this file is shared with liburing and that has to autodetect + * if linux/time_types.h is available or not, it can + * define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H + * if linux/time_types.h is not available + */ +#ifndef UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H #include +#endif #ifdef __cplusplus extern "C" {