io_uring: fix racy req->flags modification
Setting and clearing REQ_F_OVERFLOW in io_uring_cancel_files() and io_cqring_overflow_flush() are racy, because they might be called asynchronously. REQ_F_OVERFLOW flag in only needed for files cancellation, so if it can be guaranteed that requests _currently_ marked inflight can't be overflown, the problem will be solved with removing the flag altogether. That's how the patch works, it removes inflight status of a request in io_cqring_fill_event() whenever it should be thrown into CQ-overflow list. That's Ok to do, because no opcode specific handling can be done after io_cqring_fill_event(), the same assumption as with "struct io_completion" patches. And it already have a good place for such cleanups, which is io_clean_op(). A nice side effect of this is removing this inflight check from the hot path. note on synchronisation: now __io_cqring_fill_event() may be taking two spinlocks simultaneously, completion_lock and inflight_lock. It's fine, because we never do that in reverse order, and CQ-overflow of inflight requests shouldn't happen often. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
fc666777da
commit
bb175342aa
@ -540,7 +540,6 @@ enum {
|
|||||||
REQ_F_ISREG_BIT,
|
REQ_F_ISREG_BIT,
|
||||||
REQ_F_COMP_LOCKED_BIT,
|
REQ_F_COMP_LOCKED_BIT,
|
||||||
REQ_F_NEED_CLEANUP_BIT,
|
REQ_F_NEED_CLEANUP_BIT,
|
||||||
REQ_F_OVERFLOW_BIT,
|
|
||||||
REQ_F_POLLED_BIT,
|
REQ_F_POLLED_BIT,
|
||||||
REQ_F_BUFFER_SELECTED_BIT,
|
REQ_F_BUFFER_SELECTED_BIT,
|
||||||
REQ_F_NO_FILE_TABLE_BIT,
|
REQ_F_NO_FILE_TABLE_BIT,
|
||||||
@ -583,8 +582,6 @@ enum {
|
|||||||
REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
|
REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
|
||||||
/* needs cleanup */
|
/* needs cleanup */
|
||||||
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
|
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
|
||||||
/* in overflow list */
|
|
||||||
REQ_F_OVERFLOW = BIT(REQ_F_OVERFLOW_BIT),
|
|
||||||
/* already went through poll handler */
|
/* already went through poll handler */
|
||||||
REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
|
REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
|
||||||
/* buffer already selected */
|
/* buffer already selected */
|
||||||
@ -946,7 +943,8 @@ static void io_get_req_task(struct io_kiocb *req)
|
|||||||
|
|
||||||
static inline void io_clean_op(struct io_kiocb *req)
|
static inline void io_clean_op(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED))
|
if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED |
|
||||||
|
REQ_F_INFLIGHT))
|
||||||
__io_clean_op(req);
|
__io_clean_op(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1366,7 +1364,6 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
|
|||||||
req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
|
req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
|
||||||
compl.list);
|
compl.list);
|
||||||
list_move(&req->compl.list, &list);
|
list_move(&req->compl.list, &list);
|
||||||
req->flags &= ~REQ_F_OVERFLOW;
|
|
||||||
if (cqe) {
|
if (cqe) {
|
||||||
WRITE_ONCE(cqe->user_data, req->user_data);
|
WRITE_ONCE(cqe->user_data, req->user_data);
|
||||||
WRITE_ONCE(cqe->res, req->result);
|
WRITE_ONCE(cqe->res, req->result);
|
||||||
@ -1419,7 +1416,6 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
|
|||||||
ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
|
ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
|
||||||
}
|
}
|
||||||
io_clean_op(req);
|
io_clean_op(req);
|
||||||
req->flags |= REQ_F_OVERFLOW;
|
|
||||||
req->result = res;
|
req->result = res;
|
||||||
req->compl.cflags = cflags;
|
req->compl.cflags = cflags;
|
||||||
refcount_inc(&req->refs);
|
refcount_inc(&req->refs);
|
||||||
@ -1563,17 +1559,6 @@ static bool io_dismantle_req(struct io_kiocb *req)
|
|||||||
if (req->file)
|
if (req->file)
|
||||||
io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
|
io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
|
||||||
|
|
||||||
if (req->flags & REQ_F_INFLIGHT) {
|
|
||||||
struct io_ring_ctx *ctx = req->ctx;
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&ctx->inflight_lock, flags);
|
|
||||||
list_del(&req->inflight_entry);
|
|
||||||
if (waitqueue_active(&ctx->inflight_wait))
|
|
||||||
wake_up(&ctx->inflight_wait);
|
|
||||||
spin_unlock_irqrestore(&ctx->inflight_lock, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
return io_req_clean_work(req);
|
return io_req_clean_work(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5634,6 +5619,18 @@ static void __io_clean_op(struct io_kiocb *req)
|
|||||||
}
|
}
|
||||||
req->flags &= ~REQ_F_NEED_CLEANUP;
|
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (req->flags & REQ_F_INFLIGHT) {
|
||||||
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&ctx->inflight_lock, flags);
|
||||||
|
list_del(&req->inflight_entry);
|
||||||
|
if (waitqueue_active(&ctx->inflight_wait))
|
||||||
|
wake_up(&ctx->inflight_wait);
|
||||||
|
spin_unlock_irqrestore(&ctx->inflight_lock, flags);
|
||||||
|
req->flags &= ~REQ_F_INFLIGHT;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||||
@ -8108,33 +8105,9 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
|
|||||||
/* We need to keep going until we don't find a matching req */
|
/* We need to keep going until we don't find a matching req */
|
||||||
if (!cancel_req)
|
if (!cancel_req)
|
||||||
break;
|
break;
|
||||||
|
/* cancel this request, or head link requests */
|
||||||
if (cancel_req->flags & REQ_F_OVERFLOW) {
|
io_attempt_cancel(ctx, cancel_req);
|
||||||
spin_lock_irq(&ctx->completion_lock);
|
io_put_req(cancel_req);
|
||||||
list_del(&cancel_req->compl.list);
|
|
||||||
cancel_req->flags &= ~REQ_F_OVERFLOW;
|
|
||||||
|
|
||||||
io_cqring_mark_overflow(ctx);
|
|
||||||
WRITE_ONCE(ctx->rings->cq_overflow,
|
|
||||||
atomic_inc_return(&ctx->cached_cq_overflow));
|
|
||||||
io_commit_cqring(ctx);
|
|
||||||
spin_unlock_irq(&ctx->completion_lock);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Put inflight ref and overflow ref. If that's
|
|
||||||
* all we had, then we're done with this request.
|
|
||||||
*/
|
|
||||||
if (refcount_sub_and_test(2, &cancel_req->refs)) {
|
|
||||||
io_free_req(cancel_req);
|
|
||||||
finish_wait(&ctx->inflight_wait, &wait);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* cancel this request, or head link requests */
|
|
||||||
io_attempt_cancel(ctx, cancel_req);
|
|
||||||
io_put_req(cancel_req);
|
|
||||||
}
|
|
||||||
|
|
||||||
schedule();
|
schedule();
|
||||||
finish_wait(&ctx->inflight_wait, &wait);
|
finish_wait(&ctx->inflight_wait, &wait);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user