io_uring: fix racy req->flags modification

Setting and clearing REQ_F_OVERFLOW in io_uring_cancel_files() and
io_cqring_overflow_flush() are racy, because they might be called
asynchronously.

REQ_F_OVERFLOW flag in only needed for files cancellation, so if it can
be guaranteed that requests _currently_ marked inflight can't be
overflown, the problem will be solved with removing the flag
altogether.

That's how the patch works, it removes inflight status of a request
in io_cqring_fill_event() whenever it should be thrown into CQ-overflow
list. That's Ok to do, because no opcode specific handling can be done
after io_cqring_fill_event(), the same assumption as with "struct
io_completion" patches.
And it already have a good place for such cleanups, which is
io_clean_op(). A nice side effect of this is removing this inflight
check from the hot path.

note on synchronisation: now __io_cqring_fill_event() may be taking two
spinlocks simultaneously, completion_lock and inflight_lock. It's fine,
because we never do that in reverse order, and CQ-overflow of inflight
requests shouldn't happen often.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Pavel Begunkov 2020-08-20 11:33:35 +03:00 committed by Jens Axboe
parent fc666777da
commit bb175342aa

View File

@ -540,7 +540,6 @@ enum {
REQ_F_ISREG_BIT, REQ_F_ISREG_BIT,
REQ_F_COMP_LOCKED_BIT, REQ_F_COMP_LOCKED_BIT,
REQ_F_NEED_CLEANUP_BIT, REQ_F_NEED_CLEANUP_BIT,
REQ_F_OVERFLOW_BIT,
REQ_F_POLLED_BIT, REQ_F_POLLED_BIT,
REQ_F_BUFFER_SELECTED_BIT, REQ_F_BUFFER_SELECTED_BIT,
REQ_F_NO_FILE_TABLE_BIT, REQ_F_NO_FILE_TABLE_BIT,
@ -583,8 +582,6 @@ enum {
REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT), REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
/* needs cleanup */ /* needs cleanup */
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
/* in overflow list */
REQ_F_OVERFLOW = BIT(REQ_F_OVERFLOW_BIT),
/* already went through poll handler */ /* already went through poll handler */
REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
/* buffer already selected */ /* buffer already selected */
@ -946,7 +943,8 @@ static void io_get_req_task(struct io_kiocb *req)
static inline void io_clean_op(struct io_kiocb *req) static inline void io_clean_op(struct io_kiocb *req)
{ {
if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED)) if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED |
REQ_F_INFLIGHT))
__io_clean_op(req); __io_clean_op(req);
} }
@ -1366,7 +1364,6 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb, req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
compl.list); compl.list);
list_move(&req->compl.list, &list); list_move(&req->compl.list, &list);
req->flags &= ~REQ_F_OVERFLOW;
if (cqe) { if (cqe) {
WRITE_ONCE(cqe->user_data, req->user_data); WRITE_ONCE(cqe->user_data, req->user_data);
WRITE_ONCE(cqe->res, req->result); WRITE_ONCE(cqe->res, req->result);
@ -1419,7 +1416,6 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
} }
io_clean_op(req); io_clean_op(req);
req->flags |= REQ_F_OVERFLOW;
req->result = res; req->result = res;
req->compl.cflags = cflags; req->compl.cflags = cflags;
refcount_inc(&req->refs); refcount_inc(&req->refs);
@ -1563,17 +1559,6 @@ static bool io_dismantle_req(struct io_kiocb *req)
if (req->file) if (req->file)
io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
if (req->flags & REQ_F_INFLIGHT) {
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
spin_lock_irqsave(&ctx->inflight_lock, flags);
list_del(&req->inflight_entry);
if (waitqueue_active(&ctx->inflight_wait))
wake_up(&ctx->inflight_wait);
spin_unlock_irqrestore(&ctx->inflight_lock, flags);
}
return io_req_clean_work(req); return io_req_clean_work(req);
} }
@ -5634,6 +5619,18 @@ static void __io_clean_op(struct io_kiocb *req)
} }
req->flags &= ~REQ_F_NEED_CLEANUP; req->flags &= ~REQ_F_NEED_CLEANUP;
} }
if (req->flags & REQ_F_INFLIGHT) {
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
spin_lock_irqsave(&ctx->inflight_lock, flags);
list_del(&req->inflight_entry);
if (waitqueue_active(&ctx->inflight_wait))
wake_up(&ctx->inflight_wait);
spin_unlock_irqrestore(&ctx->inflight_lock, flags);
req->flags &= ~REQ_F_INFLIGHT;
}
} }
static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
@ -8108,33 +8105,9 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
/* We need to keep going until we don't find a matching req */ /* We need to keep going until we don't find a matching req */
if (!cancel_req) if (!cancel_req)
break; break;
/* cancel this request, or head link requests */
if (cancel_req->flags & REQ_F_OVERFLOW) { io_attempt_cancel(ctx, cancel_req);
spin_lock_irq(&ctx->completion_lock); io_put_req(cancel_req);
list_del(&cancel_req->compl.list);
cancel_req->flags &= ~REQ_F_OVERFLOW;
io_cqring_mark_overflow(ctx);
WRITE_ONCE(ctx->rings->cq_overflow,
atomic_inc_return(&ctx->cached_cq_overflow));
io_commit_cqring(ctx);
spin_unlock_irq(&ctx->completion_lock);
/*
* Put inflight ref and overflow ref. If that's
* all we had, then we're done with this request.
*/
if (refcount_sub_and_test(2, &cancel_req->refs)) {
io_free_req(cancel_req);
finish_wait(&ctx->inflight_wait, &wait);
continue;
}
} else {
/* cancel this request, or head link requests */
io_attempt_cancel(ctx, cancel_req);
io_put_req(cancel_req);
}
schedule(); schedule();
finish_wait(&ctx->inflight_wait, &wait); finish_wait(&ctx->inflight_wait, &wait);
} }