io_uring: fix REQ_F_COMP_LOCKED by killing it
REQ_F_COMP_LOCKED is used and implemented in a buggy way. The problem is that the flag is set before io_put_req() but not cleared after, and if that wasn't the final reference, the request will be freed with the flag set from some other context, which may not hold a spinlock. That means possible races with removing linked timeouts and unsynchronised completion (e.g. access to CQ). Instead of fixing REQ_F_COMP_LOCKED, kill the flag and use task_work_add() to move such requests to a fresh context to free from it, as was done with __io_free_req_finish(). Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
4edf20f999
commit
216578e55a
149
fs/io_uring.c
149
fs/io_uring.c
@ -574,7 +574,6 @@ enum {
|
|||||||
REQ_F_NOWAIT_BIT,
|
REQ_F_NOWAIT_BIT,
|
||||||
REQ_F_LINK_TIMEOUT_BIT,
|
REQ_F_LINK_TIMEOUT_BIT,
|
||||||
REQ_F_ISREG_BIT,
|
REQ_F_ISREG_BIT,
|
||||||
REQ_F_COMP_LOCKED_BIT,
|
|
||||||
REQ_F_NEED_CLEANUP_BIT,
|
REQ_F_NEED_CLEANUP_BIT,
|
||||||
REQ_F_POLLED_BIT,
|
REQ_F_POLLED_BIT,
|
||||||
REQ_F_BUFFER_SELECTED_BIT,
|
REQ_F_BUFFER_SELECTED_BIT,
|
||||||
@ -613,8 +612,6 @@ enum {
|
|||||||
REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
|
REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
|
||||||
/* regular file */
|
/* regular file */
|
||||||
REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
|
REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
|
||||||
/* completion under lock */
|
|
||||||
REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
|
|
||||||
/* needs cleanup */
|
/* needs cleanup */
|
||||||
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
|
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
|
||||||
/* already went through poll handler */
|
/* already went through poll handler */
|
||||||
@ -963,8 +960,8 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
|
|||||||
struct io_comp_state *cs);
|
struct io_comp_state *cs);
|
||||||
static void io_cqring_fill_event(struct io_kiocb *req, long res);
|
static void io_cqring_fill_event(struct io_kiocb *req, long res);
|
||||||
static void io_put_req(struct io_kiocb *req);
|
static void io_put_req(struct io_kiocb *req);
|
||||||
|
static void io_put_req_deferred(struct io_kiocb *req, int nr);
|
||||||
static void io_double_put_req(struct io_kiocb *req);
|
static void io_double_put_req(struct io_kiocb *req);
|
||||||
static void __io_double_put_req(struct io_kiocb *req);
|
|
||||||
static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
|
static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
|
||||||
static void __io_queue_linked_timeout(struct io_kiocb *req);
|
static void __io_queue_linked_timeout(struct io_kiocb *req);
|
||||||
static void io_queue_linked_timeout(struct io_kiocb *req);
|
static void io_queue_linked_timeout(struct io_kiocb *req);
|
||||||
@ -1316,9 +1313,8 @@ static void io_kill_timeout(struct io_kiocb *req)
|
|||||||
atomic_set(&req->ctx->cq_timeouts,
|
atomic_set(&req->ctx->cq_timeouts,
|
||||||
atomic_read(&req->ctx->cq_timeouts) + 1);
|
atomic_read(&req->ctx->cq_timeouts) + 1);
|
||||||
list_del_init(&req->timeout.list);
|
list_del_init(&req->timeout.list);
|
||||||
req->flags |= REQ_F_COMP_LOCKED;
|
|
||||||
io_cqring_fill_event(req, 0);
|
io_cqring_fill_event(req, 0);
|
||||||
io_put_req(req);
|
io_put_req_deferred(req, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1369,8 +1365,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
|
|||||||
if (link) {
|
if (link) {
|
||||||
__io_queue_linked_timeout(link);
|
__io_queue_linked_timeout(link);
|
||||||
/* drop submission reference */
|
/* drop submission reference */
|
||||||
link->flags |= REQ_F_COMP_LOCKED;
|
io_put_req_deferred(link, 1);
|
||||||
io_put_req(link);
|
|
||||||
}
|
}
|
||||||
kfree(de);
|
kfree(de);
|
||||||
} while (!list_empty(&ctx->defer_list));
|
} while (!list_empty(&ctx->defer_list));
|
||||||
@ -1597,13 +1592,19 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
|
|||||||
req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
|
req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
|
||||||
list_del(&req->compl.list);
|
list_del(&req->compl.list);
|
||||||
__io_cqring_fill_event(req, req->result, req->compl.cflags);
|
__io_cqring_fill_event(req, req->result, req->compl.cflags);
|
||||||
if (!(req->flags & REQ_F_LINK_HEAD)) {
|
|
||||||
req->flags |= REQ_F_COMP_LOCKED;
|
/*
|
||||||
io_put_req(req);
|
* io_free_req() doesn't care about completion_lock unless one
|
||||||
} else {
|
* of these flags is set. REQ_F_WORK_INITIALIZED is in the list
|
||||||
|
* because of a potential deadlock with req->work.fs->lock
|
||||||
|
*/
|
||||||
|
if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT
|
||||||
|
|REQ_F_WORK_INITIALIZED)) {
|
||||||
spin_unlock_irq(&ctx->completion_lock);
|
spin_unlock_irq(&ctx->completion_lock);
|
||||||
io_put_req(req);
|
io_put_req(req);
|
||||||
spin_lock_irq(&ctx->completion_lock);
|
spin_lock_irq(&ctx->completion_lock);
|
||||||
|
} else {
|
||||||
|
io_put_req(req);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
io_commit_cqring(ctx);
|
io_commit_cqring(ctx);
|
||||||
@ -1702,10 +1703,14 @@ static void io_dismantle_req(struct io_kiocb *req)
|
|||||||
io_req_clean_work(req);
|
io_req_clean_work(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __io_free_req_finish(struct io_kiocb *req)
|
static void __io_free_req(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
struct io_uring_task *tctx = req->task->io_uring;
|
struct io_uring_task *tctx;
|
||||||
struct io_ring_ctx *ctx = req->ctx;
|
struct io_ring_ctx *ctx;
|
||||||
|
|
||||||
|
io_dismantle_req(req);
|
||||||
|
tctx = req->task->io_uring;
|
||||||
|
ctx = req->ctx;
|
||||||
|
|
||||||
atomic_long_inc(&tctx->req_complete);
|
atomic_long_inc(&tctx->req_complete);
|
||||||
if (tctx->in_idle)
|
if (tctx->in_idle)
|
||||||
@ -1719,33 +1724,6 @@ static void __io_free_req_finish(struct io_kiocb *req)
|
|||||||
percpu_ref_put(&ctx->refs);
|
percpu_ref_put(&ctx->refs);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_req_task_file_table_put(struct callback_head *cb)
|
|
||||||
{
|
|
||||||
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
|
|
||||||
|
|
||||||
io_dismantle_req(req);
|
|
||||||
__io_free_req_finish(req);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __io_free_req(struct io_kiocb *req)
|
|
||||||
{
|
|
||||||
if (!(req->flags & REQ_F_COMP_LOCKED)) {
|
|
||||||
io_dismantle_req(req);
|
|
||||||
__io_free_req_finish(req);
|
|
||||||
} else {
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
init_task_work(&req->task_work, io_req_task_file_table_put);
|
|
||||||
ret = task_work_add(req->task, &req->task_work, TWA_RESUME);
|
|
||||||
if (unlikely(ret)) {
|
|
||||||
struct task_struct *tsk;
|
|
||||||
|
|
||||||
tsk = io_wq_get_task(req->ctx->io_wq);
|
|
||||||
task_work_add(tsk, &req->task_work, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool io_link_cancel_timeout(struct io_kiocb *req)
|
static bool io_link_cancel_timeout(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
struct io_timeout_data *io = req->async_data;
|
struct io_timeout_data *io = req->async_data;
|
||||||
@ -1754,11 +1732,10 @@ static bool io_link_cancel_timeout(struct io_kiocb *req)
|
|||||||
|
|
||||||
ret = hrtimer_try_to_cancel(&io->timer);
|
ret = hrtimer_try_to_cancel(&io->timer);
|
||||||
if (ret != -1) {
|
if (ret != -1) {
|
||||||
req->flags |= REQ_F_COMP_LOCKED;
|
|
||||||
io_cqring_fill_event(req, -ECANCELED);
|
io_cqring_fill_event(req, -ECANCELED);
|
||||||
io_commit_cqring(ctx);
|
io_commit_cqring(ctx);
|
||||||
req->flags &= ~REQ_F_LINK_HEAD;
|
req->flags &= ~REQ_F_LINK_HEAD;
|
||||||
io_put_req(req);
|
io_put_req_deferred(req, 1);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1785,17 +1762,12 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req)
|
|||||||
static void io_kill_linked_timeout(struct io_kiocb *req)
|
static void io_kill_linked_timeout(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
struct io_ring_ctx *ctx = req->ctx;
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
|
unsigned long flags;
|
||||||
bool wake_ev;
|
bool wake_ev;
|
||||||
|
|
||||||
if (!(req->flags & REQ_F_COMP_LOCKED)) {
|
spin_lock_irqsave(&ctx->completion_lock, flags);
|
||||||
unsigned long flags;
|
wake_ev = __io_kill_linked_timeout(req);
|
||||||
|
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
||||||
spin_lock_irqsave(&ctx->completion_lock, flags);
|
|
||||||
wake_ev = __io_kill_linked_timeout(req);
|
|
||||||
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
|
||||||
} else {
|
|
||||||
wake_ev = __io_kill_linked_timeout(req);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (wake_ev)
|
if (wake_ev)
|
||||||
io_cqring_ev_posted(ctx);
|
io_cqring_ev_posted(ctx);
|
||||||
@ -1835,27 +1807,29 @@ static void __io_fail_links(struct io_kiocb *req)
|
|||||||
trace_io_uring_fail_link(req, link);
|
trace_io_uring_fail_link(req, link);
|
||||||
|
|
||||||
io_cqring_fill_event(link, -ECANCELED);
|
io_cqring_fill_event(link, -ECANCELED);
|
||||||
link->flags |= REQ_F_COMP_LOCKED;
|
|
||||||
__io_double_put_req(link);
|
/*
|
||||||
|
* It's ok to free under spinlock as they're not linked anymore,
|
||||||
|
* but avoid REQ_F_WORK_INITIALIZED because it may deadlock on
|
||||||
|
* work.fs->lock.
|
||||||
|
*/
|
||||||
|
if (link->flags & REQ_F_WORK_INITIALIZED)
|
||||||
|
io_put_req_deferred(link, 2);
|
||||||
|
else
|
||||||
|
io_double_put_req(link);
|
||||||
}
|
}
|
||||||
|
|
||||||
io_commit_cqring(ctx);
|
io_commit_cqring(ctx);
|
||||||
io_cqring_ev_posted(ctx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_fail_links(struct io_kiocb *req)
|
static void io_fail_links(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
struct io_ring_ctx *ctx = req->ctx;
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
if (!(req->flags & REQ_F_COMP_LOCKED)) {
|
spin_lock_irqsave(&ctx->completion_lock, flags);
|
||||||
unsigned long flags;
|
__io_fail_links(req);
|
||||||
|
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
||||||
spin_lock_irqsave(&ctx->completion_lock, flags);
|
|
||||||
__io_fail_links(req);
|
|
||||||
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
|
||||||
} else {
|
|
||||||
__io_fail_links(req);
|
|
||||||
}
|
|
||||||
|
|
||||||
io_cqring_ev_posted(ctx);
|
io_cqring_ev_posted(ctx);
|
||||||
}
|
}
|
||||||
@ -2069,6 +2043,34 @@ static void io_put_req(struct io_kiocb *req)
|
|||||||
io_free_req(req);
|
io_free_req(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void io_put_req_deferred_cb(struct callback_head *cb)
|
||||||
|
{
|
||||||
|
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
|
||||||
|
|
||||||
|
io_free_req(req);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void io_free_req_deferred(struct io_kiocb *req)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
init_task_work(&req->task_work, io_put_req_deferred_cb);
|
||||||
|
ret = io_req_task_work_add(req, true);
|
||||||
|
if (unlikely(ret)) {
|
||||||
|
struct task_struct *tsk;
|
||||||
|
|
||||||
|
tsk = io_wq_get_task(req->ctx->io_wq);
|
||||||
|
task_work_add(tsk, &req->task_work, 0);
|
||||||
|
wake_up_process(tsk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
|
||||||
|
{
|
||||||
|
if (refcount_sub_and_test(refs, &req->refs))
|
||||||
|
io_free_req_deferred(req);
|
||||||
|
}
|
||||||
|
|
||||||
static struct io_wq_work *io_steal_work(struct io_kiocb *req)
|
static struct io_wq_work *io_steal_work(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
struct io_kiocb *nxt;
|
struct io_kiocb *nxt;
|
||||||
@ -2085,17 +2087,6 @@ static struct io_wq_work *io_steal_work(struct io_kiocb *req)
|
|||||||
return nxt ? &nxt->work : NULL;
|
return nxt ? &nxt->work : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Must only be used if we don't need to care about links, usually from
|
|
||||||
* within the completion handling itself.
|
|
||||||
*/
|
|
||||||
static void __io_double_put_req(struct io_kiocb *req)
|
|
||||||
{
|
|
||||||
/* drop both submit and complete references */
|
|
||||||
if (refcount_sub_and_test(2, &req->refs))
|
|
||||||
__io_free_req(req);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void io_double_put_req(struct io_kiocb *req)
|
static void io_double_put_req(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
/* drop both submit and complete references */
|
/* drop both submit and complete references */
|
||||||
@ -5127,9 +5118,8 @@ static bool io_poll_remove_one(struct io_kiocb *req)
|
|||||||
if (do_complete) {
|
if (do_complete) {
|
||||||
io_cqring_fill_event(req, -ECANCELED);
|
io_cqring_fill_event(req, -ECANCELED);
|
||||||
io_commit_cqring(req->ctx);
|
io_commit_cqring(req->ctx);
|
||||||
req->flags |= REQ_F_COMP_LOCKED;
|
|
||||||
req_set_fail_links(req);
|
req_set_fail_links(req);
|
||||||
io_put_req(req);
|
io_put_req_deferred(req, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return do_complete;
|
return do_complete;
|
||||||
@ -5311,9 +5301,8 @@ static int __io_timeout_cancel(struct io_kiocb *req)
|
|||||||
list_del_init(&req->timeout.list);
|
list_del_init(&req->timeout.list);
|
||||||
|
|
||||||
req_set_fail_links(req);
|
req_set_fail_links(req);
|
||||||
req->flags |= REQ_F_COMP_LOCKED;
|
|
||||||
io_cqring_fill_event(req, -ECANCELED);
|
io_cqring_fill_event(req, -ECANCELED);
|
||||||
io_put_req(req);
|
io_put_req_deferred(req, 1);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user