io_uring-5.12-2021-03-12

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmBLtdcQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpqK9D/9sE6QDAmLCvW4+wsFawf+Md9tCE3F15quC
 Tptsa6IoR2UB01d06uavLJ5sGo0LeVQQP8+Nygz0TM7jSV39Odmr8geP8wyqSQwP
 ZHLasrnz3LGINFOmxwMz/xQbrYUXEhRah+nx9Me0ROWmtQ46MRBZlpjsxffKccC9
 SdkS6R8chfc/6HT6oQXMRRDtB4U4SjDdeX6VFIW5E2Z62h0xjhZrmY42fPmChjXR
 mmAa2medSmajlwKrmp/+6sCfu2vVRR7bZ5FbS/SoQyo3ZvMabXI3lWicSgtu1wAK
 iK9NFJEuJ34Fj4RxTSwQrj0eRX5BqZpWHUJ/1ecxc4tDRtaIXZuzPtblYrZ5fwYe
 5pBzXXNpVwhat1AvGp9BFH/4P3kxJDszUAuL7zRut6nHu8xFGDGbNJHezCtws/uZ
 i+90Qt5sfoYyXgMDAZuXS7AkJXKbdnajpwjXmZheL3MEj2EsVylcTVaW0MBdVjx1
 y0eAtOGUVj2rNOSthDT0ZlKql7PY9N3dhkRxJIzRlIIfBfg73UWkis7zOlFE8CCz
 y0rtsu+v/u22mU17v6gdVnTls/vbfiGSg4SutEK2Rv/Qqbjr+po+RXK14BJKBJR9
 JknAkQlBjagZmLZKlzRfCDqa62aFYwxC/eOeLGxSpInj0ncgKmWNpnFjXSyRBdPq
 stOCQF5aHQ==
 =40h0
 -----END PGP SIGNATURE-----

Merge tag 'io_uring-5.12-2021-03-12' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "Not quite as small this week as I had hoped, but at least this should
  be the end of it. All the little known issues have been ironed out -
  most of it little stuff, but cancelations being the bigger part. Only
  minor tweaks and/or regular fixes expected beyond this point.

   - Fix the creds tracking for async (io-wq and SQPOLL)

   - Various SQPOLL fixes related to parking, sharing, forking, IOPOLL,
     completions, and life times. Much simpler now.

   - Make IO threads unfreezable by default, on account of a bug report
     that had them spinning on resume. Honestly not quite sure why
     thawing leaves us with a perpetual signal pending (causing the
     spin), but for now make them unfreezable like there were in 5.11
     and prior.

   - Move personality_idr to xarray, solving a use-after-free related to
     removing an entry from the iterator callback. Buffer idr needs the
     same treatment.

   - Re-org around and task vs context tracking, enabling the fixing of
     cancelations, and then cancelation fixes on top.

   - Various little bits of cleanups and hardening, and removal of now
     dead parts"

* tag 'io_uring-5.12-2021-03-12' of git://git.kernel.dk/linux-block: (34 commits)
  io_uring: fix OP_ASYNC_CANCEL across tasks
  io_uring: cancel sqpoll via task_work
  io_uring: prevent racy sqd->thread checks
  io_uring: remove useless ->startup completion
  io_uring: cancel deferred requests in try_cancel
  io_uring: perform IOPOLL reaping if canceler is thread itself
  io_uring: force creation of separate context for ATTACH_WQ and non-threads
  io_uring: remove indirect ctx into sqo injection
  io_uring: fix invalid ctx->sq_thread_idle
  kernel: make IO threads unfreezable by default
  io_uring: always wait for sqd exited when stopping SQPOLL thread
  io_uring: remove unneeded variable 'ret'
  io_uring: move all io_kiocb init early in io_init_req()
  io-wq: fix ref leak for req in case of exit cancelations
  io_uring: fix complete_post races for linked req
  io_uring: add io_disarm_next() helper
  io_uring: fix io_sq_offload_create error handling
  io-wq: remove unused 'user' member of io_wq
  io_uring: Convert personality_idr to XArray
  io_uring: clean R_DISABLED startup mess
  ...
This commit is contained in:
Linus Torvalds 2021-03-12 13:13:57 -08:00
commit 9278be92f2
5 changed files with 479 additions and 406 deletions

View File

@ -110,7 +110,6 @@ struct io_wq {
io_wq_work_fn *do_work; io_wq_work_fn *do_work;
struct task_struct *manager; struct task_struct *manager;
struct user_struct *user;
struct io_wq_hash *hash; struct io_wq_hash *hash;
@ -592,7 +591,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
tsk->pf_io_worker = worker; tsk->pf_io_worker = worker;
worker->task = tsk; worker->task = tsk;
set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node)); set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node));
tsk->flags |= PF_NOFREEZE | PF_NO_SETAFFINITY; tsk->flags |= PF_NO_SETAFFINITY;
raw_spin_lock_irq(&wqe->lock); raw_spin_lock_irq(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
@ -710,7 +709,6 @@ static int io_wq_manager(void *data)
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
io_wq_check_workers(wq); io_wq_check_workers(wq);
schedule_timeout(HZ); schedule_timeout(HZ);
try_to_freeze();
if (fatal_signal_pending(current)) if (fatal_signal_pending(current))
set_bit(IO_WQ_BIT_EXIT, &wq->state); set_bit(IO_WQ_BIT_EXIT, &wq->state);
} while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)); } while (!test_bit(IO_WQ_BIT_EXIT, &wq->state));
@ -722,9 +720,9 @@ static int io_wq_manager(void *data)
io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL); io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
rcu_read_unlock(); rcu_read_unlock();
/* we might not ever have created any workers */ if (atomic_dec_and_test(&wq->worker_refs))
if (atomic_read(&wq->worker_refs)) complete(&wq->worker_done);
wait_for_completion(&wq->worker_done); wait_for_completion(&wq->worker_done);
spin_lock_irq(&wq->hash->wait.lock); spin_lock_irq(&wq->hash->wait.lock);
for_each_node(node) for_each_node(node)
@ -774,7 +772,10 @@ static int io_wq_fork_manager(struct io_wq *wq)
if (wq->manager) if (wq->manager)
return 0; return 0;
reinit_completion(&wq->worker_done); WARN_ON_ONCE(test_bit(IO_WQ_BIT_EXIT, &wq->state));
init_completion(&wq->worker_done);
atomic_set(&wq->worker_refs, 1);
tsk = create_io_thread(io_wq_manager, wq, NUMA_NO_NODE); tsk = create_io_thread(io_wq_manager, wq, NUMA_NO_NODE);
if (!IS_ERR(tsk)) { if (!IS_ERR(tsk)) {
wq->manager = get_task_struct(tsk); wq->manager = get_task_struct(tsk);
@ -782,6 +783,9 @@ static int io_wq_fork_manager(struct io_wq *wq)
return 0; return 0;
} }
if (atomic_dec_and_test(&wq->worker_refs))
complete(&wq->worker_done);
return PTR_ERR(tsk); return PTR_ERR(tsk);
} }
@ -794,8 +798,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
/* Can only happen if manager creation fails after exec */ /* Can only happen if manager creation fails after exec */
if (io_wq_fork_manager(wqe->wq) || if (io_wq_fork_manager(wqe->wq) ||
test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) { test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) {
work->flags |= IO_WQ_WORK_CANCEL; io_run_cancel(work, wqe);
wqe->wq->do_work(work);
return; return;
} }
@ -1018,13 +1021,9 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
init_completion(&wq->exited); init_completion(&wq->exited);
refcount_set(&wq->refs, 1); refcount_set(&wq->refs, 1);
init_completion(&wq->worker_done);
atomic_set(&wq->worker_refs, 0);
ret = io_wq_fork_manager(wq); ret = io_wq_fork_manager(wq);
if (!ret) if (!ret)
return wq; return wq;
err: err:
io_wq_put_hash(data->hash); io_wq_put_hash(data->hash);
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);

View File

@ -79,8 +79,8 @@ static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work { struct io_wq_work {
struct io_wq_work_node list; struct io_wq_work_node list;
const struct cred *creds;
unsigned flags; unsigned flags;
unsigned short personality;
}; };
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)

File diff suppressed because it is too large Load Diff

View File

@ -18,7 +18,7 @@ struct io_uring_task {
/* submission side */ /* submission side */
struct xarray xa; struct xarray xa;
struct wait_queue_head wait; struct wait_queue_head wait;
struct file *last; void *last;
void *io_wq; void *io_wq;
struct percpu_counter inflight; struct percpu_counter inflight;
atomic_t in_idle; atomic_t in_idle;

View File

@ -2436,6 +2436,7 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
if (!IS_ERR(tsk)) { if (!IS_ERR(tsk)) {
sigfillset(&tsk->blocked); sigfillset(&tsk->blocked);
sigdelsetmask(&tsk->blocked, sigmask(SIGKILL)); sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
tsk->flags |= PF_NOFREEZE;
} }
return tsk; return tsk;
} }