2019-10-22 19:25:58 +03:00
// SPDX-License-Identifier: GPL-2.0
/*
* Basic worker thread pool for io_uring
*
* Copyright ( C ) 2019 Jens Axboe
*
*/
# include <linux/kernel.h>
# include <linux/init.h>
# include <linux/errno.h>
# include <linux/sched/signal.h>
# include <linux/mm.h>
# include <linux/sched/mm.h>
# include <linux/percpu.h>
# include <linux/slab.h>
# include <linux/rculist_nulls.h>
2020-10-22 18:02:50 +03:00
# include <linux/cpu.h>
2021-02-17 00:15:30 +03:00
# include <linux/tracehook.h>
2019-10-22 19:25:58 +03:00
2020-10-22 18:02:50 +03:00
# include "../kernel/sched/sched.h"
2019-10-22 19:25:58 +03:00
# include "io-wq.h"
# define WORKER_IDLE_TIMEOUT (5 * HZ)
enum {
IO_WORKER_F_UP = 1 , /* up and active */
IO_WORKER_F_RUNNING = 2 , /* account as running */
IO_WORKER_F_FREE = 4 , /* worker on free list */
2020-09-26 21:37:46 +03:00
IO_WORKER_F_FIXED = 8 , /* static idle worker */
IO_WORKER_F_BOUND = 16 , /* is doing bounded work */
2019-10-22 19:25:58 +03:00
} ;
enum {
IO_WQ_BIT_EXIT = 0 , /* wq exiting */
2020-12-20 20:47:42 +03:00
IO_WQ_BIT_ERROR = 1 , /* error on setup */
2019-10-22 19:25:58 +03:00
} ;
enum {
IO_WQE_FLAG_STALLED = 1 , /* stalled on hash */
} ;
/*
* One for each thread in a wqe pool
*/
struct io_worker {
refcount_t ref ;
unsigned flags ;
struct hlist_nulls_node nulls_node ;
2019-11-13 23:54:49 +03:00
struct list_head all_list ;
2019-10-22 19:25:58 +03:00
struct task_struct * task ;
struct io_wqe * wqe ;
2019-11-13 19:43:34 +03:00
2019-10-22 19:25:58 +03:00
struct io_wq_work * cur_work ;
2019-11-13 19:43:34 +03:00
spinlock_t lock ;
2019-10-22 19:25:58 +03:00
2021-02-15 23:40:22 +03:00
const struct cred * cur_creds ;
const struct cred * saved_creds ;
2021-02-24 05:59:06 +03:00
struct completion ref_done ;
2019-10-22 19:25:58 +03:00
struct rcu_head rcu ;
} ;
# if BITS_PER_LONG == 64
# define IO_WQ_HASH_ORDER 6
# else
# define IO_WQ_HASH_ORDER 5
# endif
2020-03-23 22:57:22 +03:00
# define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER)
2019-11-07 21:41:16 +03:00
struct io_wqe_acct {
unsigned nr_workers ;
unsigned max_workers ;
atomic_t nr_running ;
} ;
enum {
IO_WQ_ACCT_BOUND ,
IO_WQ_ACCT_UNBOUND ,
} ;
2019-10-22 19:25:58 +03:00
/*
* Per - node worker thread pool
*/
struct io_wqe {
struct {
2020-09-01 11:41:46 +03:00
raw_spinlock_t lock ;
2019-11-26 21:59:32 +03:00
struct io_wq_work_list work_list ;
2019-10-22 19:25:58 +03:00
unsigned flags ;
} ____cacheline_aligned_in_smp ;
int node ;
2019-11-07 21:41:16 +03:00
struct io_wqe_acct acct [ 2 ] ;
2019-10-22 19:25:58 +03:00
2019-11-14 18:00:41 +03:00
struct hlist_nulls_head free_list ;
2019-11-13 23:54:49 +03:00
struct list_head all_list ;
2019-10-22 19:25:58 +03:00
2021-02-19 22:33:30 +03:00
struct wait_queue_entry wait ;
2019-10-22 19:25:58 +03:00
struct io_wq * wq ;
2020-03-23 22:57:22 +03:00
struct io_wq_work * hash_tail [ IO_WQ_NR_HASH_BUCKETS ] ;
2019-10-22 19:25:58 +03:00
} ;
/*
* Per io_wq state
*/
struct io_wq {
struct io_wqe * * wqes ;
unsigned long state ;
2020-03-04 16:14:12 +03:00
free_work_fn * free_work ;
2020-06-08 21:08:20 +03:00
io_wq_work_fn * do_work ;
2019-11-13 08:31:31 +03:00
2019-10-22 19:25:58 +03:00
struct task_struct * manager ;
2019-11-07 21:41:16 +03:00
struct user_struct * user ;
2021-02-19 22:33:30 +03:00
struct io_wq_hash * hash ;
2019-10-22 19:25:58 +03:00
refcount_t refs ;
struct completion done ;
2020-01-24 01:33:32 +03:00
2020-10-22 18:02:50 +03:00
struct hlist_node cpuhp_node ;
2021-02-17 00:15:30 +03:00
pid_t task_pid ;
2019-10-22 19:25:58 +03:00
} ;
2020-10-22 18:02:50 +03:00
static enum cpuhp_state io_wq_online ;
2019-10-22 19:25:58 +03:00
static bool io_worker_get ( struct io_worker * worker )
{
return refcount_inc_not_zero ( & worker - > ref ) ;
}
static void io_worker_release ( struct io_worker * worker )
{
if ( refcount_dec_and_test ( & worker - > ref ) )
2021-02-24 05:59:06 +03:00
complete ( & worker - > ref_done ) ;
2019-10-22 19:25:58 +03:00
}
2019-11-07 21:41:16 +03:00
static inline struct io_wqe_acct * io_work_get_acct ( struct io_wqe * wqe ,
struct io_wq_work * work )
{
if ( work - > flags & IO_WQ_WORK_UNBOUND )
return & wqe - > acct [ IO_WQ_ACCT_UNBOUND ] ;
return & wqe - > acct [ IO_WQ_ACCT_BOUND ] ;
}
2021-02-17 19:00:57 +03:00
static inline struct io_wqe_acct * io_wqe_get_acct ( struct io_worker * worker )
2019-11-07 21:41:16 +03:00
{
2021-02-17 19:00:57 +03:00
struct io_wqe * wqe = worker - > wqe ;
2019-11-07 21:41:16 +03:00
if ( worker - > flags & IO_WORKER_F_BOUND )
return & wqe - > acct [ IO_WQ_ACCT_BOUND ] ;
return & wqe - > acct [ IO_WQ_ACCT_UNBOUND ] ;
}
2019-10-22 19:25:58 +03:00
static void io_worker_exit ( struct io_worker * worker )
{
struct io_wqe * wqe = worker - > wqe ;
2021-02-17 19:00:57 +03:00
struct io_wqe_acct * acct = io_wqe_get_acct ( worker ) ;
2021-02-17 04:00:55 +03:00
unsigned flags ;
2019-10-22 19:25:58 +03:00
2021-02-24 05:59:06 +03:00
if ( refcount_dec_and_test ( & worker - > ref ) )
complete ( & worker - > ref_done ) ;
wait_for_completion ( & worker - > ref_done ) ;
2019-10-22 19:25:58 +03:00
preempt_disable ( ) ;
current - > flags & = ~ PF_IO_WORKER ;
2021-02-17 04:00:55 +03:00
flags = worker - > flags ;
worker - > flags = 0 ;
if ( flags & IO_WORKER_F_RUNNING )
2019-11-07 21:41:16 +03:00
atomic_dec ( & acct - > nr_running ) ;
2019-10-22 19:25:58 +03:00
worker - > flags = 0 ;
preempt_enable ( ) ;
2021-02-15 23:40:22 +03:00
if ( worker - > saved_creds ) {
revert_creds ( worker - > saved_creds ) ;
worker - > cur_creds = worker - > saved_creds = NULL ;
}
2020-09-01 11:41:46 +03:00
raw_spin_lock_irq ( & wqe - > lock ) ;
2021-02-17 04:00:55 +03:00
if ( flags & IO_WORKER_F_FREE )
hlist_nulls_del_rcu ( & worker - > nulls_node ) ;
2019-11-13 23:54:49 +03:00
list_del_rcu ( & worker - > all_list ) ;
2019-11-07 21:41:16 +03:00
acct - > nr_workers - - ;
2020-09-01 11:41:46 +03:00
raw_spin_unlock_irq ( & wqe - > lock ) ;
2019-10-22 19:25:58 +03:00
2019-11-02 10:55:01 +03:00
kfree_rcu ( worker , rcu ) ;
2020-09-26 16:26:55 +03:00
if ( refcount_dec_and_test ( & wqe - > wq - > refs ) )
complete ( & wqe - > wq - > done ) ;
2019-10-22 19:25:58 +03:00
}
2019-11-07 21:41:16 +03:00
static inline bool io_wqe_run_queue ( struct io_wqe * wqe )
__must_hold ( wqe - > lock )
{
2019-11-26 21:59:32 +03:00
if ( ! wq_list_empty ( & wqe - > work_list ) & &
! ( wqe - > flags & IO_WQE_FLAG_STALLED ) )
2019-11-07 21:41:16 +03:00
return true ;
return false ;
}
/*
* Check head of free list for an available worker . If one isn ' t available ,
* caller must wake up the wq manager to create one .
*/
static bool io_wqe_activate_free_worker ( struct io_wqe * wqe )
__must_hold ( RCU )
{
struct hlist_nulls_node * n ;
struct io_worker * worker ;
2019-11-14 18:00:41 +03:00
n = rcu_dereference ( hlist_nulls_first_rcu ( & wqe - > free_list ) ) ;
2019-11-07 21:41:16 +03:00
if ( is_a_nulls ( n ) )
return false ;
worker = hlist_nulls_entry ( n , struct io_worker , nulls_node ) ;
if ( io_worker_get ( worker ) ) {
2019-12-08 07:03:59 +03:00
wake_up_process ( worker - > task ) ;
2019-11-07 21:41:16 +03:00
io_worker_release ( worker ) ;
return true ;
}
return false ;
}
/*
* We need a worker . If we find a free one , we ' re good . If not , and we ' re
* below the max number of workers , wake up the manager to create one .
*/
static void io_wqe_wake_worker ( struct io_wqe * wqe , struct io_wqe_acct * acct )
{
bool ret ;
/*
* Most likely an attempt to queue unbounded work on an io_wq that
* wasn ' t setup with any unbounded workers .
*/
WARN_ON_ONCE ( ! acct - > max_workers ) ;
rcu_read_lock ( ) ;
ret = io_wqe_activate_free_worker ( wqe ) ;
rcu_read_unlock ( ) ;
if ( ! ret & & acct - > nr_workers < acct - > max_workers )
wake_up_process ( wqe - > wq - > manager ) ;
}
2021-02-17 19:00:57 +03:00
static void io_wqe_inc_running ( struct io_worker * worker )
2019-11-07 21:41:16 +03:00
{
2021-02-17 19:00:57 +03:00
struct io_wqe_acct * acct = io_wqe_get_acct ( worker ) ;
2019-11-07 21:41:16 +03:00
atomic_inc ( & acct - > nr_running ) ;
}
2021-02-17 19:00:57 +03:00
static void io_wqe_dec_running ( struct io_worker * worker )
2019-11-07 21:41:16 +03:00
__must_hold ( wqe - > lock )
{
2021-02-17 19:00:57 +03:00
struct io_wqe_acct * acct = io_wqe_get_acct ( worker ) ;
struct io_wqe * wqe = worker - > wqe ;
2019-11-07 21:41:16 +03:00
if ( atomic_dec_and_test ( & acct - > nr_running ) & & io_wqe_run_queue ( wqe ) )
io_wqe_wake_worker ( wqe , acct ) ;
}
2021-02-17 19:00:57 +03:00
static void io_worker_start ( struct io_worker * worker )
2019-10-22 19:25:58 +03:00
{
worker - > flags | = ( IO_WORKER_F_UP | IO_WORKER_F_RUNNING ) ;
2021-02-17 19:00:57 +03:00
io_wqe_inc_running ( worker ) ;
2019-10-22 19:25:58 +03:00
}
/*
* Worker will start processing some work . Move it to the busy list , if
* it ' s currently on the freelist
*/
static void __io_worker_busy ( struct io_wqe * wqe , struct io_worker * worker ,
struct io_wq_work * work )
__must_hold ( wqe - > lock )
{
2019-11-07 21:41:16 +03:00
bool worker_bound , work_bound ;
2019-10-22 19:25:58 +03:00
if ( worker - > flags & IO_WORKER_F_FREE ) {
worker - > flags & = ~ IO_WORKER_F_FREE ;
hlist_nulls_del_init_rcu ( & worker - > nulls_node ) ;
}
2019-11-07 21:41:16 +03:00
/*
* If worker is moving from bound to unbound ( or vice versa ) , then
* ensure we update the running accounting .
*/
2019-11-19 09:22:16 +03:00
worker_bound = ( worker - > flags & IO_WORKER_F_BOUND ) ! = 0 ;
work_bound = ( work - > flags & IO_WQ_WORK_UNBOUND ) = = 0 ;
if ( worker_bound ! = work_bound ) {
2021-02-17 19:00:57 +03:00
io_wqe_dec_running ( worker ) ;
2019-11-07 21:41:16 +03:00
if ( work_bound ) {
worker - > flags | = IO_WORKER_F_BOUND ;
wqe - > acct [ IO_WQ_ACCT_UNBOUND ] . nr_workers - - ;
wqe - > acct [ IO_WQ_ACCT_BOUND ] . nr_workers + + ;
} else {
worker - > flags & = ~ IO_WORKER_F_BOUND ;
wqe - > acct [ IO_WQ_ACCT_UNBOUND ] . nr_workers + + ;
wqe - > acct [ IO_WQ_ACCT_BOUND ] . nr_workers - - ;
}
2021-02-17 19:00:57 +03:00
io_wqe_inc_running ( worker ) ;
2019-11-07 21:41:16 +03:00
}
2019-10-22 19:25:58 +03:00
}
/*
* No work , worker going to sleep . Move to freelist , and unuse mm if we
* have one attached . Dropping the mm may potentially sleep , so we drop
* the lock in that case and return success . Since the caller has to
* retry the loop in that case ( we changed task state ) , we don ' t regrab
* the lock if we return success .
*/
2021-02-15 23:26:34 +03:00
static void __io_worker_idle ( struct io_wqe * wqe , struct io_worker * worker )
2019-10-22 19:25:58 +03:00
__must_hold ( wqe - > lock )
{
if ( ! ( worker - > flags & IO_WORKER_F_FREE ) ) {
worker - > flags | = IO_WORKER_F_FREE ;
2019-11-14 18:00:41 +03:00
hlist_nulls_add_head_rcu ( & worker - > nulls_node , & wqe - > free_list ) ;
2019-10-22 19:25:58 +03:00
}
2021-02-15 23:40:22 +03:00
if ( worker - > saved_creds ) {
revert_creds ( worker - > saved_creds ) ;
worker - > cur_creds = worker - > saved_creds = NULL ;
}
2019-10-22 19:25:58 +03:00
}
2020-03-14 00:31:05 +03:00
static inline unsigned int io_get_work_hash ( struct io_wq_work * work )
{
return work - > flags > > IO_WQ_HASH_SHIFT ;
}
2021-02-19 22:33:30 +03:00
static void io_wait_on_hash ( struct io_wqe * wqe , unsigned int hash )
{
struct io_wq * wq = wqe - > wq ;
spin_lock ( & wq - > hash - > wait . lock ) ;
if ( list_empty ( & wqe - > wait . entry ) ) {
__add_wait_queue ( & wq - > hash - > wait , & wqe - > wait ) ;
if ( ! test_bit ( hash , & wq - > hash - > map ) ) {
__set_current_state ( TASK_RUNNING ) ;
list_del_init ( & wqe - > wait . entry ) ;
}
}
spin_unlock ( & wq - > hash - > wait . lock ) ;
}
2020-03-14 00:31:05 +03:00
static struct io_wq_work * io_get_next_work ( struct io_wqe * wqe )
2019-10-22 19:25:58 +03:00
__must_hold ( wqe - > lock )
{
2019-11-26 21:59:32 +03:00
struct io_wq_work_node * node , * prev ;
2020-03-23 22:57:22 +03:00
struct io_wq_work * work , * tail ;
2021-02-19 22:33:30 +03:00
unsigned int stall_hash = - 1U ;
2019-10-22 19:25:58 +03:00
2019-11-26 21:59:32 +03:00
wq_list_for_each ( node , prev , & wqe - > work_list ) {
2021-02-19 22:33:30 +03:00
unsigned int hash ;
2019-11-26 21:59:32 +03:00
work = container_of ( node , struct io_wq_work , list ) ;
2019-10-22 19:25:58 +03:00
/* not hashed, can run anytime */
2020-03-14 00:31:04 +03:00
if ( ! io_wq_is_hashed ( work ) ) {
2020-03-23 22:57:22 +03:00
wq_list_del ( & wqe - > work_list , node , prev ) ;
2019-10-22 19:25:58 +03:00
return work ;
}
2020-03-14 00:31:05 +03:00
hash = io_get_work_hash ( work ) ;
2021-02-19 22:33:30 +03:00
/* all items with this hash lie in [work, tail] */
tail = wqe - > hash_tail [ hash ] ;
/* hashed, can run if not already running */
if ( ! test_and_set_bit ( hash , & wqe - > wq - > hash - > map ) ) {
2020-03-23 22:57:22 +03:00
wqe - > hash_tail [ hash ] = NULL ;
wq_list_cut ( & wqe - > work_list , & tail - > list , prev ) ;
2019-10-22 19:25:58 +03:00
return work ;
}
2021-02-19 22:33:30 +03:00
if ( stall_hash = = - 1U )
stall_hash = hash ;
/* fast forward to a next hash, for-each will fix up @prev */
node = & tail - > list ;
}
if ( stall_hash ! = - 1U ) {
raw_spin_unlock ( & wqe - > lock ) ;
io_wait_on_hash ( wqe , stall_hash ) ;
raw_spin_lock ( & wqe - > lock ) ;
2019-10-22 19:25:58 +03:00
}
return NULL ;
}
2021-02-17 00:15:30 +03:00
static void io_flush_signals ( void )
2020-01-28 02:34:48 +03:00
{
2021-02-17 00:15:30 +03:00
if ( unlikely ( test_tsk_thread_flag ( current , TIF_NOTIFY_SIGNAL ) ) ) {
if ( current - > task_works )
task_work_run ( ) ;
clear_tsk_thread_flag ( current , TIF_NOTIFY_SIGNAL ) ;
2020-01-28 02:34:48 +03:00
}
2020-03-04 16:14:09 +03:00
}
2021-02-15 23:40:22 +03:00
static void io_wq_switch_creds ( struct io_worker * worker ,
struct io_wq_work * work )
{
const struct cred * old_creds = override_creds ( work - > creds ) ;
worker - > cur_creds = work - > creds ;
if ( worker - > saved_creds )
put_cred ( old_creds ) ; /* creds set by previous switch */
else
worker - > saved_creds = old_creds ;
}
2020-03-04 16:14:09 +03:00
static void io_assign_current_work ( struct io_worker * worker ,
struct io_wq_work * work )
{
2020-03-14 00:31:03 +03:00
if ( work ) {
2021-02-17 00:15:30 +03:00
io_flush_signals ( ) ;
2020-03-14 00:31:03 +03:00
cond_resched ( ) ;
}
2020-03-04 16:14:09 +03:00
spin_lock_irq ( & worker - > lock ) ;
worker - > cur_work = work ;
spin_unlock_irq ( & worker - > lock ) ;
}
2020-03-14 00:31:05 +03:00
static void io_wqe_enqueue ( struct io_wqe * wqe , struct io_wq_work * work ) ;
2019-10-22 19:25:58 +03:00
static void io_worker_handle_work ( struct io_worker * worker )
__releases ( wqe - > lock )
{
struct io_wqe * wqe = worker - > wqe ;
struct io_wq * wq = wqe - > wq ;
do {
2020-03-23 22:57:22 +03:00
struct io_wq_work * work ;
2020-03-04 16:14:11 +03:00
get_next :
2019-10-22 19:25:58 +03:00
/*
* If we got some work , mark us as busy . If we didn ' t , but
* the list isn ' t empty , it means we stalled on hashed work .
* Mark us stalled so we don ' t keep looking for work when we
* can ' t make progress , any work completion or insertion will
* clear the stalled flag .
*/
2020-03-14 00:31:05 +03:00
work = io_get_next_work ( wqe ) ;
2019-10-22 19:25:58 +03:00
if ( work )
__io_worker_busy ( wqe , worker , work ) ;
2019-11-26 21:59:32 +03:00
else if ( ! wq_list_empty ( & wqe - > work_list ) )
2019-10-22 19:25:58 +03:00
wqe - > flags | = IO_WQE_FLAG_STALLED ;
2020-09-01 11:41:46 +03:00
raw_spin_unlock_irq ( & wqe - > lock ) ;
2019-10-22 19:25:58 +03:00
if ( ! work )
break ;
2020-03-04 16:14:10 +03:00
io_assign_current_work ( worker , work ) ;
2021-02-19 22:33:30 +03:00
__set_current_state ( TASK_RUNNING ) ;
2019-11-13 19:43:34 +03:00
2020-03-04 16:14:09 +03:00
/* handle a whole dependent link */
do {
2021-02-04 16:52:08 +03:00
struct io_wq_work * next_hashed , * linked ;
2020-07-25 14:42:00 +03:00
unsigned int hash = io_get_work_hash ( work ) ;
2020-03-04 16:14:09 +03:00
2020-03-23 22:57:22 +03:00
next_hashed = wq_next_work ( work ) ;
2021-02-15 23:40:22 +03:00
if ( work - > creds & & worker - > cur_creds ! = work - > creds )
io_wq_switch_creds ( worker , work ) ;
2021-02-04 16:52:08 +03:00
wq - > do_work ( work ) ;
io_assign_current_work ( worker , NULL ) ;
2020-03-04 16:14:09 +03:00
2021-02-04 16:52:08 +03:00
linked = wq - > free_work ( work ) ;
2020-03-23 22:57:22 +03:00
work = next_hashed ;
if ( ! work & & linked & & ! io_wq_is_hashed ( linked ) ) {
work = linked ;
linked = NULL ;
}
io_assign_current_work ( worker , work ) ;
if ( linked )
io_wqe_enqueue ( wqe , linked ) ;
if ( hash ! = - 1U & & ! next_hashed ) {
2021-02-19 22:33:30 +03:00
clear_bit ( hash , & wq - > hash - > map ) ;
if ( wq_has_sleeper ( & wq - > hash - > wait ) )
wake_up ( & wq - > hash - > wait ) ;
2020-09-01 11:41:46 +03:00
raw_spin_lock_irq ( & wqe - > lock ) ;
2020-03-04 16:14:09 +03:00
wqe - > flags & = ~ IO_WQE_FLAG_STALLED ;
2020-03-04 16:14:11 +03:00
/* skip unnecessary unlock-lock wqe->lock */
if ( ! work )
goto get_next ;
2020-09-01 11:41:46 +03:00
raw_spin_unlock_irq ( & wqe - > lock ) ;
2019-11-13 08:31:31 +03:00
}
2020-03-04 16:14:10 +03:00
} while ( work ) ;
2019-11-13 08:31:31 +03:00
2020-09-01 11:41:46 +03:00
raw_spin_lock_irq ( & wqe - > lock ) ;
2019-10-22 19:25:58 +03:00
} while ( 1 ) ;
}
static int io_wqe_worker ( void * data )
{
struct io_worker * worker = data ;
struct io_wqe * wqe = worker - > wqe ;
struct io_wq * wq = wqe - > wq ;
2021-02-17 19:00:57 +03:00
io_worker_start ( worker ) ;
2019-10-22 19:25:58 +03:00
while ( ! test_bit ( IO_WQ_BIT_EXIT , & wq - > state ) ) {
2019-12-08 07:03:59 +03:00
set_current_state ( TASK_INTERRUPTIBLE ) ;
2019-12-08 07:06:46 +03:00
loop :
2020-09-01 11:41:46 +03:00
raw_spin_lock_irq ( & wqe - > lock ) ;
2019-10-22 19:25:58 +03:00
if ( io_wqe_run_queue ( wqe ) ) {
io_worker_handle_work ( worker ) ;
2019-12-08 07:06:46 +03:00
goto loop ;
2019-10-22 19:25:58 +03:00
}
2021-02-15 23:26:34 +03:00
__io_worker_idle ( wqe , worker ) ;
2020-09-01 11:41:46 +03:00
raw_spin_unlock_irq ( & wqe - > lock ) ;
2021-02-17 00:15:30 +03:00
io_flush_signals ( ) ;
2019-10-22 19:25:58 +03:00
if ( schedule_timeout ( WORKER_IDLE_TIMEOUT ) )
continue ;
2021-02-17 00:15:30 +03:00
if ( fatal_signal_pending ( current ) )
break ;
2019-10-22 19:25:58 +03:00
/* timed out, exit unless we're the fixed worker */
if ( test_bit ( IO_WQ_BIT_EXIT , & wq - > state ) | |
! ( worker - > flags & IO_WORKER_F_FIXED ) )
break ;
}
if ( test_bit ( IO_WQ_BIT_EXIT , & wq - > state ) ) {
2020-09-01 11:41:46 +03:00
raw_spin_lock_irq ( & wqe - > lock ) ;
2019-11-26 21:59:32 +03:00
if ( ! wq_list_empty ( & wqe - > work_list ) )
2019-10-22 19:25:58 +03:00
io_worker_handle_work ( worker ) ;
else
2020-09-01 11:41:46 +03:00
raw_spin_unlock_irq ( & wqe - > lock ) ;
2019-10-22 19:25:58 +03:00
}
io_worker_exit ( worker ) ;
return 0 ;
}
/*
* Called when a worker is scheduled in . Mark us as currently running .
*/
void io_wq_worker_running ( struct task_struct * tsk )
{
2021-02-17 00:15:30 +03:00
struct io_worker * worker = tsk - > pf_io_worker ;
2019-10-22 19:25:58 +03:00
2021-02-17 00:15:30 +03:00
if ( ! worker )
return ;
2019-10-22 19:25:58 +03:00
if ( ! ( worker - > flags & IO_WORKER_F_UP ) )
return ;
if ( worker - > flags & IO_WORKER_F_RUNNING )
return ;
worker - > flags | = IO_WORKER_F_RUNNING ;
2021-02-17 19:00:57 +03:00
io_wqe_inc_running ( worker ) ;
2019-10-22 19:25:58 +03:00
}
/*
* Called when worker is going to sleep . If there are no workers currently
* running and we have work pending , wake up a free one or have the manager
* set one up .
*/
void io_wq_worker_sleeping ( struct task_struct * tsk )
{
2021-02-17 00:15:30 +03:00
struct io_worker * worker = tsk - > pf_io_worker ;
2019-10-22 19:25:58 +03:00
2021-02-17 00:15:30 +03:00
if ( ! worker )
return ;
2019-10-22 19:25:58 +03:00
if ( ! ( worker - > flags & IO_WORKER_F_UP ) )
return ;
if ( ! ( worker - > flags & IO_WORKER_F_RUNNING ) )
return ;
worker - > flags & = ~ IO_WORKER_F_RUNNING ;
2021-02-17 00:15:30 +03:00
raw_spin_lock_irq ( & worker - > wqe - > lock ) ;
2021-02-17 19:00:57 +03:00
io_wqe_dec_running ( worker ) ;
2021-02-17 00:15:30 +03:00
raw_spin_unlock_irq ( & worker - > wqe - > lock ) ;
2019-10-22 19:25:58 +03:00
}
2021-02-17 00:15:30 +03:00
static int task_thread ( void * data , int index )
2019-10-22 19:25:58 +03:00
{
2021-02-17 00:15:30 +03:00
struct io_worker * worker = data ;
struct io_wqe * wqe = worker - > wqe ;
2020-09-26 16:26:55 +03:00
struct io_wqe_acct * acct = & wqe - > acct [ index ] ;
2021-02-17 00:15:30 +03:00
struct io_wq * wq = wqe - > wq ;
char buf [ TASK_COMM_LEN ] ;
2019-10-22 19:25:58 +03:00
2021-02-17 00:15:30 +03:00
sprintf ( buf , " iou-wrk-%d " , wq - > task_pid ) ;
set_task_comm ( current , buf ) ;
2019-10-22 19:25:58 +03:00
2021-02-17 00:15:30 +03:00
current - > pf_io_worker = worker ;
worker - > task = current ;
2019-10-22 19:25:58 +03:00
2021-02-17 00:15:30 +03:00
set_cpus_allowed_ptr ( current , cpumask_of_node ( wqe - > node ) ) ;
current - > flags | = PF_NO_SETAFFINITY ;
2019-10-22 19:25:58 +03:00
2020-09-01 11:41:46 +03:00
raw_spin_lock_irq ( & wqe - > lock ) ;
2019-11-14 18:00:41 +03:00
hlist_nulls_add_head_rcu ( & worker - > nulls_node , & wqe - > free_list ) ;
2019-11-13 23:54:49 +03:00
list_add_tail_rcu ( & worker - > all_list , & wqe - > all_list ) ;
2019-10-22 19:25:58 +03:00
worker - > flags | = IO_WORKER_F_FREE ;
2019-11-07 21:41:16 +03:00
if ( index = = IO_WQ_ACCT_BOUND )
worker - > flags | = IO_WORKER_F_BOUND ;
if ( ! acct - > nr_workers & & ( worker - > flags & IO_WORKER_F_BOUND ) )
2019-10-22 19:25:58 +03:00
worker - > flags | = IO_WORKER_F_FIXED ;
2019-11-07 21:41:16 +03:00
acct - > nr_workers + + ;
2020-09-01 11:41:46 +03:00
raw_spin_unlock_irq ( & wqe - > lock ) ;
2019-10-22 19:25:58 +03:00
2021-02-17 00:15:30 +03:00
io_wqe_worker ( data ) ;
do_exit ( 0 ) ;
}
static int task_thread_bound ( void * data )
{
return task_thread ( data , IO_WQ_ACCT_BOUND ) ;
}
static int task_thread_unbound ( void * data )
{
return task_thread ( data , IO_WQ_ACCT_UNBOUND ) ;
}
2021-02-18 07:05:41 +03:00
pid_t io_wq_fork_thread ( int ( * fn ) ( void * ) , void * arg )
2021-02-17 00:15:30 +03:00
{
unsigned long flags = CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD |
CLONE_IO | SIGCHLD ;
struct kernel_clone_args args = {
. flags = ( ( lower_32_bits ( flags ) | CLONE_VM |
CLONE_UNTRACED ) & ~ CSIGNAL ) ,
. exit_signal = ( lower_32_bits ( flags ) & CSIGNAL ) ,
. stack = ( unsigned long ) fn ,
. stack_size = ( unsigned long ) arg ,
} ;
return kernel_clone ( & args ) ;
}
static bool create_io_worker ( struct io_wq * wq , struct io_wqe * wqe , int index )
{
struct io_worker * worker ;
pid_t pid ;
2021-02-24 01:34:06 +03:00
__set_current_state ( TASK_RUNNING ) ;
2021-02-17 00:15:30 +03:00
worker = kzalloc_node ( sizeof ( * worker ) , GFP_KERNEL , wqe - > node ) ;
if ( ! worker )
return false ;
refcount_set ( & worker - > ref , 1 ) ;
worker - > nulls_node . pprev = NULL ;
worker - > wqe = wqe ;
spin_lock_init ( & worker - > lock ) ;
2021-02-24 05:59:06 +03:00
init_completion ( & worker - > ref_done ) ;
2021-02-17 00:15:30 +03:00
2021-02-24 01:34:06 +03:00
refcount_inc ( & wq - > refs ) ;
2021-02-17 00:15:30 +03:00
if ( index = = IO_WQ_ACCT_BOUND )
2021-02-18 07:05:41 +03:00
pid = io_wq_fork_thread ( task_thread_bound , worker ) ;
2021-02-17 00:15:30 +03:00
else
2021-02-18 07:05:41 +03:00
pid = io_wq_fork_thread ( task_thread_unbound , worker ) ;
2021-02-17 00:15:30 +03:00
if ( pid < 0 ) {
2021-02-24 01:34:06 +03:00
if ( refcount_dec_and_test ( & wq - > refs ) )
complete ( & wq - > done ) ;
2021-02-17 00:15:30 +03:00
kfree ( worker ) ;
return false ;
}
2019-11-19 18:37:07 +03:00
return true ;
2019-10-22 19:25:58 +03:00
}
2019-11-07 21:41:16 +03:00
static inline bool io_wqe_need_worker ( struct io_wqe * wqe , int index )
2019-10-22 19:25:58 +03:00
__must_hold ( wqe - > lock )
{
2019-11-07 21:41:16 +03:00
struct io_wqe_acct * acct = & wqe - > acct [ index ] ;
2019-10-22 19:25:58 +03:00
2019-11-07 21:41:16 +03:00
/* if we have available workers or no work, no need */
2019-11-14 18:00:41 +03:00
if ( ! hlist_nulls_empty ( & wqe - > free_list ) | | ! io_wqe_run_queue ( wqe ) )
2019-11-07 21:41:16 +03:00
return false ;
return acct - > nr_workers < acct - > max_workers ;
2019-10-22 19:25:58 +03:00
}
2020-09-26 16:26:55 +03:00
/*
* Iterate the passed in list and call the specific function for each
* worker that isn ' t exiting
*/
static bool io_wq_for_each_worker ( struct io_wqe * wqe ,
bool ( * func ) ( struct io_worker * , void * ) ,
void * data )
{
struct io_worker * worker ;
bool ret = false ;
list_for_each_entry_rcu ( worker , & wqe - > all_list , all_list ) {
if ( io_worker_get ( worker ) ) {
/* no task if node is/was offline */
if ( worker - > task )
ret = func ( worker , data ) ;
io_worker_release ( worker ) ;
if ( ret )
break ;
}
}
return ret ;
}
static bool io_wq_worker_wake ( struct io_worker * worker , void * data )
{
wake_up_process ( worker - > task ) ;
return false ;
}
2021-02-24 01:34:06 +03:00
static void io_wq_check_workers ( struct io_wq * wq )
{
int node ;
for_each_node ( node ) {
struct io_wqe * wqe = wq - > wqes [ node ] ;
bool fork_worker [ 2 ] = { false , false } ;
if ( ! node_online ( node ) )
continue ;
raw_spin_lock_irq ( & wqe - > lock ) ;
if ( io_wqe_need_worker ( wqe , IO_WQ_ACCT_BOUND ) )
fork_worker [ IO_WQ_ACCT_BOUND ] = true ;
if ( io_wqe_need_worker ( wqe , IO_WQ_ACCT_UNBOUND ) )
fork_worker [ IO_WQ_ACCT_UNBOUND ] = true ;
raw_spin_unlock_irq ( & wqe - > lock ) ;
if ( fork_worker [ IO_WQ_ACCT_BOUND ] )
create_io_worker ( wq , wqe , IO_WQ_ACCT_BOUND ) ;
if ( fork_worker [ IO_WQ_ACCT_UNBOUND ] )
create_io_worker ( wq , wqe , IO_WQ_ACCT_UNBOUND ) ;
}
}
2019-10-22 19:25:58 +03:00
/*
* Manager thread . Tasked with creating new workers , if we need them .
*/
static int io_wq_manager ( void * data )
{
struct io_wq * wq = data ;
2021-02-17 00:15:30 +03:00
char buf [ TASK_COMM_LEN ] ;
2019-11-26 21:10:20 +03:00
int node ;
2019-10-22 19:25:58 +03:00
2021-02-17 00:15:30 +03:00
sprintf ( buf , " iou-mgr-%d " , wq - > task_pid ) ;
set_task_comm ( current , buf ) ;
current - > flags | = PF_IO_WORKER ;
wq - > manager = current ;
2019-11-19 18:37:07 +03:00
complete ( & wq - > done ) ;
2021-02-24 01:34:06 +03:00
do {
2019-10-22 19:25:58 +03:00
set_current_state ( TASK_INTERRUPTIBLE ) ;
2021-02-24 01:34:06 +03:00
io_wq_check_workers ( wq ) ;
2019-10-22 19:25:58 +03:00
schedule_timeout ( HZ ) ;
2021-02-17 00:15:30 +03:00
if ( fatal_signal_pending ( current ) )
set_bit ( IO_WQ_BIT_EXIT , & wq - > state ) ;
2021-02-24 01:34:06 +03:00
} while ( ! test_bit ( IO_WQ_BIT_EXIT , & wq - > state ) ) ;
io_wq_check_workers ( wq ) ;
2019-10-22 19:25:58 +03:00
2020-09-26 16:26:55 +03:00
if ( refcount_dec_and_test ( & wq - > refs ) ) {
2021-02-24 05:59:06 +03:00
wq - > manager = NULL ;
2019-11-19 18:37:07 +03:00
complete ( & wq - > done ) ;
2021-02-17 00:15:30 +03:00
do_exit ( 0 ) ;
2020-09-26 16:26:55 +03:00
}
/* if ERROR is set and we get here, we have workers to wake */
if ( test_bit ( IO_WQ_BIT_ERROR , & wq - > state ) ) {
rcu_read_lock ( ) ;
for_each_node ( node )
io_wq_for_each_worker ( wq - > wqes [ node ] , io_wq_worker_wake , NULL ) ;
rcu_read_unlock ( ) ;
}
2021-02-24 05:59:06 +03:00
wq - > manager = NULL ;
2021-02-17 00:15:30 +03:00
do_exit ( 0 ) ;
2019-10-22 19:25:58 +03:00
}
2020-03-04 16:14:12 +03:00
static void io_run_cancel ( struct io_wq_work * work , struct io_wqe * wqe )
2020-03-01 19:18:19 +03:00
{
2020-03-04 16:14:12 +03:00
struct io_wq * wq = wqe - > wq ;
2020-03-01 19:18:19 +03:00
do {
work - > flags | = IO_WQ_WORK_CANCEL ;
2021-02-04 16:52:08 +03:00
wq - > do_work ( work ) ;
work = wq - > free_work ( work ) ;
2020-03-01 19:18:19 +03:00
} while ( work ) ;
}
2020-03-23 22:57:22 +03:00
static void io_wqe_insert_work ( struct io_wqe * wqe , struct io_wq_work * work )
{
unsigned int hash ;
struct io_wq_work * tail ;
if ( ! io_wq_is_hashed ( work ) ) {
append :
wq_list_add_tail ( & work - > list , & wqe - > work_list ) ;
return ;
}
hash = io_get_work_hash ( work ) ;
tail = wqe - > hash_tail [ hash ] ;
wqe - > hash_tail [ hash ] = work ;
if ( ! tail )
goto append ;
wq_list_add_after ( & work - > list , & tail - > list , & wqe - > work_list ) ;
}
2019-10-22 19:25:58 +03:00
static void io_wqe_enqueue ( struct io_wqe * wqe , struct io_wq_work * work )
{
2019-11-07 21:41:16 +03:00
struct io_wqe_acct * acct = io_work_get_acct ( wqe , work ) ;
2019-12-17 18:46:33 +03:00
int work_flags ;
2019-10-22 19:25:58 +03:00
unsigned long flags ;
2019-12-17 18:46:33 +03:00
work_flags = work - > flags ;
2020-09-01 11:41:46 +03:00
raw_spin_lock_irqsave ( & wqe - > lock , flags ) ;
2020-03-23 22:57:22 +03:00
io_wqe_insert_work ( wqe , work ) ;
2019-10-22 19:25:58 +03:00
wqe - > flags & = ~ IO_WQE_FLAG_STALLED ;
2020-09-01 11:41:46 +03:00
raw_spin_unlock_irqrestore ( & wqe - > lock , flags ) ;
2019-10-22 19:25:58 +03:00
2019-12-17 18:46:33 +03:00
if ( ( work_flags & IO_WQ_WORK_CONCURRENT ) | |
! atomic_read ( & acct - > nr_running ) )
2019-11-07 21:41:16 +03:00
io_wqe_wake_worker ( wqe , acct ) ;
2019-10-22 19:25:58 +03:00
}
void io_wq_enqueue ( struct io_wq * wq , struct io_wq_work * work )
{
struct io_wqe * wqe = wq - > wqes [ numa_node_id ( ) ] ;
io_wqe_enqueue ( wqe , work ) ;
}
/*
2020-03-14 00:31:04 +03:00
* Work items that hash to the same value will not be done in parallel .
* Used to limit concurrent writes , generally hashed by inode .
2019-10-22 19:25:58 +03:00
*/
2020-03-14 00:31:04 +03:00
void io_wq_hash_work ( struct io_wq_work * work , void * val )
2019-10-22 19:25:58 +03:00
{
2020-03-14 00:31:04 +03:00
unsigned int bit ;
2019-10-22 19:25:58 +03:00
bit = hash_ptr ( val , IO_WQ_HASH_ORDER ) ;
work - > flags | = ( IO_WQ_WORK_HASHED | ( bit < < IO_WQ_HASH_SHIFT ) ) ;
}
2019-10-29 06:49:21 +03:00
struct io_cb_cancel_data {
2020-03-07 01:15:39 +03:00
work_cancel_fn * fn ;
void * data ;
2020-06-15 10:24:03 +03:00
int nr_running ;
int nr_pending ;
bool cancel_all ;
2019-10-29 06:49:21 +03:00
} ;
2020-03-07 01:15:39 +03:00
static bool io_wq_worker_cancel ( struct io_worker * worker , void * data )
2019-10-29 06:49:21 +03:00
{
2020-03-07 01:15:39 +03:00
struct io_cb_cancel_data * match = data ;
2019-11-05 23:51:51 +03:00
unsigned long flags ;
2019-10-29 06:49:21 +03:00
/*
* Hold the lock to avoid - > cur_work going out of scope , caller
2019-11-13 19:43:34 +03:00
* may dereference the passed in work .
2019-10-29 06:49:21 +03:00
*/
2019-11-13 19:43:34 +03:00
spin_lock_irqsave ( & worker - > lock , flags ) ;
2019-10-29 06:49:21 +03:00
if ( worker - > cur_work & &
2020-03-07 01:15:39 +03:00
match - > fn ( worker - > cur_work , match - > data ) ) {
2021-02-17 00:15:30 +03:00
set_notify_signal ( worker - > task ) ;
2020-06-15 10:24:03 +03:00
match - > nr_running + + ;
2019-10-22 19:25:58 +03:00
}
2019-11-13 19:43:34 +03:00
spin_unlock_irqrestore ( & worker - > lock , flags ) ;
2019-10-22 19:25:58 +03:00
2020-06-15 10:24:03 +03:00
return match - > nr_running & & ! match - > cancel_all ;
2019-10-22 19:25:58 +03:00
}
2020-08-23 20:33:10 +03:00
static inline void io_wqe_remove_pending ( struct io_wqe * wqe ,
struct io_wq_work * work ,
struct io_wq_work_node * prev )
{
unsigned int hash = io_get_work_hash ( work ) ;
struct io_wq_work * prev_work = NULL ;
if ( io_wq_is_hashed ( work ) & & work = = wqe - > hash_tail [ hash ] ) {
if ( prev )
prev_work = container_of ( prev , struct io_wq_work , list ) ;
if ( prev_work & & io_get_work_hash ( prev_work ) = = hash )
wqe - > hash_tail [ hash ] = prev_work ;
else
wqe - > hash_tail [ hash ] = NULL ;
}
wq_list_del ( & wqe - > work_list , & work - > list , prev ) ;
}
2020-06-15 10:24:03 +03:00
static void io_wqe_cancel_pending_work ( struct io_wqe * wqe ,
2020-06-15 10:24:02 +03:00
struct io_cb_cancel_data * match )
2019-10-22 19:25:58 +03:00
{
2019-11-26 21:59:32 +03:00
struct io_wq_work_node * node , * prev ;
2019-10-22 19:25:58 +03:00
struct io_wq_work * work ;
2019-11-05 23:51:51 +03:00
unsigned long flags ;
2019-10-22 19:25:58 +03:00
2020-06-15 10:24:03 +03:00
retry :
2020-09-01 11:41:46 +03:00
raw_spin_lock_irqsave ( & wqe - > lock , flags ) ;
2019-11-26 21:59:32 +03:00
wq_list_for_each ( node , prev , & wqe - > work_list ) {
work = container_of ( node , struct io_wq_work , list ) ;
2020-06-15 10:24:03 +03:00
if ( ! match - > fn ( work , match - > data ) )
continue ;
2020-08-23 20:33:10 +03:00
io_wqe_remove_pending ( wqe , work , prev ) ;
2020-09-01 11:41:46 +03:00
raw_spin_unlock_irqrestore ( & wqe - > lock , flags ) ;
2020-06-15 10:24:03 +03:00
io_run_cancel ( work , wqe ) ;
match - > nr_pending + + ;
if ( ! match - > cancel_all )
return ;
/* not safe to continue after unlock */
goto retry ;
2019-10-22 19:25:58 +03:00
}
2020-09-01 11:41:46 +03:00
raw_spin_unlock_irqrestore ( & wqe - > lock , flags ) ;
2020-06-15 10:24:02 +03:00
}
2020-06-15 10:24:03 +03:00
static void io_wqe_cancel_running_work ( struct io_wqe * wqe ,
2020-06-15 10:24:02 +03:00
struct io_cb_cancel_data * match )
{
2019-10-22 19:25:58 +03:00
rcu_read_lock ( ) ;
2020-06-15 10:24:03 +03:00
io_wq_for_each_worker ( wqe , io_wq_worker_cancel , match ) ;
2019-10-22 19:25:58 +03:00
rcu_read_unlock ( ) ;
}
2020-03-07 01:15:39 +03:00
enum io_wq_cancel io_wq_cancel_cb ( struct io_wq * wq , work_cancel_fn * cancel ,
2020-06-15 10:24:03 +03:00
void * data , bool cancel_all )
2019-10-22 19:25:58 +03:00
{
2020-03-07 01:15:39 +03:00
struct io_cb_cancel_data match = {
2020-06-15 10:24:03 +03:00
. fn = cancel ,
. data = data ,
. cancel_all = cancel_all ,
2020-02-09 05:13:32 +03:00
} ;
2019-11-26 21:10:20 +03:00
int node ;
2019-10-22 19:25:58 +03:00
2020-06-15 10:24:02 +03:00
/*
* First check pending list , if we ' re lucky we can just remove it
* from there . CANCEL_OK means that the work is returned as - new ,
* no completion will be posted for it .
*/
2019-11-26 21:10:20 +03:00
for_each_node ( node ) {
struct io_wqe * wqe = wq - > wqes [ node ] ;
2019-10-22 19:25:58 +03:00
2020-06-15 10:24:03 +03:00
io_wqe_cancel_pending_work ( wqe , & match ) ;
if ( match . nr_pending & & ! match . cancel_all )
2020-06-15 10:24:02 +03:00
return IO_WQ_CANCEL_OK ;
2019-10-22 19:25:58 +03:00
}
2020-06-15 10:24:02 +03:00
/*
* Now check if a free ( going busy ) or busy worker has the work
* currently running . If we find it there , we ' ll return CANCEL_RUNNING
* as an indication that we attempt to signal cancellation . The
* completion will run normally in this case .
*/
for_each_node ( node ) {
struct io_wqe * wqe = wq - > wqes [ node ] ;
2020-06-15 10:24:03 +03:00
io_wqe_cancel_running_work ( wqe , & match ) ;
if ( match . nr_running & & ! match . cancel_all )
2020-06-15 10:24:02 +03:00
return IO_WQ_CANCEL_RUNNING ;
}
2020-06-15 10:24:03 +03:00
if ( match . nr_running )
return IO_WQ_CANCEL_RUNNING ;
if ( match . nr_pending )
return IO_WQ_CANCEL_OK ;
2020-06-15 10:24:02 +03:00
return IO_WQ_CANCEL_NOTFOUND ;
2019-10-22 19:25:58 +03:00
}
2021-02-19 22:33:30 +03:00
static int io_wqe_hash_wake ( struct wait_queue_entry * wait , unsigned mode ,
int sync , void * key )
{
struct io_wqe * wqe = container_of ( wait , struct io_wqe , wait ) ;
int ret ;
list_del_init ( & wait - > entry ) ;
rcu_read_lock ( ) ;
ret = io_wqe_activate_free_worker ( wqe ) ;
rcu_read_unlock ( ) ;
if ( ! ret )
wake_up_process ( wqe - > wq - > manager ) ;
return 1 ;
}
2019-11-25 18:49:20 +03:00
struct io_wq * io_wq_create ( unsigned bounded , struct io_wq_data * data )
2019-10-22 19:25:58 +03:00
{
2019-11-26 21:10:20 +03:00
int ret = - ENOMEM , node ;
2019-10-22 19:25:58 +03:00
struct io_wq * wq ;
2020-06-08 21:08:20 +03:00
if ( WARN_ON_ONCE ( ! data - > free_work | | ! data - > do_work ) )
2020-03-04 16:14:12 +03:00
return ERR_PTR ( - EINVAL ) ;
2019-11-26 19:39:45 +03:00
wq = kzalloc ( sizeof ( * wq ) , GFP_KERNEL ) ;
2019-10-22 19:25:58 +03:00
if ( ! wq )
return ERR_PTR ( - ENOMEM ) ;
2019-11-26 21:10:20 +03:00
wq - > wqes = kcalloc ( nr_node_ids , sizeof ( struct io_wqe * ) , GFP_KERNEL ) ;
2020-10-22 18:02:50 +03:00
if ( ! wq - > wqes )
goto err_wq ;
ret = cpuhp_state_add_instance_nocalls ( io_wq_online , & wq - > cpuhp_node ) ;
if ( ret )
goto err_wqes ;
2019-10-22 19:25:58 +03:00
2021-02-19 22:33:30 +03:00
refcount_inc ( & data - > hash - > refs ) ;
wq - > hash = data - > hash ;
2020-03-04 16:14:12 +03:00
wq - > free_work = data - > free_work ;
2020-06-08 21:08:20 +03:00
wq - > do_work = data - > do_work ;
2019-11-13 08:31:31 +03:00
2020-10-22 18:02:50 +03:00
ret = - ENOMEM ;
2019-11-26 21:10:20 +03:00
for_each_node ( node ) {
2019-10-22 19:25:58 +03:00
struct io_wqe * wqe ;
2020-02-11 16:30:06 +03:00
int alloc_node = node ;
2019-10-22 19:25:58 +03:00
2020-02-11 16:30:06 +03:00
if ( ! node_online ( alloc_node ) )
alloc_node = NUMA_NO_NODE ;
wqe = kzalloc_node ( sizeof ( struct io_wqe ) , GFP_KERNEL , alloc_node ) ;
2019-10-22 19:25:58 +03:00
if ( ! wqe )
2019-11-26 21:10:20 +03:00
goto err ;
wq - > wqes [ node ] = wqe ;
2020-02-11 16:30:06 +03:00
wqe - > node = alloc_node ;
2019-11-07 21:41:16 +03:00
wqe - > acct [ IO_WQ_ACCT_BOUND ] . max_workers = bounded ;
atomic_set ( & wqe - > acct [ IO_WQ_ACCT_BOUND ] . nr_running , 0 ) ;
2021-02-22 02:02:53 +03:00
wqe - > acct [ IO_WQ_ACCT_UNBOUND ] . max_workers =
2019-11-07 21:41:16 +03:00
task_rlimit ( current , RLIMIT_NPROC ) ;
atomic_set ( & wqe - > acct [ IO_WQ_ACCT_UNBOUND ] . nr_running , 0 ) ;
2021-02-19 22:33:30 +03:00
wqe - > wait . func = io_wqe_hash_wake ;
INIT_LIST_HEAD ( & wqe - > wait . entry ) ;
2019-10-22 19:25:58 +03:00
wqe - > wq = wq ;
2020-09-01 11:41:46 +03:00
raw_spin_lock_init ( & wqe - > lock ) ;
2019-11-26 21:59:32 +03:00
INIT_WQ_LIST ( & wqe - > work_list ) ;
2019-11-14 18:00:41 +03:00
INIT_HLIST_NULLS_HEAD ( & wqe - > free_list , 0 ) ;
2019-11-13 23:54:49 +03:00
INIT_LIST_HEAD ( & wqe - > all_list ) ;
2019-10-22 19:25:58 +03:00
}
2021-02-17 00:15:30 +03:00
wq - > task_pid = current - > pid ;
2019-10-22 19:25:58 +03:00
init_completion ( & wq - > done ) ;
2021-02-17 00:15:30 +03:00
refcount_set ( & wq - > refs , 1 ) ;
2019-10-22 19:25:58 +03:00
2021-02-17 00:15:30 +03:00
current - > flags | = PF_IO_WORKER ;
2021-02-18 07:05:41 +03:00
ret = io_wq_fork_thread ( io_wq_manager , wq ) ;
2021-02-17 00:15:30 +03:00
current - > flags & = ~ PF_IO_WORKER ;
if ( ret > = 0 ) {
2019-11-19 18:37:07 +03:00
wait_for_completion ( & wq - > done ) ;
2019-10-22 19:25:58 +03:00
return wq ;
}
2021-02-17 00:15:30 +03:00
if ( refcount_dec_and_test ( & wq - > refs ) )
complete ( & wq - > done ) ;
2021-02-19 22:33:30 +03:00
io_wq_put_hash ( data - > hash ) ;
2019-11-19 18:37:07 +03:00
err :
2020-10-22 18:02:50 +03:00
cpuhp_state_remove_instance_nocalls ( io_wq_online , & wq - > cpuhp_node ) ;
2019-11-26 21:10:20 +03:00
for_each_node ( node )
kfree ( wq - > wqes [ node ] ) ;
2020-10-22 18:02:50 +03:00
err_wqes :
2019-11-19 18:37:07 +03:00
kfree ( wq - > wqes ) ;
2020-10-22 18:02:50 +03:00
err_wq :
2019-11-19 18:37:07 +03:00
kfree ( wq ) ;
2019-10-22 19:25:58 +03:00
return ERR_PTR ( ret ) ;
}
2021-02-17 01:42:24 +03:00
void io_wq_destroy ( struct io_wq * wq )
2019-10-22 19:25:58 +03:00
{
2019-11-26 21:10:20 +03:00
int node ;
2019-10-22 19:25:58 +03:00
2020-10-22 18:02:50 +03:00
cpuhp_state_remove_instance_nocalls ( io_wq_online , & wq - > cpuhp_node ) ;
2019-11-19 18:37:07 +03:00
set_bit ( IO_WQ_BIT_EXIT , & wq - > state ) ;
if ( wq - > manager )
2021-02-17 00:15:30 +03:00
wake_up_process ( wq - > manager ) ;
2019-10-22 19:25:58 +03:00
rcu_read_lock ( ) ;
2019-11-26 21:10:20 +03:00
for_each_node ( node )
io_wq_for_each_worker ( wq - > wqes [ node ] , io_wq_worker_wake , NULL ) ;
2019-10-22 19:25:58 +03:00
rcu_read_unlock ( ) ;
wait_for_completion ( & wq - > done ) ;
2021-02-19 22:33:30 +03:00
spin_lock_irq ( & wq - > hash - > wait . lock ) ;
for_each_node ( node ) {
struct io_wqe * wqe = wq - > wqes [ node ] ;
list_del_init ( & wqe - > wait . entry ) ;
kfree ( wqe ) ;
}
spin_unlock_irq ( & wq - > hash - > wait . lock ) ;
io_wq_put_hash ( wq - > hash ) ;
2019-10-22 19:25:58 +03:00
kfree ( wq - > wqes ) ;
kfree ( wq ) ;
}
2020-01-24 01:33:32 +03:00
2020-10-22 18:02:50 +03:00
static bool io_wq_worker_affinity ( struct io_worker * worker , void * data )
{
struct task_struct * task = worker - > task ;
struct rq_flags rf ;
struct rq * rq ;
rq = task_rq_lock ( task , & rf ) ;
do_set_cpus_allowed ( task , cpumask_of_node ( worker - > wqe - > node ) ) ;
task - > flags | = PF_NO_SETAFFINITY ;
task_rq_unlock ( rq , task , & rf ) ;
return false ;
}
static int io_wq_cpu_online ( unsigned int cpu , struct hlist_node * node )
{
struct io_wq * wq = hlist_entry_safe ( node , struct io_wq , cpuhp_node ) ;
int i ;
rcu_read_lock ( ) ;
for_each_node ( i )
io_wq_for_each_worker ( wq - > wqes [ i ] , io_wq_worker_affinity , NULL ) ;
rcu_read_unlock ( ) ;
return 0 ;
}
static __init int io_wq_init ( void )
{
int ret ;
ret = cpuhp_setup_state_multi ( CPUHP_AP_ONLINE_DYN , " io-wq/online " ,
io_wq_cpu_online , NULL ) ;
if ( ret < 0 )
return ret ;
io_wq_online = ret ;
return 0 ;
}
subsys_initcall ( io_wq_init ) ;