2019-10-22 19:25:58 +03:00
# ifndef INTERNAL_IO_WQ_H
# define INTERNAL_IO_WQ_H
2020-10-14 19:48:51 +03:00
# include <linux/io_uring.h>
2019-10-22 19:25:58 +03:00
struct io_wq ;
enum {
IO_WQ_WORK_CANCEL = 1 ,
2020-06-25 18:20:53 +03:00
IO_WQ_WORK_HASHED = 2 ,
IO_WQ_WORK_UNBOUND = 4 ,
IO_WQ_WORK_NO_CANCEL = 8 ,
IO_WQ_WORK_CONCURRENT = 16 ,
2019-10-22 19:25:58 +03:00
2020-10-14 18:23:55 +03:00
IO_WQ_WORK_FILES = 32 ,
IO_WQ_WORK_FS = 64 ,
IO_WQ_WORK_MM = 128 ,
IO_WQ_WORK_CREDS = 256 ,
IO_WQ_WORK_BLKCG = 512 ,
2020-10-20 23:28:41 +03:00
IO_WQ_WORK_FSIZE = 1024 ,
2020-10-14 18:23:55 +03:00
2019-10-22 19:25:58 +03:00
IO_WQ_HASH_SHIFT = 24 , /* upper 8 bits are used for hash key */
} ;
enum io_wq_cancel {
IO_WQ_CANCEL_OK , /* cancelled before started */
IO_WQ_CANCEL_RUNNING , /* found, running, and attempted cancelled */
IO_WQ_CANCEL_NOTFOUND , /* work not found */
} ;
2019-11-26 21:59:32 +03:00
struct io_wq_work_node {
struct io_wq_work_node * next ;
} ;
struct io_wq_work_list {
struct io_wq_work_node * first ;
struct io_wq_work_node * last ;
} ;
2020-03-23 22:57:22 +03:00
static inline void wq_list_add_after ( struct io_wq_work_node * node ,
struct io_wq_work_node * pos ,
struct io_wq_work_list * list )
{
struct io_wq_work_node * next = pos - > next ;
pos - > next = node ;
node - > next = next ;
if ( ! next )
list - > last = node ;
}
2019-11-26 21:59:32 +03:00
static inline void wq_list_add_tail ( struct io_wq_work_node * node ,
struct io_wq_work_list * list )
{
if ( ! list - > first ) {
2019-12-08 07:06:46 +03:00
list - > last = node ;
WRITE_ONCE ( list - > first , node ) ;
2019-11-26 21:59:32 +03:00
} else {
list - > last - > next = node ;
list - > last = node ;
}
io_uring: fix io_wqe->work_list corruption
For the first time a req punted to io-wq, we'll initialize io_wq_work's
list to be NULL, then insert req to io_wqe->work_list. If this req is not
inserted into tail of io_wqe->work_list, this req's io_wq_work list will
point to another req's io_wq_work. For splitted bio case, this req maybe
inserted to io_wqe->work_list repeatedly, once we insert it to tail of
io_wqe->work_list for the second time, now io_wq_work->list->next will be
invalid pointer, which then result in many strang error, panic, kernel
soft-lockup, rcu stall, etc.
In my vm, kernel doest not have commit cc29e1bf0d63f7 ("block: disable
iopoll for split bio"), below fio job can reproduce this bug steadily:
[global]
name=iouring-sqpoll-iopoll-1
ioengine=io_uring
iodepth=128
numjobs=1
thread
rw=randread
direct=1
registerfiles=1
hipri=1
bs=4m
size=100M
runtime=120
time_based
group_reporting
randrepeat=0
[device]
directory=/home/feiman.wxg/mntpoint/ # an ext4 mount point
If we have commit cc29e1bf0d63f7 ("block: disable iopoll for split bio"),
there will no splitted bio case for polled io, but I think we still to need
to fix this list corruption, it also should maybe go to stable branchs.
To fix this corruption, if a req is inserted into tail of io_wqe->work_list,
initialize req->io_wq_work->list->next to bu NULL.
Cc: stable@vger.kernel.org
Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2020-12-18 10:26:48 +03:00
node - > next = NULL ;
2019-11-26 21:59:32 +03:00
}
2020-03-23 22:57:22 +03:00
static inline void wq_list_cut ( struct io_wq_work_list * list ,
struct io_wq_work_node * last ,
2019-11-26 21:59:32 +03:00
struct io_wq_work_node * prev )
{
2020-03-23 22:57:22 +03:00
/* first in the list, if prev==NULL */
if ( ! prev )
WRITE_ONCE ( list - > first , last - > next ) ;
else
prev - > next = last - > next ;
if ( last = = list - > last )
2019-11-26 21:59:32 +03:00
list - > last = prev ;
2020-03-23 22:57:22 +03:00
last - > next = NULL ;
}
static inline void wq_list_del ( struct io_wq_work_list * list ,
struct io_wq_work_node * node ,
struct io_wq_work_node * prev )
{
wq_list_cut ( list , node , prev ) ;
2019-11-26 21:59:32 +03:00
}
# define wq_list_for_each(pos, prv, head) \
for ( pos = ( head ) - > first , prv = NULL ; pos ; prv = pos , pos = ( pos ) - > next )
2019-12-08 07:06:46 +03:00
# define wq_list_empty(list) (READ_ONCE((list)->first) == NULL)
2019-11-26 21:59:32 +03:00
# define INIT_WQ_LIST(list) do { \
( list ) - > first = NULL ; \
( list ) - > last = NULL ; \
} while ( 0 )
2019-10-22 19:25:58 +03:00
struct io_wq_work {
2020-03-23 00:23:29 +03:00
struct io_wq_work_node list ;
2020-10-14 19:48:51 +03:00
struct io_identity * identity ;
2019-11-26 21:59:32 +03:00
unsigned flags ;
2019-10-22 19:25:58 +03:00
} ;
2020-03-23 22:57:22 +03:00
static inline struct io_wq_work * wq_next_work ( struct io_wq_work * work )
{
if ( ! work - > list . next )
return NULL ;
return container_of ( work - > list . next , struct io_wq_work , list ) ;
}
2020-03-04 16:14:12 +03:00
typedef void ( free_work_fn ) ( struct io_wq_work * ) ;
2020-06-25 18:20:54 +03:00
typedef struct io_wq_work * ( io_wq_work_fn ) ( struct io_wq_work * ) ;
2019-11-13 08:31:31 +03:00
2019-11-25 18:49:20 +03:00
struct io_wq_data {
struct user_struct * user ;
2020-06-08 21:08:20 +03:00
io_wq_work_fn * do_work ;
2020-03-04 16:14:12 +03:00
free_work_fn * free_work ;
2019-11-25 18:49:20 +03:00
} ;
struct io_wq * io_wq_create ( unsigned bounded , struct io_wq_data * data ) ;
2020-01-28 03:15:47 +03:00
bool io_wq_get ( struct io_wq * wq , struct io_wq_data * data ) ;
2019-10-22 19:25:58 +03:00
void io_wq_destroy ( struct io_wq * wq ) ;
void io_wq_enqueue ( struct io_wq * wq , struct io_wq_work * work ) ;
2020-03-14 00:31:04 +03:00
void io_wq_hash_work ( struct io_wq_work * work , void * val ) ;
static inline bool io_wq_is_hashed ( struct io_wq_work * work )
{
return work - > flags & IO_WQ_WORK_HASHED ;
}
2019-10-22 19:25:58 +03:00
2019-10-29 06:49:21 +03:00
typedef bool ( work_cancel_fn ) ( struct io_wq_work * , void * ) ;
enum io_wq_cancel io_wq_cancel_cb ( struct io_wq * wq , work_cancel_fn * cancel ,
2020-06-15 10:24:03 +03:00
void * data , bool cancel_all ) ;
2019-10-29 06:49:21 +03:00
2020-04-03 20:26:26 +03:00
struct task_struct * io_wq_get_task ( struct io_wq * wq ) ;
2019-10-22 19:25:58 +03:00
# if defined(CONFIG_IO_WQ)
extern void io_wq_worker_sleeping ( struct task_struct * ) ;
extern void io_wq_worker_running ( struct task_struct * ) ;
# else
static inline void io_wq_worker_sleeping ( struct task_struct * tsk )
{
}
static inline void io_wq_worker_running ( struct task_struct * tsk )
{
}
2019-12-18 00:13:37 +03:00
# endif
2019-10-22 19:25:58 +03:00
2019-12-18 00:13:37 +03:00
static inline bool io_wq_current_is_worker ( void )
{
return in_task ( ) & & ( current - > flags & PF_IO_WORKER ) ;
}
# endif