2022-05-26 05:31:09 +03:00
// SPDX-License-Identifier: GPL-2.0
# include <linux/kernel.h>
# include <linux/errno.h>
# include <linux/fs.h>
# include <linux/file.h>
# include <linux/mm.h>
# include <linux/slab.h>
# include <linux/poll.h>
# include <linux/hashtable.h>
# include <linux/io_uring.h>
# include <trace/events/io_uring.h>
# include <uapi/linux/io_uring.h>
# include "io_uring.h"
# include "refs.h"
# include "opdef.h"
2022-06-13 16:07:23 +03:00
# include "kbuf.h"
2022-05-26 05:31:09 +03:00
# include "poll.h"
2022-06-16 12:22:02 +03:00
# include "cancel.h"
2022-05-26 05:31:09 +03:00
struct io_poll_update {
struct file * file ;
u64 old_user_data ;
u64 new_user_data ;
__poll_t events ;
bool update_events ;
bool update_user_data ;
} ;
struct io_poll_table {
struct poll_table_struct pt ;
struct io_kiocb * req ;
int nr_entries ;
int error ;
2022-06-23 16:24:49 +03:00
bool owning ;
2022-06-23 16:24:47 +03:00
/* output value, set only if arm poll returns >0 */
__poll_t result_mask ;
2022-05-26 05:31:09 +03:00
} ;
# define IO_POLL_CANCEL_FLAG BIT(31)
2022-11-20 19:57:42 +03:00
# define IO_POLL_RETRY_FLAG BIT(30)
# define IO_POLL_REF_MASK GENMASK(29, 0)
/*
* We usually have 1 - 2 refs taken , 128 is more than enough and we want to
* maximise the margin between this amount and the moment when it overflows .
*/
# define IO_POLL_REF_BIAS 128
2022-05-26 05:31:09 +03:00
2022-06-23 16:24:44 +03:00
# define IO_WQE_F_DOUBLE 1
static inline struct io_kiocb * wqe_to_req ( struct wait_queue_entry * wqe )
{
unsigned long priv = ( unsigned long ) wqe - > private ;
return ( struct io_kiocb * ) ( priv & ~ IO_WQE_F_DOUBLE ) ;
}
static inline bool wqe_is_double ( struct wait_queue_entry * wqe )
{
unsigned long priv = ( unsigned long ) wqe - > private ;
return priv & IO_WQE_F_DOUBLE ;
}
2022-11-20 19:57:42 +03:00
static bool io_poll_get_ownership_slowpath ( struct io_kiocb * req )
{
int v ;
/*
* poll_refs are already elevated and we don ' t have much hope for
* grabbing the ownership . Instead of incrementing set a retry flag
* to notify the loop that there might have been some change .
*/
v = atomic_fetch_or ( IO_POLL_RETRY_FLAG , & req - > poll_refs ) ;
if ( v & IO_POLL_REF_MASK )
return false ;
return ! ( atomic_fetch_inc ( & req - > poll_refs ) & IO_POLL_REF_MASK ) ;
}
2022-05-26 05:31:09 +03:00
/*
* If refs part of - > poll_refs ( see IO_POLL_REF_MASK ) is 0 , it ' s free . We can
* bump it and acquire ownership . It ' s disallowed to modify requests while not
* owning it , that prevents from races for enqueueing task_work ' s and b / w
* arming poll and wakeups .
*/
static inline bool io_poll_get_ownership ( struct io_kiocb * req )
{
2022-11-20 19:57:42 +03:00
if ( unlikely ( atomic_read ( & req - > poll_refs ) > = IO_POLL_REF_BIAS ) )
return io_poll_get_ownership_slowpath ( req ) ;
2022-05-26 05:31:09 +03:00
return ! ( atomic_fetch_inc ( & req - > poll_refs ) & IO_POLL_REF_MASK ) ;
}
static void io_poll_mark_cancelled ( struct io_kiocb * req )
{
atomic_or ( IO_POLL_CANCEL_FLAG , & req - > poll_refs ) ;
}
static struct io_poll * io_poll_get_double ( struct io_kiocb * req )
{
/* pure poll stashes this in ->async_data, poll driven retry elsewhere */
if ( req - > opcode = = IORING_OP_POLL_ADD )
return req - > async_data ;
return req - > apoll - > double_poll ;
}
static struct io_poll * io_poll_get_single ( struct io_kiocb * req )
{
if ( req - > opcode = = IORING_OP_POLL_ADD )
2022-08-11 10:11:15 +03:00
return io_kiocb_to_cmd ( req , struct io_poll ) ;
2022-05-26 05:31:09 +03:00
return & req - > apoll - > poll ;
}
static void io_poll_req_insert ( struct io_kiocb * req )
{
2022-06-16 12:22:10 +03:00
struct io_hash_table * table = & req - > ctx - > cancel_table ;
u32 index = hash_long ( req - > cqe . user_data , table - > hash_bits ) ;
struct io_hash_bucket * hb = & table - > hbs [ index ] ;
2022-05-26 05:31:09 +03:00
2022-06-16 12:22:02 +03:00
spin_lock ( & hb - > lock ) ;
hlist_add_head ( & req - > hash_node , & hb - > list ) ;
spin_unlock ( & hb - > lock ) ;
}
static void io_poll_req_delete ( struct io_kiocb * req , struct io_ring_ctx * ctx )
{
2022-06-16 12:22:10 +03:00
struct io_hash_table * table = & req - > ctx - > cancel_table ;
u32 index = hash_long ( req - > cqe . user_data , table - > hash_bits ) ;
spinlock_t * lock = & table - > hbs [ index ] . lock ;
2022-06-16 12:22:02 +03:00
spin_lock ( lock ) ;
hash_del ( & req - > hash_node ) ;
spin_unlock ( lock ) ;
2022-05-26 05:31:09 +03:00
}
2022-06-16 12:22:12 +03:00
static void io_poll_req_insert_locked ( struct io_kiocb * req )
{
struct io_hash_table * table = & req - > ctx - > cancel_table_locked ;
u32 index = hash_long ( req - > cqe . user_data , table - > hash_bits ) ;
2022-11-11 19:51:30 +03:00
lockdep_assert_held ( & req - > ctx - > uring_lock ) ;
2022-06-16 12:22:12 +03:00
hlist_add_head ( & req - > hash_node , & table - > hbs [ index ] . list ) ;
}
static void io_poll_tw_hash_eject ( struct io_kiocb * req , bool * locked )
{
struct io_ring_ctx * ctx = req - > ctx ;
if ( req - > flags & REQ_F_HASH_LOCKED ) {
/*
* - > cancel_table_locked is protected by - > uring_lock in
* contrast to per bucket spinlocks . Likely , tctx_task_work ( )
* already grabbed the mutex for us , but there is a chance it
* failed .
*/
io_tw_lock ( ctx , locked ) ;
hash_del ( & req - > hash_node ) ;
2022-07-07 17:13:16 +03:00
req - > flags & = ~ REQ_F_HASH_LOCKED ;
2022-06-16 12:22:12 +03:00
} else {
io_poll_req_delete ( req , ctx ) ;
}
}
2022-05-26 05:31:09 +03:00
static void io_init_poll_iocb ( struct io_poll * poll , __poll_t events ,
wait_queue_func_t wake_func )
{
poll - > head = NULL ;
# define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
/* mask in events that we always want/need */
poll - > events = events | IO_POLL_UNMASK ;
INIT_LIST_HEAD ( & poll - > wait . entry ) ;
init_waitqueue_func_entry ( & poll - > wait , wake_func ) ;
}
static inline void io_poll_remove_entry ( struct io_poll * poll )
{
struct wait_queue_head * head = smp_load_acquire ( & poll - > head ) ;
if ( head ) {
spin_lock_irq ( & head - > lock ) ;
list_del_init ( & poll - > wait . entry ) ;
poll - > head = NULL ;
spin_unlock_irq ( & head - > lock ) ;
}
}
static void io_poll_remove_entries ( struct io_kiocb * req )
{
/*
* Nothing to do if neither of those flags are set . Avoid dipping
* into the poll / apoll / double cachelines if we can .
*/
if ( ! ( req - > flags & ( REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL ) ) )
return ;
/*
* While we hold the waitqueue lock and the waitqueue is nonempty ,
* wake_up_pollfree ( ) will wait for us . However , taking the waitqueue
* lock in the first place can race with the waitqueue being freed .
*
* We solve this as eventpoll does : by taking advantage of the fact that
* all users of wake_up_pollfree ( ) will RCU - delay the actual free . If
* we enter rcu_read_lock ( ) and see that the pointer to the queue is
* non - NULL , we can then lock it without the memory being freed out from
* under us .
*
* Keep holding rcu_read_lock ( ) as long as we hold the queue lock , in
* case the caller deletes the entry from the queue , leaving it empty .
* In that case , only RCU prevents the queue memory from being freed .
*/
rcu_read_lock ( ) ;
if ( req - > flags & REQ_F_SINGLE_POLL )
io_poll_remove_entry ( io_poll_get_single ( req ) ) ;
if ( req - > flags & REQ_F_DOUBLE_POLL )
io_poll_remove_entry ( io_poll_get_double ( req ) ) ;
rcu_read_unlock ( ) ;
}
2022-06-30 12:12:24 +03:00
enum {
IOU_POLL_DONE = 0 ,
IOU_POLL_NO_ACTION = 1 ,
2022-06-30 12:12:25 +03:00
IOU_POLL_REMOVE_POLL_USE_RES = 2 ,
2022-06-30 12:12:24 +03:00
} ;
2022-05-26 05:31:09 +03:00
/*
* All poll tw should go through this . Checks for poll events , manages
* references , does rewait , etc .
*
2022-06-30 12:12:24 +03:00
* Returns a negative error on failure . IOU_POLL_NO_ACTION when no action require ,
* which is either spurious wakeup or multishot CQE is served .
* IOU_POLL_DONE when it ' s done with the request , then the mask is stored in req - > cqe . res .
2022-06-30 12:12:25 +03:00
* IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot poll and that the result
* is stored in req - > cqe .
2022-05-26 05:31:09 +03:00
*/
static int io_poll_check_events ( struct io_kiocb * req , bool * locked )
{
int v , ret ;
/* req->task == current here, checking PF_EXITING is safe */
if ( unlikely ( req - > task - > flags & PF_EXITING ) )
return - ECANCELED ;
do {
v = atomic_read ( & req - > poll_refs ) ;
2022-11-30 18:21:52 +03:00
if ( unlikely ( v ! = 1 ) ) {
/* tw should be the owner and so have some refs */
if ( WARN_ON_ONCE ( ! ( v & IO_POLL_REF_MASK ) ) )
2022-11-30 18:21:54 +03:00
return IOU_POLL_NO_ACTION ;
2022-11-30 18:21:52 +03:00
if ( v & IO_POLL_CANCEL_FLAG )
return - ECANCELED ;
2022-11-20 19:57:42 +03:00
/*
2022-11-30 18:21:52 +03:00
* cqe . res contains only events of the first wake up
* and all others are to be lost . Redo vfs_poll ( ) to get
* up to date state .
2022-11-20 19:57:42 +03:00
*/
2022-11-30 18:21:52 +03:00
if ( ( v & IO_POLL_REF_MASK ) ! = 1 )
req - > cqe . res = 0 ;
if ( v & IO_POLL_RETRY_FLAG ) {
req - > cqe . res = 0 ;
/*
* We won ' t find new events that came in between
* vfs_poll and the ref put unless we clear the
* flag in advance .
*/
atomic_andnot ( IO_POLL_RETRY_FLAG , & req - > poll_refs ) ;
v & = ~ IO_POLL_RETRY_FLAG ;
}
2022-11-20 19:57:42 +03:00
}
2022-05-26 05:31:09 +03:00
2022-06-30 12:12:24 +03:00
/* the mask was stashed in __io_poll_execute */
2022-05-26 05:31:09 +03:00
if ( ! req - > cqe . res ) {
struct poll_table_struct pt = { . _key = req - > apoll_events } ;
req - > cqe . res = vfs_poll ( req - > file , & pt ) & req - > apoll_events ;
}
if ( ( unlikely ( ! req - > cqe . res ) ) )
continue ;
if ( req - > apoll_events & EPOLLONESHOT )
2022-06-30 12:12:24 +03:00
return IOU_POLL_DONE ;
2022-05-26 05:31:09 +03:00
/* multishot, just fill a CQE and proceed */
if ( ! ( req - > flags & REQ_F_APOLL_MULTISHOT ) ) {
__poll_t mask = mangle_poll ( req - > cqe . res &
req - > apoll_events ) ;
2022-11-30 18:21:53 +03:00
if ( ! io_aux_cqe ( req - > ctx , * locked , req - > cqe . user_data ,
2022-11-24 12:35:55 +03:00
mask , IORING_CQE_F_MORE , false ) ) {
2022-06-30 12:12:27 +03:00
io_req_set_res ( req , mask , 0 ) ;
return IOU_POLL_REMOVE_POLL_USE_RES ;
}
2022-06-17 11:48:00 +03:00
} else {
ret = io_poll_issue ( req , locked ) ;
2022-06-30 12:12:25 +03:00
if ( ret = = IOU_STOP_MULTISHOT )
return IOU_POLL_REMOVE_POLL_USE_RES ;
2022-06-30 12:12:24 +03:00
if ( ret < 0 )
2022-06-17 11:48:00 +03:00
return ret ;
}
2022-05-26 05:31:09 +03:00
2022-11-17 21:40:14 +03:00
/* force the next iteration to vfs_poll() */
req - > cqe . res = 0 ;
2022-05-26 05:31:09 +03:00
/*
* Release all references , retry if someone tried to restart
* task_work while we were executing it .
*/
2022-11-25 17:15:54 +03:00
} while ( atomic_sub_return ( v & IO_POLL_REF_MASK , & req - > poll_refs ) &
IO_POLL_REF_MASK ) ;
2022-05-26 05:31:09 +03:00
2022-06-30 12:12:24 +03:00
return IOU_POLL_NO_ACTION ;
2022-05-26 05:31:09 +03:00
}
static void io_poll_task_func ( struct io_kiocb * req , bool * locked )
{
int ret ;
ret = io_poll_check_events ( req , locked ) ;
2022-06-30 12:12:24 +03:00
if ( ret = = IOU_POLL_NO_ACTION )
2022-05-26 05:31:09 +03:00
return ;
2022-06-30 12:12:24 +03:00
if ( ret = = IOU_POLL_DONE ) {
2022-08-11 10:11:15 +03:00
struct io_poll * poll = io_kiocb_to_cmd ( req , struct io_poll ) ;
2022-05-26 05:31:09 +03:00
req - > cqe . res = mangle_poll ( req - > cqe . res & poll - > events ) ;
2022-06-30 12:12:25 +03:00
} else if ( ret ! = IOU_POLL_REMOVE_POLL_USE_RES ) {
2022-05-26 05:31:09 +03:00
req - > cqe . res = ret ;
req_set_fail ( req ) ;
}
io_poll_remove_entries ( req ) ;
2022-06-16 12:22:12 +03:00
io_poll_tw_hash_eject ( req , locked ) ;
2022-06-16 12:22:07 +03:00
io_req_set_res ( req , req - > cqe . res , 0 ) ;
io_req_task_complete ( req , locked ) ;
2022-05-26 05:31:09 +03:00
}
static void io_apoll_task_func ( struct io_kiocb * req , bool * locked )
{
int ret ;
ret = io_poll_check_events ( req , locked ) ;
2022-06-30 12:12:24 +03:00
if ( ret = = IOU_POLL_NO_ACTION )
2022-05-26 05:31:09 +03:00
return ;
2022-11-24 12:35:52 +03:00
io_tw_lock ( req - > ctx , locked ) ;
2022-05-26 05:31:09 +03:00
io_poll_remove_entries ( req ) ;
2022-06-16 12:22:12 +03:00
io_poll_tw_hash_eject ( req , locked ) ;
2022-05-26 05:31:09 +03:00
2022-06-30 12:12:25 +03:00
if ( ret = = IOU_POLL_REMOVE_POLL_USE_RES )
2022-11-24 12:35:52 +03:00
io_req_task_complete ( req , locked ) ;
2022-06-30 12:12:25 +03:00
else if ( ret = = IOU_POLL_DONE )
2022-05-26 05:31:09 +03:00
io_req_task_submit ( req , locked ) ;
else
2022-11-24 12:35:53 +03:00
io_req_defer_failed ( req , ret ) ;
2022-05-26 05:31:09 +03:00
}
2022-06-23 16:24:45 +03:00
static void __io_poll_execute ( struct io_kiocb * req , int mask )
2022-05-26 05:31:09 +03:00
{
io_req_set_res ( req , mask , 0 ) ;
2022-11-10 09:03:13 +03:00
2022-05-26 05:31:09 +03:00
if ( req - > opcode = = IORING_OP_POLL_ADD )
req - > io_task_work . func = io_poll_task_func ;
else
req - > io_task_work . func = io_apoll_task_func ;
2022-06-16 15:57:20 +03:00
trace_io_uring_task_add ( req , mask ) ;
2022-05-26 05:31:09 +03:00
io_req_task_work_add ( req ) ;
}
2022-06-23 16:24:45 +03:00
static inline void io_poll_execute ( struct io_kiocb * req , int res )
2022-05-26 05:31:09 +03:00
{
if ( io_poll_get_ownership ( req ) )
2022-06-23 16:24:45 +03:00
__io_poll_execute ( req , res ) ;
2022-05-26 05:31:09 +03:00
}
static void io_poll_cancel_req ( struct io_kiocb * req )
{
io_poll_mark_cancelled ( req ) ;
/* kick tw, which should complete the request */
2022-06-23 16:24:45 +03:00
io_poll_execute ( req , 0 ) ;
2022-05-26 05:31:09 +03:00
}
# define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI)
2022-06-21 23:34:15 +03:00
static __cold int io_pollfree_wake ( struct io_kiocb * req , struct io_poll * poll )
{
io_poll_mark_cancelled ( req ) ;
/* we have to kick tw in case it's not already */
io_poll_execute ( req , 0 ) ;
/*
* If the waitqueue is being freed early but someone is already
* holds ownership over it , we have to tear down the request as
* best we can . That means immediately removing the request from
* its waitqueue and preventing all further accesses to the
* waitqueue via the request .
*/
list_del_init ( & poll - > wait . entry ) ;
/*
* Careful : this * must * be the last step , since as soon
* as req - > head is NULL ' ed out , the request can be
* completed and freed , since aio_poll_complete_work ( )
* will no longer need to take the waitqueue lock .
*/
smp_store_release ( & poll - > head , NULL ) ;
return 1 ;
}
2022-05-26 05:31:09 +03:00
static int io_poll_wake ( struct wait_queue_entry * wait , unsigned mode , int sync ,
void * key )
{
struct io_kiocb * req = wqe_to_req ( wait ) ;
struct io_poll * poll = container_of ( wait , struct io_poll , wait ) ;
__poll_t mask = key_to_poll ( key ) ;
2022-06-21 23:34:15 +03:00
if ( unlikely ( mask & POLLFREE ) )
return io_pollfree_wake ( req , poll ) ;
2022-05-26 05:31:09 +03:00
/* for instances that support it check for an event match first */
if ( mask & & ! ( mask & ( poll - > events & ~ IO_ASYNC_POLL_COMMON ) ) )
return 0 ;
if ( io_poll_get_ownership ( req ) ) {
2022-11-20 20:18:45 +03:00
/*
* If we trigger a multishot poll off our own wakeup path ,
* disable multishot as there is a circular dependency between
* CQ posting and triggering the event .
*/
if ( mask & EPOLL_URING_WAKE )
poll - > events | = EPOLLONESHOT ;
2022-05-26 05:31:09 +03:00
/* optional, saves extra locking for removal in tw handler */
if ( mask & & poll - > events & EPOLLONESHOT ) {
list_del_init ( & poll - > wait . entry ) ;
poll - > head = NULL ;
if ( wqe_is_double ( wait ) )
req - > flags & = ~ REQ_F_DOUBLE_POLL ;
else
req - > flags & = ~ REQ_F_SINGLE_POLL ;
}
2022-06-23 16:24:45 +03:00
__io_poll_execute ( req , mask ) ;
2022-05-26 05:31:09 +03:00
}
return 1 ;
}
2022-11-11 19:51:29 +03:00
/* fails only when polling is already completing by the first entry */
static bool io_poll_double_prepare ( struct io_kiocb * req )
2022-06-23 16:24:49 +03:00
{
struct wait_queue_head * head ;
struct io_poll * poll = io_poll_get_single ( req ) ;
/* head is RCU protected, see io_poll_remove_entries() comments */
rcu_read_lock ( ) ;
head = smp_load_acquire ( & poll - > head ) ;
2022-07-07 17:13:14 +03:00
/*
2022-11-11 19:51:29 +03:00
* poll arm might not hold ownership and so race for req - > flags with
* io_poll_wake ( ) . There is only one poll entry queued , serialise with
* it by taking its head lock . As we ' re still arming the tw hanlder
* is not going to be run , so there are no races with it .
2022-07-07 17:13:14 +03:00
*/
2022-11-11 19:51:29 +03:00
if ( head ) {
2022-06-23 16:24:49 +03:00
spin_lock_irq ( & head - > lock ) ;
2022-11-11 19:51:29 +03:00
req - > flags | = REQ_F_DOUBLE_POLL ;
if ( req - > opcode = = IORING_OP_POLL_ADD )
req - > flags | = REQ_F_ASYNC_DATA ;
2022-06-23 16:24:49 +03:00
spin_unlock_irq ( & head - > lock ) ;
2022-11-11 19:51:29 +03:00
}
2022-06-23 16:24:49 +03:00
rcu_read_unlock ( ) ;
2022-11-11 19:51:29 +03:00
return ! ! head ;
2022-06-23 16:24:49 +03:00
}
2022-05-26 05:31:09 +03:00
static void __io_queue_proc ( struct io_poll * poll , struct io_poll_table * pt ,
struct wait_queue_head * head ,
struct io_poll * * poll_ptr )
{
struct io_kiocb * req = pt - > req ;
unsigned long wqe_private = ( unsigned long ) req ;
/*
* The file being polled uses multiple waitqueues for poll handling
* ( e . g . one for read , one for write ) . Setup a separate io_poll
* if this happens .
*/
if ( unlikely ( pt - > nr_entries ) ) {
struct io_poll * first = poll ;
/* double add on the same waitqueue head, ignore */
if ( first - > head = = head )
return ;
/* already have a 2nd entry, fail a third attempt */
if ( * poll_ptr ) {
if ( ( * poll_ptr ) - > head = = head )
return ;
pt - > error = - EINVAL ;
return ;
}
poll = kmalloc ( sizeof ( * poll ) , GFP_ATOMIC ) ;
if ( ! poll ) {
pt - > error = - ENOMEM ;
return ;
}
2022-06-23 16:24:49 +03:00
2022-05-26 05:31:09 +03:00
/* mark as double wq entry */
2022-06-23 16:24:44 +03:00
wqe_private | = IO_WQE_F_DOUBLE ;
2022-05-26 05:31:09 +03:00
io_init_poll_iocb ( poll , first - > events , first - > wait . func ) ;
2022-11-11 19:51:29 +03:00
if ( ! io_poll_double_prepare ( req ) ) {
/* the request is completing, just back off */
kfree ( poll ) ;
return ;
}
2022-05-26 05:31:09 +03:00
* poll_ptr = poll ;
2022-06-23 16:24:49 +03:00
} else {
/* fine to modify, there is no poll queued to race with us */
req - > flags | = REQ_F_SINGLE_POLL ;
2022-05-26 05:31:09 +03:00
}
pt - > nr_entries + + ;
poll - > head = head ;
poll - > wait . private = ( void * ) wqe_private ;
if ( poll - > events & EPOLLEXCLUSIVE )
add_wait_queue_exclusive ( head , & poll - > wait ) ;
else
add_wait_queue ( head , & poll - > wait ) ;
}
static void io_poll_queue_proc ( struct file * file , struct wait_queue_head * head ,
struct poll_table_struct * p )
{
struct io_poll_table * pt = container_of ( p , struct io_poll_table , pt ) ;
2022-08-11 10:11:15 +03:00
struct io_poll * poll = io_kiocb_to_cmd ( pt - > req , struct io_poll ) ;
2022-05-26 05:31:09 +03:00
__io_queue_proc ( poll , pt , head ,
( struct io_poll * * ) & pt - > req - > async_data ) ;
}
2022-06-23 16:24:49 +03:00
static bool io_poll_can_finish_inline ( struct io_kiocb * req ,
struct io_poll_table * pt )
{
return pt - > owning | | io_poll_get_ownership ( req ) ;
}
2022-06-23 16:24:48 +03:00
/*
* Returns 0 when it ' s handed over for polling . The caller owns the requests if
* it returns non - zero , but otherwise should not touch it . Negative values
* contain an error code . When the result is > 0 , the polling has completed
* inline and ipt . result_mask is set to the mask .
*/
2022-05-26 05:31:09 +03:00
static int __io_arm_poll_handler ( struct io_kiocb * req ,
struct io_poll * poll ,
2022-06-23 16:24:49 +03:00
struct io_poll_table * ipt , __poll_t mask ,
unsigned issue_flags )
2022-05-26 05:31:09 +03:00
{
struct io_ring_ctx * ctx = req - > ctx ;
INIT_HLIST_NODE ( & req - > hash_node ) ;
req - > work . cancel_seq = atomic_read ( & ctx - > cancel_seq ) ;
io_init_poll_iocb ( poll , mask , io_poll_wake ) ;
poll - > file = req - > file ;
req - > apoll_events = poll - > events ;
ipt - > pt . _key = mask ;
ipt - > req = req ;
ipt - > error = 0 ;
ipt - > nr_entries = 0 ;
/*
2022-06-23 16:24:49 +03:00
* Polling is either completed here or via task_work , so if we ' re in the
* task context we ' re naturally serialised with tw by merit of running
* the same task . When it ' s io - wq , take the ownership to prevent tw
* from running . However , when we ' re in the task context , skip taking
* it as an optimisation .
*
* Note : even though the request won ' t be completed / freed , without
* ownership we still can race with io_poll_wake ( ) .
* io_poll_can_finish_inline ( ) tries to deal with that .
2022-05-26 05:31:09 +03:00
*/
2022-06-23 16:24:49 +03:00
ipt - > owning = issue_flags & IO_URING_F_UNLOCKED ;
atomic_set ( & req - > poll_refs , ( int ) ipt - > owning ) ;
2022-07-07 17:13:17 +03:00
/* io-wq doesn't hold uring_lock */
if ( issue_flags & IO_URING_F_UNLOCKED )
req - > flags & = ~ REQ_F_HASH_LOCKED ;
2022-05-26 05:31:09 +03:00
mask = vfs_poll ( req - > file , & ipt - > pt ) & poll - > events ;
2022-06-23 16:24:48 +03:00
if ( unlikely ( ipt - > error | | ! ipt - > nr_entries ) ) {
io_poll_remove_entries ( req ) ;
2022-06-23 16:24:49 +03:00
if ( ! io_poll_can_finish_inline ( req , ipt ) ) {
io_poll_mark_cancelled ( req ) ;
return 0 ;
} else if ( mask & & ( poll - > events & EPOLLET ) ) {
2022-06-23 16:24:48 +03:00
ipt - > result_mask = mask ;
return 1 ;
}
2022-06-23 16:24:49 +03:00
return ipt - > error ? : - EINVAL ;
2022-06-23 16:24:48 +03:00
}
2022-05-27 19:55:07 +03:00
if ( mask & &
( ( poll - > events & ( EPOLLET | EPOLLONESHOT ) ) = = ( EPOLLET | EPOLLONESHOT ) ) ) {
2022-06-23 16:24:49 +03:00
if ( ! io_poll_can_finish_inline ( req , ipt ) )
return 0 ;
2022-05-26 05:31:09 +03:00
io_poll_remove_entries ( req ) ;
2022-06-23 16:24:47 +03:00
ipt - > result_mask = mask ;
2022-05-26 05:31:09 +03:00
/* no one else has access to the req, forget about the ref */
2022-06-23 16:24:47 +03:00
return 1 ;
2022-05-26 05:31:09 +03:00
}
2022-05-27 19:55:07 +03:00
2022-06-16 12:22:12 +03:00
if ( req - > flags & REQ_F_HASH_LOCKED )
io_poll_req_insert_locked ( req ) ;
else
io_poll_req_insert ( req ) ;
2022-05-26 05:31:09 +03:00
2022-06-23 16:24:49 +03:00
if ( mask & & ( poll - > events & EPOLLET ) & &
io_poll_can_finish_inline ( req , ipt ) ) {
2022-06-23 16:24:45 +03:00
__io_poll_execute ( req , mask ) ;
2022-05-26 05:31:09 +03:00
return 0 ;
}
2022-06-23 16:24:49 +03:00
if ( ipt - > owning ) {
/*
2022-11-20 19:57:41 +03:00
* Try to release ownership . If we see a change of state , e . g .
* poll was waken up , queue up a tw , it ' ll deal with it .
2022-06-23 16:24:49 +03:00
*/
2022-11-20 19:57:41 +03:00
if ( atomic_cmpxchg ( & req - > poll_refs , 1 , 0 ) ! = 1 )
2022-06-23 16:24:49 +03:00
__io_poll_execute ( req , 0 ) ;
}
2022-05-26 05:31:09 +03:00
return 0 ;
}
static void io_async_queue_proc ( struct file * file , struct wait_queue_head * head ,
struct poll_table_struct * p )
{
struct io_poll_table * pt = container_of ( p , struct io_poll_table , pt ) ;
struct async_poll * apoll = pt - > req - > apoll ;
__io_queue_proc ( & apoll - > poll , pt , head , & apoll - > double_poll ) ;
}
2022-06-23 16:24:46 +03:00
static struct async_poll * io_req_alloc_apoll ( struct io_kiocb * req ,
unsigned issue_flags )
{
struct io_ring_ctx * ctx = req - > ctx ;
2022-07-07 23:16:20 +03:00
struct io_cache_entry * entry ;
2022-06-23 16:24:46 +03:00
struct async_poll * apoll ;
if ( req - > flags & REQ_F_POLLED ) {
apoll = req - > apoll ;
kfree ( apoll - > double_poll ) ;
2022-11-02 11:25:03 +03:00
} else if ( ! ( issue_flags & IO_URING_F_UNLOCKED ) ) {
entry = io_alloc_cache_get ( & ctx - > apoll_cache ) ;
if ( entry = = NULL )
goto alloc_apoll ;
2022-07-07 23:16:20 +03:00
apoll = container_of ( entry , struct async_poll , cache ) ;
2022-06-23 16:24:46 +03:00
} else {
2022-11-02 11:25:03 +03:00
alloc_apoll :
2022-06-23 16:24:46 +03:00
apoll = kmalloc ( sizeof ( * apoll ) , GFP_ATOMIC ) ;
if ( unlikely ( ! apoll ) )
return NULL ;
}
apoll - > double_poll = NULL ;
req - > apoll = apoll ;
return apoll ;
}
2022-05-26 05:31:09 +03:00
int io_arm_poll_handler ( struct io_kiocb * req , unsigned issue_flags )
{
const struct io_op_def * def = & io_op_defs [ req - > opcode ] ;
struct async_poll * apoll ;
struct io_poll_table ipt ;
2022-05-27 19:55:07 +03:00
__poll_t mask = POLLPRI | POLLERR | EPOLLET ;
2022-05-26 05:31:09 +03:00
int ret ;
2022-06-16 12:22:12 +03:00
/*
* apoll requests already grab the mutex to complete in the tw handler ,
* so removal from the mutex - backed hash is free , use it by default .
*/
2022-07-07 17:13:17 +03:00
req - > flags | = REQ_F_HASH_LOCKED ;
2022-06-16 12:22:12 +03:00
2022-05-26 05:31:09 +03:00
if ( ! def - > pollin & & ! def - > pollout )
return IO_APOLL_ABORTED ;
if ( ! file_can_poll ( req - > file ) )
return IO_APOLL_ABORTED ;
if ( ( req - > flags & ( REQ_F_POLLED | REQ_F_PARTIAL_IO ) ) = = REQ_F_POLLED )
return IO_APOLL_ABORTED ;
if ( ! ( req - > flags & REQ_F_APOLL_MULTISHOT ) )
mask | = EPOLLONESHOT ;
if ( def - > pollin ) {
mask | = EPOLLIN | EPOLLRDNORM ;
/* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
if ( req - > flags & REQ_F_CLEAR_POLLIN )
mask & = ~ EPOLLIN ;
} else {
mask | = EPOLLOUT | EPOLLWRNORM ;
}
if ( def - > poll_exclusive )
mask | = EPOLLEXCLUSIVE ;
2022-06-23 16:24:46 +03:00
apoll = io_req_alloc_apoll ( req , issue_flags ) ;
if ( ! apoll )
return IO_APOLL_ABORTED ;
2022-05-26 05:31:09 +03:00
req - > flags | = REQ_F_POLLED ;
ipt . pt . _qproc = io_async_queue_proc ;
io_kbuf_recycle ( req , issue_flags ) ;
2022-06-23 16:24:49 +03:00
ret = __io_arm_poll_handler ( req , & apoll - > poll , & ipt , mask , issue_flags ) ;
2022-06-23 16:24:48 +03:00
if ( ret )
return ret > 0 ? IO_APOLL_READY : IO_APOLL_ABORTED ;
2022-06-16 15:57:20 +03:00
trace_io_uring_poll_arm ( req , mask , apoll - > poll . events ) ;
2022-05-26 05:31:09 +03:00
return IO_APOLL_OK ;
}
2022-06-16 12:22:12 +03:00
static __cold bool io_poll_remove_all_table ( struct task_struct * tsk ,
struct io_hash_table * table ,
bool cancel_all )
2022-05-26 05:31:09 +03:00
{
2022-06-16 12:22:10 +03:00
unsigned nr_buckets = 1U < < table - > hash_bits ;
2022-05-26 05:31:09 +03:00
struct hlist_node * tmp ;
struct io_kiocb * req ;
bool found = false ;
int i ;
2022-06-16 12:22:10 +03:00
for ( i = 0 ; i < nr_buckets ; i + + ) {
struct io_hash_bucket * hb = & table - > hbs [ i ] ;
2022-05-26 05:31:09 +03:00
2022-06-16 12:22:02 +03:00
spin_lock ( & hb - > lock ) ;
hlist_for_each_entry_safe ( req , tmp , & hb - > list , hash_node ) {
2022-05-26 05:31:09 +03:00
if ( io_match_task_safe ( req , tsk , cancel_all ) ) {
hlist_del_init ( & req - > hash_node ) ;
io_poll_cancel_req ( req ) ;
found = true ;
}
}
2022-06-16 12:22:02 +03:00
spin_unlock ( & hb - > lock ) ;
2022-05-26 05:31:09 +03:00
}
return found ;
}
2022-06-16 12:22:12 +03:00
/*
* Returns true if we found and killed one or more poll requests
*/
__cold bool io_poll_remove_all ( struct io_ring_ctx * ctx , struct task_struct * tsk ,
bool cancel_all )
__must_hold ( & ctx - > uring_lock )
{
2022-06-20 03:25:54 +03:00
bool ret ;
ret = io_poll_remove_all_table ( tsk , & ctx - > cancel_table , cancel_all ) ;
ret | = io_poll_remove_all_table ( tsk , & ctx - > cancel_table_locked , cancel_all ) ;
return ret ;
2022-06-16 12:22:12 +03:00
}
2022-05-26 05:31:09 +03:00
static struct io_kiocb * io_poll_find ( struct io_ring_ctx * ctx , bool poll_only ,
2022-06-16 12:22:03 +03:00
struct io_cancel_data * cd ,
2022-06-16 12:22:10 +03:00
struct io_hash_table * table ,
2022-06-16 12:22:03 +03:00
struct io_hash_bucket * * out_bucket )
2022-05-26 05:31:09 +03:00
{
struct io_kiocb * req ;
2022-06-16 12:22:10 +03:00
u32 index = hash_long ( cd - > data , table - > hash_bits ) ;
struct io_hash_bucket * hb = & table - > hbs [ index ] ;
2022-05-26 05:31:09 +03:00
2022-06-16 12:22:03 +03:00
* out_bucket = NULL ;
2022-06-16 12:22:02 +03:00
spin_lock ( & hb - > lock ) ;
hlist_for_each_entry ( req , & hb - > list , hash_node ) {
2022-05-26 05:31:09 +03:00
if ( cd - > data ! = req - > cqe . user_data )
continue ;
if ( poll_only & & req - > opcode ! = IORING_OP_POLL_ADD )
continue ;
if ( cd - > flags & IORING_ASYNC_CANCEL_ALL ) {
if ( cd - > seq = = req - > work . cancel_seq )
continue ;
req - > work . cancel_seq = cd - > seq ;
}
2022-06-16 12:22:03 +03:00
* out_bucket = hb ;
2022-05-26 05:31:09 +03:00
return req ;
}
2022-06-16 12:22:02 +03:00
spin_unlock ( & hb - > lock ) ;
2022-05-26 05:31:09 +03:00
return NULL ;
}
static struct io_kiocb * io_poll_file_find ( struct io_ring_ctx * ctx ,
2022-06-16 12:22:03 +03:00
struct io_cancel_data * cd ,
2022-06-16 12:22:10 +03:00
struct io_hash_table * table ,
2022-06-16 12:22:03 +03:00
struct io_hash_bucket * * out_bucket )
2022-05-26 05:31:09 +03:00
{
2022-06-16 12:22:10 +03:00
unsigned nr_buckets = 1U < < table - > hash_bits ;
2022-05-26 05:31:09 +03:00
struct io_kiocb * req ;
int i ;
2022-06-16 12:22:03 +03:00
* out_bucket = NULL ;
2022-06-16 12:22:10 +03:00
for ( i = 0 ; i < nr_buckets ; i + + ) {
struct io_hash_bucket * hb = & table - > hbs [ i ] ;
2022-05-26 05:31:09 +03:00
2022-06-16 12:22:02 +03:00
spin_lock ( & hb - > lock ) ;
hlist_for_each_entry ( req , & hb - > list , hash_node ) {
2022-05-26 05:31:09 +03:00
if ( ! ( cd - > flags & IORING_ASYNC_CANCEL_ANY ) & &
req - > file ! = cd - > file )
continue ;
if ( cd - > seq = = req - > work . cancel_seq )
continue ;
req - > work . cancel_seq = cd - > seq ;
2022-06-16 12:22:03 +03:00
* out_bucket = hb ;
2022-05-26 05:31:09 +03:00
return req ;
}
2022-06-16 12:22:02 +03:00
spin_unlock ( & hb - > lock ) ;
2022-05-26 05:31:09 +03:00
}
return NULL ;
}
2022-06-16 12:22:12 +03:00
static int io_poll_disarm ( struct io_kiocb * req )
2022-05-26 05:31:09 +03:00
{
2022-06-16 12:22:12 +03:00
if ( ! req )
return - ENOENT ;
2022-05-26 05:31:09 +03:00
if ( ! io_poll_get_ownership ( req ) )
2022-06-16 12:22:12 +03:00
return - EALREADY ;
2022-05-26 05:31:09 +03:00
io_poll_remove_entries ( req ) ;
hash_del ( & req - > hash_node ) ;
2022-06-16 12:22:12 +03:00
return 0 ;
2022-05-26 05:31:09 +03:00
}
2022-06-16 12:22:09 +03:00
static int __io_poll_cancel ( struct io_ring_ctx * ctx , struct io_cancel_data * cd ,
2022-06-16 12:22:10 +03:00
struct io_hash_table * table )
2022-05-26 05:31:09 +03:00
{
2022-06-16 12:22:03 +03:00
struct io_hash_bucket * bucket ;
2022-05-26 05:31:09 +03:00
struct io_kiocb * req ;
if ( cd - > flags & ( IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_ANY ) )
2022-06-16 12:22:10 +03:00
req = io_poll_file_find ( ctx , cd , table , & bucket ) ;
2022-05-26 05:31:09 +03:00
else
2022-06-16 12:22:10 +03:00
req = io_poll_find ( ctx , false , cd , table , & bucket ) ;
2022-06-16 12:22:03 +03:00
if ( req )
io_poll_cancel_req ( req ) ;
if ( bucket )
spin_unlock ( & bucket - > lock ) ;
return req ? 0 : - ENOENT ;
2022-05-26 05:31:09 +03:00
}
2022-06-16 12:22:11 +03:00
int io_poll_cancel ( struct io_ring_ctx * ctx , struct io_cancel_data * cd ,
unsigned issue_flags )
2022-06-16 12:22:09 +03:00
{
2022-06-16 12:22:12 +03:00
int ret ;
ret = __io_poll_cancel ( ctx , cd , & ctx - > cancel_table ) ;
if ( ret ! = - ENOENT )
return ret ;
io_ring_submit_lock ( ctx , issue_flags ) ;
ret = __io_poll_cancel ( ctx , cd , & ctx - > cancel_table_locked ) ;
io_ring_submit_unlock ( ctx , issue_flags ) ;
return ret ;
2022-06-16 12:22:09 +03:00
}
2022-05-26 05:31:09 +03:00
static __poll_t io_poll_parse_events ( const struct io_uring_sqe * sqe ,
unsigned int flags )
{
u32 events ;
events = READ_ONCE ( sqe - > poll32_events ) ;
# ifdef __BIG_ENDIAN
events = swahw32 ( events ) ;
# endif
if ( ! ( flags & IORING_POLL_ADD_MULTI ) )
events | = EPOLLONESHOT ;
2022-05-27 19:55:07 +03:00
if ( ! ( flags & IORING_POLL_ADD_LEVEL ) )
events | = EPOLLET ;
return demangle_poll ( events ) |
( events & ( EPOLLEXCLUSIVE | EPOLLONESHOT | EPOLLET ) ) ;
2022-05-26 05:31:09 +03:00
}
int io_poll_remove_prep ( struct io_kiocb * req , const struct io_uring_sqe * sqe )
{
2022-08-11 10:11:15 +03:00
struct io_poll_update * upd = io_kiocb_to_cmd ( req , struct io_poll_update ) ;
2022-05-26 05:31:09 +03:00
u32 flags ;
if ( sqe - > buf_index | | sqe - > splice_fd_in )
return - EINVAL ;
flags = READ_ONCE ( sqe - > len ) ;
if ( flags & ~ ( IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
IORING_POLL_ADD_MULTI ) )
return - EINVAL ;
/* meaningless without update */
if ( flags = = IORING_POLL_ADD_MULTI )
return - EINVAL ;
upd - > old_user_data = READ_ONCE ( sqe - > addr ) ;
upd - > update_events = flags & IORING_POLL_UPDATE_EVENTS ;
upd - > update_user_data = flags & IORING_POLL_UPDATE_USER_DATA ;
upd - > new_user_data = READ_ONCE ( sqe - > off ) ;
if ( ! upd - > update_user_data & & upd - > new_user_data )
return - EINVAL ;
if ( upd - > update_events )
upd - > events = io_poll_parse_events ( sqe , flags ) ;
else if ( sqe - > poll32_events )
return - EINVAL ;
return 0 ;
}
int io_poll_add_prep ( struct io_kiocb * req , const struct io_uring_sqe * sqe )
{
2022-08-11 10:11:15 +03:00
struct io_poll * poll = io_kiocb_to_cmd ( req , struct io_poll ) ;
2022-05-26 05:31:09 +03:00
u32 flags ;
if ( sqe - > buf_index | | sqe - > off | | sqe - > addr )
return - EINVAL ;
flags = READ_ONCE ( sqe - > len ) ;
2022-09-28 18:49:28 +03:00
if ( flags & ~ IORING_POLL_ADD_MULTI )
2022-05-26 05:31:09 +03:00
return - EINVAL ;
if ( ( flags & IORING_POLL_ADD_MULTI ) & & ( req - > flags & REQ_F_CQE_SKIP ) )
return - EINVAL ;
poll - > events = io_poll_parse_events ( sqe , flags ) ;
return 0 ;
}
int io_poll_add ( struct io_kiocb * req , unsigned int issue_flags )
{
2022-08-11 10:11:15 +03:00
struct io_poll * poll = io_kiocb_to_cmd ( req , struct io_poll ) ;
2022-05-26 05:31:09 +03:00
struct io_poll_table ipt ;
int ret ;
ipt . pt . _qproc = io_poll_queue_proc ;
2022-06-16 12:22:12 +03:00
/*
* If sqpoll or single issuer , there is no contention for - > uring_lock
* and we ' ll end up holding it in tw handlers anyway .
*/
2022-07-07 17:13:17 +03:00
if ( req - > ctx - > flags & ( IORING_SETUP_SQPOLL | IORING_SETUP_SINGLE_ISSUER ) )
2022-06-16 12:22:12 +03:00
req - > flags | = REQ_F_HASH_LOCKED ;
2022-06-23 16:24:49 +03:00
ret = __io_arm_poll_handler ( req , poll , & ipt , poll - > events , issue_flags ) ;
2022-06-23 16:24:48 +03:00
if ( ret > 0 ) {
2022-06-23 16:24:47 +03:00
io_req_set_res ( req , ipt . result_mask , 0 ) ;
2022-05-26 05:31:09 +03:00
return IOU_OK ;
}
2022-06-23 16:24:48 +03:00
return ret ? : IOU_ISSUE_SKIP_COMPLETE ;
2022-05-26 05:31:09 +03:00
}
int io_poll_remove ( struct io_kiocb * req , unsigned int issue_flags )
{
2022-08-11 10:11:15 +03:00
struct io_poll_update * poll_update = io_kiocb_to_cmd ( req , struct io_poll_update ) ;
2022-05-26 05:31:09 +03:00
struct io_cancel_data cd = { . data = poll_update - > old_user_data , } ;
struct io_ring_ctx * ctx = req - > ctx ;
2022-06-16 12:22:03 +03:00
struct io_hash_bucket * bucket ;
2022-05-26 05:31:09 +03:00
struct io_kiocb * preq ;
int ret2 , ret = 0 ;
bool locked ;
2022-06-16 12:22:10 +03:00
preq = io_poll_find ( ctx , true , & cd , & ctx - > cancel_table , & bucket ) ;
2022-06-16 12:22:12 +03:00
ret2 = io_poll_disarm ( preq ) ;
2022-06-16 12:22:03 +03:00
if ( bucket )
spin_unlock ( & bucket - > lock ) ;
2022-06-16 12:22:12 +03:00
if ( ! ret2 )
goto found ;
if ( ret2 ! = - ENOENT ) {
ret = ret2 ;
2022-06-16 12:22:02 +03:00
goto out ;
}
2022-06-16 12:22:12 +03:00
io_ring_submit_lock ( ctx , issue_flags ) ;
preq = io_poll_find ( ctx , true , & cd , & ctx - > cancel_table_locked , & bucket ) ;
ret2 = io_poll_disarm ( preq ) ;
if ( bucket )
spin_unlock ( & bucket - > lock ) ;
io_ring_submit_unlock ( ctx , issue_flags ) ;
if ( ret2 ) {
ret = ret2 ;
2022-05-26 05:31:09 +03:00
goto out ;
}
2022-06-16 12:22:12 +03:00
found :
2022-06-20 03:26:01 +03:00
if ( WARN_ON_ONCE ( preq - > opcode ! = IORING_OP_POLL_ADD ) ) {
ret = - EFAULT ;
goto out ;
}
2022-05-26 05:31:09 +03:00
if ( poll_update - > update_events | | poll_update - > update_user_data ) {
/* only mask one event flags, keep behavior flags */
if ( poll_update - > update_events ) {
2022-08-11 10:11:15 +03:00
struct io_poll * poll = io_kiocb_to_cmd ( preq , struct io_poll ) ;
2022-05-26 05:31:09 +03:00
poll - > events & = ~ 0xffff ;
poll - > events | = poll_update - > events & 0xffff ;
poll - > events | = IO_POLL_UNMASK ;
}
if ( poll_update - > update_user_data )
preq - > cqe . user_data = poll_update - > new_user_data ;
ret2 = io_poll_add ( preq , issue_flags ) ;
/* successfully updated, don't complete poll request */
if ( ! ret2 | | ret2 = = - EIOCBQUEUED )
goto out ;
}
req_set_fail ( preq ) ;
io_req_set_res ( preq , - ECANCELED , 0 ) ;
locked = ! ( issue_flags & IO_URING_F_UNLOCKED ) ;
io_req_task_complete ( preq , & locked ) ;
out :
if ( ret < 0 ) {
req_set_fail ( req ) ;
return ret ;
}
/* complete update request, we're done with it */
io_req_set_res ( req , ret , 0 ) ;
return IOU_OK ;
}
2022-07-07 20:18:33 +03:00
2022-07-07 23:16:20 +03:00
void io_apoll_cache_free ( struct io_cache_entry * entry )
2022-07-07 20:18:33 +03:00
{
2022-07-07 23:16:20 +03:00
kfree ( container_of ( entry , struct async_poll , cache ) ) ;
2022-07-07 20:18:33 +03:00
}