2022-05-24 12:45:38 -06:00
# ifndef IOU_CORE_H
# define IOU_CORE_H
# include <linux/errno.h>
2022-05-24 21:54:43 -06:00
# include <linux/lockdep.h>
2022-06-16 13:57:19 +01:00
# include <linux/io_uring_types.h>
2022-11-20 10:18:45 -07:00
# include <uapi/linux/eventpoll.h>
2022-06-16 13:57:19 +01:00
# include "io-wq.h"
2022-06-21 10:09:01 +01:00
# include "slist.h"
2022-06-16 13:57:19 +01:00
# include "filetable.h"
2022-05-24 12:45:38 -06:00
2022-06-13 07:27:03 -06:00
# ifndef CREATE_TRACE_POINTS
# include <trace/events/io_uring.h>
# endif
2022-05-24 15:21:00 -06:00
enum {
IOU_OK = 0 ,
IOU_ISSUE_SKIP_COMPLETE = - EIOCBQUEUED ,
2022-06-30 02:12:25 -07:00
/*
2022-11-17 18:40:16 +00:00
* Intended only when both IO_URING_F_MULTISHOT is passed
* to indicate to the poll runner that multishot should be
2022-06-30 02:12:25 -07:00
* removed and the result is set on req - > cqe . res .
*/
IOU_STOP_MULTISHOT = - ECANCELED ,
2022-05-24 15:21:00 -06:00
} ;
2022-09-23 14:53:25 +01:00
struct io_uring_cqe * __io_get_cqe ( struct io_ring_ctx * ctx , bool overflow ) ;
2022-06-17 09:48:02 +01:00
bool io_req_cqe_overflow ( struct io_kiocb * req ) ;
2022-08-30 05:50:10 -07:00
int io_run_task_work_sig ( struct io_ring_ctx * ctx ) ;
2022-10-27 07:44:29 -07:00
int __io_run_local_work ( struct io_ring_ctx * ctx , bool * locked ) ;
2022-08-30 05:50:10 -07:00
int io_run_local_work ( struct io_ring_ctx * ctx ) ;
2022-06-19 12:26:05 +01:00
void io_req_complete_failed ( struct io_kiocb * req , s32 res ) ;
void __io_req_complete ( struct io_kiocb * req , unsigned issue_flags ) ;
void io_req_complete_post ( struct io_kiocb * req ) ;
2022-06-30 02:12:26 -07:00
bool io_post_aux_cqe ( struct io_ring_ctx * ctx , u64 user_data , s32 res , u32 cflags ,
bool allow_overflow ) ;
2022-07-12 21:52:38 +01:00
bool io_fill_cqe_aux ( struct io_ring_ctx * ctx , u64 user_data , s32 res , u32 cflags ,
bool allow_overflow ) ;
2022-06-19 12:26:05 +01:00
void __io_commit_cqring_flush ( struct io_ring_ctx * ctx ) ;
struct page * * io_pin_pages ( unsigned long ubuf , unsigned long len , int * npages ) ;
struct file * io_file_get_normal ( struct io_kiocb * req , int fd ) ;
struct file * io_file_get_fixed ( struct io_kiocb * req , int fd ,
unsigned issue_flags ) ;
2022-07-21 09:06:47 -06:00
static inline bool io_req_ffs_set ( struct io_kiocb * req )
{
return req - > flags & REQ_F_FIXED_FILE ;
}
2022-11-11 16:54:08 +00:00
void __io_req_task_work_add ( struct io_kiocb * req , bool allow_local ) ;
2022-06-19 12:26:05 +01:00
bool io_is_uring_fops ( struct file * file ) ;
bool io_alloc_async_data ( struct io_kiocb * req ) ;
void io_req_tw_post_queue ( struct io_kiocb * req , s32 res , u32 cflags ) ;
void io_req_task_queue ( struct io_kiocb * req ) ;
void io_queue_iowq ( struct io_kiocb * req , bool * dont_use ) ;
void io_req_task_complete ( struct io_kiocb * req , bool * locked ) ;
void io_req_task_queue_fail ( struct io_kiocb * req , int ret ) ;
void io_req_task_submit ( struct io_kiocb * req , bool * locked ) ;
void tctx_task_work ( struct callback_head * cb ) ;
__cold void io_uring_cancel_generic ( bool cancel_all , struct io_sq_data * sqd ) ;
int io_uring_alloc_task_context ( struct task_struct * task ,
struct io_ring_ctx * ctx ) ;
int io_poll_issue ( struct io_kiocb * req , bool * locked ) ;
int io_submit_sqes ( struct io_ring_ctx * ctx , unsigned int nr ) ;
int io_do_iopoll ( struct io_ring_ctx * ctx , bool force_nonspin ) ;
void io_free_batch_list ( struct io_ring_ctx * ctx , struct io_wq_work_node * node ) ;
int io_req_prep_async ( struct io_kiocb * req ) ;
struct io_wq_work * io_wq_free_work ( struct io_wq_work * work ) ;
void io_wq_submit_work ( struct io_wq_work * work ) ;
void io_free_req ( struct io_kiocb * req ) ;
void io_queue_next ( struct io_kiocb * req ) ;
2022-07-12 21:52:37 +01:00
void __io_put_task ( struct task_struct * task , int nr ) ;
2022-07-12 21:52:47 +01:00
void io_task_refs_refill ( struct io_uring_task * tctx ) ;
2022-07-27 10:30:40 +01:00
bool __io_alloc_req_refill ( struct io_ring_ctx * ctx ) ;
2022-06-19 12:26:05 +01:00
bool io_match_task_safe ( struct io_kiocb * head , struct task_struct * task ,
bool cancel_all ) ;
2022-11-11 16:54:08 +00:00
static inline void io_req_task_work_add ( struct io_kiocb * req )
{
__io_req_task_work_add ( req , true ) ;
}
2022-06-19 12:26:05 +01:00
# define io_for_each_link(pos, head) \
for ( pos = ( head ) ; pos ; pos = pos - > link )
2022-06-13 07:27:03 -06:00
2022-06-20 01:25:56 +01:00
static inline void io_cq_lock ( struct io_ring_ctx * ctx )
__acquires ( ctx - > completion_lock )
{
spin_lock ( & ctx - > completion_lock ) ;
}
void io_cq_unlock_post ( struct io_ring_ctx * ctx ) ;
2022-09-23 14:53:25 +01:00
static inline struct io_uring_cqe * io_get_cqe_overflow ( struct io_ring_ctx * ctx ,
bool overflow )
2022-06-13 07:27:03 -06:00
{
if ( likely ( ctx - > cqe_cached < ctx - > cqe_sentinel ) ) {
struct io_uring_cqe * cqe = ctx - > cqe_cached ;
ctx - > cached_cq_tail + + ;
ctx - > cqe_cached + + ;
2022-06-17 09:48:05 +01:00
if ( ctx - > flags & IORING_SETUP_CQE32 )
ctx - > cqe_cached + + ;
2022-06-13 07:27:03 -06:00
return cqe ;
}
2022-09-23 14:53:25 +01:00
return __io_get_cqe ( ctx , overflow ) ;
}
static inline struct io_uring_cqe * io_get_cqe ( struct io_ring_ctx * ctx )
{
return io_get_cqe_overflow ( ctx , false ) ;
2022-06-13 07:27:03 -06:00
}
static inline bool __io_fill_cqe_req ( struct io_ring_ctx * ctx ,
struct io_kiocb * req )
{
struct io_uring_cqe * cqe ;
2022-06-17 09:48:04 +01:00
/*
* If we can ' t get a cq entry , userspace overflowed the
* submission ( by quite a lot ) . Increment the overflow count in
* the ring .
*/
cqe = io_get_cqe ( ctx ) ;
if ( unlikely ( ! cqe ) )
return io_req_cqe_overflow ( req ) ;
2022-06-30 02:12:31 -07:00
trace_io_uring_complete ( req - > ctx , req , req - > cqe . user_data ,
req - > cqe . res , req - > cqe . flags ,
( req - > flags & REQ_F_CQE32_INIT ) ? req - > extra1 : 0 ,
( req - > flags & REQ_F_CQE32_INIT ) ? req - > extra2 : 0 ) ;
2022-06-17 09:48:04 +01:00
memcpy ( cqe , & req - > cqe , sizeof ( * cqe ) ) ;
if ( ctx - > flags & IORING_SETUP_CQE32 ) {
2022-06-13 07:27:03 -06:00
u64 extra1 = 0 , extra2 = 0 ;
if ( req - > flags & REQ_F_CQE32_INIT ) {
extra1 = req - > extra1 ;
extra2 = req - > extra2 ;
}
2022-06-17 09:48:04 +01:00
WRITE_ONCE ( cqe - > big_cqe [ 0 ] , extra1 ) ;
WRITE_ONCE ( cqe - > big_cqe [ 1 ] , extra2 ) ;
2022-06-13 07:27:03 -06:00
}
2022-06-17 09:48:04 +01:00
return true ;
2022-06-13 07:27:03 -06:00
}
2022-05-24 21:19:47 -06:00
static inline void req_set_fail ( struct io_kiocb * req )
{
req - > flags | = REQ_F_FAIL ;
if ( req - > flags & REQ_F_CQE_SKIP ) {
req - > flags & = ~ REQ_F_CQE_SKIP ;
req - > flags | = REQ_F_SKIP_LINK_CQES ;
}
}
2022-05-24 12:45:38 -06:00
static inline void io_req_set_res ( struct io_kiocb * req , s32 res , u32 cflags )
{
req - > cqe . res = res ;
req - > cqe . flags = cflags ;
}
2022-05-25 05:59:19 -06:00
static inline bool req_has_async_data ( struct io_kiocb * req )
{
return req - > flags & REQ_F_ASYNC_DATA ;
}
2022-05-24 21:19:47 -06:00
static inline void io_put_file ( struct file * file )
{
if ( file )
fput ( file ) ;
}
2022-05-24 21:54:43 -06:00
static inline void io_ring_submit_unlock ( struct io_ring_ctx * ctx ,
unsigned issue_flags )
{
lockdep_assert_held ( & ctx - > uring_lock ) ;
if ( issue_flags & IO_URING_F_UNLOCKED )
mutex_unlock ( & ctx - > uring_lock ) ;
}
static inline void io_ring_submit_lock ( struct io_ring_ctx * ctx ,
unsigned issue_flags )
{
/*
* " Normal " inline submissions always hold the uring_lock , since we
* grab it from the system call . Same is true for the SQPOLL offload .
* The only exception is when we ' ve detached the request and issue it
* from an async worker thread , grab the lock for that case .
*/
if ( issue_flags & IO_URING_F_UNLOCKED )
mutex_lock ( & ctx - > uring_lock ) ;
lockdep_assert_held ( & ctx - > uring_lock ) ;
}
2022-05-25 06:25:13 -06:00
static inline void io_commit_cqring ( struct io_ring_ctx * ctx )
{
/* order cqe stores with ring update */
smp_store_release ( & ctx - > rings - > cq . tail , ctx - > cached_cq_tail ) ;
}
2022-10-06 02:06:10 +01:00
/* requires smb_mb() prior, see wq_has_sleeper() */
static inline void __io_cqring_wake ( struct io_ring_ctx * ctx )
2022-06-13 07:27:03 -06:00
{
/*
2022-11-20 10:18:45 -07:00
* Trigger waitqueue handler on all waiters on our waitqueue . This
* won ' t necessarily wake up all the tasks , io_should_wake ( ) will make
* that decision .
*
* Pass in EPOLLIN | EPOLL_URING_WAKE as the poll wakeup key . The latter
* set in the mask so that if we recurse back into our own poll
* waitqueue handlers , we know we have a dependency between eventfd or
* epoll and should terminate multishot poll at that point .
2022-06-13 07:27:03 -06:00
*/
2022-10-06 02:06:10 +01:00
if ( waitqueue_active ( & ctx - > cq_wait ) )
2022-11-20 10:18:45 -07:00
__wake_up ( & ctx - > cq_wait , TASK_NORMAL , 0 ,
poll_to_key ( EPOLL_URING_WAKE | EPOLLIN ) ) ;
2022-06-13 07:27:03 -06:00
}
2022-10-06 02:06:10 +01:00
static inline void io_cqring_wake ( struct io_ring_ctx * ctx )
{
smp_mb ( ) ;
__io_cqring_wake ( ctx ) ;
}
2022-05-25 09:13:39 -06:00
static inline bool io_sqring_full ( struct io_ring_ctx * ctx )
{
struct io_rings * r = ctx - > rings ;
return READ_ONCE ( r - > sq . tail ) - ctx - > cached_sq_head = = ctx - > sq_entries ;
}
static inline unsigned int io_sqring_entries ( struct io_ring_ctx * ctx )
{
struct io_rings * rings = ctx - > rings ;
/* make sure SQ entry isn't read before tail */
return smp_load_acquire ( & rings - > sq . tail ) - ctx - > cached_sq_head ;
}
2022-08-30 05:50:10 -07:00
static inline int io_run_task_work ( void )
2022-05-25 09:13:39 -06:00
{
2022-09-29 15:29:13 -06:00
if ( task_work_pending ( current ) ) {
if ( test_thread_flag ( TIF_NOTIFY_SIGNAL ) )
clear_notify_signal ( ) ;
2022-05-25 09:13:39 -06:00
__set_current_state ( TASK_RUNNING ) ;
2022-09-29 15:29:13 -06:00
task_work_run ( ) ;
2022-08-30 05:50:10 -07:00
return 1 ;
2022-05-25 09:13:39 -06:00
}
2022-08-30 05:50:10 -07:00
return 0 ;
}
2022-09-03 09:52:01 -06:00
static inline bool io_task_work_pending ( struct io_ring_ctx * ctx )
{
return test_thread_flag ( TIF_NOTIFY_SIGNAL ) | |
! wq_list_empty ( & ctx - > work_llist ) ;
}
2022-08-30 05:50:10 -07:00
static inline int io_run_task_work_ctx ( struct io_ring_ctx * ctx )
{
int ret = 0 ;
int ret2 ;
if ( ctx - > flags & IORING_SETUP_DEFER_TASKRUN )
ret = io_run_local_work ( ctx ) ;
/* want to run this after in case more is added */
ret2 = io_run_task_work ( ) ;
/* Try propagate error in favour of if tasks were run,
* but still make sure to run them if requested
*/
if ( ret > = 0 )
ret + = ret2 ;
return ret ;
2022-05-25 09:13:39 -06:00
}
2022-10-06 21:42:33 +01:00
static inline int io_run_local_work_locked ( struct io_ring_ctx * ctx )
{
2022-10-27 07:44:29 -07:00
bool locked ;
int ret ;
2022-10-06 21:42:33 +01:00
if ( llist_empty ( & ctx - > work_llist ) )
return 0 ;
2022-10-27 07:44:29 -07:00
locked = true ;
ret = __io_run_local_work ( ctx , & locked ) ;
/* shouldn't happen! */
if ( WARN_ON_ONCE ( ! locked ) )
mutex_lock ( & ctx - > uring_lock ) ;
return ret ;
2022-10-06 21:42:33 +01:00
}
2022-06-15 17:33:51 +01:00
static inline void io_tw_lock ( struct io_ring_ctx * ctx , bool * locked )
{
if ( ! * locked ) {
mutex_lock ( & ctx - > uring_lock ) ;
* locked = true ;
}
}
2022-06-20 01:26:00 +01:00
/*
* Don ' t complete immediately but use deferred completion infrastructure .
* Protected by - > uring_lock and can only be used either with
* IO_URING_F_COMPLETE_DEFER or inside a tw handler holding the mutex .
*/
static inline void io_req_complete_defer ( struct io_kiocb * req )
__must_hold ( & req - > ctx - > uring_lock )
2022-06-15 17:33:51 +01:00
{
struct io_submit_state * state = & req - > ctx - > submit_state ;
2022-06-20 01:26:00 +01:00
lockdep_assert_held ( & req - > ctx - > uring_lock ) ;
2022-06-15 17:33:51 +01:00
wq_list_add_tail ( & req - > comp_list , & state - > compl_reqs ) ;
}
2022-06-20 01:25:57 +01:00
static inline void io_commit_cqring_flush ( struct io_ring_ctx * ctx )
{
if ( unlikely ( ctx - > off_timeout_used | | ctx - > drain_active | | ctx - > has_evfd ) )
__io_commit_cqring_flush ( ctx ) ;
}
2022-07-12 21:52:37 +01:00
/* must to be called somewhat shortly after putting a request */
static inline void io_put_task ( struct task_struct * task , int nr )
{
if ( likely ( task = = current ) )
task - > io_uring - > cached_refs + = nr ;
else
__io_put_task ( task , nr ) ;
}
2022-07-12 21:52:47 +01:00
static inline void io_get_task_refs ( int nr )
{
struct io_uring_task * tctx = current - > io_uring ;
tctx - > cached_refs - = nr ;
if ( unlikely ( tctx - > cached_refs < 0 ) )
io_task_refs_refill ( tctx ) ;
}
2022-07-27 10:30:40 +01:00
static inline bool io_req_cache_empty ( struct io_ring_ctx * ctx )
{
return ! ctx - > submit_state . free_list . next ;
}
static inline bool io_alloc_req_refill ( struct io_ring_ctx * ctx )
{
if ( unlikely ( io_req_cache_empty ( ctx ) ) )
return __io_alloc_req_refill ( ctx ) ;
return true ;
}
static inline struct io_kiocb * io_alloc_req ( struct io_ring_ctx * ctx )
{
struct io_wq_work_node * node ;
node = wq_stack_extract ( & ctx - > submit_state . free_list ) ;
return container_of ( node , struct io_kiocb , comp_list ) ;
}
2022-09-08 16:56:52 +01:00
static inline bool io_allowed_run_tw ( struct io_ring_ctx * ctx )
{
2022-09-08 16:56:53 +01:00
return likely ( ! ( ctx - > flags & IORING_SETUP_DEFER_TASKRUN ) | |
ctx - > submitter_task = = current ) ;
2022-09-08 16:56:52 +01:00
}
2022-05-24 12:45:38 -06:00
# endif