2022-05-25 14:59:19 +03:00
// SPDX-License-Identifier: GPL-2.0
# include <linux/kernel.h>
# include <linux/errno.h>
# include <linux/file.h>
2023-12-01 03:57:35 +03:00
# include <linux/io_uring/cmd.h>
2024-04-10 00:05:53 +03:00
# include <linux/io_uring/net.h>
2022-07-15 22:16:22 +03:00
# include <linux/security.h>
2022-09-30 09:27:39 +03:00
# include <linux/nospec.h>
2024-02-13 02:42:36 +03:00
# include <net/sock.h>
2022-05-25 14:59:19 +03:00
# include <uapi/linux/io_uring.h>
io_uring/cmd: fix breakage in SOCKET_URING_OP_SIOC* implementation
In 8e9fad0e70b7 "io_uring: Add io_uring command support for sockets"
you've got an include of asm-generic/ioctls.h done in io_uring/uring_cmd.c.
That had been done for the sake of this chunk -
+ ret = prot->ioctl(sk, SIOCINQ, &arg);
+ if (ret)
+ return ret;
+ return arg;
+ case SOCKET_URING_OP_SIOCOUTQ:
+ ret = prot->ioctl(sk, SIOCOUTQ, &arg);
SIOC{IN,OUT}Q are defined to symbols (FIONREAD and TIOCOUTQ) that come from
ioctls.h, all right, but the values vary by the architecture.
FIONREAD is
0x467F on mips
0x4004667F on alpha, powerpc and sparc
0x8004667F on sh and xtensa
0x541B everywhere else
TIOCOUTQ is
0x7472 on mips
0x40047473 on alpha, powerpc and sparc
0x80047473 on sh and xtensa
0x5411 everywhere else
->ioctl() expects the same values it would've gotten from userland; all
places where we compare with SIOC{IN,OUT}Q are using asm/ioctls.h, so
they pick the correct values. io_uring_cmd_sock(), OTOH, ends up
passing the default ones.
Fixes: 8e9fad0e70b7 ("io_uring: Add io_uring command support for sockets")
Cc: <stable@vger.kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lore.kernel.org/r/20231214213408.GT1674809@ZenIV
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2023-12-15 00:34:08 +03:00
# include <asm/ioctls.h>
2022-05-25 14:59:19 +03:00
# include "io_uring.h"
2024-03-21 00:19:44 +03:00
# include "alloc_cache.h"
2022-09-30 09:27:38 +03:00
# include "rsrc.h"
2022-05-25 14:59:19 +03:00
# include "uring_cmd.h"
2024-03-19 05:41:58 +03:00
static struct uring_cache * io_uring_async_get ( struct io_kiocb * req )
{
struct io_ring_ctx * ctx = req - > ctx ;
struct uring_cache * cache ;
2024-03-21 00:19:44 +03:00
cache = io_alloc_cache_get ( & ctx - > uring_cache ) ;
if ( cache ) {
2024-03-19 05:41:58 +03:00
req - > flags | = REQ_F_ASYNC_DATA ;
req - > async_data = cache ;
return cache ;
}
if ( ! io_alloc_async_data ( req ) )
return req - > async_data ;
return NULL ;
}
static void io_req_uring_cleanup ( struct io_kiocb * req , unsigned int issue_flags )
{
struct io_uring_cmd * ioucmd = io_kiocb_to_cmd ( req , struct io_uring_cmd ) ;
struct uring_cache * cache = req - > async_data ;
if ( issue_flags & IO_URING_F_UNLOCKED )
return ;
2024-03-21 00:19:44 +03:00
if ( io_alloc_cache_put ( & req - > ctx - > uring_cache , cache ) ) {
2024-03-19 05:41:58 +03:00
ioucmd - > sqe = NULL ;
req - > async_data = NULL ;
req - > flags & = ~ REQ_F_ASYNC_DATA ;
}
}
2024-03-19 01:00:23 +03:00
bool io_uring_try_cancel_uring_cmd ( struct io_ring_ctx * ctx ,
struct task_struct * task , bool cancel_all )
{
struct hlist_node * tmp ;
struct io_kiocb * req ;
bool ret = false ;
lockdep_assert_held ( & ctx - > uring_lock ) ;
hlist_for_each_entry_safe ( req , tmp , & ctx - > cancelable_uring_cmd ,
hash_node ) {
struct io_uring_cmd * cmd = io_kiocb_to_cmd ( req ,
struct io_uring_cmd ) ;
struct file * file = req - > file ;
if ( ! cancel_all & & req - > task ! = task )
continue ;
if ( cmd - > flags & IORING_URING_CMD_CANCELABLE ) {
/* ->sqe isn't available if no async data */
if ( ! req_has_async_data ( req ) )
cmd - > sqe = NULL ;
2024-03-19 01:00:25 +03:00
file - > f_op - > uring_cmd ( cmd , IO_URING_F_CANCEL |
IO_URING_F_COMPLETE_DEFER ) ;
2024-03-19 01:00:23 +03:00
ret = true ;
}
}
io_submit_flush_completions ( ctx ) ;
return ret ;
}
2023-09-28 15:43:25 +03:00
static void io_uring_cmd_del_cancelable ( struct io_uring_cmd * cmd ,
unsigned int issue_flags )
{
struct io_kiocb * req = cmd_to_io_kiocb ( cmd ) ;
struct io_ring_ctx * ctx = req - > ctx ;
if ( ! ( cmd - > flags & IORING_URING_CMD_CANCELABLE ) )
return ;
cmd - > flags & = ~ IORING_URING_CMD_CANCELABLE ;
io_ring_submit_lock ( ctx , issue_flags ) ;
hlist_del ( & req - > hash_node ) ;
io_ring_submit_unlock ( ctx , issue_flags ) ;
}
/*
* Mark this command as concelable , then io_uring_try_cancel_uring_cmd ( )
* will try to cancel this issued command by sending - > uring_cmd ( ) with
* issue_flags of IO_URING_F_CANCEL .
*
* The command is guaranteed to not be done when calling - > uring_cmd ( )
* with IO_URING_F_CANCEL , but it is driver ' s responsibility to deal
* with race between io_uring canceling and normal completion .
*/
void io_uring_cmd_mark_cancelable ( struct io_uring_cmd * cmd ,
unsigned int issue_flags )
{
struct io_kiocb * req = cmd_to_io_kiocb ( cmd ) ;
struct io_ring_ctx * ctx = req - > ctx ;
if ( ! ( cmd - > flags & IORING_URING_CMD_CANCELABLE ) ) {
cmd - > flags | = IORING_URING_CMD_CANCELABLE ;
io_ring_submit_lock ( ctx , issue_flags ) ;
hlist_add_head ( & req - > hash_node , & ctx - > cancelable_uring_cmd ) ;
io_ring_submit_unlock ( ctx , issue_flags ) ;
}
}
EXPORT_SYMBOL_GPL ( io_uring_cmd_mark_cancelable ) ;
2023-03-27 18:38:15 +03:00
static void io_uring_cmd_work ( struct io_kiocb * req , struct io_tw_state * ts )
2022-05-25 14:59:19 +03:00
{
2022-08-11 10:11:15 +03:00
struct io_uring_cmd * ioucmd = io_kiocb_to_cmd ( req , struct io_uring_cmd ) ;
2024-03-19 01:00:25 +03:00
2024-03-19 01:00:30 +03:00
/* task_work executor checks the deffered list completion */
ioucmd - > task_work_cb ( ioucmd , IO_URING_F_COMPLETE_DEFER ) ;
2022-05-25 14:59:19 +03:00
}
2023-05-15 15:54:42 +03:00
void __io_uring_cmd_do_in_task ( struct io_uring_cmd * ioucmd ,
void ( * task_work_cb ) ( struct io_uring_cmd * , unsigned ) ,
unsigned flags )
2022-05-25 14:59:19 +03:00
{
struct io_kiocb * req = cmd_to_io_kiocb ( ioucmd ) ;
ioucmd - > task_work_cb = task_work_cb ;
req - > io_task_work . func = io_uring_cmd_work ;
2023-05-15 15:54:42 +03:00
__io_req_task_work_add ( req , flags ) ;
}
EXPORT_SYMBOL_GPL ( __io_uring_cmd_do_in_task ) ;
2022-05-25 14:59:19 +03:00
static inline void io_req_set_cqe32_extra ( struct io_kiocb * req ,
u64 extra1 , u64 extra2 )
{
2023-08-25 01:53:25 +03:00
req - > big_cqe . extra1 = extra1 ;
req - > big_cqe . extra2 = extra2 ;
2022-05-25 14:59:19 +03:00
}
/*
* Called by consumers of io_uring_cmd , if they originally returned
* - EIOCBQUEUED upon receiving the command .
*/
2023-03-21 05:01:25 +03:00
void io_uring_cmd_done ( struct io_uring_cmd * ioucmd , ssize_t ret , ssize_t res2 ,
unsigned issue_flags )
2022-05-25 14:59:19 +03:00
{
struct io_kiocb * req = cmd_to_io_kiocb ( ioucmd ) ;
2023-09-28 15:43:25 +03:00
io_uring_cmd_del_cancelable ( ioucmd , issue_flags ) ;
2022-05-25 14:59:19 +03:00
if ( ret < 0 )
req_set_fail ( req ) ;
2022-08-03 15:07:57 +03:00
io_req_set_res ( req , ret , 0 ) ;
2022-05-25 14:59:19 +03:00
if ( req - > ctx - > flags & IORING_SETUP_CQE32 )
io_req_set_cqe32_extra ( req , res2 , 0 ) ;
2024-03-19 05:41:58 +03:00
io_req_uring_cleanup ( req , issue_flags ) ;
2023-04-12 21:07:36 +03:00
if ( req - > ctx - > flags & IORING_SETUP_IOPOLL ) {
2022-08-23 19:14:41 +03:00
/* order with io_iopoll_req_issued() checking ->iopoll_complete */
smp_store_release ( & req - > iopoll_completed , 1 ) ;
2024-03-19 01:00:25 +03:00
} else if ( issue_flags & IO_URING_F_COMPLETE_DEFER ) {
if ( WARN_ON_ONCE ( issue_flags & IO_URING_F_UNLOCKED ) )
return ;
2024-03-19 01:00:24 +03:00
io_req_complete_defer ( req ) ;
2023-04-12 21:07:36 +03:00
} else {
2024-03-19 01:00:24 +03:00
req - > io_task_work . func = io_req_task_complete ;
io_req_task_work_add ( req ) ;
2023-04-12 21:07:36 +03:00
}
2022-05-25 14:59:19 +03:00
}
EXPORT_SYMBOL_GPL ( io_uring_cmd_done ) ;
2024-03-19 05:41:58 +03:00
static int io_uring_cmd_prep_setup ( struct io_kiocb * req ,
const struct io_uring_sqe * sqe )
2022-05-25 14:59:19 +03:00
{
2022-08-11 10:11:15 +03:00
struct io_uring_cmd * ioucmd = io_kiocb_to_cmd ( req , struct io_uring_cmd ) ;
2024-03-19 05:41:58 +03:00
struct uring_cache * cache ;
2022-05-25 14:59:19 +03:00
2024-03-19 05:41:58 +03:00
cache = io_uring_async_get ( req ) ;
io_uring/uring_cmd: defer SQE copying until it's needed
The previous commit turned on async data for uring_cmd, and did the
basic conversion of setting everything up on the prep side. However, for
a lot of use cases, -EIOCBQUEUED will get returned on issue, as the
operation got successfully queued. For that case, a persistent SQE isn't
needed, as it's just used for issue.
Unless execution goes async immediately, defer copying the double SQE
until it's necessary.
This greatly reduces the overhead of such commands, as evidenced by
a perf diff from before and after this change:
10.60% -8.58% [kernel.vmlinux] [k] io_uring_cmd_prep
where the prep side drops from 10.60% to ~2%, which is more expected.
Performance also rises from ~113M IOPS to ~122M IOPS, bringing us back
to where it was before the async command prep.
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2024-03-21 00:23:47 +03:00
if ( unlikely ( ! cache ) )
return - ENOMEM ;
if ( ! ( req - > flags & REQ_F_FORCE_ASYNC ) ) {
/* defer memcpy until we need it */
ioucmd - > sqe = sqe ;
2024-03-19 05:41:58 +03:00
return 0 ;
}
io_uring/uring_cmd: defer SQE copying until it's needed
The previous commit turned on async data for uring_cmd, and did the
basic conversion of setting everything up on the prep side. However, for
a lot of use cases, -EIOCBQUEUED will get returned on issue, as the
operation got successfully queued. For that case, a persistent SQE isn't
needed, as it's just used for issue.
Unless execution goes async immediately, defer copying the double SQE
until it's necessary.
This greatly reduces the overhead of such commands, as evidenced by
a perf diff from before and after this change:
10.60% -8.58% [kernel.vmlinux] [k] io_uring_cmd_prep
where the prep side drops from 10.60% to ~2%, which is more expected.
Performance also rises from ~113M IOPS to ~122M IOPS, bringing us back
to where it was before the async command prep.
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2024-03-21 00:23:47 +03:00
memcpy ( req - > async_data , sqe , uring_sqe_size ( req - > ctx ) ) ;
ioucmd - > sqe = req - > async_data ;
return 0 ;
2022-05-25 14:59:19 +03:00
}
int io_uring_cmd_prep ( struct io_kiocb * req , const struct io_uring_sqe * sqe )
{
2022-08-11 10:11:15 +03:00
struct io_uring_cmd * ioucmd = io_kiocb_to_cmd ( req , struct io_uring_cmd ) ;
2022-05-25 14:59:19 +03:00
2022-09-30 09:27:39 +03:00
if ( sqe - > __pad1 )
2022-05-25 14:59:19 +03:00
return - EINVAL ;
2022-09-30 09:27:39 +03:00
ioucmd - > flags = READ_ONCE ( sqe - > uring_cmd_flags ) ;
2023-09-28 15:43:24 +03:00
if ( ioucmd - > flags & ~ IORING_URING_CMD_MASK )
2022-09-30 09:27:39 +03:00
return - EINVAL ;
if ( ioucmd - > flags & IORING_URING_CMD_FIXED ) {
struct io_ring_ctx * ctx = req - > ctx ;
u16 index ;
req - > buf_index = READ_ONCE ( sqe - > buf_index ) ;
if ( unlikely ( req - > buf_index > = ctx - > nr_user_bufs ) )
return - EFAULT ;
index = array_index_nospec ( req - > buf_index , ctx - > nr_user_bufs ) ;
req - > imu = ctx - > user_bufs [ index ] ;
io_req_set_rsrc_node ( req , ctx , 0 ) ;
}
2022-05-25 14:59:19 +03:00
ioucmd - > cmd_op = READ_ONCE ( sqe - > cmd_op ) ;
2024-03-19 05:41:58 +03:00
return io_uring_cmd_prep_setup ( req , sqe ) ;
2022-05-25 14:59:19 +03:00
}
int io_uring_cmd ( struct io_kiocb * req , unsigned int issue_flags )
{
2022-08-11 10:11:15 +03:00
struct io_uring_cmd * ioucmd = io_kiocb_to_cmd ( req , struct io_uring_cmd ) ;
2022-05-25 14:59:19 +03:00
struct io_ring_ctx * ctx = req - > ctx ;
struct file * file = req - > file ;
int ret ;
2023-03-08 19:26:13 +03:00
if ( ! file - > f_op - > uring_cmd )
2022-05-25 14:59:19 +03:00
return - EOPNOTSUPP ;
2022-07-15 22:16:22 +03:00
ret = security_uring_cmd ( ioucmd ) ;
if ( ret )
return ret ;
2022-05-25 14:59:19 +03:00
if ( ctx - > flags & IORING_SETUP_SQE128 )
issue_flags | = IO_URING_F_SQE128 ;
if ( ctx - > flags & IORING_SETUP_CQE32 )
issue_flags | = IO_URING_F_CQE32 ;
2023-10-16 16:47:43 +03:00
if ( ctx - > compat )
issue_flags | = IO_URING_F_COMPAT ;
2022-08-23 19:14:41 +03:00
if ( ctx - > flags & IORING_SETUP_IOPOLL ) {
2023-03-08 19:26:13 +03:00
if ( ! file - > f_op - > uring_cmd_iopoll )
return - EOPNOTSUPP ;
2022-05-25 14:59:19 +03:00
issue_flags | = IO_URING_F_IOPOLL ;
2022-08-23 19:14:41 +03:00
req - > iopoll_completed = 0 ;
}
2022-05-25 14:59:19 +03:00
ret = file - > f_op - > uring_cmd ( ioucmd , issue_flags ) ;
io_uring/uring_cmd: defer SQE copying until it's needed
The previous commit turned on async data for uring_cmd, and did the
basic conversion of setting everything up on the prep side. However, for
a lot of use cases, -EIOCBQUEUED will get returned on issue, as the
operation got successfully queued. For that case, a persistent SQE isn't
needed, as it's just used for issue.
Unless execution goes async immediately, defer copying the double SQE
until it's necessary.
This greatly reduces the overhead of such commands, as evidenced by
a perf diff from before and after this change:
10.60% -8.58% [kernel.vmlinux] [k] io_uring_cmd_prep
where the prep side drops from 10.60% to ~2%, which is more expected.
Performance also rises from ~113M IOPS to ~122M IOPS, bringing us back
to where it was before the async command prep.
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2024-03-21 00:23:47 +03:00
if ( ret = = - EAGAIN ) {
struct uring_cache * cache = req - > async_data ;
if ( ioucmd - > sqe ! = ( void * ) cache )
memcpy ( cache , ioucmd - > sqe , uring_sqe_size ( req - > ctx ) ) ;
return - EAGAIN ;
} else if ( ret = = - EIOCBQUEUED ) {
return - EIOCBQUEUED ;
}
2022-05-25 14:59:19 +03:00
2024-03-19 05:41:58 +03:00
if ( ret < 0 )
req_set_fail ( req ) ;
io_req_uring_cleanup ( req , issue_flags ) ;
io_req_set_res ( req , ret , 0 ) ;
return ret ;
2022-05-25 14:59:19 +03:00
}
2022-09-30 09:27:38 +03:00
int io_uring_cmd_import_fixed ( u64 ubuf , unsigned long len , int rw ,
struct iov_iter * iter , void * ioucmd )
{
struct io_kiocb * req = cmd_to_io_kiocb ( ioucmd ) ;
return io_import_fixed ( rw , iter , req - > imu , ubuf , len ) ;
}
EXPORT_SYMBOL_GPL ( io_uring_cmd_import_fixed ) ;
2023-06-27 16:44:24 +03:00
2023-10-16 16:47:47 +03:00
static inline int io_uring_cmd_getsockopt ( struct socket * sock ,
struct io_uring_cmd * cmd ,
unsigned int issue_flags )
{
bool compat = ! ! ( issue_flags & IO_URING_F_COMPAT ) ;
int optlen , optname , level , err ;
void __user * optval ;
level = READ_ONCE ( cmd - > sqe - > level ) ;
if ( level ! = SOL_SOCKET )
return - EOPNOTSUPP ;
optval = u64_to_user_ptr ( READ_ONCE ( cmd - > sqe - > optval ) ) ;
optname = READ_ONCE ( cmd - > sqe - > optname ) ;
optlen = READ_ONCE ( cmd - > sqe - > optlen ) ;
err = do_sock_getsockopt ( sock , compat , level , optname ,
USER_SOCKPTR ( optval ) ,
KERNEL_SOCKPTR ( & optlen ) ) ;
if ( err )
return err ;
/* On success, return optlen */
return optlen ;
}
2023-10-16 16:47:48 +03:00
static inline int io_uring_cmd_setsockopt ( struct socket * sock ,
struct io_uring_cmd * cmd ,
unsigned int issue_flags )
{
bool compat = ! ! ( issue_flags & IO_URING_F_COMPAT ) ;
int optname , optlen , level ;
void __user * optval ;
sockptr_t optval_s ;
optval = u64_to_user_ptr ( READ_ONCE ( cmd - > sqe - > optval ) ) ;
optname = READ_ONCE ( cmd - > sqe - > optname ) ;
optlen = READ_ONCE ( cmd - > sqe - > optlen ) ;
level = READ_ONCE ( cmd - > sqe - > level ) ;
optval_s = USER_SOCKPTR ( optval ) ;
return do_sock_setsockopt ( sock , compat , level , optname , optval_s ,
optlen ) ;
}
2023-10-16 16:47:46 +03:00
# if defined(CONFIG_NET)
2023-06-27 16:44:24 +03:00
int io_uring_cmd_sock ( struct io_uring_cmd * cmd , unsigned int issue_flags )
{
struct socket * sock = cmd - > file - > private_data ;
struct sock * sk = sock - > sk ;
struct proto * prot = READ_ONCE ( sk - > sk_prot ) ;
int ret , arg = 0 ;
if ( ! prot | | ! prot - > ioctl )
return - EOPNOTSUPP ;
switch ( cmd - > sqe - > cmd_op ) {
case SOCKET_URING_OP_SIOCINQ :
ret = prot - > ioctl ( sk , SIOCINQ , & arg ) ;
if ( ret )
return ret ;
return arg ;
case SOCKET_URING_OP_SIOCOUTQ :
ret = prot - > ioctl ( sk , SIOCOUTQ , & arg ) ;
if ( ret )
return ret ;
return arg ;
2023-10-16 16:47:47 +03:00
case SOCKET_URING_OP_GETSOCKOPT :
return io_uring_cmd_getsockopt ( sock , cmd , issue_flags ) ;
2023-10-16 16:47:48 +03:00
case SOCKET_URING_OP_SETSOCKOPT :
return io_uring_cmd_setsockopt ( sock , cmd , issue_flags ) ;
2023-06-27 16:44:24 +03:00
default :
return - EOPNOTSUPP ;
}
}
EXPORT_SYMBOL_GPL ( io_uring_cmd_sock ) ;
2023-10-16 16:47:46 +03:00
# endif