2022-05-25 19:40:19 +03:00
// SPDX-License-Identifier: GPL-2.0
# include <linux/kernel.h>
# include <linux/errno.h>
# include <linux/fs.h>
# include <linux/file.h>
# include <linux/proc_fs.h>
# include <linux/seq_file.h>
# include <linux/io_uring.h>
# include <uapi/linux/io_uring.h>
# include "io_uring.h"
# include "sqpoll.h"
# include "fdinfo.h"
2022-06-16 12:22:02 +03:00
# include "cancel.h"
2022-06-19 04:44:33 +03:00
# include "rsrc.h"
2022-05-25 19:40:19 +03:00
# ifdef CONFIG_PROC_FS
static __cold int io_uring_show_cred ( struct seq_file * m , unsigned int id ,
const struct cred * cred )
{
struct user_namespace * uns = seq_user_ns ( m ) ;
struct group_info * gi ;
kernel_cap_t cap ;
int g ;
seq_printf ( m , " %5d \n " , id ) ;
seq_put_decimal_ull ( m , " \t Uid: \t " , from_kuid_munged ( uns , cred - > uid ) ) ;
seq_put_decimal_ull ( m , " \t \t " , from_kuid_munged ( uns , cred - > euid ) ) ;
seq_put_decimal_ull ( m , " \t \t " , from_kuid_munged ( uns , cred - > suid ) ) ;
seq_put_decimal_ull ( m , " \t \t " , from_kuid_munged ( uns , cred - > fsuid ) ) ;
seq_put_decimal_ull ( m , " \n \t Gid: \t " , from_kgid_munged ( uns , cred - > gid ) ) ;
seq_put_decimal_ull ( m , " \t \t " , from_kgid_munged ( uns , cred - > egid ) ) ;
seq_put_decimal_ull ( m , " \t \t " , from_kgid_munged ( uns , cred - > sgid ) ) ;
seq_put_decimal_ull ( m , " \t \t " , from_kgid_munged ( uns , cred - > fsgid ) ) ;
seq_puts ( m , " \n \t Groups: \t " ) ;
gi = cred - > group_info ;
for ( g = 0 ; g < gi - > ngroups ; g + + ) {
seq_put_decimal_ull ( m , g ? " " : " " ,
from_kgid_munged ( uns , gi - > gid [ g ] ) ) ;
}
seq_puts ( m , " \n \t CapEff: \t " ) ;
cap = cred - > cap_effective ;
2023-02-28 22:39:09 +03:00
seq_put_hex_ll ( m , NULL , cap . val , 16 ) ;
2022-05-25 19:40:19 +03:00
seq_putc ( m , ' \n ' ) ;
return 0 ;
}
2023-07-10 22:13:54 +03:00
/*
* Caller holds a reference to the file already , we don ' t need to do
* anything else to get an extra reference .
*/
__cold void io_uring_show_fdinfo ( struct seq_file * m , struct file * f )
2022-05-25 19:40:19 +03:00
{
2023-07-10 22:13:54 +03:00
struct io_ring_ctx * ctx = f - > private_data ;
2022-05-25 19:40:19 +03:00
struct io_overflow_cqe * ocqe ;
struct io_rings * r = ctx - > rings ;
unsigned int sq_mask = ctx - > sq_entries - 1 , cq_mask = ctx - > cq_entries - 1 ;
unsigned int sq_head = READ_ONCE ( r - > sq . head ) ;
unsigned int sq_tail = READ_ONCE ( r - > sq . tail ) ;
unsigned int cq_head = READ_ONCE ( r - > cq . head ) ;
unsigned int cq_tail = READ_ONCE ( r - > cq . tail ) ;
unsigned int cq_shift = 0 ;
io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128
If we have doubly sized SQEs, then we need to shift the sq index by 1
to account for using two entries for a single request. The CQE dumping
gets this right, but the SQE one does not.
Improve the SQE dumping in general, the information dumped is pretty
sparse and doesn't even cover the whole basic part of the SQE. Include
information on the extended part of the SQE, if doubly sized SQEs are
in use. A typical dump now looks like the following:
[...]
SQEs: 32
32: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2721, e0:0x0, e1:0xffffb8041000, e2:0x100000000000, e3:0x5500, e4:0x7, e5:0x0, e6:0x0, e7:0x0
33: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2722, e0:0x0, e1:0xffffb8043000, e2:0x100000000000, e3:0x5508, e4:0x7, e5:0x0, e6:0x0, e7:0x0
34: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2723, e0:0x0, e1:0xffffb8045000, e2:0x100000000000, e3:0x5510, e4:0x7, e5:0x0, e6:0x0, e7:0x0
[...]
Fixes: ebdeb7c01d02 ("io_uring: add support for 128-byte SQEs")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-09-11 15:40:37 +03:00
unsigned int sq_shift = 0 ;
2022-05-25 19:40:19 +03:00
unsigned int sq_entries , cq_entries ;
2023-10-21 21:30:29 +03:00
int sq_pid = - 1 , sq_cpu = - 1 ;
2022-05-25 19:40:19 +03:00
bool has_lock ;
unsigned int i ;
2022-09-11 15:36:09 +03:00
if ( ctx - > flags & IORING_SETUP_CQE32 )
2022-05-25 19:40:19 +03:00
cq_shift = 1 ;
io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128
If we have doubly sized SQEs, then we need to shift the sq index by 1
to account for using two entries for a single request. The CQE dumping
gets this right, but the SQE one does not.
Improve the SQE dumping in general, the information dumped is pretty
sparse and doesn't even cover the whole basic part of the SQE. Include
information on the extended part of the SQE, if doubly sized SQEs are
in use. A typical dump now looks like the following:
[...]
SQEs: 32
32: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2721, e0:0x0, e1:0xffffb8041000, e2:0x100000000000, e3:0x5500, e4:0x7, e5:0x0, e6:0x0, e7:0x0
33: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2722, e0:0x0, e1:0xffffb8043000, e2:0x100000000000, e3:0x5508, e4:0x7, e5:0x0, e6:0x0, e7:0x0
34: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2723, e0:0x0, e1:0xffffb8045000, e2:0x100000000000, e3:0x5510, e4:0x7, e5:0x0, e6:0x0, e7:0x0
[...]
Fixes: ebdeb7c01d02 ("io_uring: add support for 128-byte SQEs")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-09-11 15:40:37 +03:00
if ( ctx - > flags & IORING_SETUP_SQE128 )
sq_shift = 1 ;
2022-05-25 19:40:19 +03:00
/*
* we may get imprecise sqe and cqe info if uring is actively running
* since we get cached_sq_head and cached_cq_tail without uring_lock
* and sq_tail and cq_head are changed by userspace . But it ' s ok since
* we usually use these info when it is stuck .
*/
seq_printf ( m , " SqMask: \t 0x%x \n " , sq_mask ) ;
seq_printf ( m , " SqHead: \t %u \n " , sq_head ) ;
seq_printf ( m , " SqTail: \t %u \n " , sq_tail ) ;
seq_printf ( m , " CachedSqHead: \t %u \n " , ctx - > cached_sq_head ) ;
seq_printf ( m , " CqMask: \t 0x%x \n " , cq_mask ) ;
seq_printf ( m , " CqHead: \t %u \n " , cq_head ) ;
seq_printf ( m , " CqTail: \t %u \n " , cq_tail ) ;
seq_printf ( m , " CachedCqTail: \t %u \n " , ctx - > cached_cq_tail ) ;
io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128
If we have doubly sized SQEs, then we need to shift the sq index by 1
to account for using two entries for a single request. The CQE dumping
gets this right, but the SQE one does not.
Improve the SQE dumping in general, the information dumped is pretty
sparse and doesn't even cover the whole basic part of the SQE. Include
information on the extended part of the SQE, if doubly sized SQEs are
in use. A typical dump now looks like the following:
[...]
SQEs: 32
32: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2721, e0:0x0, e1:0xffffb8041000, e2:0x100000000000, e3:0x5500, e4:0x7, e5:0x0, e6:0x0, e7:0x0
33: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2722, e0:0x0, e1:0xffffb8043000, e2:0x100000000000, e3:0x5508, e4:0x7, e5:0x0, e6:0x0, e7:0x0
34: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2723, e0:0x0, e1:0xffffb8045000, e2:0x100000000000, e3:0x5510, e4:0x7, e5:0x0, e6:0x0, e7:0x0
[...]
Fixes: ebdeb7c01d02 ("io_uring: add support for 128-byte SQEs")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-09-11 15:40:37 +03:00
seq_printf ( m , " SQEs: \t %u \n " , sq_tail - sq_head ) ;
2022-05-25 19:40:19 +03:00
sq_entries = min ( sq_tail - sq_head , ctx - > sq_entries ) ;
for ( i = 0 ; i < sq_entries ; i + + ) {
unsigned int entry = i + sq_head ;
struct io_uring_sqe * sqe ;
io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128
If we have doubly sized SQEs, then we need to shift the sq index by 1
to account for using two entries for a single request. The CQE dumping
gets this right, but the SQE one does not.
Improve the SQE dumping in general, the information dumped is pretty
sparse and doesn't even cover the whole basic part of the SQE. Include
information on the extended part of the SQE, if doubly sized SQEs are
in use. A typical dump now looks like the following:
[...]
SQEs: 32
32: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2721, e0:0x0, e1:0xffffb8041000, e2:0x100000000000, e3:0x5500, e4:0x7, e5:0x0, e6:0x0, e7:0x0
33: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2722, e0:0x0, e1:0xffffb8043000, e2:0x100000000000, e3:0x5508, e4:0x7, e5:0x0, e6:0x0, e7:0x0
34: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2723, e0:0x0, e1:0xffffb8045000, e2:0x100000000000, e3:0x5510, e4:0x7, e5:0x0, e6:0x0, e7:0x0
[...]
Fixes: ebdeb7c01d02 ("io_uring: add support for 128-byte SQEs")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-09-11 15:40:37 +03:00
unsigned int sq_idx ;
2022-05-25 19:40:19 +03:00
2023-09-01 22:59:19 +03:00
if ( ctx - > flags & IORING_SETUP_NO_SQARRAY )
break ;
io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128
If we have doubly sized SQEs, then we need to shift the sq index by 1
to account for using two entries for a single request. The CQE dumping
gets this right, but the SQE one does not.
Improve the SQE dumping in general, the information dumped is pretty
sparse and doesn't even cover the whole basic part of the SQE. Include
information on the extended part of the SQE, if doubly sized SQEs are
in use. A typical dump now looks like the following:
[...]
SQEs: 32
32: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2721, e0:0x0, e1:0xffffb8041000, e2:0x100000000000, e3:0x5500, e4:0x7, e5:0x0, e6:0x0, e7:0x0
33: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2722, e0:0x0, e1:0xffffb8043000, e2:0x100000000000, e3:0x5508, e4:0x7, e5:0x0, e6:0x0, e7:0x0
34: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2723, e0:0x0, e1:0xffffb8045000, e2:0x100000000000, e3:0x5510, e4:0x7, e5:0x0, e6:0x0, e7:0x0
[...]
Fixes: ebdeb7c01d02 ("io_uring: add support for 128-byte SQEs")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-09-11 15:40:37 +03:00
sq_idx = READ_ONCE ( ctx - > sq_array [ entry & sq_mask ] ) ;
2022-05-25 19:40:19 +03:00
if ( sq_idx > sq_mask )
continue ;
2022-10-11 03:59:57 +03:00
sqe = & ctx - > sq_sqes [ sq_idx < < sq_shift ] ;
io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128
If we have doubly sized SQEs, then we need to shift the sq index by 1
to account for using two entries for a single request. The CQE dumping
gets this right, but the SQE one does not.
Improve the SQE dumping in general, the information dumped is pretty
sparse and doesn't even cover the whole basic part of the SQE. Include
information on the extended part of the SQE, if doubly sized SQEs are
in use. A typical dump now looks like the following:
[...]
SQEs: 32
32: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2721, e0:0x0, e1:0xffffb8041000, e2:0x100000000000, e3:0x5500, e4:0x7, e5:0x0, e6:0x0, e7:0x0
33: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2722, e0:0x0, e1:0xffffb8043000, e2:0x100000000000, e3:0x5508, e4:0x7, e5:0x0, e6:0x0, e7:0x0
34: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2723, e0:0x0, e1:0xffffb8045000, e2:0x100000000000, e3:0x5510, e4:0x7, e5:0x0, e6:0x0, e7:0x0
[...]
Fixes: ebdeb7c01d02 ("io_uring: add support for 128-byte SQEs")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-09-11 15:40:37 +03:00
seq_printf ( m , " %5u: opcode:%s, fd:%d, flags:%x, off:%llu, "
" addr:0x%llx, rw_flags:0x%x, buf_index:%d "
" user_data:%llu " ,
sq_idx , io_uring_get_opcode ( sqe - > opcode ) , sqe - > fd ,
sqe - > flags , ( unsigned long long ) sqe - > off ,
( unsigned long long ) sqe - > addr , sqe - > rw_flags ,
sqe - > buf_index , sqe - > user_data ) ;
if ( sq_shift ) {
u64 * sqeb = ( void * ) ( sqe + 1 ) ;
int size = sizeof ( struct io_uring_sqe ) / sizeof ( u64 ) ;
int j ;
for ( j = 0 ; j < size ; j + + ) {
seq_printf ( m , " , e%d:0x%llx " , j ,
( unsigned long long ) * sqeb ) ;
sqeb + + ;
}
}
seq_printf ( m , " \n " ) ;
2022-05-25 19:40:19 +03:00
}
seq_printf ( m , " CQEs: \t %u \n " , cq_tail - cq_head ) ;
cq_entries = min ( cq_tail - cq_head , ctx - > cq_entries ) ;
for ( i = 0 ; i < cq_entries ; i + + ) {
unsigned int entry = i + cq_head ;
struct io_uring_cqe * cqe = & r - > cqes [ ( entry & cq_mask ) < < cq_shift ] ;
2022-09-11 15:36:09 +03:00
seq_printf ( m , " %5u: user_data:%llu, res:%d, flag:%x " ,
2022-05-25 19:40:19 +03:00
entry & cq_mask , cqe - > user_data , cqe - > res ,
cqe - > flags ) ;
2022-09-11 15:36:09 +03:00
if ( cq_shift )
seq_printf ( m , " , extra1:%llu, extra2:%llu \n " ,
cqe - > big_cqe [ 0 ] , cqe - > big_cqe [ 1 ] ) ;
seq_printf ( m , " \n " ) ;
2022-05-25 19:40:19 +03:00
}
/*
* Avoid ABBA deadlock between the seq lock and the io_uring mutex ,
* since fdinfo case grabs it in the opposite direction of normal use
* cases . If we fail to get the lock , we just don ' t iterate any
* structures that could be going away outside the io_uring mutex .
*/
has_lock = mutex_trylock ( & ctx - > uring_lock ) ;
if ( has_lock & & ( ctx - > flags & IORING_SETUP_SQPOLL ) ) {
2023-10-21 21:30:29 +03:00
struct io_sq_data * sq = ctx - > sq_data ;
io_uring/fdinfo: remove need for sqpoll lock for thread/pid retrieval
A previous commit added a trylock for getting the SQPOLL thread info via
fdinfo, but this introduced a regression where we often fail to get it if
the thread is busy. For that case, we end up not printing the current CPU
and PID info.
Rather than rely on this lock, just print the pid we already stored in
the io_sq_data struct, and ensure we update the current CPU every time
we've slept or potentially rescheduled. The latter won't potentially be
100% accurate, but that wasn't the case before either as the task can
get migrated at any time unless it has been pinned at creation time.
We retain keeping the io_sq_data dereference inside the ctx->uring_lock,
as it has always been, as destruction of the thread and data happen below
that. We could make this RCU safe, but there's little point in doing that.
With this, we always print the last valid information we had, rather than
have spurious outputs with missing information.
Fixes: 7644b1a1c9a7 ("io_uring/fdinfo: lock SQ thread while retrieving thread cpu/pid")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2023-11-14 19:55:50 +03:00
sq_pid = sq - > task_pid ;
sq_cpu = sq - > sq_cpu ;
2022-05-25 19:40:19 +03:00
}
2023-10-21 21:30:29 +03:00
seq_printf ( m , " SqThread: \t %d \n " , sq_pid ) ;
seq_printf ( m , " SqThreadCpu: \t %d \n " , sq_cpu ) ;
2022-05-25 19:40:19 +03:00
seq_printf ( m , " UserFiles: \t %u \n " , ctx - > nr_user_files ) ;
for ( i = 0 ; has_lock & & i < ctx - > nr_user_files ; i + + ) {
struct file * f = io_file_from_index ( & ctx - > file_table , i ) ;
if ( f )
seq_printf ( m , " %5u: %s \n " , i , file_dentry ( f ) - > d_iname ) ;
else
seq_printf ( m , " %5u: <none> \n " , i ) ;
}
seq_printf ( m , " UserBufs: \t %u \n " , ctx - > nr_user_bufs ) ;
for ( i = 0 ; has_lock & & i < ctx - > nr_user_bufs ; i + + ) {
struct io_mapped_ubuf * buf = ctx - > user_bufs [ i ] ;
unsigned int len = buf - > ubuf_end - buf - > ubuf ;
seq_printf ( m , " %5u: 0x%llx/%u \n " , i , buf - > ubuf , len ) ;
}
if ( has_lock & & ! xa_empty ( & ctx - > personalities ) ) {
unsigned long index ;
const struct cred * cred ;
seq_printf ( m , " Personalities: \n " ) ;
xa_for_each ( & ctx - > personalities , index , cred )
io_uring_show_cred ( m , index , cred ) ;
}
seq_puts ( m , " PollList: \n " ) ;
2022-06-16 12:22:10 +03:00
for ( i = 0 ; i < ( 1U < < ctx - > cancel_table . hash_bits ) ; i + + ) {
struct io_hash_bucket * hb = & ctx - > cancel_table . hbs [ i ] ;
2023-01-10 20:24:52 +03:00
struct io_hash_bucket * hbl = & ctx - > cancel_table_locked . hbs [ i ] ;
2022-05-25 19:40:19 +03:00
struct io_kiocb * req ;
2022-06-16 12:22:02 +03:00
spin_lock ( & hb - > lock ) ;
hlist_for_each_entry ( req , & hb - > list , hash_node )
2022-05-25 19:40:19 +03:00
seq_printf ( m , " op=%d, task_works=%d \n " , req - > opcode ,
task_work_pending ( req - > task ) ) ;
2022-06-16 12:22:02 +03:00
spin_unlock ( & hb - > lock ) ;
2023-01-10 20:24:52 +03:00
if ( ! has_lock )
continue ;
hlist_for_each_entry ( req , & hbl - > list , hash_node )
seq_printf ( m , " op=%d, task_works=%d \n " , req - > opcode ,
task_work_pending ( req - > task ) ) ;
2022-05-25 19:40:19 +03:00
}
2023-01-10 20:24:52 +03:00
if ( has_lock )
mutex_unlock ( & ctx - > uring_lock ) ;
2022-05-25 19:40:19 +03:00
seq_puts ( m , " CqOverflowList: \n " ) ;
2022-06-16 12:22:02 +03:00
spin_lock ( & ctx - > completion_lock ) ;
2022-05-25 19:40:19 +03:00
list_for_each_entry ( ocqe , & ctx - > cq_overflow_list , list ) {
struct io_uring_cqe * cqe = & ocqe - > cqe ;
seq_printf ( m , " user_data=%llu, res=%d, flags=%x \n " ,
cqe - > user_data , cqe - > res , cqe - > flags ) ;
}
spin_unlock ( & ctx - > completion_lock ) ;
}
# endif