io_uring: store SCM state in io_fixed_file->file_ptr

A previous commit removed SCM accounting for non-unix sockets, as those
are the only ones that can cause a fixed file reference. While that is
true, it also means we're now dereferencing the file as part of the
workqueue driven __io_sqe_files_unregister() after the process has
exited. This isn't safe for SCM files, as unix gc may have already
reaped them when the process exited. KASAN complains about this:

[   12.307040] Freed by task 0:
[   12.307592]  kasan_save_stack+0x28/0x4c
[   12.308318]  kasan_set_track+0x28/0x38
[   12.309049]  kasan_set_free_info+0x24/0x44
[   12.309890]  ____kasan_slab_free+0x108/0x11c
[   12.310739]  __kasan_slab_free+0x14/0x1c
[   12.311482]  slab_free_freelist_hook+0xd4/0x164
[   12.312382]  kmem_cache_free+0x100/0x1dc
[   12.313178]  file_free_rcu+0x58/0x74
[   12.313864]  rcu_core+0x59c/0x7c0
[   12.314675]  rcu_core_si+0xc/0x14
[   12.315496]  _stext+0x30c/0x414
[   12.316287]
[   12.316687] Last potentially related work creation:
[   12.317885]  kasan_save_stack+0x28/0x4c
[   12.318845]  __kasan_record_aux_stack+0x9c/0xb0
[   12.319976]  kasan_record_aux_stack_noalloc+0x10/0x18
[   12.321268]  call_rcu+0x50/0x35c
[   12.322082]  __fput+0x2fc/0x324
[   12.322873]  ____fput+0xc/0x14
[   12.323644]  task_work_run+0xac/0x10c
[   12.324561]  do_notify_resume+0x37c/0xe74
[   12.325420]  el0_svc+0x5c/0x68
[   12.326050]  el0t_64_sync_handler+0xb0/0x12c
[   12.326918]  el0t_64_sync+0x164/0x168
[   12.327657]
[   12.327976] Second to last potentially related work creation:
[   12.329134]  kasan_save_stack+0x28/0x4c
[   12.329864]  __kasan_record_aux_stack+0x9c/0xb0
[   12.330735]  kasan_record_aux_stack+0x10/0x18
[   12.331576]  task_work_add+0x34/0xf0
[   12.332284]  fput_many+0x11c/0x134
[   12.332960]  fput+0x10/0x94
[   12.333524]  __scm_destroy+0x80/0x84
[   12.334213]  unix_destruct_scm+0xc4/0x144
[   12.334948]  skb_release_head_state+0x5c/0x6c
[   12.335696]  skb_release_all+0x14/0x38
[   12.336339]  __kfree_skb+0x14/0x28
[   12.336928]  kfree_skb_reason+0xf4/0x108
[   12.337604]  unix_gc+0x1e8/0x42c
[   12.338154]  unix_release_sock+0x25c/0x2dc
[   12.338895]  unix_release+0x58/0x78
[   12.339531]  __sock_release+0x68/0xec
[   12.340170]  sock_close+0x14/0x20
[   12.340729]  __fput+0x18c/0x324
[   12.341254]  ____fput+0xc/0x14
[   12.341763]  task_work_run+0xac/0x10c
[   12.342367]  do_notify_resume+0x37c/0xe74
[   12.343086]  el0_svc+0x5c/0x68
[   12.343510]  el0t_64_sync_handler+0xb0/0x12c
[   12.344086]  el0t_64_sync+0x164/0x168

We have an extra bit we can use in file_ptr on 64-bit, use that to store
whether this file is SCM'ed or not, avoiding the need to look at the
file contents itself. This does mean that 32-bit will be stuck with SCM
for all registered files, just like 64-bit did before the referenced
commit.

Fixes: 1f59bc0f18cf ("io_uring: don't scm-account for non af_unix sockets")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2022-04-20 16:15:27 -06:00
parent 7ac1edc4a9
commit 5e45690a1c

View File

@ -223,6 +223,23 @@ struct io_overflow_cqe {
struct list_head list; struct list_head list;
}; };
/*
* FFS_SCM is only available on 64-bit archs, for 32-bit we just define it as 0
* and define IO_URING_SCM_ALL. For this case, we use SCM for all files as we
* can't safely always dereference the file when the task has exited and ring
* cleanup is done. If a file is tracked and part of SCM, then unix gc on
* process exit may reap it before __io_sqe_files_unregister() is run.
*/
#define FFS_NOWAIT 0x1UL
#define FFS_ISREG 0x2UL
#if defined(CONFIG_64BIT)
#define FFS_SCM 0x4UL
#else
#define IO_URING_SCM_ALL
#define FFS_SCM 0x0UL
#endif
#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG|FFS_SCM)
struct io_fixed_file { struct io_fixed_file {
/* file * with additional FFS_* flags */ /* file * with additional FFS_* flags */
unsigned long file_ptr; unsigned long file_ptr;
@ -1236,12 +1253,16 @@ EXPORT_SYMBOL(io_uring_get_socket);
#if defined(CONFIG_UNIX) #if defined(CONFIG_UNIX)
static inline bool io_file_need_scm(struct file *filp) static inline bool io_file_need_scm(struct file *filp)
{ {
#if defined(IO_URING_SCM_ALL)
return true;
#else
return !!unix_get_socket(filp); return !!unix_get_socket(filp);
#endif
} }
#else #else
static inline bool io_file_need_scm(struct file *filp) static inline bool io_file_need_scm(struct file *filp)
{ {
return 0; return false;
} }
#endif #endif
@ -1651,10 +1672,6 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
return false; return false;
} }
#define FFS_NOWAIT 0x1UL
#define FFS_ISREG 0x2UL
#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG)
static inline bool io_req_ffs_set(struct io_kiocb *req) static inline bool io_req_ffs_set(struct io_kiocb *req)
{ {
return req->flags & REQ_F_FIXED_FILE; return req->flags & REQ_F_FIXED_FILE;
@ -3197,6 +3214,8 @@ static unsigned int io_file_get_flags(struct file *file)
res |= FFS_ISREG; res |= FFS_ISREG;
if (__io_file_supports_nowait(file, mode)) if (__io_file_supports_nowait(file, mode))
res |= FFS_NOWAIT; res |= FFS_NOWAIT;
if (io_file_need_scm(file))
res |= FFS_SCM;
return res; return res;
} }
@ -8478,14 +8497,17 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
{ {
int i; int i;
#if !defined(IO_URING_SCM_ALL)
for (i = 0; i < ctx->nr_user_files; i++) { for (i = 0; i < ctx->nr_user_files; i++) {
struct file *file = io_file_from_index(ctx, i); struct file *file = io_file_from_index(ctx, i);
if (!file || io_file_need_scm(file)) if (!file)
continue;
if (io_fixed_file_slot(&ctx->file_table, i)->file_ptr & FFS_SCM)
continue; continue;
io_fixed_file_slot(&ctx->file_table, i)->file_ptr = 0;
fput(file); fput(file);
} }
#endif
#if defined(CONFIG_UNIX) #if defined(CONFIG_UNIX)
if (ctx->ring_sock) { if (ctx->ring_sock) {