Kumar Kartikeya Dwivedi 4656569643 selftests/bpf: Add test for race in btf_try_get_module
This adds a complete test case to ensure we never take references to
modules not in MODULE_STATE_LIVE, which can lead to UAF, and it also
ensures we never access btf->kfunc_set_tab in an inconsistent state.

The test uses userfaultfd to artificially widen the race.

When run on an unpatched kernel, it leads to the following splat:

[root@(none) bpf]# ./test_progs -t bpf_mod_race/ksym
[   55.498171] BUG: unable to handle page fault for address: fffffbfff802548b
[   55.499206] #PF: supervisor read access in kernel mode
[   55.499855] #PF: error_code(0x0000) - not-present page
[   55.500555] PGD a4fa9067 P4D a4fa9067 PUD a4fa5067 PMD 1b44067 PTE 0
[   55.501499] Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI
[   55.502195] CPU: 0 PID: 83 Comm: kworker/0:2 Tainted: G           OE     5.16.0-rc4+ #151
[   55.503388] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.15.0-1 04/01/2014
[   55.504777] Workqueue: events bpf_prog_free_deferred
[   55.505563] RIP: 0010:kasan_check_range+0x184/0x1d0
[   55.509140] RSP: 0018:ffff88800560fcf0 EFLAGS: 00010282
[   55.509977] RAX: fffffbfff802548b RBX: fffffbfff802548c RCX: ffffffff9337b6ba
[   55.511096] RDX: fffffbfff802548c RSI: 0000000000000004 RDI: ffffffffc012a458
[   55.512143] RBP: fffffbfff802548b R08: 0000000000000001 R09: ffffffffc012a45b
[   55.513228] R10: fffffbfff802548b R11: 0000000000000001 R12: ffff888001b5f598
[   55.514332] R13: ffff888004f49ac8 R14: 0000000000000000 R15: ffff888092449400
[   55.515418] FS:  0000000000000000(0000) GS:ffff888092400000(0000) knlGS:0000000000000000
[   55.516705] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   55.517560] CR2: fffffbfff802548b CR3: 0000000007c10006 CR4: 0000000000770ef0
[   55.518672] PKRU: 55555554
[   55.519022] Call Trace:
[   55.519483]  <TASK>
[   55.519884]  module_put.part.0+0x2a/0x180
[   55.520642]  bpf_prog_free_deferred+0x129/0x2e0
[   55.521478]  process_one_work+0x4fa/0x9e0
[   55.522122]  ? pwq_dec_nr_in_flight+0x100/0x100
[   55.522878]  ? rwlock_bug.part.0+0x60/0x60
[   55.523551]  worker_thread+0x2eb/0x700
[   55.524176]  ? __kthread_parkme+0xd8/0xf0
[   55.524853]  ? process_one_work+0x9e0/0x9e0
[   55.525544]  kthread+0x23a/0x270
[   55.526088]  ? set_kthread_struct+0x80/0x80
[   55.526798]  ret_from_fork+0x1f/0x30
[   55.527413]  </TASK>
[   55.527813] Modules linked in: bpf_testmod(OE) [last unloaded: bpf_testmod]
[   55.530846] CR2: fffffbfff802548b
[   55.531341] ---[ end trace 1af41803c054ad6d ]---
[   55.532136] RIP: 0010:kasan_check_range+0x184/0x1d0
[   55.535887] RSP: 0018:ffff88800560fcf0 EFLAGS: 00010282
[   55.536711] RAX: fffffbfff802548b RBX: fffffbfff802548c RCX: ffffffff9337b6ba
[   55.537821] RDX: fffffbfff802548c RSI: 0000000000000004 RDI: ffffffffc012a458
[   55.538899] RBP: fffffbfff802548b R08: 0000000000000001 R09: ffffffffc012a45b
[   55.539928] R10: fffffbfff802548b R11: 0000000000000001 R12: ffff888001b5f598
[   55.541021] R13: ffff888004f49ac8 R14: 0000000000000000 R15: ffff888092449400
[   55.542108] FS:  0000000000000000(0000) GS:ffff888092400000(0000) knlGS:0000000000000000
[   55.543260]CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   55.544136] CR2: fffffbfff802548b CR3: 0000000007c10006 CR4: 0000000000770ef0
[   55.545317] PKRU: 55555554
[   55.545671] note: kworker/0:2[83] exited with preempt_count 1

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-11-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-18 14:27:39 -08:00

101 lines
2.6 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
const volatile struct {
/* thread to activate trace programs for */
pid_t tgid;
/* return error from __init function */
int inject_error;
/* uffd monitored range start address */
void *fault_addr;
} bpf_mod_race_config = { -1 };
int bpf_blocking = 0;
int res_try_get_module = -1;
static __always_inline bool check_thread_id(void)
{
struct task_struct *task = bpf_get_current_task_btf();
return task->tgid == bpf_mod_race_config.tgid;
}
/* The trace of execution is something like this:
*
* finit_module()
* load_module()
* prepare_coming_module()
* notifier_call(MODULE_STATE_COMING)
* btf_parse_module()
* btf_alloc_id() // Visible to userspace at this point
* list_add(btf_mod->list, &btf_modules)
* do_init_module()
* freeinit = kmalloc()
* ret = mod->init()
* bpf_prog_widen_race()
* bpf_copy_from_user()
* ...<sleep>...
* if (ret < 0)
* ...
* free_module()
* return ret
*
* At this point, module loading thread is blocked, we now load the program:
*
* bpf_check
* add_kfunc_call/check_pseudo_btf_id
* btf_try_get_module
* try_get_module_live == false
* return -ENXIO
*
* Without the fix (try_get_module_live in btf_try_get_module):
*
* bpf_check
* add_kfunc_call/check_pseudo_btf_id
* btf_try_get_module
* try_get_module == true
* <store module reference in btf_kfunc_tab or used_btf array>
* ...
* return fd
*
* Now, if we inject an error in the blocked program, our module will be freed
* (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING).
* Later, when bpf program is freed, it will try to module_put already freed
* module. This is why try_get_module_live returns false if mod->state is not
* MODULE_STATE_LIVE.
*/
SEC("fmod_ret.s/bpf_fentry_test1")
int BPF_PROG(widen_race, int a, int ret)
{
char dst;
if (!check_thread_id())
return 0;
/* Indicate that we will attempt to block */
bpf_blocking = 1;
bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr);
return bpf_mod_race_config.inject_error;
}
SEC("fexit/do_init_module")
int BPF_PROG(fexit_init_module, struct module *mod, int ret)
{
if (!check_thread_id())
return 0;
/* Indicate that we finished blocking */
bpf_blocking = 2;
return 0;
}
SEC("fexit/btf_try_get_module")
int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod)
{
res_try_get_module = !!mod;
return 0;
}
char _license[] SEC("license") = "GPL";