[ Upstream commit f7cfd871ae0c5008d94b6f66834e7845caa93c15 ] Recently syzbot reported[0] that there is a deadlock amongst the users of exec_update_mutex. The problematic lock ordering found by lockdep was: perf_event_open (exec_update_mutex -> ovl_i_mutex) chown (ovl_i_mutex -> sb_writes) sendfile (sb_writes -> p->lock) by reading from a proc file and writing to overlayfs proc_pid_syscall (p->lock -> exec_update_mutex) While looking at possible solutions it occured to me that all of the users and possible users involved only wanted to state of the given process to remain the same. They are all readers. The only writer is exec. There is no reason for readers to block on each other. So fix this deadlock by transforming exec_update_mutex into a rw_semaphore named exec_update_lock that only exec takes for writing. Cc: Jann Horn <jannh@google.com> Cc: Vasiliy Kulikov <segoon@openwall.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Bernd Edlinger <bernd.edlinger@hotmail.de> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Christopher Yeoh <cyeoh@au1.ibm.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Sargun Dhillon <sargun@sargun.me> Cc: Christian Brauner <christian.brauner@ubuntu.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Fixes: eea9673250db ("exec: Add exec_update_mutex to replace cred_guard_mutex") [0] https://lkml.kernel.org/r/00000000000063640c05ade8e3de@google.com Reported-by: syzbot+db9cdf3dd1f64252c6ef@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/87ft4mbqen.fsf@x220.int.ebiederm.org Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
195 lines
5.4 KiB
C
195 lines
5.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/init_task.h>
|
|
#include <linux/export.h>
|
|
#include <linux/mqueue.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/sysctl.h>
|
|
#include <linux/sched/rt.h>
|
|
#include <linux/sched/task.h>
|
|
#include <linux/init.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/audit.h>
|
|
#include <linux/numa.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
static struct signal_struct init_signals = {
|
|
.nr_threads = 1,
|
|
.thread_head = LIST_HEAD_INIT(init_task.thread_node),
|
|
.wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(init_signals.wait_chldexit),
|
|
.shared_pending = {
|
|
.list = LIST_HEAD_INIT(init_signals.shared_pending.list),
|
|
.signal = {{0}}
|
|
},
|
|
.multiprocess = HLIST_HEAD_INIT,
|
|
.rlim = INIT_RLIMITS,
|
|
.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
|
|
.exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock),
|
|
#ifdef CONFIG_POSIX_TIMERS
|
|
.posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
|
|
.cputimer = {
|
|
.cputime_atomic = INIT_CPUTIME_ATOMIC,
|
|
},
|
|
#endif
|
|
INIT_CPU_TIMERS(init_signals)
|
|
.pids = {
|
|
[PIDTYPE_PID] = &init_struct_pid,
|
|
[PIDTYPE_TGID] = &init_struct_pid,
|
|
[PIDTYPE_PGID] = &init_struct_pid,
|
|
[PIDTYPE_SID] = &init_struct_pid,
|
|
},
|
|
INIT_PREV_CPUTIME(init_signals)
|
|
};
|
|
|
|
static struct sighand_struct init_sighand = {
|
|
.count = REFCOUNT_INIT(1),
|
|
.action = { { { .sa_handler = SIG_DFL, } }, },
|
|
.siglock = __SPIN_LOCK_UNLOCKED(init_sighand.siglock),
|
|
.signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
|
|
};
|
|
|
|
/*
|
|
* Set up the first task table, touch at your own risk!. Base=0,
|
|
* limit=0x1fffff (=2MB)
|
|
*/
|
|
struct task_struct init_task
|
|
#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
|
|
__init_task_data
|
|
#endif
|
|
= {
|
|
#ifdef CONFIG_THREAD_INFO_IN_TASK
|
|
.thread_info = INIT_THREAD_INFO(init_task),
|
|
.stack_refcount = REFCOUNT_INIT(1),
|
|
#endif
|
|
.state = 0,
|
|
.stack = init_stack,
|
|
.usage = REFCOUNT_INIT(2),
|
|
.flags = PF_KTHREAD,
|
|
.prio = MAX_PRIO - 20,
|
|
.static_prio = MAX_PRIO - 20,
|
|
.normal_prio = MAX_PRIO - 20,
|
|
.policy = SCHED_NORMAL,
|
|
.cpus_ptr = &init_task.cpus_mask,
|
|
.cpus_mask = CPU_MASK_ALL,
|
|
.nr_cpus_allowed= NR_CPUS,
|
|
.mm = NULL,
|
|
.active_mm = &init_mm,
|
|
.restart_block = {
|
|
.fn = do_no_restart_syscall,
|
|
},
|
|
.se = {
|
|
.group_node = LIST_HEAD_INIT(init_task.se.group_node),
|
|
},
|
|
.rt = {
|
|
.run_list = LIST_HEAD_INIT(init_task.rt.run_list),
|
|
.time_slice = RR_TIMESLICE,
|
|
},
|
|
.tasks = LIST_HEAD_INIT(init_task.tasks),
|
|
#ifdef CONFIG_SMP
|
|
.pushable_tasks = PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
|
|
#endif
|
|
#ifdef CONFIG_CGROUP_SCHED
|
|
.sched_task_group = &root_task_group,
|
|
#endif
|
|
.ptraced = LIST_HEAD_INIT(init_task.ptraced),
|
|
.ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry),
|
|
.real_parent = &init_task,
|
|
.parent = &init_task,
|
|
.children = LIST_HEAD_INIT(init_task.children),
|
|
.sibling = LIST_HEAD_INIT(init_task.sibling),
|
|
.group_leader = &init_task,
|
|
RCU_POINTER_INITIALIZER(real_cred, &init_cred),
|
|
RCU_POINTER_INITIALIZER(cred, &init_cred),
|
|
.comm = INIT_TASK_COMM,
|
|
.thread = INIT_THREAD,
|
|
.fs = &init_fs,
|
|
.files = &init_files,
|
|
.signal = &init_signals,
|
|
.sighand = &init_sighand,
|
|
.nsproxy = &init_nsproxy,
|
|
.pending = {
|
|
.list = LIST_HEAD_INIT(init_task.pending.list),
|
|
.signal = {{0}}
|
|
},
|
|
.blocked = {{0}},
|
|
.alloc_lock = __SPIN_LOCK_UNLOCKED(init_task.alloc_lock),
|
|
.journal_info = NULL,
|
|
INIT_CPU_TIMERS(init_task)
|
|
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
|
|
.timer_slack_ns = 50000, /* 50 usec default slack */
|
|
.thread_pid = &init_struct_pid,
|
|
.thread_group = LIST_HEAD_INIT(init_task.thread_group),
|
|
.thread_node = LIST_HEAD_INIT(init_signals.thread_head),
|
|
#ifdef CONFIG_AUDIT
|
|
.loginuid = INVALID_UID,
|
|
.sessionid = AUDIT_SID_UNSET,
|
|
#endif
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
|
|
.perf_event_list = LIST_HEAD_INIT(init_task.perf_event_list),
|
|
#endif
|
|
#ifdef CONFIG_PREEMPT_RCU
|
|
.rcu_read_lock_nesting = 0,
|
|
.rcu_read_unlock_special.s = 0,
|
|
.rcu_node_entry = LIST_HEAD_INIT(init_task.rcu_node_entry),
|
|
.rcu_blocked_node = NULL,
|
|
#endif
|
|
#ifdef CONFIG_TASKS_RCU
|
|
.rcu_tasks_holdout = false,
|
|
.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
|
|
.rcu_tasks_idle_cpu = -1,
|
|
#endif
|
|
#ifdef CONFIG_CPUSETS
|
|
.mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq),
|
|
#endif
|
|
#ifdef CONFIG_RT_MUTEXES
|
|
.pi_waiters = RB_ROOT_CACHED,
|
|
.pi_top_task = NULL,
|
|
#endif
|
|
INIT_PREV_CPUTIME(init_task)
|
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
|
.vtime.seqcount = SEQCNT_ZERO(init_task.vtime_seqcount),
|
|
.vtime.starttime = 0,
|
|
.vtime.state = VTIME_SYS,
|
|
#endif
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
.numa_preferred_nid = NUMA_NO_NODE,
|
|
.numa_group = NULL,
|
|
.numa_faults = NULL,
|
|
#endif
|
|
#ifdef CONFIG_KASAN
|
|
.kasan_depth = 1,
|
|
#endif
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
.softirqs_enabled = 1,
|
|
#endif
|
|
#ifdef CONFIG_LOCKDEP
|
|
.lockdep_depth = 0, /* no locks held yet */
|
|
.curr_chain_key = INITIAL_CHAIN_KEY,
|
|
.lockdep_recursion = 0,
|
|
#endif
|
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
.ret_stack = NULL,
|
|
#endif
|
|
#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPTION)
|
|
.trace_recursion = 0,
|
|
#endif
|
|
#ifdef CONFIG_LIVEPATCH
|
|
.patch_state = KLP_UNDEFINED,
|
|
#endif
|
|
#ifdef CONFIG_SECURITY
|
|
.security = NULL,
|
|
#endif
|
|
};
|
|
EXPORT_SYMBOL(init_task);
|
|
|
|
/*
|
|
* Initial thread structure. Alignment of this is handled by a special
|
|
* linker map entry.
|
|
*/
|
|
#ifndef CONFIG_THREAD_INFO_IN_TASK
|
|
struct thread_info init_thread_info __init_thread_info = INIT_THREAD_INFO(init_task);
|
|
#endif
|