diff --git a/Documentation/locking/mutex-design.rst b/Documentation/locking/mutex-design.rst index 78540cd7f54b..7c30b4aa5e28 100644 --- a/Documentation/locking/mutex-design.rst +++ b/Documentation/locking/mutex-design.rst @@ -101,6 +101,24 @@ features that make lock debugging easier and faster: - Detects multi-task circular deadlocks and prints out all affected locks and tasks (and only those tasks). +Mutexes - and most other sleeping locks like rwsems - do not provide an +implicit reference for the memory they occupy, which reference is released +with mutex_unlock(). + +[ This is in contrast with spin_unlock() [or completion_done()], which + APIs can be used to guarantee that the memory is not touched by the + lock implementation after spin_unlock()/completion_done() releases + the lock. ] + +mutex_unlock() may access the mutex structure even after it has internally +released the lock already - so it's not safe for another context to +acquire the mutex and assume that the mutex_unlock() context is not using +the structure anymore. + +The mutex user must ensure that the mutex is not destroyed while a +release operation is still in progress - in other words, callers of +mutex_unlock() must ensure that the mutex stays alive until mutex_unlock() +has returned. Interfaces ---------- diff --git a/MAINTAINERS b/MAINTAINERS index 5ef3bad7def9..f71c525405e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12424,7 +12424,7 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core F: Documentation/locking/ F: arch/*/include/asm/spinlock*.h -F: include/linux/lockdep.h +F: include/linux/lockdep*.h F: include/linux/mutex*.h F: include/linux/rwlock*.h F: include/linux/rwsem*.h diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 9f1a9c455b68..c2d09bc4f976 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -125,25 +125,55 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * trivial wrapper around DEFINE_CLASS() above specifically * for locks. * + * DEFINE_GUARD_COND(name, ext, condlock) + * wrapper around EXTEND_CLASS above to add conditional lock + * variants to a base class, eg. mutex_trylock() or + * mutex_lock_interruptible(). + * * guard(name): - * an anonymous instance of the (guard) class + * an anonymous instance of the (guard) class, not recommended for + * conditional locks. * * scoped_guard (name, args...) { }: * similar to CLASS(name, scope)(args), except the variable (with the * explicit name 'scope') is declard in a for-loop such that its scope is * bound to the next (compound) statement. * + * for conditional locks the loop body is skipped when the lock is not + * acquired. + * + * scoped_cond_guard (name, fail, args...) { }: + * similar to scoped_guard(), except it does fail when the lock + * acquire fails. + * */ #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ - DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T) + DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ + static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { return *_T; } + +#define DEFINE_GUARD_COND(_name, _ext, _condlock) \ + EXTEND_CLASS(_name, _ext, \ + ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ + class_##_name##_t _T) \ + static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ + { return class_##_name##_lock_ptr(_T); } #define guard(_name) \ CLASS(_name, __UNIQUE_ID(guard)) +#define __guard_ptr(_name) class_##_name##_lock_ptr + #define scoped_guard(_name, args...) \ for (CLASS(_name, scope)(args), \ - *done = NULL; !done; done = (void *)1) + *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1) + +#define scoped_cond_guard(_name, _fail, args...) \ + for (CLASS(_name, scope)(args), \ + *done = NULL; !done; done = (void *)1) \ + if (!__guard_ptr(_name)(&scope)) _fail; \ + else /* * Additional helper macros for generating lock guards with types, either for @@ -152,6 +182,7 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * * DEFINE_LOCK_GUARD_0(name, lock, unlock, ...) * DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...) + * DEFINE_LOCK_GUARD_1_COND(name, ext, condlock) * * will result in the following type: * @@ -173,6 +204,11 @@ typedef struct { \ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ { \ if (_T->lock) { _unlock; } \ +} \ + \ +static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ +{ \ + return _T->lock; \ } @@ -201,4 +237,14 @@ __DEFINE_LOCK_GUARD_1(_name, _type, _lock) __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) +#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ + EXTEND_CLASS(_name, _ext, \ + ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ + if (_T->lock && !(_condlock)) _T->lock = NULL; \ + _t; }), \ + typeof_member(class_##_name##_t, lock) l) \ + static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ + { return class_##_name##_lock_ptr(_T); } + + #endif /* __LINUX_GUARDS_H */ diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 2ebc323d345a..857d785e89e6 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -127,12 +127,12 @@ struct lock_class { unsigned long usage_mask; const struct lock_trace *usage_traces[LOCK_TRACE_STATES]; + const char *name; /* * Generation counter, when doing certain classes of graph walking, * to ensure that we check one node only once: */ int name_version; - const char *name; u8 wait_type_inner; u8 wait_type_outer; diff --git a/include/linux/mutex.h b/include/linux/mutex.h index a33aa9eb9fc3..95d11308f995 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -221,6 +221,7 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) -DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T)) +DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) +DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 1dd530ce8b45..9c29689ff505 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -203,11 +203,11 @@ extern void up_read(struct rw_semaphore *sem); extern void up_write(struct rw_semaphore *sem); DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) +DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) +DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0) + DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) - -DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T)) -DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T)) - +DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) /* * downgrade write lock to read lock diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index a23af225c898..4f3dca353556 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -226,4 +226,6 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } +DEFINE_GUARD(task_lock, struct task_struct *, task_lock(_T), task_unlock(_T)) + #endif /* _LINUX_SCHED_TASK_H */ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 31d3d747a9db..90bc853cafb6 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -507,6 +507,8 @@ DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t, raw_spin_lock(_T->lock), raw_spin_unlock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock, _try, raw_spin_trylock(_T->lock)) + DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t, raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING), raw_spin_unlock(_T->lock)) @@ -515,23 +517,62 @@ DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t, raw_spin_lock_irq(_T->lock), raw_spin_unlock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irq, _try, raw_spin_trylock_irq(_T->lock)) + DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, raw_spin_lock_irqsave(_T->lock, _T->flags), raw_spin_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irqsave, _try, + raw_spin_trylock_irqsave(_T->lock, _T->flags)) + DEFINE_LOCK_GUARD_1(spinlock, spinlock_t, spin_lock(_T->lock), spin_unlock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(spinlock, _try, spin_trylock(_T->lock)) + DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, spin_lock_irq(_T->lock), spin_unlock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try, + spin_trylock_irq(_T->lock)) + DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, spin_lock_irqsave(_T->lock, _T->flags), spin_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DEFINE_LOCK_GUARD_1_COND(spinlock_irqsave, _try, + spin_trylock_irqsave(_T->lock, _T->flags)) + +DEFINE_LOCK_GUARD_1(read_lock, rwlock_t, + read_lock(_T->lock), + read_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(read_lock_irq, rwlock_t, + read_lock_irq(_T->lock), + read_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(read_lock_irqsave, rwlock_t, + read_lock_irqsave(_T->lock, _T->flags), + read_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + +DEFINE_LOCK_GUARD_1(write_lock, rwlock_t, + write_lock(_T->lock), + write_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(write_lock_irq, rwlock_t, + write_lock_irq(_T->lock), + write_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(write_lock_irqsave, rwlock_t, + write_lock_irqsave(_T->lock, _T->flags), + write_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + #undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 2deeeca3e71b..cbae8c0b89ab 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -532,6 +532,11 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne * This function must not be used in interrupt context. Unlocking * of a not locked mutex is not allowed. * + * The caller must ensure that the mutex stays alive until this function has + * returned - mutex_unlock() can NOT directly be used to release an object such + * that another concurrent task can free it. + * Mutexes are different from spinlocks & refcounts in this aspect. + * * This function is similar to (but not equivalent to) up(). */ void __sched mutex_unlock(struct mutex *lock) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d8b5e13a2229..5c579fb9a5e3 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -386,71 +386,9 @@ static int check_ptrace_options(unsigned long data) return 0; } -static int ptrace_attach(struct task_struct *task, long request, - unsigned long addr, - unsigned long flags) +static inline void ptrace_set_stopped(struct task_struct *task) { - bool seize = (request == PTRACE_SEIZE); - int retval; - - retval = -EIO; - if (seize) { - if (addr != 0) - goto out; - /* - * This duplicates the check in check_ptrace_options() because - * ptrace_attach() and ptrace_setoptions() have historically - * used different error codes for unknown ptrace options. - */ - if (flags & ~(unsigned long)PTRACE_O_MASK) - goto out; - retval = check_ptrace_options(flags); - if (retval) - return retval; - flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); - } else { - flags = PT_PTRACED; - } - - audit_ptrace(task); - - retval = -EPERM; - if (unlikely(task->flags & PF_KTHREAD)) - goto out; - if (same_thread_group(task, current)) - goto out; - - /* - * Protect exec's credential calculations against our interference; - * SUID, SGID and LSM creds get determined differently - * under ptrace. - */ - retval = -ERESTARTNOINTR; - if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) - goto out; - - task_lock(task); - retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); - task_unlock(task); - if (retval) - goto unlock_creds; - - write_lock_irq(&tasklist_lock); - retval = -EPERM; - if (unlikely(task->exit_state)) - goto unlock_tasklist; - if (task->ptrace) - goto unlock_tasklist; - - task->ptrace = flags; - - ptrace_link(task, current); - - /* SEIZE doesn't trap tracee on attach */ - if (!seize) - send_sig_info(SIGSTOP, SEND_SIG_PRIV, task); - - spin_lock(&task->sighand->siglock); + guard(spinlock)(&task->sighand->siglock); /* * If the task is already STOPPED, set JOBCTL_TRAP_STOP and @@ -474,28 +412,84 @@ static int ptrace_attach(struct task_struct *task, long request, task->jobctl &= ~JOBCTL_STOPPED; signal_wake_up_state(task, __TASK_STOPPED); } +} - spin_unlock(&task->sighand->siglock); +static int ptrace_attach(struct task_struct *task, long request, + unsigned long addr, + unsigned long flags) +{ + bool seize = (request == PTRACE_SEIZE); + int retval; - retval = 0; -unlock_tasklist: - write_unlock_irq(&tasklist_lock); -unlock_creds: - mutex_unlock(&task->signal->cred_guard_mutex); -out: - if (!retval) { + if (seize) { + if (addr != 0) + return -EIO; /* - * We do not bother to change retval or clear JOBCTL_TRAPPING - * if wait_on_bit() was interrupted by SIGKILL. The tracer will - * not return to user-mode, it will exit and clear this bit in - * __ptrace_unlink() if it wasn't already cleared by the tracee; - * and until then nobody can ptrace this task. + * This duplicates the check in check_ptrace_options() because + * ptrace_attach() and ptrace_setoptions() have historically + * used different error codes for unknown ptrace options. */ - wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE); - proc_ptrace_connector(task, PTRACE_ATTACH); + if (flags & ~(unsigned long)PTRACE_O_MASK) + return -EIO; + + retval = check_ptrace_options(flags); + if (retval) + return retval; + flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); + } else { + flags = PT_PTRACED; } - return retval; + audit_ptrace(task); + + if (unlikely(task->flags & PF_KTHREAD)) + return -EPERM; + if (same_thread_group(task, current)) + return -EPERM; + + /* + * Protect exec's credential calculations against our interference; + * SUID, SGID and LSM creds get determined differently + * under ptrace. + */ + scoped_cond_guard (mutex_intr, return -ERESTARTNOINTR, + &task->signal->cred_guard_mutex) { + + scoped_guard (task_lock, task) { + retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); + if (retval) + return retval; + } + + scoped_guard (write_lock_irq, &tasklist_lock) { + if (unlikely(task->exit_state)) + return -EPERM; + if (task->ptrace) + return -EPERM; + + task->ptrace = flags; + + ptrace_link(task, current); + + /* SEIZE doesn't trap tracee on attach */ + if (!seize) + send_sig_info(SIGSTOP, SEND_SIG_PRIV, task); + + ptrace_set_stopped(task); + } + } + + /* + * We do not bother to change retval or clear JOBCTL_TRAPPING + * if wait_on_bit() was interrupted by SIGKILL. The tracer will + * not return to user-mode, it will exit and clear this bit in + * __ptrace_unlink() if it wasn't already cleared by the tracee; + * and until then nobody can ptrace this task. + */ + wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE); + proc_ptrace_connector(task, PTRACE_ATTACH); + + return 0; } /**