linux/arch/xtensa/kernel/process.c
Linus Torvalds 1ec6574a3c This set of changes updates init and user mode helper tasks to be
ordinary user mode tasks.
 
 In commit 40966e316f86 ("kthread: Ensure struct kthread is present for
 all kthreads") caused init and the user mode helper threads that call
 kernel_execve to have struct kthread allocated for them.  This struct
 kthread going away during execve in turned made a use after free of
 struct kthread possible.
 
 The commit 343f4c49f243 ("kthread: Don't allocate kthread_struct for
 init and umh") is enough to fix the use after free and is simple enough
 to be backportable.
 
 The rest of the changes pass struct kernel_clone_args to clean things
 up and cause the code to make sense.
 
 In making init and the user mode helpers tasks purely user mode tasks
 I ran into two complications.  The function task_tick_numa was
 detecting tasks without an mm by testing for the presence of
 PF_KTHREAD.  The initramfs code in populate_initrd_image was using
 flush_delayed_fput to ensuere the closing of all it's file descriptors
 was complete, and flush_delayed_fput does not work in a userspace thread.
 
 I have looked and looked and more complications and in my code review
 I have not found any, and neither has anyone else with the code sitting
 in linux-next.
 
 Link: https://lkml.kernel.org/r/87mtfu4up3.fsf@email.froward.int.ebiederm.org
 
 Eric W. Biederman (8):
       kthread: Don't allocate kthread_struct for init and umh
       fork: Pass struct kernel_clone_args into copy_thread
       fork: Explicity test for idle tasks in copy_thread
       fork: Generalize PF_IO_WORKER handling
       init: Deal with the init process being a user mode process
       fork: Explicitly set PF_KTHREAD
       fork: Stop allowing kthreads to call execve
       sched: Update task_tick_numa to ignore tasks without an mm
 
  arch/alpha/kernel/process.c      | 13 ++++++------
  arch/arc/kernel/process.c        | 13 ++++++------
  arch/arm/kernel/process.c        | 12 ++++++-----
  arch/arm64/kernel/process.c      | 12 ++++++-----
  arch/csky/kernel/process.c       | 15 ++++++-------
  arch/h8300/kernel/process.c      | 10 ++++-----
  arch/hexagon/kernel/process.c    | 12 ++++++-----
  arch/ia64/kernel/process.c       | 15 +++++++------
  arch/m68k/kernel/process.c       | 12 ++++++-----
  arch/microblaze/kernel/process.c | 12 ++++++-----
  arch/mips/kernel/process.c       | 13 ++++++------
  arch/nios2/kernel/process.c      | 12 ++++++-----
  arch/openrisc/kernel/process.c   | 12 ++++++-----
  arch/parisc/kernel/process.c     | 18 +++++++++-------
  arch/powerpc/kernel/process.c    | 15 +++++++------
  arch/riscv/kernel/process.c      | 12 ++++++-----
  arch/s390/kernel/process.c       | 12 ++++++-----
  arch/sh/kernel/process_32.c      | 12 ++++++-----
  arch/sparc/kernel/process_32.c   | 12 ++++++-----
  arch/sparc/kernel/process_64.c   | 12 ++++++-----
  arch/um/kernel/process.c         | 15 +++++++------
  arch/x86/include/asm/fpu/sched.h |  2 +-
  arch/x86/include/asm/switch_to.h |  8 +++----
  arch/x86/kernel/fpu/core.c       |  4 ++--
  arch/x86/kernel/process.c        | 18 +++++++++-------
  arch/xtensa/kernel/process.c     | 17 ++++++++-------
  fs/exec.c                        |  8 ++++---
  include/linux/sched/task.h       |  8 +++++--
  init/initramfs.c                 |  2 ++
  init/main.c                      |  2 +-
  kernel/fork.c                    | 46 +++++++++++++++++++++++++++++++++-------
  kernel/sched/fair.c              |  2 +-
  kernel/umh.c                     |  6 +++---
  33 files changed, 234 insertions(+), 160 deletions(-)
 
 Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEgjlraLDcwBA2B+6cC/v6Eiajj0AFAmKaR/MACgkQC/v6Eiaj
 j0Aayg/7Bx66872d9c6igkJ+MPCTuh+v9QKCGwiYEmiU4Q5sVAFB0HPJO27qC14u
 630X0RFNZTkPzNNEJNIW4kw6Dj8s8YRKf+FgQAVt4SzdRwT7eIPDjk1nGraopPJ3
 O04pjvuTmUyidyViRyFcf2ptx/pnkrwP8jUSc+bGTgfASAKAgAokqKE5ecjewbBc
 Y/EAkQ6QW7KxPjeSmpAHwI+t3BpBev9WEC4PbhRhsBCQFO2+PJiklvqdhVNBnIjv
 qUezll/1xv9UYgniB15Q4Nb722SmnWSU3r8as1eFPugzTHizKhufrrpyP+KMK1A0
 tdtEJNs5t2DZF7ZbGTFSPqJWmyTYLrghZdO+lOmnaSjHxK4Nda1d4NzbefJ0u+FE
 tutewowvHtBX6AFIbx+H3O+DOJM2IgNMf+ReQDU/TyNyVf3wBrTbsr9cLxypIJIp
 zze8npoLMlB7B4yxVo5ES5e63EXfi3iHl0L3/1EhoGwriRz1kWgVLUX/VZOUpscL
 RkJHsW6bT8sqxPWAA5kyWjEN+wNR2PxbXi8OE4arT0uJrEBMUgDCzydzOv5tJB00
 mSQdytxH9LVdsmxBKAOBp5X6WOLGA4yb1cZ6E/mEhlqXMpBDF1DaMfwbWqxSYi4q
 sp5zU3SBAW0qceiZSsWZXInfbjrcQXNV/DkDRDO9OmzEZP4m1j0=
 =x6fy
 -----END PGP SIGNATURE-----

Merge tag 'kthread-cleanups-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull kthread updates from Eric Biederman:
 "This updates init and user mode helper tasks to be ordinary user mode
  tasks.

  Commit 40966e316f86 ("kthread: Ensure struct kthread is present for
  all kthreads") caused init and the user mode helper threads that call
  kernel_execve to have struct kthread allocated for them. This struct
  kthread going away during execve in turned made a use after free of
  struct kthread possible.

  Here, commit 343f4c49f243 ("kthread: Don't allocate kthread_struct for
  init and umh") is enough to fix the use after free and is simple
  enough to be backportable.

  The rest of the changes pass struct kernel_clone_args to clean things
  up and cause the code to make sense.

  In making init and the user mode helpers tasks purely user mode tasks
  I ran into two complications. The function task_tick_numa was
  detecting tasks without an mm by testing for the presence of
  PF_KTHREAD. The initramfs code in populate_initrd_image was using
  flush_delayed_fput to ensuere the closing of all it's file descriptors
  was complete, and flush_delayed_fput does not work in a userspace
  thread.

  I have looked and looked and more complications and in my code review
  I have not found any, and neither has anyone else with the code
  sitting in linux-next"

* tag 'kthread-cleanups-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  sched: Update task_tick_numa to ignore tasks without an mm
  fork: Stop allowing kthreads to call execve
  fork: Explicitly set PF_KTHREAD
  init: Deal with the init process being a user mode process
  fork: Generalize PF_IO_WORKER handling
  fork: Explicity test for idle tasks in copy_thread
  fork: Pass struct kernel_clone_args into copy_thread
  kthread: Don't allocate kthread_struct for init and umh
2022-06-03 16:03:05 -07:00

399 lines
11 KiB
C

/*
* arch/xtensa/kernel/process.c
*
* Xtensa Processor version.
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 2001 - 2005 Tensilica Inc.
*
* Joe Taylor <joe@tensilica.com, joetylr@yahoo.com>
* Chris Zankel <chris@zankel.net>
* Marc Gauthier <marc@tensilica.com, marc@alumni.uwaterloo.ca>
* Kevin Chea
*/
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/elf.h>
#include <linux/hw_breakpoint.h>
#include <linux/init.h>
#include <linux/prctl.h>
#include <linux/init_task.h>
#include <linux/module.h>
#include <linux/mqueue.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/rcupdate.h>
#include <linux/uaccess.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/platform.h>
#include <asm/mmu.h>
#include <asm/irq.h>
#include <linux/atomic.h>
#include <asm/asm-offsets.h>
#include <asm/regs.h>
#include <asm/hw_breakpoint.h>
#include <asm/traps.h>
extern void ret_from_fork(void);
extern void ret_from_kernel_thread(void);
void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off);
#ifdef CONFIG_STACKPROTECTOR
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
#if XTENSA_HAVE_COPROCESSORS
void local_coprocessors_flush_release_all(void)
{
struct thread_info **coprocessor_owner;
struct thread_info *unique_owner[XCHAL_CP_MAX];
int n = 0;
int i, j;
coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
xtensa_set_sr(XCHAL_CP_MASK, cpenable);
for (i = 0; i < XCHAL_CP_MAX; i++) {
struct thread_info *ti = coprocessor_owner[i];
if (ti) {
coprocessor_flush(ti, i);
for (j = 0; j < n; j++)
if (unique_owner[j] == ti)
break;
if (j == n)
unique_owner[n++] = ti;
coprocessor_owner[i] = NULL;
}
}
for (i = 0; i < n; i++) {
/* pairs with memw (1) in fast_coprocessor and memw in switch_to */
smp_wmb();
unique_owner[i]->cpenable = 0;
}
xtensa_set_sr(0, cpenable);
}
static void local_coprocessor_release_all(void *info)
{
struct thread_info *ti = info;
struct thread_info **coprocessor_owner;
int i;
coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
/* Walk through all cp owners and release it for the requested one. */
for (i = 0; i < XCHAL_CP_MAX; i++) {
if (coprocessor_owner[i] == ti)
coprocessor_owner[i] = NULL;
}
/* pairs with memw (1) in fast_coprocessor and memw in switch_to */
smp_wmb();
ti->cpenable = 0;
if (ti == current_thread_info())
xtensa_set_sr(0, cpenable);
}
void coprocessor_release_all(struct thread_info *ti)
{
if (ti->cpenable) {
/* pairs with memw (2) in fast_coprocessor */
smp_rmb();
smp_call_function_single(ti->cp_owner_cpu,
local_coprocessor_release_all,
ti, true);
}
}
static void local_coprocessor_flush_all(void *info)
{
struct thread_info *ti = info;
struct thread_info **coprocessor_owner;
unsigned long old_cpenable;
int i;
coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
old_cpenable = xtensa_xsr(ti->cpenable, cpenable);
for (i = 0; i < XCHAL_CP_MAX; i++) {
if (coprocessor_owner[i] == ti)
coprocessor_flush(ti, i);
}
xtensa_set_sr(old_cpenable, cpenable);
}
void coprocessor_flush_all(struct thread_info *ti)
{
if (ti->cpenable) {
/* pairs with memw (2) in fast_coprocessor */
smp_rmb();
smp_call_function_single(ti->cp_owner_cpu,
local_coprocessor_flush_all,
ti, true);
}
}
static void local_coprocessor_flush_release_all(void *info)
{
local_coprocessor_flush_all(info);
local_coprocessor_release_all(info);
}
void coprocessor_flush_release_all(struct thread_info *ti)
{
if (ti->cpenable) {
/* pairs with memw (2) in fast_coprocessor */
smp_rmb();
smp_call_function_single(ti->cp_owner_cpu,
local_coprocessor_flush_release_all,
ti, true);
}
}
#endif
/*
* Powermanagement idle function, if any is provided by the platform.
*/
void arch_cpu_idle(void)
{
platform_idle();
}
/*
* This is called when the thread calls exit().
*/
void exit_thread(struct task_struct *tsk)
{
#if XTENSA_HAVE_COPROCESSORS
coprocessor_release_all(task_thread_info(tsk));
#endif
}
/*
* Flush thread state. This is called when a thread does an execve()
* Note that we flush coprocessor registers for the case execve fails.
*/
void flush_thread(void)
{
#if XTENSA_HAVE_COPROCESSORS
struct thread_info *ti = current_thread_info();
coprocessor_flush_release_all(ti);
#endif
flush_ptrace_hw_breakpoint(current);
}
/*
* this gets called so that we can store coprocessor state into memory and
* copy the current task into the new thread.
*/
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
#if XTENSA_HAVE_COPROCESSORS
coprocessor_flush_all(task_thread_info(src));
#endif
*dst = *src;
return 0;
}
/*
* Copy thread.
*
* There are two modes in which this function is called:
* 1) Userspace thread creation,
* regs != NULL, usp_thread_fn is userspace stack pointer.
* It is expected to copy parent regs (in case CLONE_VM is not set
* in the clone_flags) and set up passed usp in the childregs.
* 2) Kernel thread creation,
* regs == NULL, usp_thread_fn is the function to run in the new thread
* and thread_fn_arg is its parameter.
* childregs are not used for the kernel threads.
*
* The stack layout for the new thread looks like this:
*
* +------------------------+
* | childregs |
* +------------------------+ <- thread.sp = sp in dummy-frame
* | dummy-frame | (saved in dummy-frame spill-area)
* +------------------------+
*
* We create a dummy frame to return to either ret_from_fork or
* ret_from_kernel_thread:
* a0 points to ret_from_fork/ret_from_kernel_thread (simulating a call4)
* sp points to itself (thread.sp)
* a2, a3 are unused for userspace threads,
* a2 points to thread_fn, a3 holds thread_fn arg for kernel threads.
*
* Note: This is a pristine frame, so we don't need any spill region on top of
* childregs.
*
* The fun part: if we're keeping the same VM (i.e. cloning a thread,
* not an entire process), we're normally given a new usp, and we CANNOT share
* any live address register windows. If we just copy those live frames over,
* the two threads (parent and child) will overflow the same frames onto the
* parent stack at different times, likely corrupting the parent stack (esp.
* if the parent returns from functions that called clone() and calls new
* ones, before the child overflows its now old copies of its parent windows).
* One solution is to spill windows to the parent stack, but that's fairly
* involved. Much simpler to just not copy those live frames across.
*/
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
unsigned long clone_flags = args->flags;
unsigned long usp_thread_fn = args->stack;
unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
#if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
struct thread_info *ti;
#endif
#if defined(__XTENSA_WINDOWED_ABI__)
/* Create a call4 dummy-frame: a0 = 0, a1 = childregs. */
SPILL_SLOT(childregs, 1) = (unsigned long)childregs;
SPILL_SLOT(childregs, 0) = 0;
p->thread.sp = (unsigned long)childregs;
#elif defined(__XTENSA_CALL0_ABI__)
/* Reserve 16 bytes for the _switch_to stack frame. */
p->thread.sp = (unsigned long)childregs - 16;
#else
#error Unsupported Xtensa ABI
#endif
if (!args->fn) {
struct pt_regs *regs = current_pt_regs();
unsigned long usp = usp_thread_fn ?
usp_thread_fn : regs->areg[1];
p->thread.ra = MAKE_RA_FOR_CALL(
(unsigned long)ret_from_fork, 0x1);
*childregs = *regs;
childregs->areg[1] = usp;
childregs->areg[2] = 0;
/* When sharing memory with the parent thread, the child
usually starts on a pristine stack, so we have to reset
windowbase, windowstart and wmask.
(Note that such a new thread is required to always create
an initial call4 frame)
The exception is vfork, where the new thread continues to
run on the parent's stack until it calls execve. This could
be a call8 or call12, which requires a legal stack frame
of the previous caller for the overflow handlers to work.
(Note that it's always legal to overflow live registers).
In this case, ensure to spill at least the stack pointer
of that frame. */
if (clone_flags & CLONE_VM) {
/* check that caller window is live and same stack */
int len = childregs->wmask & ~0xf;
if (regs->areg[1] == usp && len != 0) {
int callinc = (regs->areg[0] >> 30) & 3;
int caller_ars = XCHAL_NUM_AREGS - callinc * 4;
put_user(regs->areg[caller_ars+1],
(unsigned __user*)(usp - 12));
}
childregs->wmask = 1;
childregs->windowstart = 1;
childregs->windowbase = 0;
}
if (clone_flags & CLONE_SETTLS)
childregs->threadptr = tls;
} else {
p->thread.ra = MAKE_RA_FOR_CALL(
(unsigned long)ret_from_kernel_thread, 1);
/* pass parameters to ret_from_kernel_thread: */
#if defined(__XTENSA_WINDOWED_ABI__)
/*
* a2 = thread_fn, a3 = thread_fn arg.
* Window underflow will load registers from the
* spill slots on the stack on return from _switch_to.
*/
SPILL_SLOT(childregs, 2) = (unsigned long)args->fn;
SPILL_SLOT(childregs, 3) = (unsigned long)args->fn_arg;
#elif defined(__XTENSA_CALL0_ABI__)
/*
* a12 = thread_fn, a13 = thread_fn arg.
* _switch_to epilogue will load registers from the stack.
*/
((unsigned long *)p->thread.sp)[0] = (unsigned long)args->fn;
((unsigned long *)p->thread.sp)[1] = (unsigned long)args->fn_arg;
#else
#error Unsupported Xtensa ABI
#endif
/* Childregs are only used when we're going to userspace
* in which case start_thread will set them up.
*/
}
#if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
ti = task_thread_info(p);
ti->cpenable = 0;
#endif
clear_ptrace_hw_breakpoint(p);
return 0;
}
/*
* These bracket the sleeping functions..
*/
unsigned long __get_wchan(struct task_struct *p)
{
unsigned long sp, pc;
unsigned long stack_page = (unsigned long) task_stack_page(p);
int count = 0;
sp = p->thread.sp;
pc = MAKE_PC_FROM_RA(p->thread.ra, p->thread.sp);
do {
if (sp < stack_page + sizeof(struct task_struct) ||
sp >= (stack_page + THREAD_SIZE) ||
pc == 0)
return 0;
if (!in_sched_functions(pc))
return pc;
/* Stack layout: sp-4: ra, sp-3: sp' */
pc = MAKE_PC_FROM_RA(SPILL_SLOT(sp, 0), sp);
sp = SPILL_SLOT(sp, 1);
} while (count++ < 16);
return 0;
}