Linus Torvalds 1ec6574a3c This set of changes updates init and user mode helper tasks to be
ordinary user mode tasks.
 
 In commit 40966e316f86 ("kthread: Ensure struct kthread is present for
 all kthreads") caused init and the user mode helper threads that call
 kernel_execve to have struct kthread allocated for them.  This struct
 kthread going away during execve in turned made a use after free of
 struct kthread possible.
 
 The commit 343f4c49f243 ("kthread: Don't allocate kthread_struct for
 init and umh") is enough to fix the use after free and is simple enough
 to be backportable.
 
 The rest of the changes pass struct kernel_clone_args to clean things
 up and cause the code to make sense.
 
 In making init and the user mode helpers tasks purely user mode tasks
 I ran into two complications.  The function task_tick_numa was
 detecting tasks without an mm by testing for the presence of
 PF_KTHREAD.  The initramfs code in populate_initrd_image was using
 flush_delayed_fput to ensuere the closing of all it's file descriptors
 was complete, and flush_delayed_fput does not work in a userspace thread.
 
 I have looked and looked and more complications and in my code review
 I have not found any, and neither has anyone else with the code sitting
 in linux-next.
 
 Link: https://lkml.kernel.org/r/87mtfu4up3.fsf@email.froward.int.ebiederm.org
 
 Eric W. Biederman (8):
       kthread: Don't allocate kthread_struct for init and umh
       fork: Pass struct kernel_clone_args into copy_thread
       fork: Explicity test for idle tasks in copy_thread
       fork: Generalize PF_IO_WORKER handling
       init: Deal with the init process being a user mode process
       fork: Explicitly set PF_KTHREAD
       fork: Stop allowing kthreads to call execve
       sched: Update task_tick_numa to ignore tasks without an mm
 
  arch/alpha/kernel/process.c      | 13 ++++++------
  arch/arc/kernel/process.c        | 13 ++++++------
  arch/arm/kernel/process.c        | 12 ++++++-----
  arch/arm64/kernel/process.c      | 12 ++++++-----
  arch/csky/kernel/process.c       | 15 ++++++-------
  arch/h8300/kernel/process.c      | 10 ++++-----
  arch/hexagon/kernel/process.c    | 12 ++++++-----
  arch/ia64/kernel/process.c       | 15 +++++++------
  arch/m68k/kernel/process.c       | 12 ++++++-----
  arch/microblaze/kernel/process.c | 12 ++++++-----
  arch/mips/kernel/process.c       | 13 ++++++------
  arch/nios2/kernel/process.c      | 12 ++++++-----
  arch/openrisc/kernel/process.c   | 12 ++++++-----
  arch/parisc/kernel/process.c     | 18 +++++++++-------
  arch/powerpc/kernel/process.c    | 15 +++++++------
  arch/riscv/kernel/process.c      | 12 ++++++-----
  arch/s390/kernel/process.c       | 12 ++++++-----
  arch/sh/kernel/process_32.c      | 12 ++++++-----
  arch/sparc/kernel/process_32.c   | 12 ++++++-----
  arch/sparc/kernel/process_64.c   | 12 ++++++-----
  arch/um/kernel/process.c         | 15 +++++++------
  arch/x86/include/asm/fpu/sched.h |  2 +-
  arch/x86/include/asm/switch_to.h |  8 +++----
  arch/x86/kernel/fpu/core.c       |  4 ++--
  arch/x86/kernel/process.c        | 18 +++++++++-------
  arch/xtensa/kernel/process.c     | 17 ++++++++-------
  fs/exec.c                        |  8 ++++---
  include/linux/sched/task.h       |  8 +++++--
  init/initramfs.c                 |  2 ++
  init/main.c                      |  2 +-
  kernel/fork.c                    | 46 +++++++++++++++++++++++++++++++++-------
  kernel/sched/fair.c              |  2 +-
  kernel/umh.c                     |  6 +++---
  33 files changed, 234 insertions(+), 160 deletions(-)
 
 Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEgjlraLDcwBA2B+6cC/v6Eiajj0AFAmKaR/MACgkQC/v6Eiaj
 j0Aayg/7Bx66872d9c6igkJ+MPCTuh+v9QKCGwiYEmiU4Q5sVAFB0HPJO27qC14u
 630X0RFNZTkPzNNEJNIW4kw6Dj8s8YRKf+FgQAVt4SzdRwT7eIPDjk1nGraopPJ3
 O04pjvuTmUyidyViRyFcf2ptx/pnkrwP8jUSc+bGTgfASAKAgAokqKE5ecjewbBc
 Y/EAkQ6QW7KxPjeSmpAHwI+t3BpBev9WEC4PbhRhsBCQFO2+PJiklvqdhVNBnIjv
 qUezll/1xv9UYgniB15Q4Nb722SmnWSU3r8as1eFPugzTHizKhufrrpyP+KMK1A0
 tdtEJNs5t2DZF7ZbGTFSPqJWmyTYLrghZdO+lOmnaSjHxK4Nda1d4NzbefJ0u+FE
 tutewowvHtBX6AFIbx+H3O+DOJM2IgNMf+ReQDU/TyNyVf3wBrTbsr9cLxypIJIp
 zze8npoLMlB7B4yxVo5ES5e63EXfi3iHl0L3/1EhoGwriRz1kWgVLUX/VZOUpscL
 RkJHsW6bT8sqxPWAA5kyWjEN+wNR2PxbXi8OE4arT0uJrEBMUgDCzydzOv5tJB00
 mSQdytxH9LVdsmxBKAOBp5X6WOLGA4yb1cZ6E/mEhlqXMpBDF1DaMfwbWqxSYi4q
 sp5zU3SBAW0qceiZSsWZXInfbjrcQXNV/DkDRDO9OmzEZP4m1j0=
 =x6fy
 -----END PGP SIGNATURE-----

Merge tag 'kthread-cleanups-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull kthread updates from Eric Biederman:
 "This updates init and user mode helper tasks to be ordinary user mode
  tasks.

  Commit 40966e316f86 ("kthread: Ensure struct kthread is present for
  all kthreads") caused init and the user mode helper threads that call
  kernel_execve to have struct kthread allocated for them. This struct
  kthread going away during execve in turned made a use after free of
  struct kthread possible.

  Here, commit 343f4c49f243 ("kthread: Don't allocate kthread_struct for
  init and umh") is enough to fix the use after free and is simple
  enough to be backportable.

  The rest of the changes pass struct kernel_clone_args to clean things
  up and cause the code to make sense.

  In making init and the user mode helpers tasks purely user mode tasks
  I ran into two complications. The function task_tick_numa was
  detecting tasks without an mm by testing for the presence of
  PF_KTHREAD. The initramfs code in populate_initrd_image was using
  flush_delayed_fput to ensuere the closing of all it's file descriptors
  was complete, and flush_delayed_fput does not work in a userspace
  thread.

  I have looked and looked and more complications and in my code review
  I have not found any, and neither has anyone else with the code
  sitting in linux-next"

* tag 'kthread-cleanups-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  sched: Update task_tick_numa to ignore tasks without an mm
  fork: Stop allowing kthreads to call execve
  fork: Explicitly set PF_KTHREAD
  init: Deal with the init process being a user mode process
  fork: Generalize PF_IO_WORKER handling
  fork: Explicity test for idle tasks in copy_thread
  fork: Pass struct kernel_clone_args into copy_thread
  kthread: Don't allocate kthread_struct for init and umh
2022-06-03 16:03:05 -07:00

289 lines
7.4 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* OpenRISC process.c
*
* Linux architectural port borrowing liberally from similar works of
* others. All original copyrights apply as per the original source
* declaration.
*
* Modifications for the OpenRISC architecture:
* Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
* Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
*
* This file handles the architecture-dependent parts of process handling...
*/
#define __KERNEL_SYSCALLS__
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/elfcore.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/init_task.h>
#include <linux/mqueue.h>
#include <linux/fs.h>
#include <linux/reboot.h>
#include <linux/uaccess.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/spr_defs.h>
#include <linux/smp.h>
/*
* Pointer to Current thread info structure.
*
* Used at user space -> kernel transitions.
*/
struct thread_info *current_thread_info_set[NR_CPUS] = { &init_thread_info, };
void machine_restart(char *cmd)
{
do_kernel_restart(cmd);
__asm__("l.nop 13");
/* Give a grace period for failure to restart of 1s */
mdelay(1000);
/* Whoops - the platform was unable to reboot. Tell the user! */
pr_emerg("Reboot failed -- System halted\n");
while (1);
}
/*
* This is used if pm_power_off has not been set by a power management
* driver, in this case we can assume we are on a simulator. On
* OpenRISC simulators l.nop 1 will trigger the simulator exit.
*/
static void default_power_off(void)
{
__asm__("l.nop 1");
}
/*
* Similar to machine_power_off, but don't shut off power. Add code
* here to freeze the system for e.g. post-mortem debug purpose when
* possible. This halt has nothing to do with the idle halt.
*/
void machine_halt(void)
{
printk(KERN_INFO "*** MACHINE HALT ***\n");
__asm__("l.nop 1");
}
/* If or when software power-off is implemented, add code here. */
void machine_power_off(void)
{
printk(KERN_INFO "*** MACHINE POWER OFF ***\n");
if (pm_power_off != NULL)
pm_power_off();
else
default_power_off();
}
/*
* Send the doze signal to the cpu if available.
* Make sure, that all interrupts are enabled
*/
void arch_cpu_idle(void)
{
raw_local_irq_enable();
if (mfspr(SPR_UPR) & SPR_UPR_PMP)
mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
}
void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off);
/*
* When a process does an "exec", machine state like FPU and debug
* registers need to be reset. This is a hook function for that.
* Currently we don't have any such state to reset, so this is empty.
*/
void flush_thread(void)
{
}
void show_regs(struct pt_regs *regs)
{
extern void show_registers(struct pt_regs *regs);
show_regs_print_info(KERN_DEFAULT);
/* __PHX__ cleanup this mess */
show_registers(regs);
}
void release_thread(struct task_struct *dead_task)
{
}
/*
* Copy the thread-specific (arch specific) info from the current
* process to the new one p
*/
extern asmlinkage void ret_from_fork(void);
/*
* copy_thread
* @clone_flags: flags
* @usp: user stack pointer or fn for kernel thread
* @arg: arg to fn for kernel thread; always NULL for userspace thread
* @p: the newly created task
* @tls: the Thread Local Storage pointer for the new process
*
* At the top of a newly initialized kernel stack are two stacked pt_reg
* structures. The first (topmost) is the userspace context of the thread.
* The second is the kernelspace context of the thread.
*
* A kernel thread will not be returning to userspace, so the topmost pt_regs
* struct can be uninitialized; it _does_ need to exist, though, because
* a kernel thread can become a userspace thread by doing a kernel_execve, in
* which case the topmost context will be initialized and used for 'returning'
* to userspace.
*
* The second pt_reg struct needs to be initialized to 'return' to
* ret_from_fork. A kernel thread will need to set r20 to the address of
* a function to call into (with arg in r22); userspace threads need to set
* r20 to NULL in which case ret_from_fork will just continue a return to
* userspace.
*
* A kernel thread 'fn' may return; this is effectively what happens when
* kernel_execve is called. In that case, the userspace pt_regs must have
* been initialized (which kernel_execve takes care of, see start_thread
* below); ret_from_fork will then continue its execution causing the
* 'kernel thread' to return to userspace as a userspace thread.
*/
int
copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
unsigned long clone_flags = args->flags;
unsigned long usp = args->stack;
unsigned long tls = args->tls;
struct pt_regs *userregs;
struct pt_regs *kregs;
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
unsigned long top_of_kernel_stack;
top_of_kernel_stack = sp;
/* Locate userspace context on stack... */
sp -= STACK_FRAME_OVERHEAD; /* redzone */
sp -= sizeof(struct pt_regs);
userregs = (struct pt_regs *) sp;
/* ...and kernel context */
sp -= STACK_FRAME_OVERHEAD; /* redzone */
sp -= sizeof(struct pt_regs);
kregs = (struct pt_regs *)sp;
if (unlikely(args->fn)) {
memset(kregs, 0, sizeof(struct pt_regs));
kregs->gpr[20] = (unsigned long)args->fn;
kregs->gpr[22] = (unsigned long)args->fn_arg;
} else {
*userregs = *current_pt_regs();
if (usp)
userregs->sp = usp;
/*
* For CLONE_SETTLS set "tp" (r10) to the TLS pointer.
*/
if (clone_flags & CLONE_SETTLS)
userregs->gpr[10] = tls;
userregs->gpr[11] = 0; /* Result from fork() */
kregs->gpr[20] = 0; /* Userspace thread */
}
/*
* _switch wants the kernel stack page in pt_regs->sp so that it
* can restore it to thread_info->ksp... see _switch for details.
*/
kregs->sp = top_of_kernel_stack;
kregs->gpr[9] = (unsigned long)ret_from_fork;
task_thread_info(p)->ksp = (unsigned long)kregs;
return 0;
}
/*
* Set up a thread for executing a new program
*/
void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
{
unsigned long sr = mfspr(SPR_SR) & ~SPR_SR_SM;
memset(regs, 0, sizeof(struct pt_regs));
regs->pc = pc;
regs->sr = sr;
regs->sp = sp;
}
extern struct thread_info *_switch(struct thread_info *old_ti,
struct thread_info *new_ti);
extern int lwa_flag;
struct task_struct *__switch_to(struct task_struct *old,
struct task_struct *new)
{
struct task_struct *last;
struct thread_info *new_ti, *old_ti;
unsigned long flags;
local_irq_save(flags);
/* current_set is an array of saved current pointers
* (one for each cpu). we need them at user->kernel transition,
* while we save them at kernel->user transition
*/
new_ti = new->stack;
old_ti = old->stack;
lwa_flag = 0;
current_thread_info_set[smp_processor_id()] = new_ti;
last = (_switch(old_ti, new_ti))->task;
local_irq_restore(flags);
return last;
}
/*
* Write out registers in core dump format, as defined by the
* struct user_regs_struct
*/
void dump_elf_thread(elf_greg_t *dest, struct pt_regs* regs)
{
dest[0] = 0; /* r0 */
memcpy(dest+1, regs->gpr+1, 31*sizeof(unsigned long));
dest[32] = regs->pc;
dest[33] = regs->sr;
dest[34] = 0;
dest[35] = 0;
}
unsigned long __get_wchan(struct task_struct *p)
{
/* TODO */
return 0;
}