Linus Torvalds 5695e51619 io_uring-worker.v3-2021-02-25
-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmA4JRkQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpoWqD/9dbbqe8L701U6May1A/4hRsqL4THTA2flx
 vNCNRBl6XV3l/wBCtL6waKy6tyO4lyM8XdUdEvo3Kxl2kGPb8eVfpyYL/+77HqyH
 ctT4RMrs+84Mxn+5N6cM97hS1qVI2moTxxyvOEl/JTB7BYrutz9gvAoeY3/Dto47
 J66oSaPeuqJ32TyihxfQHVxQopJcqFzDjyoYHGDu6ATio1PXfaIdTu8ywVYSECAh
 pWI4rwnqdurGuHMNpxyL1bA6CT/jC7s+sqU7bUYUCgtYI3eG0u3V0bp5gAQQIgl9
 5sxxE3DidYGAkYZsosrelshBtzGddLdz4Qrt2ungMYv8RsGNpFQ095jDPKDwFaZj
 bSvSsfplCo7iFsJByb1TtpNEOW8eAwi81PmBDVQ9Oq5P5ygTYno9GBDc/20ql0Fk
 q6wcX28coE3IBw44ne0hIwvBOtXV4WJyluG/gqOxfbTH+kOy3pDsN8lWcY/P4X0U
 yzdU2MLHe8BNMyYlUiBF47Amzt4ltr85P4XD3WZ4bX71iwri6HvrdGWLuuKwX+Ie
 66QiIDDQIYZQ6NMMJWS9DGW3y3DBizpSXGxONbOw1J2bQdNmtToR0D2UnK/9UnKp
 msnvkUNk8fkYGS4aptpJ6HxbmjMEG5YtbiGlPj6fz5/7MTvhRjPxt7A0LWrUIdqR
 f88+sHUMqg==
 =oc8u
 -----END PGP SIGNATURE-----

Merge tag 'io_uring-worker.v3-2021-02-25' of git://git.kernel.dk/linux-block

Pull io_uring thread rewrite from Jens Axboe:
 "This converts the io-wq workers to be forked off the tasks in question
  instead of being kernel threads that assume various bits of the
  original task identity.

  This kills > 400 lines of code from io_uring/io-wq, and it's the worst
  part of the code. We've had several bugs in this area, and the worry
  is always that we could be missing some pieces for file types doing
  unusual things (recent /dev/tty example comes to mind, userfaultfd
  reads installing file descriptors is another fun one... - both of
  which need special handling, and I bet it's not the last weird oddity
  we'll find).

  With these identical workers, we can have full confidence that we're
  never missing anything. That, in itself, is a huge win. Outside of
  that, it's also more efficient since we're not wasting space and code
  on tracking state, or switching between different states.

  I'm sure we're going to find little things to patch up after this
  series, but testing has been pretty thorough, from the usual
  regression suite to production. Any issue that may crop up should be
  manageable.

  There's also a nice series of further reductions we can do on top of
  this, but I wanted to get the meat of it out sooner rather than later.
  The general worry here isn't that it's fundamentally broken. Most of
  the little issues we've found over the last week have been related to
  just changes in how thread startup/exit is done, since that's the main
  difference between using kthreads and these kinds of threads. In fact,
  if all goes according to plan, I want to get this into the 5.10 and
  5.11 stable branches as well.

  That said, the changes outside of io_uring/io-wq are:

   - arch setup, simple one-liner to each arch copy_thread()
     implementation.

   - Removal of net and proc restrictions for io_uring, they are no
     longer needed or useful"

* tag 'io_uring-worker.v3-2021-02-25' of git://git.kernel.dk/linux-block: (30 commits)
  io-wq: remove now unused IO_WQ_BIT_ERROR
  io_uring: fix SQPOLL thread handling over exec
  io-wq: improve manager/worker handling over exec
  io_uring: ensure SQPOLL startup is triggered before error shutdown
  io-wq: make buffered file write hashed work map per-ctx
  io-wq: fix race around io_worker grabbing
  io-wq: fix races around manager/worker creation and task exit
  io_uring: ensure io-wq context is always destroyed for tasks
  arch: ensure parisc/powerpc handle PF_IO_WORKER in copy_thread()
  io_uring: cleanup ->user usage
  io-wq: remove nr_process accounting
  io_uring: flag new native workers with IORING_FEAT_NATIVE_WORKERS
  net: remove cmsg restriction from io_uring based send/recvmsg calls
  Revert "proc: don't allow async path resolution of /proc/self components"
  Revert "proc: don't allow async path resolution of /proc/thread-self components"
  io_uring: move SQPOLL thread io-wq forked worker
  io-wq: make io_wq_fork_thread() available to other users
  io-wq: only remove worker from free_list, if it was there
  io_uring: remove io_identity
  io_uring: remove any grabbing of context
  ...
2021-02-27 08:29:02 -08:00

274 lines
7.1 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* OpenRISC process.c
*
* Linux architectural port borrowing liberally from similar works of
* others. All original copyrights apply as per the original source
* declaration.
*
* Modifications for the OpenRISC architecture:
* Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
* Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
*
* This file handles the architecture-dependent parts of process handling...
*/
#define __KERNEL_SYSCALLS__
#include <stdarg.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/elfcore.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/init_task.h>
#include <linux/mqueue.h>
#include <linux/fs.h>
#include <linux/reboot.h>
#include <linux/uaccess.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/spr_defs.h>
#include <linux/smp.h>
/*
* Pointer to Current thread info structure.
*
* Used at user space -> kernel transitions.
*/
struct thread_info *current_thread_info_set[NR_CPUS] = { &init_thread_info, };
void machine_restart(char *cmd)
{
do_kernel_restart(cmd);
/* Give a grace period for failure to restart of 1s */
mdelay(1000);
/* Whoops - the platform was unable to reboot. Tell the user! */
pr_emerg("Reboot failed -- System halted\n");
while (1);
}
/*
* Similar to machine_power_off, but don't shut off power. Add code
* here to freeze the system for e.g. post-mortem debug purpose when
* possible. This halt has nothing to do with the idle halt.
*/
void machine_halt(void)
{
printk(KERN_INFO "*** MACHINE HALT ***\n");
__asm__("l.nop 1");
}
/* If or when software power-off is implemented, add code here. */
void machine_power_off(void)
{
printk(KERN_INFO "*** MACHINE POWER OFF ***\n");
__asm__("l.nop 1");
}
/*
* Send the doze signal to the cpu if available.
* Make sure, that all interrupts are enabled
*/
void arch_cpu_idle(void)
{
raw_local_irq_enable();
if (mfspr(SPR_UPR) & SPR_UPR_PMP)
mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
}
void (*pm_power_off) (void) = machine_power_off;
EXPORT_SYMBOL(pm_power_off);
/*
* When a process does an "exec", machine state like FPU and debug
* registers need to be reset. This is a hook function for that.
* Currently we don't have any such state to reset, so this is empty.
*/
void flush_thread(void)
{
}
void show_regs(struct pt_regs *regs)
{
extern void show_registers(struct pt_regs *regs);
show_regs_print_info(KERN_DEFAULT);
/* __PHX__ cleanup this mess */
show_registers(regs);
}
void release_thread(struct task_struct *dead_task)
{
}
/*
* Copy the thread-specific (arch specific) info from the current
* process to the new one p
*/
extern asmlinkage void ret_from_fork(void);
/*
* copy_thread
* @clone_flags: flags
* @usp: user stack pointer or fn for kernel thread
* @arg: arg to fn for kernel thread; always NULL for userspace thread
* @p: the newly created task
* @tls: the Thread Local Storage pointer for the new process
*
* At the top of a newly initialized kernel stack are two stacked pt_reg
* structures. The first (topmost) is the userspace context of the thread.
* The second is the kernelspace context of the thread.
*
* A kernel thread will not be returning to userspace, so the topmost pt_regs
* struct can be uninitialized; it _does_ need to exist, though, because
* a kernel thread can become a userspace thread by doing a kernel_execve, in
* which case the topmost context will be initialized and used for 'returning'
* to userspace.
*
* The second pt_reg struct needs to be initialized to 'return' to
* ret_from_fork. A kernel thread will need to set r20 to the address of
* a function to call into (with arg in r22); userspace threads need to set
* r20 to NULL in which case ret_from_fork will just continue a return to
* userspace.
*
* A kernel thread 'fn' may return; this is effectively what happens when
* kernel_execve is called. In that case, the userspace pt_regs must have
* been initialized (which kernel_execve takes care of, see start_thread
* below); ret_from_fork will then continue its execution causing the
* 'kernel thread' to return to userspace as a userspace thread.
*/
int
copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
struct task_struct *p, unsigned long tls)
{
struct pt_regs *userregs;
struct pt_regs *kregs;
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
unsigned long top_of_kernel_stack;
top_of_kernel_stack = sp;
/* Locate userspace context on stack... */
sp -= STACK_FRAME_OVERHEAD; /* redzone */
sp -= sizeof(struct pt_regs);
userregs = (struct pt_regs *) sp;
/* ...and kernel context */
sp -= STACK_FRAME_OVERHEAD; /* redzone */
sp -= sizeof(struct pt_regs);
kregs = (struct pt_regs *)sp;
if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
memset(kregs, 0, sizeof(struct pt_regs));
kregs->gpr[20] = usp; /* fn, kernel thread */
kregs->gpr[22] = arg;
} else {
*userregs = *current_pt_regs();
if (usp)
userregs->sp = usp;
/*
* For CLONE_SETTLS set "tp" (r10) to the TLS pointer.
*/
if (clone_flags & CLONE_SETTLS)
userregs->gpr[10] = tls;
userregs->gpr[11] = 0; /* Result from fork() */
kregs->gpr[20] = 0; /* Userspace thread */
}
/*
* _switch wants the kernel stack page in pt_regs->sp so that it
* can restore it to thread_info->ksp... see _switch for details.
*/
kregs->sp = top_of_kernel_stack;
kregs->gpr[9] = (unsigned long)ret_from_fork;
task_thread_info(p)->ksp = (unsigned long)kregs;
return 0;
}
/*
* Set up a thread for executing a new program
*/
void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
{
unsigned long sr = mfspr(SPR_SR) & ~SPR_SR_SM;
memset(regs, 0, sizeof(struct pt_regs));
regs->pc = pc;
regs->sr = sr;
regs->sp = sp;
}
extern struct thread_info *_switch(struct thread_info *old_ti,
struct thread_info *new_ti);
extern int lwa_flag;
struct task_struct *__switch_to(struct task_struct *old,
struct task_struct *new)
{
struct task_struct *last;
struct thread_info *new_ti, *old_ti;
unsigned long flags;
local_irq_save(flags);
/* current_set is an array of saved current pointers
* (one for each cpu). we need them at user->kernel transition,
* while we save them at kernel->user transition
*/
new_ti = new->stack;
old_ti = old->stack;
lwa_flag = 0;
current_thread_info_set[smp_processor_id()] = new_ti;
last = (_switch(old_ti, new_ti))->task;
local_irq_restore(flags);
return last;
}
/*
* Write out registers in core dump format, as defined by the
* struct user_regs_struct
*/
void dump_elf_thread(elf_greg_t *dest, struct pt_regs* regs)
{
dest[0] = 0; /* r0 */
memcpy(dest+1, regs->gpr+1, 31*sizeof(unsigned long));
dest[32] = regs->pc;
dest[33] = regs->sr;
dest[34] = 0;
dest[35] = 0;
}
unsigned long get_wchan(struct task_struct *p)
{
/* TODO */
return 0;
}