ad943893d5
Current task of executing crash kexec will be schedule out when panic is triggered by RCU Stall, as it needs to wait rcu completion. It lead to inability to enter the crash system. The implementation of machine_crash_shutdown() is non-standard for RISC-V according to other Arch's implementation(eg, x86, arm64), we need to send IPI to stop secondary harts. [224521.877268] rcu: INFO: rcu_preempt detected stalls on CPUs/tasks: [224521.883471] rcu: 0-...0: (3 GPs behind) idle=cfa/0/0x1 softirq=3968793/3968793 fqs=2495 [224521.891742] (detected by 2, t=5255 jiffies, g=60855593, q=328) [224521.897754] Task dump for CPU 0: [224521.901074] task:swapper/0 state:R running task stack: 0 pid: 0 ppid: 0 flags:0x00000008 [224521.911090] Call Trace: [224521.913638] [<ffffffe000c432de>] __schedule+0x208/0x5ea [224521.918957] Kernel panic - not syncing: RCU Stall [224521.923773] bad: scheduling from the idle thread! [224521.928571] CPU: 2 PID: 0 Comm: swapper/2 Kdump: loaded Tainted: G O 5.10.113-yocto-standard #1 [224521.938658] Call Trace: [224521.941200] [<ffffffe00020395c>] walk_stackframe+0x0/0xaa [224521.946689] [<ffffffe000c34f8e>] show_stack+0x32/0x3e [224521.951830] [<ffffffe000c39020>] dump_stack_lvl+0x7e/0xa2 [224521.957317] [<ffffffe000c39058>] dump_stack+0x14/0x1c [224521.962459] [<ffffffe000243884>] dequeue_task_idle+0x2c/0x40 [224521.968207] [<ffffffe000c434f4>] __schedule+0x41e/0x5ea [224521.973520] [<ffffffe000c43826>] schedule+0x34/0xe4 [224521.978487] [<ffffffe000c46cae>] schedule_timeout+0xc6/0x170 [224521.984234] [<ffffffe000c4491e>] wait_for_completion+0x98/0xf2 [224521.990157] [<ffffffe00026d9e2>] __wait_rcu_gp+0x148/0x14a [224521.995733] [<ffffffe0002761c4>] synchronize_rcu+0x5c/0x66 [224522.001307] [<ffffffe00026f1a6>] rcu_sync_enter+0x54/0xe6 [224522.006795] [<ffffffe00025a436>] percpu_down_write+0x32/0x11c [224522.012629] [<ffffffe000c4266a>] _cpu_down+0x92/0x21a [224522.017771] [<ffffffe000219a0a>] smp_shutdown_nonboot_cpus+0x90/0x118 [224522.024299] [<ffffffe00020701e>] machine_crash_shutdown+0x30/0x4a [224522.030483] [<ffffffe00029a3f8>] __crash_kexec+0x62/0xa6 [224522.035884] [<ffffffe000c3515e>] panic+0xfa/0x2b6 [224522.040678] [<ffffffe0002772be>] rcu_sched_clock_irq+0xc26/0xcb8 [224522.046774] [<ffffffe00027fc7a>] update_process_times+0x62/0x8a [224522.052785] [<ffffffe00028d522>] tick_sched_timer+0x9e/0x102 [224522.058533] [<ffffffe000280c3a>] __hrtimer_run_queues+0x16a/0x318 [224522.064716] [<ffffffe0002812ec>] hrtimer_interrupt+0xd4/0x228 [224522.070551] [<ffffffe0009a69b6>] riscv_timer_interrupt+0x3c/0x48 [224522.076646] [<ffffffe000268f8c>] handle_percpu_devid_irq+0xb0/0x24c [224522.083004] [<ffffffe00026428e>] __handle_domain_irq+0xa8/0x122 [224522.089014] [<ffffffe00062f954>] riscv_intc_irq+0x38/0x60 [224522.094501] [<ffffffe000201bd4>] ret_from_exception+0x0/0xc [224522.100161] [<ffffffe000c42146>] rcu_eqs_enter.constprop.0+0x8c/0xb8 With the patch, it can enter crash system when RCU Stall occur. Fixes: e53d28180d4d ("RISC-V: Add kdump support") Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com> Link: https://lore.kernel.org/r/20220811074150.3020189-4-xianting.tian@linux.alibaba.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
216 lines
6.1 KiB
C
216 lines
6.1 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2019 FORTH-ICS/CARV
|
|
* Nick Kossifidis <mick@ics.forth.gr>
|
|
*/
|
|
|
|
#include <linux/kexec.h>
|
|
#include <asm/kexec.h> /* For riscv_kexec_* symbol defines */
|
|
#include <linux/smp.h> /* For smp_send_stop () */
|
|
#include <asm/cacheflush.h> /* For local_flush_icache_all() */
|
|
#include <asm/barrier.h> /* For smp_wmb() */
|
|
#include <asm/page.h> /* For PAGE_MASK */
|
|
#include <linux/libfdt.h> /* For fdt_check_header() */
|
|
#include <asm/set_memory.h> /* For set_memory_x() */
|
|
#include <linux/compiler.h> /* For unreachable() */
|
|
#include <linux/cpu.h> /* For cpu_down() */
|
|
#include <linux/reboot.h>
|
|
|
|
/*
|
|
* kexec_image_info - Print received image details
|
|
*/
|
|
static void
|
|
kexec_image_info(const struct kimage *image)
|
|
{
|
|
unsigned long i;
|
|
|
|
pr_debug("Kexec image info:\n");
|
|
pr_debug("\ttype: %d\n", image->type);
|
|
pr_debug("\tstart: %lx\n", image->start);
|
|
pr_debug("\thead: %lx\n", image->head);
|
|
pr_debug("\tnr_segments: %lu\n", image->nr_segments);
|
|
|
|
for (i = 0; i < image->nr_segments; i++) {
|
|
pr_debug("\t segment[%lu]: %016lx - %016lx", i,
|
|
image->segment[i].mem,
|
|
image->segment[i].mem + image->segment[i].memsz);
|
|
pr_debug("\t\t0x%lx bytes, %lu pages\n",
|
|
(unsigned long) image->segment[i].memsz,
|
|
(unsigned long) image->segment[i].memsz / PAGE_SIZE);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* machine_kexec_prepare - Initialize kexec
|
|
*
|
|
* This function is called from do_kexec_load, when the user has
|
|
* provided us with an image to be loaded. Its goal is to validate
|
|
* the image and prepare the control code buffer as needed.
|
|
* Note that kimage_alloc_init has already been called and the
|
|
* control buffer has already been allocated.
|
|
*/
|
|
int
|
|
machine_kexec_prepare(struct kimage *image)
|
|
{
|
|
struct kimage_arch *internal = &image->arch;
|
|
struct fdt_header fdt = {0};
|
|
void *control_code_buffer = NULL;
|
|
unsigned int control_code_buffer_sz = 0;
|
|
int i = 0;
|
|
|
|
kexec_image_info(image);
|
|
|
|
/* Find the Flattened Device Tree and save its physical address */
|
|
for (i = 0; i < image->nr_segments; i++) {
|
|
if (image->segment[i].memsz <= sizeof(fdt))
|
|
continue;
|
|
|
|
if (image->file_mode)
|
|
memcpy(&fdt, image->segment[i].buf, sizeof(fdt));
|
|
else if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt)))
|
|
continue;
|
|
|
|
if (fdt_check_header(&fdt))
|
|
continue;
|
|
|
|
internal->fdt_addr = (unsigned long) image->segment[i].mem;
|
|
break;
|
|
}
|
|
|
|
if (!internal->fdt_addr) {
|
|
pr_err("Device tree not included in the provided image\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Copy the assembler code for relocation to the control page */
|
|
if (image->type != KEXEC_TYPE_CRASH) {
|
|
control_code_buffer = page_address(image->control_code_page);
|
|
control_code_buffer_sz = page_size(image->control_code_page);
|
|
|
|
if (unlikely(riscv_kexec_relocate_size > control_code_buffer_sz)) {
|
|
pr_err("Relocation code doesn't fit within a control page\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
memcpy(control_code_buffer, riscv_kexec_relocate,
|
|
riscv_kexec_relocate_size);
|
|
|
|
/* Mark the control page executable */
|
|
set_memory_x((unsigned long) control_code_buffer, 1);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* machine_kexec_cleanup - Cleanup any leftovers from
|
|
* machine_kexec_prepare
|
|
*
|
|
* This function is called by kimage_free to handle any arch-specific
|
|
* allocations done on machine_kexec_prepare. Since we didn't do any
|
|
* allocations there, this is just an empty function. Note that the
|
|
* control buffer is freed by kimage_free.
|
|
*/
|
|
void
|
|
machine_kexec_cleanup(struct kimage *image)
|
|
{
|
|
}
|
|
|
|
|
|
/*
|
|
* machine_shutdown - Prepare for a kexec reboot
|
|
*
|
|
* This function is called by kernel_kexec just before machine_kexec
|
|
* below. Its goal is to prepare the rest of the system (the other
|
|
* harts and possibly devices etc) for a kexec reboot.
|
|
*/
|
|
void machine_shutdown(void)
|
|
{
|
|
/*
|
|
* No more interrupts on this hart
|
|
* until we are back up.
|
|
*/
|
|
local_irq_disable();
|
|
|
|
#if defined(CONFIG_HOTPLUG_CPU)
|
|
smp_shutdown_nonboot_cpus(smp_processor_id());
|
|
#endif
|
|
}
|
|
|
|
/* Override the weak function in kernel/panic.c */
|
|
void crash_smp_send_stop(void)
|
|
{
|
|
static int cpus_stopped;
|
|
|
|
/*
|
|
* This function can be called twice in panic path, but obviously
|
|
* we execute this only once.
|
|
*/
|
|
if (cpus_stopped)
|
|
return;
|
|
|
|
smp_send_stop();
|
|
cpus_stopped = 1;
|
|
}
|
|
|
|
/*
|
|
* machine_crash_shutdown - Prepare to kexec after a kernel crash
|
|
*
|
|
* This function is called by crash_kexec just before machine_kexec
|
|
* and its goal is to shutdown non-crashing cpus and save registers.
|
|
*/
|
|
void
|
|
machine_crash_shutdown(struct pt_regs *regs)
|
|
{
|
|
local_irq_disable();
|
|
|
|
/* shutdown non-crashing cpus */
|
|
crash_smp_send_stop();
|
|
|
|
crash_save_cpu(regs, smp_processor_id());
|
|
pr_info("Starting crashdump kernel...\n");
|
|
}
|
|
|
|
/*
|
|
* machine_kexec - Jump to the loaded kimage
|
|
*
|
|
* This function is called by kernel_kexec which is called by the
|
|
* reboot system call when the reboot cmd is LINUX_REBOOT_CMD_KEXEC,
|
|
* or by crash_kernel which is called by the kernel's arch-specific
|
|
* trap handler in case of a kernel panic. It's the final stage of
|
|
* the kexec process where the pre-loaded kimage is ready to be
|
|
* executed. We assume at this point that all other harts are
|
|
* suspended and this hart will be the new boot hart.
|
|
*/
|
|
void __noreturn
|
|
machine_kexec(struct kimage *image)
|
|
{
|
|
struct kimage_arch *internal = &image->arch;
|
|
unsigned long jump_addr = (unsigned long) image->start;
|
|
unsigned long first_ind_entry = (unsigned long) &image->head;
|
|
unsigned long this_cpu_id = __smp_processor_id();
|
|
unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id);
|
|
unsigned long fdt_addr = internal->fdt_addr;
|
|
void *control_code_buffer = page_address(image->control_code_page);
|
|
riscv_kexec_method kexec_method = NULL;
|
|
|
|
if (image->type != KEXEC_TYPE_CRASH)
|
|
kexec_method = control_code_buffer;
|
|
else
|
|
kexec_method = (riscv_kexec_method) &riscv_kexec_norelocate;
|
|
|
|
pr_notice("Will call new kernel at %08lx from hart id %lx\n",
|
|
jump_addr, this_hart_id);
|
|
pr_notice("FDT image at %08lx\n", fdt_addr);
|
|
|
|
/* Make sure the relocation code is visible to the hart */
|
|
local_flush_icache_all();
|
|
|
|
/* Jump to the relocation code */
|
|
pr_notice("Bye...\n");
|
|
kexec_method(first_ind_entry, jump_addr, fdt_addr,
|
|
this_hart_id, kernel_map.va_pa_offset);
|
|
unreachable();
|
|
}
|