From ab7fa6b05ebbe2a8cc07114014f14fd2326fb80a Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 24 May 2023 00:54:59 +0800 Subject: [PATCH 01/23] riscv: move options to keep entries sorted Recently, some commits break the entries order. Properly move their locations to keep entries sorted. Signed-off-by: Jisheng Zhang Reviewed-by: Conor Dooley Acked-by: Guo Ren Tested-by: Nick Desaulniers # build Link: https://lore.kernel.org/r/20230523165502.2592-2-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 348c0fa1fc8c..8f55aa4aae34 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -101,6 +101,11 @@ config RISCV select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS if MMU + select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) + select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION select HAVE_EBPF_JIT if MMU select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_ERROR_INJECTION @@ -110,7 +115,6 @@ config RISCV select HAVE_KPROBES if !XIP_KERNEL select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL - select HAVE_RETHOOK if !XIP_KERNEL select HAVE_MOVE_PMD select HAVE_MOVE_PUD select HAVE_PCI @@ -119,6 +123,7 @@ config RISCV select HAVE_PERF_USER_STACK_DUMP select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RETHOOK if !XIP_KERNEL select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS @@ -142,11 +147,6 @@ config RISCV select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU select ZONE_DMA32 if 64BIT - select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) - select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION config CLANG_SUPPORTS_DYNAMIC_FTRACE def_bool CC_IS_CLANG From cead443a306262a16056d84d63b5a6a10908eb62 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 24 May 2023 00:55:00 +0800 Subject: [PATCH 02/23] riscv: vmlinux-xip.lds.S: remove .alternative section ALTERNATIVE mechanism can't work on XIP, and this is also reflected by below Kconfig dependency: RISCV_ALTERNATIVE ... depends on !XIP_KERNEL ... So there's no .alternative section at all for XIP case, remove it. Signed-off-by: Jisheng Zhang Reviewed-by: Conor Dooley Reviewed-by: Guo Ren Tested-by: Nick Desaulniers # build Link: https://lore.kernel.org/r/20230523165502.2592-3-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vmlinux-xip.lds.S | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index eab9edc3b631..50767647fbc6 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -98,12 +98,6 @@ SECTIONS __soc_builtin_dtb_table_end = .; } - . = ALIGN(8); - .alternative : { - __alt_start = .; - *(.alternative) - __alt_end = .; - } __init_end = .; . = ALIGN(16); From d4035ff16bfa73915f74cb3d28f878aff1da510a Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 24 May 2023 00:55:01 +0800 Subject: [PATCH 03/23] vmlinux.lds.h: use correct .init.data.* section name If building with -fdata-sections on riscv, LD_ORPHAN_WARN will warn similar as below: riscv64-linux-gnu-ld: warning: orphan section `.init.data.efi_loglevel' from `./drivers/firmware/efi/libstub/printk.stub.o' being placed in section `.init.data.efi_loglevel' I believe this is caused by a a typo: init.data.* should be .init.data.* Signed-off-by: Jisheng Zhang Reviewed-by: Kefeng Wang Tested-by: Nick Desaulniers # build Link: https://lore.kernel.org/r/20230523165502.2592-4-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index d1f57e4868ed..371026ca7221 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -688,7 +688,7 @@ /* init and exit section handling */ #define INIT_DATA \ KEEP(*(SORT(___kentry+*))) \ - *(.init.data init.data.*) \ + *(.init.data .init.data.*) \ MEM_DISCARD(init.data*) \ KERNEL_CTORS() \ MCOUNT_REC() \ From c828856b51bb4180c0803c12ffaeb86c41336c11 Mon Sep 17 00:00:00 2001 From: Zhangjin Wu Date: Wed, 24 May 2023 00:55:02 +0800 Subject: [PATCH 04/23] riscv: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION Select CONFIG_HAVE_LD_DEAD_CODE_DATA_ELIMINATION for RISC-V, allowing the user to enable dead code elimination. In order for this to work, ensure that we keep the alternative table by annotating them with KEEP. This boots well on qemu with both rv32_defconfig & rv64 defconfig, but it only shrinks their builds by ~1%, a smaller config is thereforce customized to test this feature: | rv32 | rv64 --------|------------------------|--------------------- No DCE | 4460684 | 4893488 DCE | 3986716 | 4376400 Shrink | 473968 (~10.6%) | 517088 (~10.5%) The config used above only reserves necessary options to boot on qemu with serial console, more like the size-critical embedded scenes: - rv64 config: https://pastebin.com/crz82T0s - rv32 config: rv64 config + 32-bit.config Here is Jisheng's original commit-msg: When trying to run linux with various opensource riscv core on resource limited FPGA platforms, for example, those FPGAs with less than 16MB SDRAM, I want to save mem as much as possible. One of the major technologies is kernel size optimizations, I found that riscv does not currently support HAVE_LD_DEAD_CODE_DATA_ELIMINATION, which passes -fdata-sections, -ffunction-sections to CFLAGS and passes the --gc-sections flag to the linker. This not only benefits my case on FPGA but also benefits defconfigs. Here are some notable improvements from enabling this with defconfigs: nommu_k210_defconfig: text data bss dec hex 1112009 410288 59837 1582134 182436 before 962838 376656 51285 1390779 1538bb after rv32_defconfig: text data bss dec hex 8804455 2816544 290577 11911576 b5c198 before 8692295 2779872 288977 11761144 b375f8 after defconfig: text data bss dec hex 9438267 3391332 485333 13314932 cb2b74 before 9285914 3350052 483349 13119315 c82f53 after Signed-off-by: Zhangjin Wu Co-developed-by: Jisheng Zhang Signed-off-by: Jisheng Zhang Reviewed-by: Guo Ren Tested-by: Bin Meng Reviewed-by: Kefeng Wang Tested-by: Nick Desaulniers # build Link: https://lore.kernel.org/r/20230523165502.2592-5-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/kernel/vmlinux.lds.S | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8f55aa4aae34..62e84fee2cfd 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -115,6 +115,7 @@ config RISCV select HAVE_KPROBES if !XIP_KERNEL select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION select HAVE_MOVE_PMD select HAVE_MOVE_PUD select HAVE_PCI diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index e5f9f4677bbf..492dd4b8f3d6 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -85,11 +85,11 @@ SECTIONS INIT_DATA_SECTION(16) .init.pi : { - *(.init.pi*) + KEEP(*(.init.pi*)) } .init.bss : { - *(.init.bss) /* from the EFI stub */ + KEEP(*(.init.bss*)) /* from the EFI stub */ } .exit.data : { @@ -112,7 +112,7 @@ SECTIONS . = ALIGN(8); .alternative : { __alt_start = .; - *(.alternative) + KEEP(*(.alternative)) __alt_end = .; } __init_end = .; From f7584322e4fef794b290e5fdb290fa92a925236e Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 23 Jun 2023 10:06:17 -0700 Subject: [PATCH 05/23] riscv: disable HAVE_LD_DEAD_CODE_DATA_ELIMINATION for LLD Linking allyesconfig with ld.lld-17 with CONFIG_DEAD_CODE_ELIMINATION=y takes hours. Assuming this is a performance regression that can be fixed, tentatively disable this for now so that allyesconfig builds don't start timing out. If and when there's a fix to ld.lld, this can be converted to a version check instead so that users of older but still supported versions of ld.lld don't hurt themselves by enabling CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y. Link: https://github.com/ClangBuiltLinux/linux/issues/1881 Link: https://lore.kernel.org/linux-riscv/ZJXTwqZIkXLxXaSi@google.com/ Reported-by: Palmer Dabbelt Suggested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 62e84fee2cfd..a8a9387eb284 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -115,7 +115,8 @@ config RISCV select HAVE_KPROBES if !XIP_KERNEL select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL - select HAVE_LD_DEAD_CODE_DATA_ELIMINATION + # https://github.com/ClangBuiltLinux/linux/issues/1881 + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD select HAVE_MOVE_PMD select HAVE_MOVE_PUD select HAVE_PCI From bcc8790057c1f02d20654f68d107973405c1f823 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Mon, 19 Jun 2023 12:01:43 -0700 Subject: [PATCH 06/23] RISC-V: Document that V registers are clobbered on syscalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is included in the ISA manual, but it's pretty common for bits of the ISA manual that are actually ABI to change. So let's document it explicitly. Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20230619190142.26498-1-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- Documentation/riscv/vector.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/riscv/vector.rst b/Documentation/riscv/vector.rst index 48f189d79e41..165b7ed0ac4f 100644 --- a/Documentation/riscv/vector.rst +++ b/Documentation/riscv/vector.rst @@ -130,3 +130,11 @@ processes in form of sysctl knob: Modifying the system default enablement status does not affect the enablement status of any existing process of thread that do not make an execve() call. + +3. Vector Register State Across System Calls +--------------------------------------------- + +As indicated by version 1.0 of the V extension [1], vector registers are +clobbered by system calls. + +1: https://github.com/riscv/riscv-v-spec/blob/master/calling-convention.adoc From e50db34efdc8cac2f17b8f5d32fddd7b58914ce6 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Mon, 19 Jun 2023 10:21:01 -0700 Subject: [PATCH 07/23] RISC-V: Fix up some vector state related build failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I get a few build failures along the lines of ./arch/riscv/include/uapi/asm/sigcontext.h:19:36: error: field ‘v_state’ has incomplete type 19 | struct __riscv_v_ext_state v_state; | ^~~~~~~ ./arch/riscv/include/uapi/asm/sigcontext.h:32:49: error: field ‘sc_extdesc’ has incomplete type 32 | struct __riscv_extra_ext_header sc_extdesc; The V structures in question are defined for !assembly, so let's just do so for the others. Fixes: 8ee0b41898fa ("riscv: signal: Add sigcontext save/restore for vector") Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230619172101.18692-1-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/sigcontext.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/include/uapi/asm/sigcontext.h b/arch/riscv/include/uapi/asm/sigcontext.h index 8b8a8541673a..8c8712aa9551 100644 --- a/arch/riscv/include/uapi/asm/sigcontext.h +++ b/arch/riscv/include/uapi/asm/sigcontext.h @@ -15,6 +15,8 @@ /* The size of END signal context header. */ #define END_HDR_SIZE 0x0 +#ifndef __ASSEMBLY__ + struct __sc_riscv_v_state { struct __riscv_v_ext_state v_state; } __attribute__((aligned(16))); @@ -33,4 +35,6 @@ struct sigcontext { }; }; +#endif /*!__ASSEMBLY__*/ + #endif /* _UAPI_ASM_RISCV_SIGCONTEXT_H */ From 26c38cd802c947401cfbcc285b7d841256b5f17f Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Sun, 25 Jun 2023 15:54:15 +0000 Subject: [PATCH 08/23] riscv: vector: only enable interrupts in the first-use trap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function irqentry_exit_to_user_mode() must be called with interrupt disabled. The caller of do_trap_insn_illegal() also assumes running without interrupts. So, we should turn off interrupts after riscv_v_first_use_handler() returns. Fixes: cd054837243b ("riscv: Allocate user's vector context in the first-use trap") Signed-off-by: Andy Chiu Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20230625155416.18629-1-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 5158961ea977..bc02b282a403 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -150,12 +150,18 @@ DO_ERROR_INFO(do_trap_insn_fault, asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *regs) { + bool handled; + if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); local_irq_enable(); - if (!riscv_v_first_use_handler(regs)) + handled = riscv_v_first_use_handler(regs); + + local_irq_disable(); + + if (!handled) do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc, "Oops - illegal instruction"); From 75b59f2a90aa7ccac62e3dcb680dfb967b341431 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 27 Jun 2023 01:55:54 +0000 Subject: [PATCH 09/23] riscv: vector: clear V-reg in the first-use trap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there is no context switch happens after we enable V for a process, then we return to user space with whatever left on the CPU's V registers accessible to the process. The leaked data could belong to another process's V-context saved from last context switch, impacting process's confidentiality on the system. To prevent this from happening, we clear V registers by restoring zero'd V context after turining on V. Fixes: cd054837243b ("riscv: Allocate user's vector context in the first-use trap") Signed-off-by: Andy Chiu Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20230627015556.12329-2-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vector.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index f9c8e19ab301..8d92fb6c522c 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -167,6 +167,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) return true; } riscv_v_vstate_on(regs); + riscv_v_vstate_restore(current, regs); return true; } From 5c93c4c72fbc69f0f1cdf43c9402b923314e67c8 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 27 Jun 2023 01:55:55 +0000 Subject: [PATCH 10/23] selftests: Test RISC-V Vector's first-use handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This add a test to check if the kernel zero-initializes all V registers after the first-use trap handler returns. If V registers are not zero-initialized, then the test should fail one out of several runs: ``` root@sifive-fpga:~# ./v_initval_nolibc # vl = 256 not ok 1 detect stale values on v-regesters 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4c 41 4e 47 3d 43 0 50 41 54 48 3d 2f 75 73 72 2f 6c 6f 63 61 6c 2f 73 62 69 6e 3a 2f 75 73 72 2f 6c 6f 63 61 6c 2f 62 69 6e 3a 2f 75 73 72 ff ff 81 0 0 0 0 0 0 0 0 0 0 0 0 0 ``` Otherwise, the test passes without errors each run. Signed-off-by: Andy Chiu Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20230627015556.12329-3-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- .../testing/selftests/riscv/vector/.gitignore | 1 + tools/testing/selftests/riscv/vector/Makefile | 6 +- .../selftests/riscv/vector/v_initval_nolibc.c | 68 +++++++++++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/riscv/vector/v_initval_nolibc.c diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore index 4f2b4e8a3b08..9ae7964491d5 100644 --- a/tools/testing/selftests/riscv/vector/.gitignore +++ b/tools/testing/selftests/riscv/vector/.gitignore @@ -1,2 +1,3 @@ vstate_exec_nolibc vstate_prctl +v_initval_nolibc diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile index cd6e80bf995d..bfff0ff4f3be 100644 --- a/tools/testing/selftests/riscv/vector/Makefile +++ b/tools/testing/selftests/riscv/vector/Makefile @@ -2,7 +2,7 @@ # Copyright (C) 2021 ARM Limited # Originally tools/testing/arm64/abi/Makefile -TEST_GEN_PROGS := vstate_prctl +TEST_GEN_PROGS := vstate_prctl v_initval_nolibc TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc include ../../lib.mk @@ -13,3 +13,7 @@ $(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S $(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc + +$(OUTPUT)/v_initval_nolibc: v_initval_nolibc.c + $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ + -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c new file mode 100644 index 000000000000..66764edb0d52 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "../../kselftest.h" +#define MAX_VSIZE (8192 * 32) + +void dump(char *ptr, int size) +{ + int i = 0; + + for (i = 0; i < size; i++) { + if (i != 0) { + if (i % 16 == 0) + printf("\n"); + else if (i % 8 == 0) + printf(" "); + } + printf("%02x ", ptr[i]); + } + printf("\n"); +} + +int main(void) +{ + int i; + unsigned long vl; + char *datap, *tmp; + + datap = malloc(MAX_VSIZE); + if (!datap) { + ksft_test_result_fail("fail to allocate memory for size = %lu\n", MAX_VSIZE); + exit(-1); + } + + tmp = datap; + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vse8.v v0, (%2)\n\t" + "add %1, %2, %0\n\t" + "vse8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl), "=r" (tmp) : "r" (datap) : "memory"); + + ksft_print_msg("vl = %lu\n", vl); + + if (datap[0] != 0x00 && datap[0] != 0xff) { + ksft_test_result_fail("v-regesters are not properly initialized\n"); + dump(datap, vl * 4); + exit(-1); + } + + for (i = 1; i < vl * 4; i++) { + if (datap[i] != datap[0]) { + ksft_test_result_fail("detect stale values on v-regesters\n"); + dump(datap, vl * 4); + exit(-2); + } + } + + free(datap); + ksft_exit_pass(); + return 0; +} From 54cdede08f2f4414629001b124110d656161080a Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 16 Jun 2023 12:43:57 +0100 Subject: [PATCH 11/23] riscv: vdso: include vdso/vsyscall.h for vdso_data Add include of to pull in the defition of vdso_data to remove the following sparse warning: arch/riscv/kernel/vdso.c:39:18: warning: symbol 'vdso_data' was not declared. Should it be static? Signed-off-by: Ben Dooks Link: https://lore.kernel.org/r/20230616114357.159601-1-ben.dooks@codethink.co.uk Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 9a68e7eaae4d..2cf76218a5bd 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -15,6 +15,7 @@ #include #include #include +#include enum vvar_pages { VVAR_DATA_PAGE_OFFSET, From c1f048a6bd7d7cb42e9bfd79eff85b33894997fe Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Mon, 29 May 2023 18:15:24 +0800 Subject: [PATCH 12/23] riscv: Enable ARCH_SUSPEND_POSSIBLE for s2idle With this configuration opened, the basic platform-independent s2idle is provided by the sole "s2idle" string in `/sys/power/mem_sleep`. At the end of s2idle, harts will hit the `wfi` instruction or enter the SUSPENDED state through the sbi_cpuidle driver. The interrupt of possible wakeup devices will be kept to wake the system up. And platform-specific sleep states can be provided by future ACPI and SBI SUSP extension support. Signed-off-by: Song Shuai Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230529101524.322076-1-songshuaishuai@tinylab.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c8c22cf11602..e242bf63c0d2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -873,6 +873,9 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_HIBERNATION_HEADER def_bool HIBERNATION +config ARCH_SUSPEND_POSSIBLE + def_bool y + endmenu # "Power management options" menu "CPU Power Management" From 85fadc0d04119c2fe4a20287767ab904c6d21ba1 Mon Sep 17 00:00:00 2001 From: Woody Zhang Date: Wed, 14 Jun 2023 21:19:07 +0800 Subject: [PATCH 13/23] riscv: move memblock_allow_resize() after linear mapping is ready The initial memblock metadata is accessed from kernel image mapping. The regions arrays need to "reallocated" from memblock and accessed through linear mapping to cover more memblock regions. So the resizing should not be allowed until linear mapping is ready. Note that there are memblock allocations when building linear mapping. This patch is similar to 24cc61d8cb5a ("arm64: memblock: don't permit memblock resizing until linear mapping is up"). In following log, many memblock regions are reserved before create_linear_mapping_page_table(). And then it triggered reallocation of memblock.reserved.regions and memcpy the old array in kernel image mapping to the new array in linear mapping which caused a page fault. [ 0.000000] memblock_reserve: [0x00000000bf01f000-0x00000000bf01ffff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf021000-0x00000000bf021fff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf023000-0x00000000bf023fff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf025000-0x00000000bf025fff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf027000-0x00000000bf027fff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf029000-0x00000000bf029fff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf02b000-0x00000000bf02bfff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf02d000-0x00000000bf02dfff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf02f000-0x00000000bf02ffff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf030000-0x00000000bf030fff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] OF: reserved mem: 0x0000000080000000..0x000000008007ffff (512 KiB) map non-reusable mmode_resv0@80000000 [ 0.000000] memblock_reserve: [0x00000000bf000000-0x00000000bf001fed] paging_init+0x19a/0x5ae [ 0.000000] memblock_phys_alloc_range: 4096 bytes align=0x1000 from=0x0000000000000000 max_addr=0x0000000000000000 alloc_pmd_fixmap+0x14/0x1c [ 0.000000] memblock_reserve: [0x000000017ffff000-0x000000017fffffff] memblock_alloc_range_nid+0xb8/0x128 [ 0.000000] memblock: reserved is doubled to 256 at [0x000000017fffd000-0x000000017fffe7ff] [ 0.000000] Unable to handle kernel paging request at virtual address ff600000ffffd000 [ 0.000000] Oops [#1] [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.4.0-rc1-00011-g99a670b2069c #66 [ 0.000000] Hardware name: riscv-virtio,qemu (DT) [ 0.000000] epc : __memcpy+0x60/0xf8 [ 0.000000] ra : memblock_double_array+0x192/0x248 [ 0.000000] epc : ffffffff8081d214 ra : ffffffff80a3dfc0 sp : ffffffff81403bd0 [ 0.000000] gp : ffffffff814fbb38 tp : ffffffff8140dac0 t0 : 0000000001600000 [ 0.000000] t1 : 0000000000000000 t2 : 000000008f001000 s0 : ffffffff81403c60 [ 0.000000] s1 : ffffffff80c0bc98 a0 : ff600000ffffd000 a1 : ffffffff80c0bcd8 [ 0.000000] a2 : 0000000000000c00 a3 : ffffffff80c0c8d8 a4 : 0000000080000000 [ 0.000000] a5 : 0000000000080000 a6 : 0000000000000000 a7 : 0000000080200000 [ 0.000000] s2 : ff600000ffffd000 s3 : 0000000000002000 s4 : 0000000000000c00 [ 0.000000] s5 : ffffffff80c0bc60 s6 : ffffffff80c0bcc8 s7 : 0000000000000000 [ 0.000000] s8 : ffffffff814fd0a8 s9 : 000000017fffe7ff s10: 0000000000000000 [ 0.000000] s11: 0000000000001000 t3 : 0000000000001000 t4 : 0000000000000000 [ 0.000000] t5 : 000000008f003000 t6 : ff600000ffffd000 [ 0.000000] status: 0000000200000100 badaddr: ff600000ffffd000 cause: 000000000000000f [ 0.000000] [] __memcpy+0x60/0xf8 [ 0.000000] [] memblock_add_range.isra.14+0x12c/0x162 [ 0.000000] [] memblock_reserve+0x6e/0x8c [ 0.000000] [] memblock_alloc_range_nid+0xb8/0x128 [ 0.000000] [] memblock_phys_alloc_range+0x5e/0x6a [ 0.000000] [] alloc_pmd_fixmap+0x14/0x1c [ 0.000000] [] alloc_p4d_fixmap+0xc/0x14 [ 0.000000] [] create_pgd_mapping+0x98/0x17c [ 0.000000] [] create_linear_mapping_range.constprop.10+0xe4/0x112 [ 0.000000] [] paging_init+0x3ec/0x5ae [ 0.000000] [] setup_arch+0xb2/0x576 [ 0.000000] [] start_kernel+0x72/0x57e [ 0.000000] Code: b303 0285 b383 0305 be03 0385 be83 0405 bf03 0485 (b023) 00ef [ 0.000000] ---[ end trace 0000000000000000 ]--- [ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task! [ 0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]--- Fixes: 671f9a3e2e24 ("RISC-V: Setup initial page tables in two stages") Signed-off-by: Woody Zhang Tested-by: Song Shuai Link: https://lore.kernel.org/r/tencent_FBB94CE615C5CCE7701CD39C15CCE0EE9706@qq.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 3b1e927a06b7..59df29fc635d 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -267,7 +267,6 @@ static void __init setup_bootmem(void) dma_contiguous_reserve(dma32_phys_limit); if (IS_ENABLED(CONFIG_64BIT)) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); - memblock_allow_resize(); } #ifdef CONFIG_MMU @@ -1370,6 +1369,9 @@ void __init paging_init(void) { setup_bootmem(); setup_vm_final(); + + /* Depend on that Linear Mapping is ready */ + memblock_allow_resize(); } void __init misc_mem_init(void) From 9657e9b7d2538dc73c24947aa00a8525dfb8062c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 29 Jun 2023 16:22:28 +0200 Subject: [PATCH 14/23] riscv: Discard vector state on syscalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RISC-V vector specification states: Executing a system call causes all caller-saved vector registers (v0-v31, vl, vtype) and vstart to become unspecified. The vector registers are set to all 1s, vill is set (invalid), and the vector status is set to Dirty. That way we can prevent userspace from accidentally relying on the stated save. Rémi pointed out [1] that writing to the registers might be superfluous, and setting vill is sufficient. Link: https://lore.kernel.org/linux-riscv/12784326.9UPPK3MAeB@basile.remlab.net/ # [1] Suggested-by: Darius Rad Suggested-by: Palmer Dabbelt Suggested-by: Rémi Denis-Courmont Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20230629142228.1125715-1-bjorn@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vector.h | 34 +++++++++++++++++++++++++++++++++ arch/riscv/kernel/traps.c | 2 ++ 2 files changed, 36 insertions(+) diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 04c0b07bf6cd..3d78930cab51 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -33,6 +33,11 @@ static inline void __riscv_v_vstate_clean(struct pt_regs *regs) regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN; } +static inline void __riscv_v_vstate_dirty(struct pt_regs *regs) +{ + regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY; +} + static inline void riscv_v_vstate_off(struct pt_regs *regs) { regs->status = (regs->status & ~SR_VS) | SR_VS_OFF; @@ -128,6 +133,34 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ riscv_v_disable(); } +static inline void __riscv_v_vstate_discard(void) +{ + unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1); + + riscv_v_enable(); + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vmv.v.i v0, -1\n\t" + "vmv.v.i v8, -1\n\t" + "vmv.v.i v16, -1\n\t" + "vmv.v.i v24, -1\n\t" + "vsetvl %0, x0, %1\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (vtype_inval) : "memory"); + riscv_v_disable(); +} + +static inline void riscv_v_vstate_discard(struct pt_regs *regs) +{ + if ((regs->status & SR_VS) == SR_VS_OFF) + return; + + __riscv_v_vstate_discard(); + __riscv_v_vstate_dirty(regs); +} + static inline void riscv_v_vstate_save(struct task_struct *task, struct pt_regs *regs) { @@ -173,6 +206,7 @@ static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return fals static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } #define riscv_v_vsize (0) +#define riscv_v_vstate_discard(regs) do {} while (0) #define riscv_v_vstate_save(task, regs) do {} while (0) #define riscv_v_vstate_restore(task, regs) do {} while (0) #define __switch_to_vector(__prev, __next) do {} while (0) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index bc02b282a403..f910dfccbf5d 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -302,6 +302,8 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) regs->epc += 4; regs->orig_a0 = regs->a0; + riscv_v_vstate_discard(regs); + syscall = syscall_enter_from_user_mode(regs, syscall); if (syscall < NR_syscalls) From 52909f1768023656d5c429873e2246a134289a95 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 29 Jun 2023 12:33:34 +0100 Subject: [PATCH 15/23] RISC-V: drop error print from riscv_hartid_to_cpuid() As of commit 2ac874343749 ("RISC-V: split early & late of_node to hartid mapping") my CI complains about newly added pr_err() messages during boot, for example: [ 0.000000] Couldn't find cpu id for hartid [0] [ 0.000000] riscv-intc: unable to find hart id for /cpus/cpu@0/interrupt-controller Before the split, riscv_of_processor_hartid() contained a check for whether the cpu was "available", before calling riscv_hartid_to_cpuid(), but after the split riscv_of_processor_hartid() can be called for cpus that are disabled. Most callers of riscv_hartid_to_cpuid() already report custom errors where it falls, making this print superfluous in those case. In other places, the print adds nothing - see riscv_intc_init() for example. Fixes: 2ac874343749 ("RISC-V: split early & late of_node to hartid mapping") Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20230629-paternity-grafted-b901b76d04a0@wendy Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 23e533766a49..85bbce0f758c 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -58,7 +58,6 @@ int riscv_hartid_to_cpuid(unsigned long hartid) if (cpuid_to_hartid_map(i) == hartid) return i; - pr_err("Couldn't find cpu id for hartid [%lu]\n", hartid); return -ENOENT; } From aeb71e42caae2031ec849a858080d81462cacca9 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sun, 2 Jul 2023 00:10:01 +0100 Subject: [PATCH 16/23] dt-bindings: riscv: deprecate riscv,isa intro ===== When the RISC-V dt-bindings were accepted upstream in Linux, the base ISA etc had yet to be ratified. By the ratification of the base ISA, incompatible changes had snuck into the specifications - for example the Zicsr and Zifencei extensions were spun out of the base ISA. Fast forward to today, and the reason for this patch. Currently the riscv,isa dt property permits only a specific subset of the ISA string - in particular it excludes version numbering. With the current constraints, it is not possible to discern whether "rv64i" means that the hart supports the fence.i instruction, for example. Future systems may choose to implement their own instruction fencing, perhaps using a vendor extension, or they may not implement the optional counter extensions. Software needs a way to determine this. versioning schemes ================== "Use the extension versions that are described in the ISA manual" you may say, and it's not like this has not been considered. Firstly, software that parses the riscv,isa property at runtime will need to contain a lookup table of some sort that maps arbitrary versions to versions it understands. There is not a consistent application of version number applied to extensions, with a higgledy-piggledy collection of tags, "bare" and versioned documents awaiting the reader on the "recently ratified extensions" page: https://wiki.riscv.org/display/HOME/Recently+Ratified+Extensions As an aside, and this is reflected in the patch too, since many extensions have yet to appear in a release of the ISA specs, they are defined by commits in their respective "working draft" repositories. Secondly, there is an issue of backwards compatibility, whereby allowing numbers in the ISA string, some parsers may be broken. This would require an additional property to be created to even use the versions in this manner. ~boolean properties~ string array property ========================================== If a new property is needed, the whole approach may as well be looked at from the bottom up. A string with limited character choices etc is hardly the best approach for communicating extension information to software. Switching to using properties that are defined on a per extension basis, allows us to define explicit meanings for the DT representation of each extension - rather than the current situation where different operating systems or other bits of software may impart different meanings to characters in the string. Clearly the best source of meanings is the specifications themselves, this just provides us the ability to choose at what point in time the meaning is set. If an extension changes incompatibility in the future, a new property will be required. Off-list, some of the RVI folks have committed to shoring up the wording in either the ISA specifications, the riscv-isa-manual or so that in the future, modifications to and additions or removals of features will require a new extension. Codifying that assertion somewhere would make it quite unlikely that compatibility would be broken, but we have the tools required to deal with it, if & when it crops up. It is in our collective interest, as consumers of extension meanings, to define a scheme that enforces compatibility. The use of individual elements, rather than a single string, will also permit validation that the properties have a meaning, as well as potentially reject mutually exclusive combinations, or enforce dependencies between extensions. That would not have be possible with the current dt-schema infrastructure for arbitrary strings, as we would need to add a riscv,isa parser to dt-validate! That's not implemented in this patch, but rather left as future work (for the brave, or the foolish). parser simplicity ================= Many systems that parse DT at runtime already implement an function that can check for the presence of a string in an array of string, as it is similar to the process for parsing a list of compatible strings, so a bunch of new, custom, DT parsing should not be needed. Getting rid of "riscv,isa" parsing would be a nice simplification, but unfortunately for backwards compatibility with old dtbs, existing parsers may not be removable - which may greatly simplify dt parsing code. In Linux, for example, checking for whether a hart supports an extension becomes as simple as: of_property_match_string(node, "riscv,isa-extensions", "zicbom") vendor extensions ================= Compared to riscv,isa, this proposed scheme promotes vendor extensions, oft touted as the strength of RISC-V, to first-class citizens. At present, extensions are defined as meaning what the RISC-V ISA specifications say they do. There is no realistic way of using that interface to provide cross-platform definitions for what vendor extensions mean. Vendor extensions may also have even less consistency than RVI do in terms of versioning, or no care about backwards compatibility. The new property allows us to assign explicit meanings on a per vendor extension basis, backed up by a description of their meanings. fin === Create a new file to store the extension meanings and a new riscv,isa-base property to replace the aspect of riscv,isa that is not represented by the new property - the base ISA implemented by a hart. As a starting point, add properties for extensions currently used in Linux. Finally, mark riscv,isa as deprecated, as removing support for it in existing programs would be an ABI break. CC: Palmer Dabbelt CC: Paul Walmsley CC: Rob Herring CC: Krzysztof Kozlowski CC: Alistair Francis CC: Andrew Jones CC: Anup Patel CC: Atish Patra CC: Jessica Clarke CC: Rick Chen CC: Leo CC: Oleksii CC: linux-riscv@lists.infradead.org CC: qemu-riscv@nongnu.org CC: u-boot@lists.denx.de CC: devicetree@vger.kernel.org CC: linux-kernel@vger.kernel.org Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Reviewed-by: Rob Herring Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20230702-eats-scorebook-c951f170d29f@spud Signed-off-by: Palmer Dabbelt --- .../devicetree/bindings/riscv/cpus.yaml | 43 ++- .../devicetree/bindings/riscv/extensions.yaml | 250 ++++++++++++++++++ 2 files changed, 270 insertions(+), 23 deletions(-) create mode 100644 Documentation/devicetree/bindings/riscv/extensions.yaml diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 67bd239ead0b..38c0b5213736 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -25,6 +25,7 @@ description: | allOf: - $ref: /schemas/cpu.yaml# + - $ref: extensions.yaml properties: compatible: @@ -82,25 +83,6 @@ properties: description: The blocksize in bytes for the Zicboz cache operations. - riscv,isa: - description: - Identifies the specific RISC-V instruction set architecture - supported by the hart. These are documented in the RISC-V - User-Level ISA document, available from - https://riscv.org/specifications/ - - Due to revisions of the ISA specification, some deviations - have arisen over time. - Notably, riscv,isa was defined prior to the creation of the - Zicntr, Zicsr, Zifencei and Zihpm extensions and thus "i" - implies "zicntr_zicsr_zifencei_zihpm". - - While the isa strings in ISA specification are case - insensitive, letters in the riscv,isa string must be all - lowercase. - $ref: /schemas/types.yaml#/definitions/string - pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$ - # RISC-V has multiple properties for cache op block sizes as the sizes # differ between individual CBO extensions cache-op-block-size: false @@ -139,8 +121,17 @@ properties: DMIPS/MHz, relative to highest capacity-dmips-mhz in the system. +anyOf: + - required: + - riscv,isa + - required: + - riscv,isa-base + +dependencies: + riscv,isa-base: [ "riscv,isa-extensions" ] + riscv,isa-extensions: [ "riscv,isa-base" ] + required: - - riscv,isa - interrupt-controller unevaluatedProperties: false @@ -160,7 +151,9 @@ examples: i-cache-sets = <128>; i-cache-size = <16384>; reg = <0>; - riscv,isa = "rv64imac"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "c"; + cpu_intc0: interrupt-controller { #interrupt-cells = <1>; compatible = "riscv,cpu-intc"; @@ -183,8 +176,10 @@ examples: i-tlb-size = <32>; mmu-type = "riscv,sv39"; reg = <1>; - riscv,isa = "rv64imafdc"; tlb-split; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c"; + cpu_intc1: interrupt-controller { #interrupt-cells = <1>; compatible = "riscv,cpu-intc"; @@ -202,8 +197,10 @@ examples: device_type = "cpu"; reg = <0>; compatible = "riscv"; - riscv,isa = "rv64imafdc"; mmu-type = "riscv,sv48"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c"; + interrupt-controller { #interrupt-cells = <1>; interrupt-controller; diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml new file mode 100644 index 000000000000..cc1f546fdbdc --- /dev/null +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -0,0 +1,250 @@ +# SPDX-License-Identifier: (GPL-2.0 OR MIT) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/riscv/extensions.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: RISC-V ISA extensions + +maintainers: + - Paul Walmsley + - Palmer Dabbelt + - Conor Dooley + +description: | + RISC-V has a large number of extensions, some of which are "standard" + extensions, meaning they are ratified by RISC-V International, and others + are "vendor" extensions. + This document defines properties that indicate whether a hart supports a + given extension. + + Once a standard extension has been ratified, no changes in behaviour can be + made without the creation of a new extension. + The properties for standard extensions therefore map to their originally + ratified states, with the exception of the I, Zicntr & Zihpm extensions. + See the "i" property for more information. + +select: + properties: + compatible: + contains: + const: riscv + +properties: + riscv,isa: + description: + Identifies the specific RISC-V instruction set architecture + supported by the hart. These are documented in the RISC-V + User-Level ISA document, available from + https://riscv.org/specifications/ + + Due to revisions of the ISA specification, some deviations + have arisen over time. + Notably, riscv,isa was defined prior to the creation of the + Zicntr, Zicsr, Zifencei and Zihpm extensions and thus "i" + implies "zicntr_zicsr_zifencei_zihpm". + + While the isa strings in ISA specification are case + insensitive, letters in the riscv,isa string must be all + lowercase. + $ref: /schemas/types.yaml#/definitions/string + pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$ + deprecated: true + + riscv,isa-base: + description: + The base ISA implemented by this hart, as described by the 20191213 + version of the unprivileged ISA specification. + enum: + - rv32i + - rv64i + + riscv,isa-extensions: + $ref: /schemas/types.yaml#/definitions/string-array + minItems: 1 + description: Extensions supported by the hart. + items: + anyOf: + # single letter extensions, in canonical order + - const: i + description: | + The base integer instruction set, as ratified in the 20191213 + version of the unprivileged ISA specification. + + This does not include Chapter 10, "Counters", which was moved into + the Zicntr and Zihpm extensions after the ratification of the + 20191213 version of the unprivileged specification. + + - const: m + description: + The standard M extension for integer multiplication and division, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: a + description: + The standard A extension for atomic instructions, as ratified in the + 20191213 version of the unprivileged ISA specification. + + - const: f + description: + The standard F extension for single-precision floating point, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: d + description: + The standard D extension for double-precision floating-point, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: q + description: + The standard Q extension for quad-precision floating-point, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: c + description: + The standard C extension for compressed instructions, as ratified in + the 20191213 version of the unprivileged ISA specification. + + - const: v + description: + The standard V extension for vector operations, as ratified + in-and-around commit 7a6c8ae ("Fix text that describes vfmv.v.f + encoding") of the riscv-v-spec. + + - const: h + description: + The standard H extension for hypervisors as ratified in the 20191213 + version of the privileged ISA specification. + + # multi-letter extensions, sorted alphanumerically + - const: smaia + description: | + The standard Smaia supervisor-level extension for the advanced + interrupt architecture for machine-mode-visible csr and behavioural + changes to interrupts as frozen at commit ccbddab ("Merge pull + request #42 from riscv/jhauser-2023-RC4") of riscv-aia. + + - const: ssaia + description: | + The standard Ssaia supervisor-level extension for the advanced + interrupt architecture for supervisor-mode-visible csr and + behavioural changes to interrupts as frozen at commit ccbddab + ("Merge pull request #42 from riscv/jhauser-2023-RC4") of riscv-aia. + + - const: sscofpmf + description: | + The standard Sscofpmf supervisor-level extension for count overflow + and mode-based filtering as ratified at commit 01d1df0 ("Add ability + to manually trigger workflow. (#2)") of riscv-count-overflow. + + - const: sstc + description: | + The standard Sstc supervisor-level extension for time compare as + ratified at commit 3f9ed34 ("Add ability to manually trigger + workflow. (#2)") of riscv-time-compare. + + - const: svinval + description: + The standard Svinval supervisor-level extension for fine-grained + address-translation cache invalidation as ratified in the 20191213 + version of the privileged ISA specification. + + - const: svnapot + description: + The standard Svnapot supervisor-level extensions for napot + translation contiguity as ratified in the 20191213 version of the + privileged ISA specification. + + - const: svpbmt + description: + The standard Svpbmt supervisor-level extensions for page-based + memory types as ratified in the 20191213 version of the privileged + ISA specification. + + - const: zba + description: | + The standard Zba bit-manipulation extension for address generation + acceleration instructions as ratified at commit 6d33919 ("Merge pull + request #158 from hirooih/clmul-fix-loop-end-condition") of + riscv-bitmanip. + + - const: zbb + description: | + The standard Zbb bit-manipulation extension for basic bit-manipulation + as ratified at commit 6d33919 ("Merge pull request #158 from + hirooih/clmul-fix-loop-end-condition") of riscv-bitmanip. + + - const: zbc + description: | + The standard Zbc bit-manipulation extension for carry-less + multiplication as ratified at commit 6d33919 ("Merge pull request + #158 from hirooih/clmul-fix-loop-end-condition") of riscv-bitmanip. + + - const: zbs + description: | + The standard Zbs bit-manipulation extension for single-bit + instructions as ratified at commit 6d33919 ("Merge pull request #158 + from hirooih/clmul-fix-loop-end-condition") of riscv-bitmanip. + + - const: zicbom + description: + The standard Zicbom extension for base cache management operations as + ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs. + + - const: zicbop + description: + The standard Zicbop extension for cache-block prefetch instructions + as ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of + riscv-CMOs. + + - const: zicboz + description: + The standard Zicboz extension for cache-block zeroing as ratified + in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs. + + - const: zicntr + description: + The standard Zicntr extension for base counters and timers, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: zicsr + description: | + The standard Zicsr extension for control and status register + instructions, as ratified in the 20191213 version of the + unprivileged ISA specification. + + This does not include Chapter 10, "Counters", which documents + special case read-only CSRs, that were moved into the Zicntr and + Zihpm extensions after the ratification of the 20191213 version of + the unprivileged specification. + + - const: zifencei + description: + The standard Zifencei extension for instruction-fetch fence, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: zihintpause + description: + The standard Zihintpause extension for pause hints, as ratified in + commit d8ab5c7 ("Zihintpause is ratified") of the riscv-isa-manual. + + - const: zihpm + description: + The standard Zihpm extension for hardware performance counters, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: ztso + description: + The standard Ztso extension for total store ordering, as ratified + in commit 2e5236 ("Ztso is now ratified.") of the + riscv-isa-manual. + +additionalProperties: true +... From 62ba41d2761206664a1fdc998051324457da2dd6 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 3 Jul 2023 12:00:44 -0700 Subject: [PATCH 17/23] mm: riscv: fix an unsafe pte read in huge_pte_alloc() The WARN_ON_ONCE() statement in riscv's huge_pte_alloc() is susceptible to false positives, because the pte is read twice at the C language level, locklessly, within the same conditional statement. Depending on compiler behavior, this can lead to generated machine code that actually reads the pte just once, or twice. Reading twice will expose the code to changing pte values and cause incorrect behavior. In [1], similar code actually caused a kernel crash on 64-bit x86, when using clang to build the kernel, but only after the conversion from *pte reads, to ptep_get(pte). The latter uses READ_ONCE(), which forced a double read of *pte. Rather than waiting for the upcoming ptep_get() conversion, just convert this part of the code now, but in a way that avoids the above problem: take a single snapshot of the pte before using it in the WARN conditional. As expected, this preparatory step does not actually change the generated code ("make mm/hugetlbpage.s"), on riscv64, when using a gcc 12.2 cross compiler. [1] https://lore.kernel.org/20230630013203.1955064-1-jhubbard@nvidia.com Suggested-by: James Houghton Cc: Ryan Roberts Signed-off-by: John Hubbard Reviewed-by: Andrew Jones Reviewed-by: Ryan Roberts Link: https://lore.kernel.org/r/20230703190044.311730-1-jhubbard@nvidia.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/hugetlbpage.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 542883b3b49b..96225a8533ad 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -73,7 +73,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, } out: - WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte)); + if (pte) { + pte_t pteval = ptep_get_lockless(pte); + + WARN_ON_ONCE(pte_present(pteval) && !pte_huge(pteval)); + } return pte; } From 6259f3443c6a376aa077816ac92e9ddeb0817d09 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 3 Jul 2023 19:31:26 +0100 Subject: [PATCH 18/23] risc-v: Fix order of IPI enablement vs RCU startup Conor reports that risc-v tries to enable IPIs before telling the core code to enable RCU. With the introduction of the mapple tree as a backing store for the irq descriptors, this results in a very shouty boot sequence, as RCU is legitimately upset. Restore some sanity by moving the risc_ipi_enable() call after notify_cpu_starting(), which explicitly enables RCU on the calling CPU. Fixes: 832f15f42646 ("RISC-V: Treat IPIs as normal Linux IRQs") Reported-by: Conor Dooley Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230703-dupe-frying-79ae2ccf94eb@spud Cc: Anup Patel Cc: Palmer Dabbelt Cc: Linus Torvalds Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20230703183126.1567625-1-maz@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smpboot.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index bb0b76e1a6d4..f4d6acb38dd0 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -238,10 +238,11 @@ asmlinkage __visible void smp_callin(void) mmgrab(mm); current->active_mm = mm; - riscv_ipi_enable(); - store_cpu_topology(curr_cpuid); notify_cpu_starting(curr_cpuid); + + riscv_ipi_enable(); + numa_add_cpu(curr_cpuid); set_cpu_online(curr_cpuid, 1); probe_vendor_features(curr_cpuid); From 5177978ee074d55577aabad7c42b431e8af68fcc Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Wed, 28 Jun 2023 20:17:05 -0700 Subject: [PATCH 19/23] RISC-V: Document the ISA string parsing rules for ACPI We've had a ton of issues around the ISA string parsing rules elsewhere in RISC-V, so let's at least be clear about what the rules are so we can try and avoid more issues. Link: https://lore.kernel.org/r/CAK9=C2Vy-4V9kgnga98tiC3TeHkR2LFPakyBbS8s_h3_Z=ieyQ@mail.gmail.com/ Link: https://lore.kernel.org/r/20230629031705.15575-1-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- Documentation/riscv/acpi.rst | 10 ++++++++++ Documentation/riscv/index.rst | 1 + 2 files changed, 11 insertions(+) create mode 100644 Documentation/riscv/acpi.rst diff --git a/Documentation/riscv/acpi.rst b/Documentation/riscv/acpi.rst new file mode 100644 index 000000000000..9870a282815b --- /dev/null +++ b/Documentation/riscv/acpi.rst @@ -0,0 +1,10 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============== +ACPI on RISC-V +============== + +The ISA string parsing rules for ACPI are defined by `Version ASCIIDOC +Conversion, 12/2022 of the RISC-V specifications, as defined by tag +"riscv-isa-release-1239329-2023-05-23" (commit 1239329 +) `_ diff --git a/Documentation/riscv/index.rst b/Documentation/riscv/index.rst index 95cf9c1e1da1..81cf6e616476 100644 --- a/Documentation/riscv/index.rst +++ b/Documentation/riscv/index.rst @@ -5,6 +5,7 @@ RISC-V architecture .. toctree:: :maxdepth: 1 + acpi boot-image-header vm-layout hwprobe From a2492ca86c98f676561f7d318b1e2e1786af0caf Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 23 Jun 2023 23:03:20 -0700 Subject: [PATCH 20/23] riscv: Select HAVE_ARCH_USERFAULTFD_MINOR This allocates the VM flag needed to support the userfaultfd minor fault functionality. Because the flag bit is >= bit 32, it can only be enabled for 64-bit kernels. See commit 7677f7fd8be7 ("userfaultfd: add minor fault registration mode") for more information. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20230624060321.3401504-1-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index e242bf63c0d2..e545713caf8f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -100,6 +100,7 @@ config RISCV select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU + select HAVE_ARCH_USERFAULTFD_MINOR if 64BIT && USERFAULTFD select HAVE_ARCH_VMAP_STACK if MMU && 64BIT select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING_USER From 31ca5d49264ba6197aa48a926f6a035ed08b3715 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 15 Jun 2023 00:55:02 +0800 Subject: [PATCH 21/23] riscv: errata: thead: only set cbom size & noncoherent during boot The CBOM size and whether the HW is noncoherent is known and determined during booting and won't change after that. Signed-off-by: Jisheng Zhang Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230614165504.532-2-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/thead/errata.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index c259dc925ec1..be84b14f0118 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -45,8 +45,11 @@ static bool errata_probe_cmo(unsigned int stage, if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return false; - riscv_cbom_block_size = L1_CACHE_BYTES; - riscv_noncoherent_supported(); + if (stage == RISCV_ALTERNATIVES_BOOT) { + riscv_cbom_block_size = L1_CACHE_BYTES; + riscv_noncoherent_supported(); + } + return true; } From 3b472f860c5c73244a9b951c10c289cc9ee080f3 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 15 Jun 2023 00:55:03 +0800 Subject: [PATCH 22/23] riscv: mm: mark CBO relate initialization funcs as __init The two functions cbo_get_block_size() and riscv_init_cbo_blocksizes() are only called during booting, mark them as __init. Signed-off-by: Jisheng Zhang Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230614165504.532-3-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/cacheflush.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index fca532ddf3ec..fbc59b3f69f2 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -104,9 +104,9 @@ EXPORT_SYMBOL_GPL(riscv_cbom_block_size); unsigned int riscv_cboz_block_size; EXPORT_SYMBOL_GPL(riscv_cboz_block_size); -static void cbo_get_block_size(struct device_node *node, - const char *name, u32 *block_size, - unsigned long *first_hartid) +static void __init cbo_get_block_size(struct device_node *node, + const char *name, u32 *block_size, + unsigned long *first_hartid) { unsigned long hartid; u32 val; @@ -126,7 +126,7 @@ static void cbo_get_block_size(struct device_node *node, } } -void riscv_init_cbo_blocksizes(void) +void __init riscv_init_cbo_blocksizes(void) { unsigned long cbom_hartid, cboz_hartid; u32 cbom_block_size = 0, cboz_block_size = 0; From 8500808a991f0e569b3d835a6223848c0717a6c7 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 15 Jun 2023 00:55:04 +0800 Subject: [PATCH 23/23] riscv: mm: mark noncoherent_supported as __ro_after_init The noncoherent_supported indicates whether the HW is coherent or not, it won't change after booting, mark it as __ro_after_init. Signed-off-by: Jisheng Zhang Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230614165504.532-4-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/dma-noncoherent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index d919efab6eba..d51a75864e53 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -10,7 +10,7 @@ #include #include -static bool noncoherent_supported; +static bool noncoherent_supported __ro_after_init; void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir)