From 3db380570af7052620ace20c29e244938610ca71 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Mon, 28 Dec 2020 12:34:59 +0800 Subject: [PATCH 001/188] selftests/powerpc: Make the test check in eeh-basic.sh posix compliant The == operand is a bash extension, thus this will fail on Ubuntu with: ./eeh-basic.sh: 89: test: 2: unexpected operator As the /bin/sh on Ubuntu is pointed to DASH. Use -eq to fix this posix compatibility issue. Fixes: 996f9e0f93f162 ("selftests/powerpc: Fix eeh-basic.sh exit codes") Signed-off-by: Po-Hsu Lin Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201228043459.14281-1-po-hsu.lin@canonical.com --- tools/testing/selftests/powerpc/eeh/eeh-basic.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh index 0d783e1065c8..64779f073e17 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh @@ -86,5 +86,5 @@ echo "$failed devices failed to recover ($dev_count tested)" lspci | diff -u $pre_lspci - rm -f $pre_lspci -test "$failed" == 0 +test "$failed" -eq 0 exit $? From 52f6b0a90bcf573ba8a33e97544c7b6f292376a4 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Thu, 24 Dec 2020 21:24:46 +0800 Subject: [PATCH 002/188] ocxl: use DEFINE_MUTEX() for mutex lock mutex lock can be initialized automatically with DEFINE_MUTEX() rather than explicitly calling mutex_init(). Signed-off-by: Zheng Yongjun Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201224132446.31286-1-zhengyongjun3@huawei.com --- drivers/misc/ocxl/file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index 4d1b44de1492..e70525eedaae 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -15,7 +15,7 @@ static dev_t ocxl_dev; static struct class *ocxl_class; -static struct mutex minors_idr_lock; +static DEFINE_MUTEX(minors_idr_lock); static struct idr minors_idr; static struct ocxl_file_info *find_and_get_file_info(dev_t devno) @@ -588,7 +588,6 @@ int ocxl_file_init(void) { int rc; - mutex_init(&minors_idr_lock); idr_init(&minors_idr); rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl"); From 7613f5a66becfd0e43a0f34de8518695888f5458 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 17 Dec 2020 11:53:06 +1100 Subject: [PATCH 003/188] powerpc/64s/kuap: Use mmu_has_feature() In commit 8150a153c013 ("powerpc/64s: Use early_mmu_has_feature() in set_kuap()") we switched the KUAP code to use early_mmu_has_feature(), to avoid a bug where we called set_kuap() before feature patching had been done, leading to recursion and crashes. That path, which called probe_kernel_read() from printk(), has since been removed, see commit 2ac5a3bf7042 ("vsprintf: Do not break early boot with probing addresses"). Additionally probe_kernel_read() no longer invokes any KUAP routines, since commit fe557319aa06 ("maccess: rename probe_kernel_{read,write} to copy_{from,to}_kernel_nofault") and c33165253492 ("powerpc: use non-set_fs based maccess routines"). So it should now be safe to use mmu_has_feature() in the KUAP routines, because we shouldn't invoke them prior to feature patching. This is essentially a revert of commit 8150a153c013 ("powerpc/64s: Use early_mmu_has_feature() in set_kuap()"), but we've since added a second usage of early_mmu_has_feature() in get_kuap(), so we convert that to use mmu_has_feature() as well. Reported-by: Christophe Leroy Signed-off-by: Michael Ellerman Depends-on: c33165253492 ("powerpc: use non-set_fs based maccess routines"). Link: https://lore.kernel.org/r/20201217005306.895685-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/book3s/64/kup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index f50f72e535aa..2298eac49763 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -333,7 +333,7 @@ static inline unsigned long get_kuap(void) * This has no effect in terms of actually blocking things on hash, * so it doesn't break anything. */ - if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return AMR_KUAP_BLOCKED; return mfspr(SPRN_AMR); @@ -341,7 +341,7 @@ static inline unsigned long get_kuap(void) static inline void set_kuap(unsigned long value) { - if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return; /* From e5f9d8858612c192a4326f39ed16c91c3a9e0893 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 28 Dec 2020 14:22:04 +0530 Subject: [PATCH 004/188] powerpc/perf/hv-24x7: Dont create sysfs event files for dummy events The hv_24x7 performance monitoring unit creates a list of supported events from the event catalog obtained via HCALL. The hv_24x7 catalog could also contain invalid or dummy events with names like RESERVED*. These events do not have any hardware counters backing them. Add a check to string compare the event names to filter such events out. Result on power9 machine: Before this patch: ..... hv_24x7/PM_XLINK2_OUT_ODD_CYC,chip=?/ [Kernel PMU event] hv_24x7/PM_XLINK2_OUT_ODD_DATA_COUNT,chip=?/ [Kernel PMU event] hv_24x7/PM_XLINK2_OUT_ODD_TOTAL_UTIL,chip=?/ [Kernel PMU event] hv_24x7/PM_XTS_ATR_DEMAND_CHECKOUT,chip=?/ [Kernel PMU event] hv_24x7/PM_XTS_ATR_DEMAND_CHECKOUT_MISS,chip=?/ [Kernel PMU event] hv_24x7/PM_XTS_ATSD_SENT,chip=?/ [Kernel PMU event] hv_24x7/PM_XTS_ATSD_TLBI_RCV,chip=?/ [Kernel PMU event] hv_24x7/RESERVED_NEST1,chip=?/ [Kernel PMU event] hv_24x7/RESERVED_NEST10,chip=?/ [Kernel PMU event] hv_24x7/RESERVED_NEST11,chip=?/ [Kernel PMU event] hv_24x7/RESERVED_NEST12,chip=?/ [Kernel PMU event] hv_24x7/RESERVED_NEST13,chip=?/ [Kernel PMU event] ...... dmesg: [ 0.000362] printk: console [hvc0] enabled [ 0.815452] hv-24x7: read 1530 catalog entries, created 537 event attrs (0 failures), 275 descs After this patch: ...... hv_24x7/PM_XLINK2_OUT_ODD_AVLBL_CYC,chip=?/ [Kernel PMU event] hv_24x7/PM_XLINK2_OUT_ODD_CYC,chip=?/ [Kernel PMU event] hv_24x7/PM_XLINK2_OUT_ODD_DATA_COUNT,chip=?/ [Kernel PMU event] hv_24x7/PM_XLINK2_OUT_ODD_TOTAL_UTIL,chip=?/ [Kernel PMU event] hv_24x7/PM_XTS_ATR_DEMAND_CHECKOUT,chip=?/ [Kernel PMU event] hv_24x7/PM_XTS_ATR_DEMAND_CHECKOUT_MISS,chip=?/ [Kernel PMU event] hv_24x7/PM_XTS_ATSD_SENT,chip=?/ [Kernel PMU event] hv_24x7/PM_XTS_ATSD_TLBI_RCV,chip=?/ [Kernel PMU event] hv_24x7/TOD,chip=?/ [Kernel PMU event] ...... dmesg: [ 0.000357] printk: console [hvc0] enabled [ 0.808592] hv-24x7: read 1530 catalog entries, created 509 event attrs (0 failures), 275 descs Signed-off-by: Kajol Jain [mpe: Simplify ignore_event(), minor change log formatting] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201228085204.18026-1-kjain@linux.ibm.com --- arch/powerpc/perf/hv-24x7.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 6e7e820508df..e5eb33255066 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -764,6 +764,14 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event, return ev_len; } +/* + * Return true incase of invalid or dummy events with names like RESERVED* + */ +static bool ignore_event(const char *name) +{ + return strncmp(name, "RESERVED", 8) == 0; +} + #define MAX_4K (SIZE_MAX / 4096) static int create_events_from_catalog(struct attribute ***events_, @@ -894,6 +902,10 @@ static int create_events_from_catalog(struct attribute ***events_, name = event_name(event, &nl); + if (ignore_event(name)) { + junk_events++; + continue; + } if (event->event_group_record_len == 0) { pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n", event_idx, nl, name); @@ -955,6 +967,9 @@ static int create_events_from_catalog(struct attribute ***events_, continue; name = event_name(event, &nl); + if (ignore_event(name)) + continue; + nonce = event_uniq_add(&ev_uniq, name, nl, event->domain); ct = event_data_to_attrs(event_idx, events + event_attr_ct, event, nonce); From d25da505c3f567a8667adb0118de1400468172ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:44 +0100 Subject: [PATCH 005/188] powerpc/mm: Include __find_linux_pte() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It fixes this W=1 compile error : ../arch/powerpc/mm/pgtable.c:337:8: error: no previous prototype for ‘__find_linux_pte’ [-Werror=missing-prototypes] 337 | pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, | ^~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-2-clg@kaod.org --- arch/powerpc/mm/pgtable.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 15555c95cebc..3a41545e5c07 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -26,6 +26,7 @@ #include #include #include +#include static inline int is_exec_fault(void) { From aa23ea0c5f7f9a46e6aa3be0a4cfdfb80fabca6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:45 +0100 Subject: [PATCH 006/188] powerpc/pseries/ras: Remove unused variable 'status' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last use of 'status' was removed in 2012. Remove the variable to fix this W=1 compile error. ../arch/powerpc/platforms/pseries/ras.c: In function ‘ras_epow_interrupt’: ../arch/powerpc/platforms/pseries/ras.c:318:6: error: variable ‘status’ set but not used [-Werror=unused-but-set-variable] 318 | int status; | ^~~~~~ Fixes: 55fc0c561742 ("powerpc/pseries: Parse and handle EPOW interrupts") Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-3-clg@kaod.org --- arch/powerpc/platforms/pseries/ras.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 149cec2212e6..bcb614ffce6a 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -315,12 +315,10 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) /* Handle environmental and power warning (EPOW) interrupts. */ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) { - int status; int state; int critical; - status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, - &state); + rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); if (state > 3) critical = 1; /* Time Critical */ @@ -329,12 +327,9 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) spin_lock(&ras_log_buf_lock); - status = rtas_call(ras_check_exception_token, 6, 1, NULL, - RTAS_VECTOR_EXTERNAL_INTERRUPT, - virq_to_hw(irq), - RTAS_EPOW_WARNING, - critical, __pa(&ras_log_buf), - rtas_get_error_log_max()); + rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, + virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf), + rtas_get_error_log_max()); log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); From 44159329e0ad160af7cc7e84fa6d97531c8ed78f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:46 +0100 Subject: [PATCH 007/188] powerpc/pseries/eeh: Make pseries_pcibios_bus_add_device() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pseries_pcibios_bus_add_device() is a local routine defining the pcibios_bus_add_device() handler of the pseries machine in eeh_pseries_init(). It doesn't need to be external. It fixes this W=1 compile error: ../arch/powerpc/platforms/pseries/eeh_pseries.c:46:6: error: no previous prototype for ‘pseries_pcibios_bus_add_device’ [-Werror=missing-prototypes] 46 | void pseries_pcibios_bus_add_device(struct pci_dev *pdev) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: dae7253f9f78 ("powerpc/pseries: Add pseries SR-IOV Machine dependent calls") Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-4-clg@kaod.org --- arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index cf024fa37bda..de45ceb634f9 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -43,7 +43,7 @@ static int ibm_get_config_addr_info; static int ibm_get_config_addr_info2; static int ibm_configure_pe; -void pseries_pcibios_bus_add_device(struct pci_dev *pdev) +static void pseries_pcibios_bus_add_device(struct pci_dev *pdev) { struct pci_dn *pdn = pci_get_pdn(pdev); From 90db8bf24d133654032a1c7dd46aa5096627b9ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:47 +0100 Subject: [PATCH 008/188] powerpc/pseries/ras: Make init_ras_hotplug_IRQ() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit init_ras_hotplug_IRQ() is a local routine used by a machine init call and it doesn't need to be external. It fixes this W=1 compile error: ../arch/powerpc/platforms/pseries/ras.c:125:12: error: no previous prototype for ‘init_ras_hotplug_IRQ’ [-Werror=missing-prototypes] 125 | int __init init_ras_hotplug_IRQ(void) | ^~~~~~~~~~~~~~~~~~~~ Fixes: c9dccf1d074a ("powerpc/pseries: Enable RAS hotplug events later") Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-5-clg@kaod.org --- arch/powerpc/platforms/pseries/ras.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index bcb614ffce6a..d2fca1aa6742 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -122,7 +122,7 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) * devices or systems (e.g. hugepages) that have not been initialized at the * subsys stage. */ -int __init init_ras_hotplug_IRQ(void) +static int __init init_ras_hotplug_IRQ(void) { struct device_node *np; From d03f210e6ed8f5d64b00f0f07b03db74aa5b95a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:48 +0100 Subject: [PATCH 009/188] powerpc/pmem: Include pmem prototypes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It fixes this W=1 compile error : ../arch/powerpc/lib/pmem.c:51:6: error: no previous prototype for ‘arch_wb_cache_pmem’ [-Werror=missing-prototypes] 51 | void arch_wb_cache_pmem(void *addr, size_t size) | ^~~~~~~~~~~~~~~~~~ ../arch/powerpc/lib/pmem.c:58:6: error: no previous prototype for ‘arch_invalidate_pmem’ [-Werror=missing-prototypes] 58 | void arch_invalidate_pmem(void *addr, size_t size) | ^~~~~~~~~~~~~~~~~~~~ Fixes: 32ce3862af3c ("powerpc/lib: Implement PMEM API") Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-6-clg@kaod.org --- arch/powerpc/lib/pmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c index 1550e0d2513a..eb2919ddf9b9 100644 --- a/arch/powerpc/lib/pmem.c +++ b/arch/powerpc/lib/pmem.c @@ -6,6 +6,7 @@ #include #include #include +#include #include From 692e592895266bafb1e0d688e960b4bdd8e165a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:49 +0100 Subject: [PATCH 010/188] powerpc/setup_64: Make some routines static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following routines are only called by local services and do not need to be external symbols. It fixes these W=1 errors : ../arch/powerpc/kernel/setup_64.c:261:13: error: no previous prototype for ‘record_spr_defaults’ [-Werror=missing-prototypes] 261 | void __init record_spr_defaults(void) | ^~~~~~~~~~~~~~~~~~~ ../arch/powerpc/kernel/setup_64.c:1011:6: error: no previous prototype for ‘entry_flush_enable’ [-Werror=missing-prototypes] 1011 | void entry_flush_enable(bool enable) | ^~~~~~~~~~~~~~~~~~ ../arch/powerpc/kernel/setup_64.c:1023:6: error: no previous prototype for ‘uaccess_flush_enable’ [-Werror=missing-prototypes] 1023 | void uaccess_flush_enable(bool enable) | ^~~~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-7-clg@kaod.org --- arch/powerpc/kernel/setup_64.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index c28e949cc222..560ed8b975e7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -67,6 +67,7 @@ #include #include #include +#include #include "setup.h" @@ -258,7 +259,7 @@ static void cpu_ready_for_interrupts(void) unsigned long spr_default_dscr = 0; -void __init record_spr_defaults(void) +static void __init record_spr_defaults(void) { if (early_cpu_has_feature(CPU_FTR_DSCR)) spr_default_dscr = mfspr(SPRN_DSCR); @@ -1008,7 +1009,7 @@ void rfi_flush_enable(bool enable) rfi_flush = enable; } -void entry_flush_enable(bool enable) +static void entry_flush_enable(bool enable) { if (enable) { do_entry_flush_fixups(enabled_flush_types); @@ -1020,7 +1021,7 @@ void entry_flush_enable(bool enable) entry_flush = enable; } -void uaccess_flush_enable(bool enable) +static void uaccess_flush_enable(bool enable) { if (enable) { do_uaccess_flush_fixups(enabled_flush_types); From 1cc2fd75934454be024cd7609b6d7890de6e724b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:50 +0100 Subject: [PATCH 011/188] powerpc/mce: Include prototypes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It fixes these W=1 compile errors : ../arch/powerpc/kernel/mce.c:591:14: error: no previous prototype for ‘machine_check_early’ [-Werror=missing-prototypes] 591 | long notrace machine_check_early(struct pt_regs *regs) | ^~~~~~~~~~~~~~~~~~~ ../arch/powerpc/kernel/mce.c:725:6: error: no previous prototype for ‘hmi_exception_realmode’ [-Werror=missing-prototypes] 725 | long hmi_exception_realmode(struct pt_regs *regs) | ^~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-8-clg@kaod.org --- arch/powerpc/kernel/mce.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 9f3e133b57b7..c381dc2f9858 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -21,6 +21,7 @@ #include #include #include +#include static DEFINE_PER_CPU(int, mce_nest_count); static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); From cd7aa5d2fae11794a00ea34b10ee58434d718bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:51 +0100 Subject: [PATCH 012/188] powerpc/smp: Include tick_broadcast() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It fixes this W=1 compile error : ../arch/powerpc/kernel/smp.c:569:6: error: no previous prototype for ‘tick_broadcast’ [-Werror=missing-prototypes] 569 | void tick_broadcast(const struct cpumask *mask) | ^~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-9-clg@kaod.org --- arch/powerpc/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9e2246e80efd..a96d90d7c442 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include From 157c9f409d11fe79f09c69e78bfc7f8fe7410744 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:52 +0100 Subject: [PATCH 013/188] powerpc/smp: Make debugger_ipi_callback() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit debugger_ipi_callback() is a local routine used as a NMI IPI handler and does not need to be external. It fixes this W=1 compile error : ../arch/powerpc/kernel/smp.c:579:6: error: no previous prototype for ‘debugger_ipi_callback’ [-Werror=missing-prototypes] 579 | void debugger_ipi_callback(struct pt_regs *regs) | ^~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-10-clg@kaod.org --- arch/powerpc/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a96d90d7c442..5a4d59a1070d 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -577,7 +577,7 @@ void tick_broadcast(const struct cpumask *mask) #endif #ifdef CONFIG_DEBUGGER -void debugger_ipi_callback(struct pt_regs *regs) +static void debugger_ipi_callback(struct pt_regs *regs) { debugger_ipi(regs); } From d47d307f1049be545d45cf0f2332495ec9a89cc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:53 +0100 Subject: [PATCH 014/188] powerpc/optprobes: Remove unused routine patch_imm32_load_insns() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 650b55b707fd ("powerpc: Add prefixed instructions to instruction data type") removed the use of patch_imm32_load_insns(). Clean it up to fix this W=1 compile error : ../arch/powerpc/kernel/optprobes.c:149:6: error: no previous prototype for ‘patch_imm32_load_insns’ [-Werror=missing-prototypes] 149 | void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) Fixes: 650b55b707fd ("powerpc: Add prefixed instructions to instruction data type") Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-11-clg@kaod.org --- arch/powerpc/kernel/optprobes.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 69bfe96884e2..da6b88b80ba4 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -141,25 +141,6 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) } } -/* - * emulate_step() requires insn to be emulated as - * second parameter. Load register 'r4' with the - * instruction. - */ -void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) -{ - /* addis r4,0,(insn)@h */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ADDIS | ___PPC_RT(4) | - ((val >> 16) & 0xffff))); - addr++; - - /* ori r4,r4,(insn)@l */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(4) | - ___PPC_RS(4) | (val & 0xffff))); -} - /* * Generate instructions to load provided immediate 64-bit value * to register 'reg' and patch these instructions at 'addr'. From bb21e1b6c5352d62d866e9236ed427f632cd537b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:54 +0100 Subject: [PATCH 015/188] powerpc/optprobes: Make patch_imm64_load_insns() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit patch_imm64_load_insns() is only used locally in arch_prepare_optimized_kprobe() and does not need to be external. It fixes this W=1 compile error : ../arch/powerpc/kernel/optprobes.c:149:6: error: no previous prototype for ‘patch_imm64_load_insns’ [-Werror=missing-prototypes] 149 | void patch_imm64_load_insns(unsigned int val, kprobe_opcode_t *addr) Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-12-clg@kaod.org --- arch/powerpc/kernel/optprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index da6b88b80ba4..7f7cdbeacd1a 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -145,7 +145,7 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) * Generate instructions to load provided immediate 64-bit value * to register 'reg' and patch these instructions at 'addr'. */ -void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) +static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) { /* lis reg,(op)@highest */ patch_instruction((struct ppc_inst *)addr, From cccaf1a10abf03d91321d29ff333d6d5d4cef542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:55 +0100 Subject: [PATCH 016/188] powerpc/mm: Declare some prototypes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It fixes this W=1 compile error : ../arch/powerpc/mm/book3s64/hash_utils.c:1515:5: error: no previous prototype for ‘__hash_page’ [-Werror=missing-prototypes] 1515 | int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, | ^~~~~~~~~~~ ../arch/powerpc/mm/book3s64/hash_utils.c:1850:6: error: no previous prototype for ‘low_hash_fault’ [-Werror=missing-prototypes] 1850 | void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) | ^~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-13-clg@kaod.org --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 066b1d34c7bc..cb95b16e9a7b 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -467,6 +467,8 @@ extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long flags); extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap, unsigned long dsisr); +void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc); +int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, unsigned long msr); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, unsigned int shift, unsigned int mmu_psize); From 11f9c1d2fb497f69f83d4fab6fb7fc8a6884eded Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:56 +0100 Subject: [PATCH 017/188] powerpc/mm: Move hpte_insert_repeating() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It fixes this W=1 compile error : ../arch/powerpc/mm/book3s64/hash_utils.c:1867:6: error: no previous prototype for ‘hpte_insert_repeating’ [-Werror=missing-prototypes] 1867 | long hpte_insert_repeating(unsigned long hash, unsigned long vpn, | ^~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-14-clg@kaod.org --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 2 ++ arch/powerpc/mm/book3s64/hash_hugetlbpage.c | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index cb95b16e9a7b..2bffc7a0fdb8 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -454,6 +454,8 @@ static inline unsigned long hpt_hash(unsigned long vpn, #define HPTE_NOHPTE_UPDATE 0x2 #define HPTE_USE_KERNEL_KEY 0x4 +long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, + unsigned long rlags, unsigned long vflags, int psize, int ssize); extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpage_prot); diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c index b5e9fff8c217..a688e1324ae5 100644 --- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c @@ -16,10 +16,6 @@ unsigned int hpage_shift; EXPORT_SYMBOL(hpage_shift); -extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, - unsigned long pa, unsigned long rlags, - unsigned long vflags, int psize, int ssize); - int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, unsigned int shift, unsigned int mmu_psize) From 1f55aefea3c1431f662aafa02ef9ac18d8880751 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:57 +0100 Subject: [PATCH 018/188] powerpc/mm: Declare preload_new_slb_context() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It fixes this W=1 compile error : ../arch/powerpc/mm/book3s64/slb.c:380:6: error: no previous prototype for ‘preload_new_slb_context’ [-Werror=missing-prototypes] 380 | void preload_new_slb_context(unsigned long start, unsigned long sp) | ^~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-15-clg@kaod.org --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 2bffc7a0fdb8..f911bdb68d8b 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -525,6 +525,7 @@ void slb_dump_contents(struct slb_entry *slb_ptr); extern void slb_vmalloc_update(void); extern void slb_set_size(u16 size); +void preload_new_slb_context(unsigned long start, unsigned long sp); #endif /* __ASSEMBLY__ */ /* From 94b87d72fc852b6995702d74541136a65f88624a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:58 +0100 Subject: [PATCH 019/188] powerpc/mm/hugetlb: Make pseries_alloc_bootmem_huge_page() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pseries_alloc_bootmem_huge_page() is only used locally in alloc_bootmem_huge_page() and does not need to be external. It fixes this W=1 compile error : ../arch/powerpc/mm/hugetlbpage.c:220:12: error: no previous prototype for ‘pseries_alloc_bootmem_huge_page’ [-Werror=missing-prototypes] 220 | int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-16-clg@kaod.org --- arch/powerpc/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8b3cc4d688e8..4e7d9b91f1da 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -217,7 +217,7 @@ void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_p } } -int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) +static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) { struct huge_bootmem_page *m; if (nr_gpages == 0) From 1429ff51480fe5a21a3d17158d259a4b4b04808f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:31:59 +0100 Subject: [PATCH 020/188] powerpc/mm: Declare arch_report_meminfo() prototype. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It fixes this W=1 compile error : ../arch/powerpc/mm/book3s64/pgtable.c:411:6: error: no previous prototype for ‘arch_report_meminfo’ [-Werror=missing-prototypes] 411 | void arch_report_meminfo(struct seq_file *m) | ^~~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-17-clg@kaod.org --- arch/powerpc/include/asm/pgtable.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index f7613f43c9cf..4eed82172e33 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -162,6 +162,9 @@ static inline bool is_ioremap_addr(const void *x) return addr >= IOREMAP_BASE && addr < IOREMAP_END; } + +struct seq_file; +void arch_report_meminfo(struct seq_file *m); #endif /* CONFIG_PPC64 */ #endif /* __ASSEMBLY__ */ From 9ae440fb3d7d1c91ada7d6b13e009bd9f4f00e6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:32:00 +0100 Subject: [PATCH 021/188] powerpc/watchdog: Declare soft_nmi_interrupt() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit soft_nmi_interrupt() usage requires PPC_WATCHDOG to be configured. Check the CONFIG definition to declare the prototype. It fixes this W=1 compile error : ../arch/powerpc/kernel/watchdog.c:250:6: error: no previous prototype for ‘soft_nmi_interrupt’ [-Werror=missing-prototypes] 250 | void soft_nmi_interrupt(struct pt_regs *regs) | ^~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-18-clg@kaod.org --- arch/powerpc/include/asm/nmi.h | 1 + arch/powerpc/kernel/watchdog.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 84b4cfe73edd..63eccea93796 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -4,6 +4,7 @@ #ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); +void soft_nmi_interrupt(struct pt_regs *regs); #else static inline void arch_touch_nmi_watchdog(void) {} #endif diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index af3c15a1d41e..3ae13c2a10cf 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -27,6 +27,7 @@ #include #include +#include /* * The powerpc watchdog ensures that each CPU is able to service timers. From 9236f57a9e51c72ce426ccd2e53e123de7196a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:32:01 +0100 Subject: [PATCH 022/188] KVM: PPC: Make the VMX instruction emulation routines static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are only used locally. It fixes these W=1 compile errors : ../arch/powerpc/kvm/powerpc.c:1521:5: error: no previous prototype for ‘kvmppc_get_vmx_dword’ [-Werror=missing-prototypes] 1521 | int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) | ^~~~~~~~~~~~~~~~~~~~ ../arch/powerpc/kvm/powerpc.c:1539:5: error: no previous prototype for ‘kvmppc_get_vmx_word’ [-Werror=missing-prototypes] 1539 | int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) | ^~~~~~~~~~~~~~~~~~~ ../arch/powerpc/kvm/powerpc.c:1557:5: error: no previous prototype for ‘kvmppc_get_vmx_hword’ [-Werror=missing-prototypes] 1557 | int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) | ^~~~~~~~~~~~~~~~~~~~ ../arch/powerpc/kvm/powerpc.c:1575:5: error: no previous prototype for ‘kvmppc_get_vmx_byte’ [-Werror=missing-prototypes] 1575 | int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) | ^~~~~~~~~~~~~~~~~~~ Fixes: acc9eb9305fe ("KVM: PPC: Reimplement LOAD_VMX/STORE_VMX instruction mmio emulation with analyse_instr() input") Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-19-clg@kaod.org --- arch/powerpc/kvm/powerpc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index cf52d26f49cd..25966ae3271e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1518,7 +1518,7 @@ int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1536,7 +1536,7 @@ int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1554,7 +1554,7 @@ int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1572,7 +1572,7 @@ int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; From d834915e8ee28884f1180dc566ba77c8768ec00a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:32:02 +0100 Subject: [PATCH 023/188] KVM: PPC: Book3S HV: Include prototypes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It fixes these W=1 compile errors : CC [M] arch/powerpc/kvm/book3s_64_mmu_hv.o ../arch/powerpc/kvm/book3s_64_mmu_hv.c:879:5: error: no previous prototype for ‘kvm_unmap_hva_range_hv’ [-Werror=missing-prototypes] 879 | int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) | ^~~~~~~~~~~~~~~~~~~~~~ ../arch/powerpc/kvm/book3s_64_mmu_hv.c:888:6: error: no previous prototype for ‘kvmppc_core_flush_memslot_hv’ [-Werror=missing-prototypes] 888 | void kvmppc_core_flush_memslot_hv(struct kvm *kvm, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../arch/powerpc/kvm/book3s_64_mmu_hv.c:970:5: error: no previous prototype for ‘kvm_age_hva_hv’ [-Werror=missing-prototypes] 970 | int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) | ^~~~~~~~~~~~~~ ../arch/powerpc/kvm/book3s_64_mmu_hv.c:1011:5: error: no previous prototype for ‘kvm_test_age_hva_hv’ [-Werror=missing-prototypes] 1011 | int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) | ^~~~~~~~~~~~~~~~~~~ ../arch/powerpc/kvm/book3s_64_mmu_hv.c:1019:6: error: no previous prototype for ‘kvm_set_spte_hva_hv’ [-Werror=missing-prototypes] 1019 | void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) | ^~~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-20-clg@kaod.org --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 38ea396a23d6..c77f2d4f44ca 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -27,6 +27,7 @@ #include #include +#include "book3s.h" #include "trace_hv.h" //#define DEBUG_RESIZE_HPT 1 From ce275179b6c98032361271927b7458884e9708b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:32:03 +0100 Subject: [PATCH 024/188] KVM: PPC: Book3S HV: Declare some prototypes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes these W=1 compile errors: ../arch/powerpc/kvm/book3s_hv_builtin.c:425:6: error: no previous prototype for ‘kvmppc_read_intr’ [-Werror=missing-prototypes] 425 | long kvmppc_read_intr(void) | ^~~~~~~~~~~~~~~~ ../arch/powerpc/kvm/book3s_hv_builtin.c:652:6: error: no previous prototype for ‘kvmppc_bad_interrupt’ [-Werror=missing-prototypes] 652 | void kvmppc_bad_interrupt(struct pt_regs *regs) | ^~~~~~~~~~~~~~~~~~~~ ../arch/powerpc/kvm/book3s_hv_builtin.c:695:6: error: no previous prototype for ‘kvmhv_p9_set_lpcr’ [-Werror=missing-prototypes] 695 | void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip) | ^~~~~~~~~~~~~~~~~ ../arch/powerpc/kvm/book3s_hv_builtin.c:740:6: error: no previous prototype for ‘kvmhv_p9_restore_lpcr’ [-Werror=missing-prototypes] 740 | void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) | ^~~~~~~~~~~~~~~~~~~~~ ../arch/powerpc/kvm/book3s_hv_builtin.c:768:6: error: no previous prototype for ‘kvmppc_set_msr_hv’ [-Werror=missing-prototypes] 768 | void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) | ^~~~~~~~~~~~~~~~~ ../arch/powerpc/kvm/book3s_hv_builtin.c:817:6: error: no previous prototype for ‘kvmppc_inject_interrupt_hv’ [-Werror=missing-prototypes] 817 | void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-21-clg@kaod.org --- arch/powerpc/include/asm/kvm_book3s.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index d32ec9ae73bd..2f5f919f6cd3 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -277,6 +277,13 @@ extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd); extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu); +long kvmppc_read_intr(void); +void kvmppc_bad_interrupt(struct pt_regs *regs); +void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip); +void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip); +void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr); +void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu); void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu); From 42c1f400d1da50dd1cd9f874df72dc827f9fe2d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:32:04 +0100 Subject: [PATCH 025/188] powerpc/pseries: Make IOV setup routines static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are only used locally. It fixes these W=1 compile errors : ../arch/powerpc/platforms/pseries/setup.c:610:17: error: no previous prototype for ‘pseries_get_iov_fw_value’ [-Werror=missing-prototypes] 610 | resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, | ^~~~~~~~~~~~~~~~~~~~~~~~ ../arch/powerpc/platforms/pseries/setup.c:646:6: error: no previous prototype for ‘of_pci_set_vf_bar_size’ [-Werror=missing-prototypes] 646 | void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) | ^~~~~~~~~~~~~~~~~~~~~~ ../arch/powerpc/platforms/pseries/setup.c:668:6: error: no previous prototype for ‘of_pci_parse_iov_addrs’ [-Werror=missing-prototypes] 668 | void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) | ^~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-22-clg@kaod.org --- arch/powerpc/platforms/pseries/setup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 090c13f6c881..0272aa4e74e3 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -607,8 +607,8 @@ enum get_iov_fw_value_index { WDW_SIZE = 3 /* Get Window Size */ }; -resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, - enum get_iov_fw_value_index value) +static resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, + enum get_iov_fw_value_index value) { const int *indexes; struct device_node *dn = pci_device_to_OF_node(dev); @@ -643,7 +643,7 @@ resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, return ret; } -void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) +static void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) { struct resource *res; resource_size_t base, size; @@ -665,7 +665,7 @@ void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) } } -void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) +static void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) { struct resource *res, *root, *conflict; resource_size_t base, size; From 53137a9b51e49e0399ad322e4a39bc5f9bf0a1de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:32:05 +0100 Subject: [PATCH 026/188] powerpc/pcidn: Make IOV setup routines static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are only used locally. It fixes these W=1 compile errors : ../arch/powerpc/platforms/pseries/pci.c:58:5: error: no previous prototype for ‘pseries_send_map_pe’ [-Werror=missing-prototypes] 58 | int pseries_send_map_pe(struct pci_dev *pdev, | ^~~~~~~~~~~~~~~~~~~ ../arch/powerpc/platforms/pseries/pci.c:91:6: error: no previous prototype for ‘pseries_set_pe_num’ [-Werror=missing-prototypes] 91 | void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) | ^~~~~~~~~~~~~~~~~~ ../arch/powerpc/platforms/pseries/pci.c:105:5: error: no previous prototype for ‘pseries_associate_pes’ [-Werror=missing-prototypes] 105 | int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) | ^~~~~~~~~~~~~~~~~~~~~ ../arch/powerpc/platforms/pseries/pci.c:149:5: error: no previous prototype for ‘pseries_pci_sriov_enable’ [-Werror=missing-prototypes] 149 | int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) | ^~~~~~~~~~~~~~~~~~~~~~~~ ../arch/powerpc/platforms/pseries/pci.c:192:5: error: no previous prototype for ‘pseries_pcibios_sriov_enable’ [-Werror=missing-prototypes] 192 | int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../arch/powerpc/platforms/pseries/pci.c:199:5: error: no previous prototype for ‘pseries_pcibios_sriov_disable’ [-Werror=missing-prototypes] 199 | int pseries_pcibios_sriov_disable(struct pci_dev *pdev) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-23-clg@kaod.org --- arch/powerpc/platforms/pseries/pci.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 72a4d4167849..1bffbd1c9a94 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -55,9 +55,8 @@ struct pe_map_bar_entry { __be32 reserved; /* Reserved Space */ }; -int pseries_send_map_pe(struct pci_dev *pdev, - u16 num_vfs, - struct pe_map_bar_entry *vf_pe_array) +static int pseries_send_map_pe(struct pci_dev *pdev, u16 num_vfs, + struct pe_map_bar_entry *vf_pe_array) { struct pci_dn *pdn; int rc; @@ -88,7 +87,7 @@ int pseries_send_map_pe(struct pci_dev *pdev, return rc; } -void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) +static void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) { struct pci_dn *pdn; @@ -102,7 +101,7 @@ void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) pdn->pe_num_map[vf_index]); } -int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) +static int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) { struct pci_dn *pdn; int i, rc, vf_index; @@ -146,7 +145,7 @@ int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) return rc; } -int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +static int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { struct pci_dn *pdn; int rc; @@ -189,14 +188,14 @@ int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) return rc; } -int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +static int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { /* Allocate PCI data */ add_sriov_vf_pdns(pdev); return pseries_pci_sriov_enable(pdev, num_vfs); } -int pseries_pcibios_sriov_disable(struct pci_dev *pdev) +static int pseries_pcibios_sriov_disable(struct pci_dev *pdev) { struct pci_dn *pdn; From 22f1de2e13b066921dedf6a00d2cc414f3cbab05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 4 Jan 2021 15:32:06 +0100 Subject: [PATCH 027/188] powerpc/pseries/eeh: Make pseries_send_allow_unfreeze() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only used locally. It fixes this W=1 compile error : ../arch/powerpc/platforms/pseries/eeh_pseries.c:697:5: error: no previous prototype for ‘pseries_send_allow_unfreeze’ [-Werror=missing-prototypes] 697 | int pseries_send_allow_unfreeze(struct pci_dn *pdn, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-24-clg@kaod.org --- arch/powerpc/platforms/pseries/eeh_pseries.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index de45ceb634f9..bc15200852b7 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -694,8 +694,7 @@ static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u } #ifdef CONFIG_PCI_IOV -int pseries_send_allow_unfreeze(struct pci_dn *pdn, - u16 *vf_pe_array, int cur_vfs) +static int pseries_send_allow_unfreeze(struct pci_dn *pdn, u16 *vf_pe_array, int cur_vfs) { int rc; int ibm_allow_unfreeze = rtas_token("ibm,open-sriov-allow-unfreeze"); From 691602aab9c3cce31d3ff9529c09b7922a5f6224 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 13 Jan 2021 21:20:14 +1100 Subject: [PATCH 028/188] powerpc/iommu/debug: Add debugfs entries for IOMMU tables This adds a folder per LIOBN under /sys/kernel/debug/iommu with IOMMU table parameters. This is enabled by CONFIG_IOMMU_DEBUGFS. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210113102014.124452-1-aik@ozlabs.ru --- arch/powerpc/kernel/iommu.c | 46 +++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5b69a6a72a0e..c00214a4355c 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,47 @@ #define DBG(...) +#ifdef CONFIG_IOMMU_DEBUGFS +static int iommu_debugfs_weight_get(void *data, u64 *val) +{ + struct iommu_table *tbl = data; + *val = bitmap_weight(tbl->it_map, tbl->it_size); + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(iommu_debugfs_fops_weight, iommu_debugfs_weight_get, NULL, "%llu\n"); + +static void iommu_debugfs_add(struct iommu_table *tbl) +{ + char name[10]; + struct dentry *liobn_entry; + + sprintf(name, "%08lx", tbl->it_index); + liobn_entry = debugfs_create_dir(name, iommu_debugfs_dir); + + debugfs_create_file_unsafe("weight", 0400, liobn_entry, tbl, &iommu_debugfs_fops_weight); + debugfs_create_ulong("it_size", 0400, liobn_entry, &tbl->it_size); + debugfs_create_ulong("it_page_shift", 0400, liobn_entry, &tbl->it_page_shift); + debugfs_create_ulong("it_reserved_start", 0400, liobn_entry, &tbl->it_reserved_start); + debugfs_create_ulong("it_reserved_end", 0400, liobn_entry, &tbl->it_reserved_end); + debugfs_create_ulong("it_indirect_levels", 0400, liobn_entry, &tbl->it_indirect_levels); + debugfs_create_ulong("it_level_size", 0400, liobn_entry, &tbl->it_level_size); +} + +static void iommu_debugfs_del(struct iommu_table *tbl) +{ + char name[10]; + struct dentry *liobn_entry; + + sprintf(name, "%08lx", tbl->it_index); + liobn_entry = debugfs_lookup(name, iommu_debugfs_dir); + if (liobn_entry) + debugfs_remove(liobn_entry); +} +#else +static void iommu_debugfs_add(struct iommu_table *tbl){} +static void iommu_debugfs_del(struct iommu_table *tbl){} +#endif + static int novmerge; static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); @@ -725,6 +767,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, welcomed = 1; } + iommu_debugfs_add(tbl); + return tbl; } @@ -744,6 +788,8 @@ static void iommu_table_free(struct kref *kref) return; } + iommu_debugfs_del(tbl); + iommu_table_release_pages(tbl); /* verify that table contains no entries */ From 9dd31b11370380c488c8f2d347058617cd3fff99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Sat, 12 Dec 2020 15:27:07 +0100 Subject: [PATCH 029/188] powerpc/vas: Fix IRQ name allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VAS device allocates a generic interrupt to handle page faults but the IRQ name doesn't show under /proc. This is because it's on stack. Allocate the name. Signed-off-by: Cédric Le Goater Acked-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201212142707.2102141-1-clg@kaod.org --- arch/powerpc/platforms/powernv/vas.c | 11 ++++++++--- arch/powerpc/platforms/powernv/vas.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index 598e4cd563fb..b65256a63e87 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -28,12 +28,10 @@ static DEFINE_PER_CPU(int, cpu_vas_id); static int vas_irq_fault_window_setup(struct vas_instance *vinst) { - char devname[64]; int rc = 0; - snprintf(devname, sizeof(devname), "vas-%d", vinst->vas_id); rc = request_threaded_irq(vinst->virq, vas_fault_handler, - vas_fault_thread_fn, 0, devname, vinst); + vas_fault_thread_fn, 0, vinst->name, vinst); if (rc) { pr_err("VAS[%d]: Request IRQ(%d) failed with %d\n", @@ -80,6 +78,12 @@ static int init_vas_instance(struct platform_device *pdev) if (!vinst) return -ENOMEM; + vinst->name = kasprintf(GFP_KERNEL, "vas-%d", vasid); + if (!vinst->name) { + kfree(vinst); + return -ENOMEM; + } + INIT_LIST_HEAD(&vinst->node); ida_init(&vinst->ida); mutex_init(&vinst->mutex); @@ -162,6 +166,7 @@ static int init_vas_instance(struct platform_device *pdev) return 0; free_vinst: + kfree(vinst->name); kfree(vinst); return -ENODEV; diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 70f793e8f6cc..c7db3190baca 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -340,6 +340,7 @@ struct vas_instance { struct vas_window *rxwin[VAS_COP_TYPE_MAX]; struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; + char *name; char *dbgname; struct dentry *dbgdir; }; From c9f3401313a5089f100d7d1ef4b75cd7b49b2190 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 18 Jan 2021 22:34:51 +1000 Subject: [PATCH 030/188] powerpc: Always enable queued spinlocks for 64s, disable for others Queued spinlocks have shown to have good performance and fairness properties even on smaller (2 socket) POWER systems. This selects them automatically for 64s. For other platforms they are de-selected, the standard spinlock is far simpler and smaller code, and single chips with a handful of cores is unlikely to show any improvement. CONFIG_EXPERT still allows this to be changed, e.g., to help debug performance or correctness issues. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210118123451.1452206-1-npiggin@gmail.com --- arch/powerpc/Kconfig | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 107bb4319e0e..eebb4a8c156c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -505,18 +505,14 @@ config HOTPLUG_CPU Say N if you are unsure. config PPC_QUEUED_SPINLOCKS - bool "Queued spinlocks" + bool "Queued spinlocks" if EXPERT depends on SMP + default PPC_BOOK3S_64 help Say Y here to use queued spinlocks which give better scalability and fairness on large SMP and NUMA systems without harming single threaded performance. - This option is currently experimental, the code is more complex and - less tested so it defaults to "N" for the moment. - - If unsure, say "N". - config ARCH_CPU_PROBE_RELEASE def_bool y depends on HOTPLUG_CPU From 27f699579b64dbf27caf31e5c0eac567ec0aa8b8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 Jan 2021 06:36:52 +0000 Subject: [PATCH 031/188] powerpc/kvm: Force selection of CONFIG_PPC_FPU book3s/32 kvm is designed with the assumption that an FPU is always present. Force selection of FPU support in the kernel when build KVM. Fixes: 7d68c8916950 ("powerpc/32s: Allow deselecting CONFIG_PPC_FPU on mpc832x") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/74461a99fa1466f361532ca794ca0753be3d9f86.1611038044.git.christophe.leroy@csgroup.eu --- arch/powerpc/kvm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 549591d9aaa2..e45644657d49 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -54,6 +54,7 @@ config KVM_BOOK3S_32 select KVM select KVM_BOOK3S_32_HANDLER select KVM_BOOK3S_PR_POSSIBLE + select PPC_FPU help Support running unmodified book3s_32 guest kernels in virtual machines on book3s_32 host processors. From 910a0cb6d259736a0c86e795d4c2f42af8d0d775 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 20 Jan 2021 07:49:13 +0000 Subject: [PATCH 032/188] powerpc/47x: Disable 256k page size PPC47x_TLBE_SIZE isn't defined for 256k pages, leading to a build break if 256k pages is selected. So change the kconfig so that 256k pages can't be selected for 47x. Fixes: e7f75ad01d59 ("powerpc/47x: Base ppc476 support") Reported-by: kernel test robot Signed-off-by: Christophe Leroy [mpe: Expand change log to mention build break] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2fed79b1154c872194f98bac4422c23918325e61.1611128938.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index eebb4a8c156c..ff64ac7d227d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -768,7 +768,7 @@ config PPC_64K_PAGES config PPC_256K_PAGES bool "256k page size" - depends on 44x && !STDBINUTILS + depends on 44x && !STDBINUTILS && !PPC_47x help Make the page size 256k. From 4eeef098b43242ed145c83fba9989d586d707589 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 20 Jan 2021 07:49:14 +0000 Subject: [PATCH 033/188] powerpc/44x: Remove STDBINUTILS kconfig option STDBINUTILS is just a toggle to allow 256k page size to appear in the possible page sizes list for the 44x. Make 256k page size option appear all the time with an explicit warning about binutils, and remove this unnecessary STDBINUTILS config option. Signed-off-by: Christophe Leroy [mpe: Incorporate help text changes from David Laight] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f9981e819009aa121a998dc483052ec76f78f991.1611128938.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ff64ac7d227d..ef64d96bf316 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -716,18 +716,6 @@ config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG -config STDBINUTILS - bool "Using standard binutils settings" - depends on 44x - default y - help - Turning this option off allows you to select 256KB PAGE_SIZE on 44x. - Note, that kernel will be able to run only those applications, - which had been compiled using binutils later than 2.17.50.0.3 with - '-zmax-page-size' set to 256K (the default is 64K). Or, if using - the older binutils, you can patch them with a trivial patch, which - changes the ELF_MAXPAGESIZE definition from 0x10000 to 0x40000. - choice prompt "Page size" default PPC_4K_PAGES @@ -767,17 +755,15 @@ config PPC_64K_PAGES select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64 config PPC_256K_PAGES - bool "256k page size" - depends on 44x && !STDBINUTILS && !PPC_47x + bool "256k page size (Requires non-standard binutils settings)" + depends on 44x && !PPC_47x help Make the page size 256k. - As the ELF standard only requires alignment to support page - sizes up to 64k, you will need to compile all of your user - space applications with a non-standard binutils settings - (see the STDBINUTILS description for details). - - Say N unless you know what you are doing. + The kernel will only be able to run applications that have been + compiled with '-zmax-page-size' set to 256K (the default is 64K) using + binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE + definition from 0x10000 to 0x40000 in older versions. endchoice From 8813ff49607eab3caaf40fe8929b0ce7dc68e85f Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 25 Jan 2021 18:36:22 +0530 Subject: [PATCH 034/188] powerpc/sstep: Check instruction validity against ISA version before emulation We currently unconditionally try to emulate newer instructions on older Power versions that could cause issues. Gate it. Fixes: 350779a29f11 ("powerpc: Handle most loads and stores in instruction emulation code") Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/161157995977.64773.13794501093457185080.stgit@thinktux.local --- arch/powerpc/lib/sstep.c | 78 +++++++++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index bf7a7d62ae8b..f859cbbb6375 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1304,9 +1304,11 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, if ((word & 0xfe2) == 2) op->type = SYSCALL; else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && - (word & 0xfe3) == 1) + (word & 0xfe3) == 1) { /* scv */ op->type = SYSCALL_VECTORED_0; - else + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + } else op->type = UNKNOWN; return 0; #endif @@ -1410,7 +1412,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 1: if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); @@ -1444,7 +1446,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 4: if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; switch (word & 0x3f) { case 48: /* maddhd */ @@ -1530,6 +1532,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 19: if (((word >> 1) & 0x1f) == 2) { /* addpcis */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; imm = (short) (word & 0xffc1); /* d0 + d2 fields */ imm |= (word >> 15) & 0x3e; /* d1 field */ op->val = regs->nip + (imm << 16) + 4; @@ -1842,7 +1846,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 265: /* modud */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = regs->gpr[ra] % regs->gpr[rb]; goto compute_done; #endif @@ -1852,7 +1856,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 267: /* moduw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (unsigned int) regs->gpr[ra] % (unsigned int) regs->gpr[rb]; goto compute_done; @@ -1889,7 +1893,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 755: /* darn */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; switch (ra & 0x3) { case 0: /* 32-bit conditioned */ @@ -1911,14 +1915,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 777: /* modsd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (long int) regs->gpr[ra] % (long int) regs->gpr[rb]; goto compute_done; #endif case 779: /* modsw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (int) regs->gpr[ra] % (int) regs->gpr[rb]; goto compute_done; @@ -1995,14 +1999,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 538: /* cnttzw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; val = (unsigned int) regs->gpr[rd]; op->val = (val ? __builtin_ctz(val) : 32); goto logical_done; #ifdef __powerpc64__ case 570: /* cnttzd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; val = regs->gpr[rd]; op->val = (val ? __builtin_ctzl(val) : 64); goto logical_done; @@ -2112,7 +2116,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 890: /* extswsli with sh_5 = 0 */ case 891: /* extswsli with sh_5 = 1 */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->type = COMPUTE + SETREG; sh = rb | ((word & 2) << 4); val = (signed int) regs->gpr[rd]; @@ -2439,6 +2443,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 268: /* lxvx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 16; @@ -2448,6 +2454,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 269: /* lxvl */ case 301: { /* lxvll */ int nb; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; @@ -2468,13 +2476,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 333: /* lxvpx */ if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; op->reg = VSX_REGISTER_XTP(rd); op->type = MKOP(LOAD_VSX, 0, 32); op->element_size = 32; break; case 364: /* lxvwsx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 4; @@ -2482,6 +2492,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 396: /* stxvx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 16; @@ -2491,6 +2503,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 397: /* stxvl */ case 429: { /* stxvll */ int nb; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; @@ -2504,7 +2518,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } case 461: /* stxvpx */ if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; op->reg = VSX_REGISTER_XTP(rd); op->type = MKOP(STORE_VSX, 0, 32); op->element_size = 32; @@ -2542,6 +2556,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 781: /* lxsibzx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 1); op->element_size = 8; @@ -2549,6 +2565,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 812: /* lxvh8x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 2; @@ -2556,6 +2574,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 813: /* lxsihzx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 2); op->element_size = 8; @@ -2569,6 +2589,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 876: /* lxvb16x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 1; @@ -2582,6 +2604,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 909: /* stxsibx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 1); op->element_size = 8; @@ -2589,6 +2613,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 940: /* stxvh8x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 2; @@ -2596,6 +2622,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 941: /* stxsihx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 2); op->element_size = 8; @@ -2609,6 +2637,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 1004: /* stxvb16x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 1; @@ -2717,12 +2747,16 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = MKOP(LOAD_FP, 0, 16); break; case 2: /* lxsd */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 3: /* lxssp */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; @@ -2752,7 +2786,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef CONFIG_VSX case 6: if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; op->ea = dqform_ea(word, regs); op->reg = VSX_REGISTER_XTP(rd); op->element_size = 32; @@ -2775,6 +2809,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 1: /* lxv */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dqform_ea(word, regs); if (word & 8) op->reg = rd + 32; @@ -2785,6 +2821,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 2: /* stxsd with LSB of DS field = 0 */ case 6: /* stxsd with LSB of DS field = 1 */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 8); @@ -2794,6 +2832,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 3: /* stxssp with LSB of DS field = 0 */ case 7: /* stxssp with LSB of DS field = 1 */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 4); @@ -2802,6 +2842,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 5: /* stxv */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dqform_ea(word, regs); if (word & 8) op->reg = rd + 32; @@ -2831,7 +2873,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 1: /* Prefixed instructions */ if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); @@ -2980,6 +3022,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; + unknown_opcode: + op->type = UNKNOWN; + return 0; + logical_done: if (word & 1) set_cr0(regs, op); From 718aae916fa6619c57c348beaedd675835cf1aa1 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 25 Jan 2021 18:36:43 +0530 Subject: [PATCH 035/188] powerpc/sstep: Fix incorrect return from analyze_instr() We currently just percolate the return value from analyze_instr() to the caller of emulate_step(), especially if it is a -1. For one particular case (opcode = 4) for instructions that aren't currently emulated, we are returning 'should not be single-stepped' while we should have returned 0 which says 'did not emulate, may have to single-step'. Fixes: 930d6288a26787 ("powerpc: sstep: Add support for maddhd, maddhdu, maddld instructions") Signed-off-by: Ananth N Mavinakayanahalli Suggested-by: Michael Ellerman Tested-by: Naveen N. Rao Reviewed-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/161157999039.64773.14950289716779364766.stgit@thinktux.local --- arch/powerpc/lib/sstep.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index f859cbbb6375..e96cff845ef7 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1445,6 +1445,11 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 4: + /* + * There are very many instructions with this primary opcode + * introduced in the ISA as early as v2.03. However, the ones + * we currently emulate were all introduced with ISA 3.0 + */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) goto unknown_opcode; @@ -1472,7 +1477,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, * There are other instructions from ISA 3.0 with the same * primary opcode which do not have emulation support yet. */ - return -1; + goto unknown_opcode; #endif case 7: /* mulli */ From db82f7097c265776c22ad866511074836f17665e Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:45:01 +1100 Subject: [PATCH 036/188] selftests/powerpc: Hoist helper code out of eeh-basic Hoist some of the useful test environment checking and prep code into eeh-functions.sh so they can be reused in other tests. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103044503.917128-1-oohall@gmail.com --- .../selftests/powerpc/eeh/eeh-basic.sh | 39 ++------------- .../selftests/powerpc/eeh/eeh-functions.sh | 48 +++++++++++++++++++ 2 files changed, 51 insertions(+), 36 deletions(-) mode change 100755 => 100644 tools/testing/selftests/powerpc/eeh/eeh-functions.sh diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh index 64779f073e17..442b666ccdb5 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh @@ -1,28 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0-only -KSELFTESTS_SKIP=4 - . ./eeh-functions.sh -if ! eeh_supported ; then - echo "EEH not supported on this system, skipping" - exit $KSELFTESTS_SKIP; -fi - -if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ - [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then - echo "debugfs EEH testing files are missing. Is debugfs mounted?" - exit $KSELFTESTS_SKIP; -fi +eeh_test_prep # NB: may exit pre_lspci=`mktemp` lspci > $pre_lspci -# Bump the max freeze count to something absurd so we don't -# trip over it while breaking things. -echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes - # record the devices that we break in here. Assuming everything # goes to plan we should get them back once the recover process # is finished. @@ -30,34 +15,16 @@ devices="" # Build up a list of candidate devices. for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do - # skip bridges since we can't recover them (yet...) - if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then - echo "$dev, Skipped: bridge" + if ! eeh_can_break $dev ; then continue; fi - # Skip VFs for now since we don't have a reliable way - # to break them. + # Skip VFs for now since we don't have a reliable way to break them. if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then echo "$dev, Skipped: virtfn" continue; fi - if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then - echo "$dev, Skipped: ahci doesn't support recovery" - continue - fi - - # Don't inject errosr into an already-frozen PE. This happens with - # PEs that contain multiple PCI devices (e.g. multi-function cards) - # and injecting new errors during the recovery process will probably - # result in the recovery failing and the device being marked as - # failed. - if ! pe_ok $dev ; then - echo "$dev, Skipped: Bad initial PE state" - continue; - fi - echo "$dev, Added" # Add to this list of device to check diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh old mode 100755 new mode 100644 index 00dc32c0ed75..9b1bcc1fd4ad --- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh @@ -1,6 +1,8 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0-only +export KSELFTESTS_SKIP=4 + pe_ok() { local dev="$1" local path="/sys/bus/pci/devices/$dev/eeh_pe_state" @@ -39,6 +41,52 @@ eeh_supported() { grep -q 'EEH Subsystem is enabled' /proc/powerpc/eeh } +eeh_test_prep() { + if ! eeh_supported ; then + echo "EEH not supported on this system, skipping" + exit $KSELFTESTS_SKIP; + fi + + if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ + [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then + echo "debugfs EEH testing files are missing. Is debugfs mounted?" + exit $KSELFTESTS_SKIP; + fi + + # Bump the max freeze count to something absurd so we don't + # trip over it while breaking things. + echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes +} + +eeh_can_break() { + # skip bridges since we can't recover them (yet...) + if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then + echo "$dev, Skipped: bridge" + return 1; + fi + + # The ahci driver doesn't support error recovery. If the ahci device + # happens to be hosting the root filesystem, and then we go and break + # it the system will generally go down. We should probably fix that + # at some point + if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then + echo "$dev, Skipped: ahci doesn't support recovery" + return 1; + fi + + # Don't inject errosr into an already-frozen PE. This happens with + # PEs that contain multiple PCI devices (e.g. multi-function cards) + # and injecting new errors during the recovery process will probably + # result in the recovery failing and the device being marked as + # failed. + if ! pe_ok $dev ; then + echo "$dev, Skipped: Bad initial PE state" + return 1; + fi + + return 0 +} + eeh_one_dev() { local dev="$1" From d6749ccba7ff86f99b4672e50db871487ba69f19 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:45:02 +1100 Subject: [PATCH 037/188] selftests/powerpc: Use stderr for debug messages in eeh-functions We want to use stdout to return lists of devices, etc so log debug / status messages to stderr rather than stdout. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103044503.917128-2-oohall@gmail.com --- .../selftests/powerpc/eeh/eeh-functions.sh | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh index 9b1bcc1fd4ad..32e5b7fbf18a 100644 --- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh @@ -3,6 +3,10 @@ export KSELFTESTS_SKIP=4 +log() { + echo >/dev/stderr $* +} + pe_ok() { local dev="$1" local path="/sys/bus/pci/devices/$dev/eeh_pe_state" @@ -49,7 +53,7 @@ eeh_test_prep() { if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then - echo "debugfs EEH testing files are missing. Is debugfs mounted?" + log "debugfs EEH testing files are missing. Is debugfs mounted?" exit $KSELFTESTS_SKIP; fi @@ -61,7 +65,7 @@ eeh_test_prep() { eeh_can_break() { # skip bridges since we can't recover them (yet...) if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then - echo "$dev, Skipped: bridge" + log "$dev, Skipped: bridge" return 1; fi @@ -70,7 +74,7 @@ eeh_can_break() { # it the system will generally go down. We should probably fix that # at some point if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then - echo "$dev, Skipped: ahci doesn't support recovery" + log "$dev, Skipped: ahci doesn't support recovery" return 1; fi @@ -80,7 +84,7 @@ eeh_can_break() { # result in the recovery failing and the device being marked as # failed. if ! pe_ok $dev ; then - echo "$dev, Skipped: Bad initial PE state" + log "$dev, Skipped: Bad initial PE state" return 1; fi @@ -94,7 +98,7 @@ eeh_one_dev() { # testing so check that the argument is a well-formed sysfs device # name. if ! test -e /sys/bus/pci/devices/$dev/ ; then - echo "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)" + log "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)" return 1; fi @@ -118,16 +122,16 @@ eeh_one_dev() { if pe_ok $dev ; then break; fi - echo "$dev, waited $i/${max_wait}" + log "$dev, waited $i/${max_wait}" sleep 1 done if ! pe_ok $dev ; then - echo "$dev, Failed to recover!" + log "$dev, Failed to recover!" return 1; fi - echo "$dev, Recovered after $i seconds" + log "$dev, Recovered after $i seconds" return 0; } From 38132cc0e5a6b22b04fac2e4df25c59435fcd6de Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:45:03 +1100 Subject: [PATCH 038/188] selftests/powerpc: Add VF recovery tests The basic EEH test ignores VFs since we the way the eeh_dev_break debugfs interface works means that if multiple VFs are enabled we may cause errors on all them them. However, we can work around that by only enabling a single VF at a time. This patch adds some infrastructure for finding SR-IOV capable devices and enabling / disabling VFs so we can exercise the VF specific EEH recovery paths. Two new tests are added, one for testing EEH aware devices and one for EEH un-aware VFs. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103044503.917128-3-oohall@gmail.com --- .../selftests/powerpc/eeh/eeh-functions.sh | 108 ++++++++++++++++++ .../selftests/powerpc/eeh/eeh-vf-aware.sh | 45 ++++++++ .../selftests/powerpc/eeh/eeh-vf-unaware.sh | 35 ++++++ 3 files changed, 188 insertions(+) create mode 100755 tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh create mode 100755 tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh index 32e5b7fbf18a..70daa3925dcb 100644 --- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh @@ -135,3 +135,111 @@ eeh_one_dev() { return 0; } +eeh_has_driver() { + test -e /sys/bus/pci/devices/$1/driver; + return $? +} + +eeh_can_recover() { + # we'll get an IO error if the device's current driver doesn't support + # error recovery + echo $1 > '/sys/kernel/debug/powerpc/eeh_dev_can_recover' 2>/dev/null + + return $? +} + +eeh_find_all_pfs() { + devices="" + + # SR-IOV on pseries requires hypervisor support, so check for that + is_pseries="" + if grep -q pSeries /proc/cpuinfo ; then + if [ ! -f /proc/device-tree/rtas/ibm,open-sriov-allow-unfreeze ] || + [ ! -f /proc/device-tree/rtas/ibm,open-sriov-map-pe-number ] ; then + return 1; + fi + + is_pseries="true" + fi + + for dev in `ls -1 /sys/bus/pci/devices/` ; do + sysfs="/sys/bus/pci/devices/$dev" + if [ ! -e "$sysfs/sriov_numvfs" ] ; then + continue + fi + + # skip unsupported PFs on pseries + if [ -z "$is_pseries" ] && + [ ! -f "$sysfs/of_node/ibm,is-open-sriov-pf" ] && + [ ! -f "$sysfs/of_node/ibm,open-sriov-vf-bar-info" ] ; then + continue; + fi + + # no driver, no vfs + if ! eeh_has_driver $dev ; then + continue + fi + + devices="$devices $dev" + done + + if [ -z "$devices" ] ; then + return 1; + fi + + echo $devices + return 0; +} + +# attempts to enable one VF on each PF so we can do VF specific tests. +# stdout: list of enabled VFs, one per line +# return code: 0 if vfs are found, 1 otherwise +eeh_enable_vfs() { + pf_list="$(eeh_find_all_pfs)" + + vfs=0 + for dev in $pf_list ; do + pf_sysfs="/sys/bus/pci/devices/$dev" + + # make sure we have a single VF + echo 0 > "$pf_sysfs/sriov_numvfs" + echo 1 > "$pf_sysfs/sriov_numvfs" + if [ "$?" != 0 ] ; then + log "Unable to enable VFs on $pf, skipping" + continue; + fi + + vf="$(basename $(realpath "$pf_sysfs/virtfn0"))" + if [ $? != 0 ] ; then + log "unable to find enabled vf on $pf" + echo 0 > "$pf_sysfs/sriov_numvfs" + continue; + fi + + if ! eeh_can_break $vf ; then + log "skipping " + + echo 0 > "$pf_sysfs/sriov_numvfs" + continue; + fi + + vfs="$((vfs + 1))" + echo $vf + done + + test "$vfs" != 0 + return $? +} + +eeh_disable_vfs() { + pf_list="$(eeh_find_all_pfs)" + if [ -z "$pf_list" ] ; then + return 1; + fi + + for dev in $pf_list ; do + echo 0 > "/sys/bus/pci/devices/$dev/sriov_numvfs" + done + + return 0; +} diff --git a/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh b/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh new file mode 100755 index 000000000000..874c11953bb6 --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +. ./eeh-functions.sh + +eeh_test_prep # NB: may exit + +vf_list="$(eeh_enable_vfs)"; +if $? != 0 ; then + log "No usable VFs found. Skipping EEH unaware VF test" + exit $KSELFTESTS_SKIP; +fi + +log "Enabled VFs: $vf_list" + +tested=0 +passed=0 +for vf in $vf_list ; do + log "Testing $vf" + + if ! eeh_can_recover $vf ; then + log "Driver for $vf doesn't support error recovery, skipping" + continue; + fi + + tested="$((tested + 1))" + + log "Breaking $vf..." + if ! eeh_one_dev $vf ; then + log "$vf failed to recover" + continue; + fi + + passed="$((passed + 1))" +done + +eeh_disable_vfs + +if [ "$tested" == 0 ] ; then + echo "No VFs with EEH aware drivers found, skipping" + exit $KSELFTESTS_SKIP +fi + +test "$failed" != 0 +exit $?; diff --git a/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh b/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh new file mode 100755 index 000000000000..8a4c147b9d43 --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +. ./eeh-functions.sh + +eeh_test_prep # NB: may exit + +vf_list="$(eeh_enable_vfs)"; +if $? != 0 ; then + log "No usable VFs found. Skipping EEH unaware VF test" + exit $KSELFTESTS_SKIP; +fi + +log "Enabled VFs: $vf_list" + +failed=0 +for vf in $vf_list ; do + log "Testing $vf" + + if eeh_can_recover $vf ; then + log "Driver for $vf supports error recovery. Unbinding..." + echo "$vf" > /sys/bus/pci/devices/$vf/driver/unbind + fi + + log "Breaking $vf..." + if ! eeh_one_dev $vf ; then + log "$vf failed to recover" + failed="$((failed + 1))" + fi +done + +eeh_disable_vfs + +test "$failed" != 0 +exit $?; From b5e904b83067bbbd7dc83ea3734c119f8796d79f Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 16:15:11 +1100 Subject: [PATCH 039/188] powerpc/eeh: Rework pci_dev lookup in debugfs attributes Pull the string -> pci_dev lookup stuff into a helper function. No functional change. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103051512.919333-1-oohall@gmail.com --- arch/powerpc/kernel/eeh.c | 71 ++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 813713c9120c..f9182ff57804 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1596,6 +1596,35 @@ static int proc_eeh_show(struct seq_file *m, void *v) } #ifdef CONFIG_DEBUG_FS + + +static struct pci_dev *eeh_debug_lookup_pdev(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + uint32_t domain, bus, dev, fn; + struct pci_dev *pdev; + char buf[20]; + int ret; + + memset(buf, 0, sizeof(buf)); + ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count); + if (!ret) + return ERR_PTR(-EFAULT); + + ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn); + if (ret != 4) { + pr_err("%s: expected 4 args, got %d\n", __func__, ret); + return ERR_PTR(-EINVAL); + } + + pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn); + if (!pdev) + return ERR_PTR(-ENODEV); + + return pdev; +} + static int eeh_enable_dbgfs_set(void *data, u64 val) { if (val) @@ -1688,26 +1717,13 @@ static ssize_t eeh_dev_check_write(struct file *filp, const char __user *user_buf, size_t count, loff_t *ppos) { - uint32_t domain, bus, dev, fn; struct pci_dev *pdev; struct eeh_dev *edev; - char buf[20]; int ret; - memset(buf, 0, sizeof(buf)); - ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count); - if (!ret) - return -EFAULT; - - ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn); - if (ret != 4) { - pr_err("%s: expected 4 args, got %d\n", __func__, ret); - return -EINVAL; - } - - pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn); - if (!pdev) - return -ENODEV; + pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); edev = pci_dev_to_eeh_dev(pdev); if (!edev) { @@ -1717,8 +1733,8 @@ static ssize_t eeh_dev_check_write(struct file *filp, } ret = eeh_dev_check_failure(edev); - pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n", - domain, bus, dev, fn, ret); + pci_info(pdev, "eeh_dev_check_failure(%s) = %d\n", + pci_name(pdev), ret); pci_dev_put(pdev); @@ -1829,25 +1845,12 @@ static ssize_t eeh_dev_break_write(struct file *filp, const char __user *user_buf, size_t count, loff_t *ppos) { - uint32_t domain, bus, dev, fn; struct pci_dev *pdev; - char buf[20]; int ret; - memset(buf, 0, sizeof(buf)); - ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count); - if (!ret) - return -EFAULT; - - ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn); - if (ret != 4) { - pr_err("%s: expected 4 args, got %d\n", __func__, ret); - return -EINVAL; - } - - pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn); - if (!pdev) - return -ENODEV; + pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); ret = eeh_debugfs_break_device(pdev); pci_dev_put(pdev); From 9e857416833d9701a406ecd6f03a695405ada5e6 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 16:15:12 +1100 Subject: [PATCH 040/188] powerpc/eeh: Add a debugfs interface to check if a driver supports recovery If a PCI device's current driver implements the error handling callbacks EEH can use them to recover the device after an error occurs. For devices without the error handling callbacks we recover them by removing the device and re-scanning it so the PCI core puts the device back into a known good state. Currently there's no way for userspace to determine if the driver supports recovery or not which makes it difficult to write automated tests for EEH. This patch addressing that by adding a debugfs interface for querying if a specific device can be recovered or not. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103051512.919333-2-oohall@gmail.com --- arch/powerpc/kernel/eeh.c | 50 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f9182ff57804..cd60bc1c8701 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1868,6 +1868,53 @@ static const struct file_operations eeh_dev_break_fops = { .read = eeh_debugfs_dev_usage, }; +static ssize_t eeh_dev_can_recover(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_driver *drv; + struct pci_dev *pdev; + size_t ret; + + pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + /* + * In order for error recovery to work the driver needs to implement + * .error_detected(), so it can quiesce IO to the device, and + * .slot_reset() so it can re-initialise the device after a reset. + * + * Ideally they'd implement .resume() too, but some drivers which + * we need to support (notably IPR) don't so I guess we can tolerate + * that. + * + * .mmio_enabled() is mostly there as a work-around for devices which + * take forever to re-init after a hot reset. Implementing that is + * strictly optional. + */ + drv = pci_dev_driver(pdev); + if (drv && + drv->err_handler && + drv->err_handler->error_detected && + drv->err_handler->slot_reset) { + ret = count; + } else { + ret = -EOPNOTSUPP; + } + + pci_dev_put(pdev); + + return ret; +} + +static const struct file_operations eeh_dev_can_recover_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = eeh_dev_can_recover, + .read = eeh_debugfs_dev_usage, +}; + #endif static int __init eeh_init_proc(void) @@ -1892,6 +1939,9 @@ static int __init eeh_init_proc(void) debugfs_create_file_unsafe("eeh_force_recover", 0600, powerpc_debugfs_root, NULL, &eeh_force_recover_fops); + debugfs_create_file_unsafe("eeh_dev_can_recover", 0600, + powerpc_debugfs_root, NULL, + &eeh_dev_can_recover_fops); eeh_cache_debugfs_init(); #endif } From 7bd2b120f3fdf8e5c6d9a343517a33c2a5108794 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 2 Sep 2020 13:51:21 +1000 Subject: [PATCH 041/188] powerpc/pci: Delete traverse_pci_dn() Nothing uses it. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902035121.1762475-1-oohall@gmail.com --- arch/powerpc/include/asm/ppc-pci.h | 3 --- arch/powerpc/kernel/pci_dn.c | 40 ------------------------------ 2 files changed, 43 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 7f4be5a05eb3..583f8b589d10 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -32,9 +32,6 @@ struct pci_dn; void *pci_traverse_device_nodes(struct device_node *start, void *(*fn)(struct device_node *, void *), void *data); -void *traverse_pci_dn(struct pci_dn *root, - void *(*fn)(struct pci_dn *, void *), - void *data); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); /* From rtas_pci.h */ diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index e99b7c547d7e..54e240597fd9 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -443,46 +443,6 @@ void *pci_traverse_device_nodes(struct device_node *start, } EXPORT_SYMBOL_GPL(pci_traverse_device_nodes); -static struct pci_dn *pci_dn_next_one(struct pci_dn *root, - struct pci_dn *pdn) -{ - struct list_head *next = pdn->child_list.next; - - if (next != &pdn->child_list) - return list_entry(next, struct pci_dn, list); - - while (1) { - if (pdn == root) - return NULL; - - next = pdn->list.next; - if (next != &pdn->parent->child_list) - break; - - pdn = pdn->parent; - } - - return list_entry(next, struct pci_dn, list); -} - -void *traverse_pci_dn(struct pci_dn *root, - void *(*fn)(struct pci_dn *, void *), - void *data) -{ - struct pci_dn *pdn = root; - void *ret; - - /* Only scan the child nodes */ - for (pdn = pci_dn_next_one(root, pdn); pdn; - pdn = pci_dn_next_one(root, pdn)) { - ret = fn(pdn, data); - if (ret) - return ret; - } - - return NULL; -} - static void *add_pdn(struct device_node *dn, void *data) { struct pci_controller *hose = data; From ed5b00a05c2ae95b59adc3442f45944ec632e794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Fri, 22 Jan 2021 08:50:29 +0100 Subject: [PATCH 042/188] powerpc/prom: Fix "ibm,arch-vec-5-platform-support" scan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "ibm,arch-vec-5-platform-support" property is a list of pairs of bytes representing the options and values supported by the platform firmware. At boot time, Linux scans this list and activates the available features it recognizes : Radix and XIVE. A recent change modified the number of entries to loop on and 8 bytes, 4 pairs of { options, values } entries are always scanned. This is fine on KVM but not on PowerVM which can advertises less. As a consequence on this platform, Linux reads extra entries pointing to random data, interprets these as available features and tries to activate them, leading to a firmware crash in ibm,client-architecture-support. Fix that by using the property length of "ibm,arch-vec-5-platform-support". Fixes: ab91239942a9 ("powerpc/prom: Remove VLA in prom_check_platform_support()") Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Cédric Le Goater Reviewed-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210122075029.797013-1-clg@kaod.org --- arch/powerpc/kernel/prom_init.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index e9d4eb6144e1..ccf77b985c8f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1331,14 +1331,10 @@ static void __init prom_check_platform_support(void) if (prop_len > sizeof(vec)) prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n", prop_len); - prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", - &vec, sizeof(vec)); - for (i = 0; i < sizeof(vec); i += 2) { - prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 - , vec[i] - , vec[i + 1]); - prom_parse_platform_support(vec[i], vec[i + 1], - &supported); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2, vec[i], vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], &supported); } } From b709e32ef570b8b91dfbcb63cffac4324c87799f Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Thu, 22 Oct 2020 14:51:19 +0800 Subject: [PATCH 043/188] powerpc/time: Enable sched clock for irqtime When CONFIG_IRQ_TIME_ACCOUNTING and CONFIG_VIRT_CPU_ACCOUNTING_GEN, powerpc does not enable "sched_clock_irqtime" and can not utilize irq time accounting. Like x86, powerpc does not use the sched_clock_register() interface. So it needs an dedicated call to enable_sched_clock_irqtime() to enable irq time accounting. Fixes: 518470fe962e ("powerpc: Add HAVE_IRQ_TIME_ACCOUNTING") Signed-off-by: Pingfan Liu [mpe: Add fixes tag] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1603349479-26185-1-git-send-email-kernelfans@gmail.com --- arch/powerpc/kernel/time.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 67feb3524460..83633a24ce78 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -1030,6 +1031,7 @@ void __init time_init(void) tick_setup_hrtimer_broadcast(); of_clk_init(NULL); + enable_sched_clock_irqtime(); } /* From 17c5cf0fb993e219bda4f53aa9ec90d3cfcf92ab Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 28 Jan 2021 16:11:42 +0530 Subject: [PATCH 044/188] powerpc/mce: Reduce the size of event arrays Maximum recursive depth of MCE is 4, Considering the maximum depth allowed reduce the size of event to 10 from 100. This saves us ~19kB of memory and has no fatal consequences. Signed-off-by: Ganesh Goudar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210128104143.70668-1-ganeshgr@linux.ibm.com --- arch/powerpc/include/asm/mce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index e6c27ae843dc..7d8b6679ec68 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -204,7 +204,7 @@ struct mce_error_info { bool ignore_event; }; -#define MAX_MC_EVT 100 +#define MAX_MC_EVT 10 /* Release flags for get_mce_event() */ #define MCE_EVENT_RELEASE true From 923b3cf00b3ffc896543bac99affc0fa8553e41a Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 28 Jan 2021 16:11:43 +0530 Subject: [PATCH 045/188] powerpc/mce: Remove per cpu variables from MCE handlers Access to per-cpu variables requires translation to be enabled on pseries machine running in hash mmu mode, Since part of MCE handler runs in realmode and part of MCE handling code is shared between ppc architectures pseries and powernv, it becomes difficult to manage these variables differently on different architectures, So have these variables in paca instead of having them as per-cpu variables to avoid complications. Signed-off-by: Ganesh Goudar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210128104143.70668-2-ganeshgr@linux.ibm.com --- arch/powerpc/include/asm/mce.h | 18 +++++++ arch/powerpc/include/asm/paca.h | 4 ++ arch/powerpc/kernel/mce.c | 79 ++++++++++++++++++------------ arch/powerpc/kernel/setup-common.c | 2 + 4 files changed, 71 insertions(+), 32 deletions(-) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 7d8b6679ec68..331d944280b8 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -206,6 +206,17 @@ struct mce_error_info { #define MAX_MC_EVT 10 +struct mce_info { + int mce_nest_count; + struct machine_check_event mce_event[MAX_MC_EVT]; + /* Queue for delayed MCE events. */ + int mce_queue_count; + struct machine_check_event mce_event_queue[MAX_MC_EVT]; + /* Queue for delayed MCE UE events. */ + int mce_ue_count; + struct machine_check_event mce_ue_event_queue[MAX_MC_EVT]; +}; + /* Release flags for get_mce_event() */ #define MCE_EVENT_RELEASE true #define MCE_EVENT_DONTRELEASE false @@ -234,4 +245,11 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs); long __machine_check_early_realmode_p9(struct pt_regs *regs); long __machine_check_early_realmode_p10(struct pt_regs *regs); #endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC_BOOK3S_64 +void mce_init(void); +#else +static inline void mce_init(void) { }; +#endif /* CONFIG_PPC_BOOK3S_64 */ + #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 9454d29ff4b4..38e0c55e845d 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -273,6 +274,9 @@ struct paca_struct { #ifdef CONFIG_MMIOWB struct mmiowb_state mmiowb_state; #endif +#ifdef CONFIG_PPC_BOOK3S_64 + struct mce_info *mce_info; +#endif /* CONFIG_PPC_BOOK3S_64 */ } ____cacheline_aligned; extern void copy_mm_to_paca(struct mm_struct *mm); diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index c381dc2f9858..3e3a84127c0e 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -17,23 +17,14 @@ #include #include #include +#include #include #include #include #include -static DEFINE_PER_CPU(int, mce_nest_count); -static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); - -/* Queue for delayed MCE events. */ -static DEFINE_PER_CPU(int, mce_queue_count); -static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); - -/* Queue for delayed MCE UE events. */ -static DEFINE_PER_CPU(int, mce_ue_count); -static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], - mce_ue_event_queue); +#include "setup.h" static void machine_check_process_queued_event(struct irq_work *work); static void machine_check_ue_irq_work(struct irq_work *work); @@ -104,9 +95,10 @@ void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t nip, uint64_t addr, uint64_t phys_addr) { - int index = __this_cpu_inc_return(mce_nest_count) - 1; - struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); + int index = local_paca->mce_info->mce_nest_count++; + struct machine_check_event *mce; + mce = &local_paca->mce_info->mce_event[index]; /* * Return if we don't have enough space to log mce event. * mce_nest_count may go beyond MAX_MC_EVT but that's ok, @@ -192,7 +184,7 @@ void save_mce_event(struct pt_regs *regs, long handled, */ int get_mce_event(struct machine_check_event *mce, bool release) { - int index = __this_cpu_read(mce_nest_count) - 1; + int index = local_paca->mce_info->mce_nest_count - 1; struct machine_check_event *mc_evt; int ret = 0; @@ -202,7 +194,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) /* Check if we have MCE info to process. */ if (index < MAX_MC_EVT) { - mc_evt = this_cpu_ptr(&mce_event[index]); + mc_evt = &local_paca->mce_info->mce_event[index]; /* Copy the event structure and release the original */ if (mce) *mce = *mc_evt; @@ -212,7 +204,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) } /* Decrement the count to free the slot. */ if (release) - __this_cpu_dec(mce_nest_count); + local_paca->mce_info->mce_nest_count--; return ret; } @@ -234,13 +226,14 @@ static void machine_check_ue_event(struct machine_check_event *evt) { int index; - index = __this_cpu_inc_return(mce_ue_count) - 1; + index = local_paca->mce_info->mce_ue_count++; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __this_cpu_dec(mce_ue_count); + local_paca->mce_info->mce_ue_count--; return; } - memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); + memcpy(&local_paca->mce_info->mce_ue_event_queue[index], + evt, sizeof(*evt)); /* Queue work to process this event later. */ irq_work_queue(&mce_ue_event_irq_work); @@ -257,13 +250,14 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; - index = __this_cpu_inc_return(mce_queue_count) - 1; + index = local_paca->mce_info->mce_queue_count++; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __this_cpu_dec(mce_queue_count); + local_paca->mce_info->mce_queue_count--; return; } - memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); + memcpy(&local_paca->mce_info->mce_event_queue[index], + &evt, sizeof(evt)); /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); @@ -290,9 +284,9 @@ static void machine_process_ue_event(struct work_struct *work) int index; struct machine_check_event *evt; - while (__this_cpu_read(mce_ue_count) > 0) { - index = __this_cpu_read(mce_ue_count) - 1; - evt = this_cpu_ptr(&mce_ue_event_queue[index]); + while (local_paca->mce_info->mce_ue_count > 0) { + index = local_paca->mce_info->mce_ue_count - 1; + evt = &local_paca->mce_info->mce_ue_event_queue[index]; blocking_notifier_call_chain(&mce_notifier_list, 0, evt); #ifdef CONFIG_MEMORY_FAILURE /* @@ -305,7 +299,7 @@ static void machine_process_ue_event(struct work_struct *work) */ if (evt->error_type == MCE_ERROR_TYPE_UE) { if (evt->u.ue_error.ignore_event) { - __this_cpu_dec(mce_ue_count); + local_paca->mce_info->mce_ue_count--; continue; } @@ -321,7 +315,7 @@ static void machine_process_ue_event(struct work_struct *work) "was generated\n"); } #endif - __this_cpu_dec(mce_ue_count); + local_paca->mce_info->mce_ue_count--; } } /* @@ -339,17 +333,17 @@ static void machine_check_process_queued_event(struct irq_work *work) * For now just print it to console. * TODO: log this error event to FSP or nvram. */ - while (__this_cpu_read(mce_queue_count) > 0) { - index = __this_cpu_read(mce_queue_count) - 1; - evt = this_cpu_ptr(&mce_event_queue[index]); + while (local_paca->mce_info->mce_queue_count > 0) { + index = local_paca->mce_info->mce_queue_count - 1; + evt = &local_paca->mce_info->mce_event_queue[index]; if (evt->error_type == MCE_ERROR_TYPE_UE && evt->u.ue_error.ignore_event) { - __this_cpu_dec(mce_queue_count); + local_paca->mce_info->mce_queue_count--; continue; } machine_check_print_event_info(evt, false, false); - __this_cpu_dec(mce_queue_count); + local_paca->mce_info->mce_queue_count--; } } @@ -742,3 +736,24 @@ long hmi_exception_realmode(struct pt_regs *regs) return 1; } + +void __init mce_init(void) +{ + struct mce_info *mce_info; + u64 limit; + int i; + + limit = min(ppc64_bolted_size(), ppc64_rma_size); + for_each_possible_cpu(i) { + mce_info = memblock_alloc_try_nid(sizeof(*mce_info), + __alignof__(*mce_info), + MEMBLOCK_LOW_LIMIT, + limit, cpu_to_node(i)); + if (!mce_info) + goto err; + paca_ptrs[i]->mce_info = mce_info; + } + return; +err: + panic("Failed to allocate memory for MCE event data\n"); +} diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 71f38e9248be..d480f091e0ad 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -64,6 +64,7 @@ #include #include #include +#include #include "setup.h" @@ -938,6 +939,7 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); + mce_init(); smp_release_cpus(); initmem_init(); From 9899a56f1eca964cd0de21008a9fa1523a571231 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Wed, 20 Jan 2021 14:28:38 +0100 Subject: [PATCH 046/188] powerpc: Fix build error in paravirt.h ./arch/powerpc/include/asm/paravirt.h:83:44: error: implicit declaration of function 'smp_processor_id'; did you mean 'raw_smp_processor_id'? smp_processor_id is defined in linux/smp.h but it is not included. The build error happens only when the patch is applied to 5.3 kernel but it only works by chance in mainline. Fixes: ca3f969dcb11 ("powerpc/paravirt: Use is_kvm_guest() in vcpu_is_preempted()") Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210120132838.15589-1-msuchanek@suse.de --- arch/powerpc/include/asm/paravirt.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index edc08f04aef7..5d1726bb28e7 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -10,6 +10,7 @@ #endif #ifdef CONFIG_PPC_SPLPAR +#include #include #include From c9790fb5df461c91d3fff1d864c1acb8baf5ad5c Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sun, 10 May 2020 01:13:47 -0400 Subject: [PATCH 047/188] powerpc/powernv/pci: fix a RCU-list lock It is unsafe to traverse tbl->it_group_list without the RCU read lock. WARNING: suspicious RCU usage 5.7.0-rc4-next-20200508 #1 Not tainted ----------------------------- arch/powerpc/platforms/powernv/pci-ioda-tce.c:355 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by qemu-kvm/4305: #0: c000000bc3fe6988 (&container->group_lock){++++}-{3:3}, at: vfio_fops_unl_ioctl+0x108/0x410 [vfio] #1: c00800000fcc7400 (&vfio.iommu_drivers_lock){+.+.}-{3:3}, at: vfio_fops_unl_ioctl+0x148/0x410 [vfio] #2: c000000bc3fe4d68 (&container->lock){+.+.}-{3:3}, at: tce_iommu_attach_group+0x3c/0x4f0 [vfio_iommu_spapr_tce] stack backtrace: CPU: 4 PID: 4305 Comm: qemu-kvm Not tainted 5.7.0-rc4-next-20200508 #1 Call Trace: [c0000010f29afa60] [c0000000007154c8] dump_stack+0xfc/0x174 (unreliable) [c0000010f29afab0] [c0000000001d8ff0] lockdep_rcu_suspicious+0x140/0x164 [c0000010f29afb30] [c0000000000dae2c] pnv_pci_unlink_table_and_group+0x11c/0x200 [c0000010f29afb70] [c0000000000d4a34] pnv_pci_ioda2_unset_window+0xc4/0x190 [c0000010f29afbf0] [c0000000000d4b4c] pnv_ioda2_take_ownership+0x4c/0xd0 [c0000010f29afc30] [c00800000fd60ee0] tce_iommu_attach_group+0x2c8/0x4f0 [vfio_iommu_spapr_tce] [c0000010f29afcd0] [c00800000fcc11a0] vfio_fops_unl_ioctl+0x238/0x410 [vfio] [c0000010f29afd50] [c0000000005430a8] ksys_ioctl+0xd8/0x130 [c0000010f29afda0] [c000000000543128] sys_ioctl+0x28/0x40 [c0000010f29afdc0] [c000000000038af4] system_call_exception+0x114/0x1e0 [c0000010f29afe20] [c00000000000c8f0] system_call_common+0xf0/0x278 Signed-off-by: Qian Cai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200510051347.1906-1-cai@lca.pw --- arch/powerpc/platforms/powernv/pci-ioda-tce.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index 5218f5da2737..30551bbd7988 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -380,6 +380,8 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, /* Remove link to a group from table's list of attached groups */ found = false; + + rcu_read_lock(); list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { if (tgl->table_group == table_group) { list_del_rcu(&tgl->next); @@ -388,6 +390,8 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, break; } } + rcu_read_unlock(); + if (WARN_ON(!found)) return; From b5952f8125ae512420d5fc569adce591bea73bf5 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sun, 10 May 2020 01:15:59 -0400 Subject: [PATCH 048/188] powerpc/mm/book3s64/iommu: fix some RCU-list locks It is safe to traverse mm->context.iommu_group_mem_list with either mem_list_mutex or the RCU read lock held. Silence a few RCU-list false positive warnings and fix a few missing RCU read locks. arch/powerpc/mm/book3s64/iommu_api.c:330 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by qemu-kvm/4305: #0: c000000bc3fe4d68 (&container->lock){+.+.}-{3:3}, at: tce_iommu_ioctl.part.9+0xc7c/0x1870 [vfio_iommu_spapr_tce] #1: c000000001501910 (mem_list_mutex){+.+.}-{3:3}, at: mm_iommu_get+0x50/0x190 ==== arch/powerpc/mm/book3s64/iommu_api.c:132 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by qemu-kvm/4305: #0: c000000bc3fe4d68 (&container->lock){+.+.}-{3:3}, at: tce_iommu_ioctl.part.9+0xc7c/0x1870 [vfio_iommu_spapr_tce] #1: c000000001501910 (mem_list_mutex){+.+.}-{3:3}, at: mm_iommu_do_alloc+0x120/0x5f0 ==== arch/powerpc/mm/book3s64/iommu_api.c:292 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by qemu-kvm/4312: #0: c000000ecafe23c8 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0xdc/0x950 [kvm] #1: c000000045e6c468 (&kvm->srcu){....}-{0:0}, at: kvmppc_h_put_tce+0x88/0x340 [kvm] ==== arch/powerpc/mm/book3s64/iommu_api.c:424 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by qemu-kvm/4312: #0: c000000ecafe23c8 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0xdc/0x950 [kvm] #1: c000000045e6c468 (&kvm->srcu){....}-{0:0}, at: kvmppc_h_put_tce+0x88/0x340 [kvm] Signed-off-by: Qian Cai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200510051559.1959-1-cai@lca.pw --- arch/powerpc/mm/book3s64/iommu_api.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 685d7bb3d26f..cd18e94d0843 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -129,7 +129,8 @@ good_exit: mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) { + list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next, + lockdep_is_held(&mem_list_mutex)) { /* Overlap? */ if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && (ua < (mem2->ua + @@ -289,6 +290,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + rcu_read_lock(); list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua <= ua) && (ua + size <= mem->ua + @@ -297,6 +299,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, break; } } + rcu_read_unlock(); return ret; } @@ -327,7 +330,8 @@ struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm, mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next, + lockdep_is_held(&mem_list_mutex)) { if ((mem->ua == ua) && (mem->entries == entries)) { ret = mem; ++mem->used; @@ -421,6 +425,7 @@ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, struct mm_iommu_table_group_mem_t *mem; unsigned long end; + rcu_read_lock(); list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) continue; @@ -437,6 +442,7 @@ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, return true; } } + rcu_read_unlock(); return false; } From 245a389c6ded15a7d308dbe988aec8a96e8aa8cf Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 10 Dec 2020 14:14:07 +0100 Subject: [PATCH 049/188] =?UTF-8?q?cxl:=20Reduce=20scope=20for=20the=20var?= =?UTF-8?q?iable=20=E2=80=9Cmm=E2=80=9D=20in=20cxllib=5Fget=5FPE=5Fattribu?= =?UTF-8?q?tes()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A local variable was used only within an if branch. Thus move the definition for the variable “mm” into the corresponding code block. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5cee2b25-71e0-15aa-fba6-12211b8308aa@web.de --- drivers/misc/cxl/cxllib.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c index 2a1783f32254..53b919856426 100644 --- a/drivers/misc/cxl/cxllib.c +++ b/drivers/misc/cxl/cxllib.c @@ -170,8 +170,6 @@ int cxllib_get_PE_attributes(struct task_struct *task, unsigned long translation_mode, struct cxllib_pe_attributes *attr) { - struct mm_struct *mm = NULL; - if (translation_mode != CXL_TRANSLATED_MODE && translation_mode != CXL_REAL_MODE) return -EINVAL; @@ -182,7 +180,7 @@ int cxllib_get_PE_attributes(struct task_struct *task, true); attr->lpid = mfspr(SPRN_LPID); if (task) { - mm = get_task_mm(task); + struct mm_struct *mm = get_task_mm(task); if (mm == NULL) return -EINVAL; /* From de060ac83e5c1fe5fb8c505a4e99c1fe4f70ff94 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 27 Aug 2019 13:34:02 +0200 Subject: [PATCH 050/188] powerpc/pseries: Delete an unnecessary kfree() call in dlpar_store() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A null pointer would be passed to a call of the function “kfree” immediately after a call of the function “kstrdup” failed at one place. Remove this superfluous function call. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b46cc4ff-a14c-0c10-0c0c-95573a960178@web.de --- arch/powerpc/platforms/pseries/dlpar.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 16e86ba8aa20..2a783dc0cfa7 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -523,7 +523,6 @@ static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, args = argbuf = kstrdup(buf, GFP_KERNEL); if (!argbuf) { pr_info("Could not allocate resources for DLPAR operation\n"); - kfree(argbuf); return -ENOMEM; } From 6e7a4da754f3087fa1f0839c1128aac233c21442 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 27 Aug 2019 13:37:56 +0200 Subject: [PATCH 051/188] powerpc/pseries: Delete an error message for a failed string duplication in dlpar_store() Omit an extra message for a memory allocation failure in this function. Signed-off-by: Markus Elfring Acked-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/535cfec2-782f-61ec-f6fb-c50186ead2af@web.de --- arch/powerpc/platforms/pseries/dlpar.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 2a783dc0cfa7..deb48b41d488 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -521,10 +521,8 @@ static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, int rc; args = argbuf = kstrdup(buf, GFP_KERNEL); - if (!argbuf) { - pr_info("Could not allocate resources for DLPAR operation\n"); + if (!argbuf) return -ENOMEM; - } /* * Parse out the request from the user, this will be in the form: From 60aece416483fdf7e51728a3518438e0458bdabb Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 27 Aug 2019 08:44:20 +0200 Subject: [PATCH 052/188] powerpc/82xx: Delete an unnecessary of_node_put() call in pq2ads_pci_init_irq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A null pointer would be passed to a call of the function “of_node_put” immediately after a call of the function “of_find_compatible_node” failed at one place. Remove this superfluous function call. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9c060a41-438b-6fb8-d549-37c72fae4898@web.de --- arch/powerpc/platforms/82xx/pq2ads-pci-pic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index 096cc0d59fd8..6cc054db7043 100644 --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -123,7 +123,6 @@ int __init pq2ads_pci_init_irq(void) np = of_find_compatible_node(NULL, NULL, "fsl,pq2ads-pci-pic"); if (!np) { printk(KERN_ERR "No pci pic node in device tree.\n"); - of_node_put(np); goto out; } From c0cff7a17781f8b02b4837a9fc434a7eed322a14 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 27 Aug 2019 09:19:32 +0200 Subject: [PATCH 053/188] powerpc/82xx: Use common error handling code in pq2ads_pci_init_irq() Adjust jump targets so that a bit of exception handling can be better reused at the end of this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1a4bafee-562f-5eb4-d2bd-34704f8c5ab3@web.de --- arch/powerpc/platforms/82xx/pq2ads-pci-pic.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index 6cc054db7043..f82f75a6085c 100644 --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -129,13 +129,11 @@ int __init pq2ads_pci_init_irq(void) irq = irq_of_parse_and_map(np, 0); if (!irq) { printk(KERN_ERR "No interrupt in pci pic node.\n"); - of_node_put(np); - goto out; + goto out_put_node; } priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) { - of_node_put(np); ret = -ENOMEM; goto out_unmap_irq; } @@ -160,17 +158,17 @@ int __init pq2ads_pci_init_irq(void) priv->host = host; irq_set_handler_data(irq, priv); irq_set_chained_handler(irq, pq2ads_pci_irq_demux); - - of_node_put(np); - return 0; + ret = 0; + goto out_put_node; out_unmap_regs: iounmap(priv->regs); out_free_kmalloc: kfree(priv); - of_node_put(np); out_unmap_irq: irq_dispose_mapping(irq); +out_put_node: + of_node_put(np); out: return ret; } From 675b963e2b6007818fe1b0a64b47be40c125246e Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 2 Jul 2019 14:41:42 +0200 Subject: [PATCH 054/188] powerpc/setup: Adjust six seq_printf() calls in show_cpuinfo() A bit of information should be put into a sequence. Thus improve the execution speed for this data output by better usage of corresponding functions. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5b62379e-a35f-4f56-f1b5-6350f76007e7@web.de --- arch/powerpc/kernel/setup-common.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index d480f091e0ad..bee984b1887b 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -238,18 +238,17 @@ static int show_cpuinfo(struct seq_file *m, void *v) maj = (pvr >> 8) & 0xFF; min = pvr & 0xFF; - seq_printf(m, "processor\t: %lu\n", cpu_id); - seq_printf(m, "cpu\t\t: "); + seq_printf(m, "processor\t: %lu\ncpu\t\t: ", cpu_id); if (cur_cpu_spec->pvr_mask && cur_cpu_spec->cpu_name) - seq_printf(m, "%s", cur_cpu_spec->cpu_name); + seq_puts(m, cur_cpu_spec->cpu_name); else seq_printf(m, "unknown (%08x)", pvr); if (cpu_has_feature(CPU_FTR_ALTIVEC)) - seq_printf(m, ", altivec supported"); + seq_puts(m, ", altivec supported"); - seq_printf(m, "\n"); + seq_putc(m, '\n'); #ifdef CONFIG_TAU if (cpu_has_feature(CPU_FTR_TAU)) { @@ -328,7 +327,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ), (loops_per_jiffy / (5000 / HZ)) % 100); - seq_printf(m, "\n"); + seq_putc(m, '\n'); /* If this is the last cpu, print the summary */ if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) From 259149cf7c3c6195e6199e045ca988c31d081cab Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 Dec 2020 06:56:05 +0000 Subject: [PATCH 055/188] powerpc/32s: Only build hash code when CONFIG_PPC_BOOK3S_604 is selected It is now possible to only build book3s/32 kernel for CPUs without hash table. Opt out hash related code when CONFIG_PPC_BOOK3S_604 is not selected. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/62df436454ef06e104cc334a0859a2878d7888d5.1608274548.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 12 ++++++++++++ arch/powerpc/mm/book3s32/Makefile | 4 +++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 349bf3f0c3af..c02024bce544 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -286,6 +286,7 @@ MachineCheck: DO_KVM 0x300 DataAccess: #ifdef CONFIG_VMAP_STACK +#ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION mtspr SPRN_SPRG_SCRATCH2,r10 mfspr r10, SPRN_SPRG_THREAD @@ -302,12 +303,14 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE b 1f ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +#endif 1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 b handle_page_fault_tramp_1 #else /* CONFIG_VMAP_STACK */ EXCEPTION_PROLOG handle_dar_dsisr=1 get_and_save_dar_dsisr_on_stack r4, r5, r11 +#ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ @@ -315,8 +318,11 @@ BEGIN_MMU_FTR_SECTION bl hash_page b handle_page_fault_tramp_1 MMU_FTR_SECTION_ELSE +#endif b handle_page_fault_tramp_2 +#ifdef CONFIG_PPC_BOOK3S_604 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +#endif #endif /* CONFIG_VMAP_STACK */ /* Instruction access exception. */ @@ -332,12 +338,14 @@ InstructionAccess: mfspr r11, SPRN_SRR1 /* check whether user or kernel */ stw r11, SRR1(r10) mfcr r10 +#ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION andis. r11, r11, SRR1_ISI_NOPT@h /* no pte found? */ bne hash_page_isi .Lhash_page_isi_cont: mfspr r11, SPRN_SRR1 /* check whether user or kernel */ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif andi. r11, r11, MSR_PR EXCEPTION_PROLOG_1 @@ -348,9 +356,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) beq 1f /* if so, try to put a PTE */ li r3,0 /* into the hash table */ mr r4,r12 /* SRR0 is fault address */ +#ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION bl hash_page END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif #endif /* CONFIG_VMAP_STACK */ 1: mr r4,r12 andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ @@ -683,6 +693,7 @@ handle_page_fault_tramp_2: EXC_XFER_LITE(0x300, handle_page_fault) #ifdef CONFIG_VMAP_STACK +#ifdef CONFIG_PPC_BOOK3S_604 .macro save_regs_thread thread stw r0, THR0(\thread) stw r3, THR3(\thread) @@ -754,6 +765,7 @@ fast_hash_page_return: mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 rfi +#endif /* CONFIG_PPC_BOOK3S_604 */ stack_overflow: vmap_stack_overflow_exception diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 3f972db17761..446d9de88ce4 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -6,4 +6,6 @@ ifdef CONFIG_KASAN CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += mmu.o hash_low.o mmu_context.o tlb.o nohash_low.o +obj-y += mmu.o mmu_context.o +obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o +obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o From 30662217885d7341161924acf1665924d7d37d64 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 23 Dec 2020 09:38:48 +0000 Subject: [PATCH 056/188] powerpc/xmon: Enable breakpoints on 8xx Since commit 4ad8622dc548 ("powerpc/8xx: Implement hw_breakpoint"), 8xx has breakpoints so there is no reason to opt breakpoint logic out of xmon for the 8xx. Fixes: 4ad8622dc548 ("powerpc/8xx: Implement hw_breakpoint") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b0607f1113d1558e73476bb06db0ee16d31a6e5b.1608716197.git.christophe.leroy@csgroup.eu --- arch/powerpc/xmon/xmon.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index dcd817ca2edf..cec432eb9189 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1383,7 +1383,6 @@ static long check_bp_loc(unsigned long addr) return 1; } -#ifndef CONFIG_PPC_8xx static int find_free_data_bpt(void) { int i; @@ -1395,7 +1394,6 @@ static int find_free_data_bpt(void) printf("Couldn't find free breakpoint register\n"); return -1; } -#endif static void print_data_bpts(void) { @@ -1435,7 +1433,6 @@ bpt_cmds(void) cmd = inchar(); switch (cmd) { -#ifndef CONFIG_PPC_8xx static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; int mode; case 'd': /* bd - hardware data breakpoint */ @@ -1497,7 +1494,6 @@ bpt_cmds(void) force_enable_xmon(); } break; -#endif case 'c': if (!scanhex(&a)) { From 6895c5ba7bdcc55eacad03cf309ab23be63b9cac Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 23 Dec 2020 09:38:47 +0000 Subject: [PATCH 057/188] powerpc/xmon: Select CONSOLE_POLL for the 8xx Powerpc 8xx requires CONSOLE_POLL to get udbg_putc() and udbg_getc() in CPM uart driver. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3d10a274516e9be8c4b0dc679a2840cdc1588872.1608716197.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index b88900f4832f..ae084357994e 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -88,6 +88,7 @@ config PPC_IRQ_SOFT_MASK_DEBUG config XMON bool "Include xmon kernel debugger" depends on DEBUG_KERNEL + select CONSOLE_POLL if SERIAL_CPM_CONSOLE help Include in-kernel hooks for the xmon kernel monitor/debugger. Unless you are intending to debug the kernel, say N here. From 24b4c6b1a7fc79fe8142d50cb439944b81b659ff Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 2 Sep 2020 11:36:57 +1000 Subject: [PATCH 058/188] powerpc/powernv/pci: Drop pnv_phb->initialized The pnv_phb->initialized flag is an odd beast. It was added back in 2012 in commit db1266c85261 ("powerpc/powernv: Skip check on PE if necessary") to allow devices to be enabled even if the device had not yet been assigned to a PE. Allowing the device to be enabled before the PE is configured may cause spurious EEH events since none of the IOMMU context has been setup. I'm not entirely sure why this was ever necessary. My best guess is that it was an workaround for a bug or some other undesireable behaviour from the PCI core. Either way, it's unnecessary now since as of commit dc3d8f85bb57 ("powerpc/powernv/pci: Re-work bus PE configuration") we can guarantee that the PE will be configured before the PCI core will allow drivers to bind to the device. It's also worth pointing out that the ->initialized flag is only set in pnv_pci_ioda_create_dbgfs(). That function has its entire body wrapped in #ifdef CONFIG_DEBUG_FS. As a result, for kernels built without debugfs (i.e. petitboot) the other checks in pnv_pci_enable_device_hook() are bypassed entirely. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902013657.1753830-1-oohall@gmail.com --- arch/powerpc/platforms/powernv/pci-ioda.c | 17 ----------------- arch/powerpc/platforms/powernv/pci.h | 1 - 2 files changed, 18 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c4f72cdc9b51..0451b188789a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2402,9 +2402,6 @@ static void pnv_pci_ioda_create_dbgfs(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; - /* Notify initialization of PHB done */ - phb->initialized = 1; - sprintf(name, "PCI%04x", hose->global_number); phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); @@ -2601,17 +2598,8 @@ static resource_size_t pnv_pci_default_alignment(void) */ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) { - struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); struct pci_dn *pdn; - /* The function is probably called while the PEs have - * not be created yet. For example, resource reassignment - * during PCI probe period. We just skip the check if - * PEs isn't ready. - */ - if (!phb->initialized) - return true; - pdn = pci_get_pdn(dev); if (!pdn || pdn->pe_number == IODA_INVALID_PE) { pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n"); @@ -2623,14 +2611,9 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn; struct pnv_ioda_pe *pe; - if (!phb->initialized) - return true; - pdn = pci_get_pdn(dev); if (!pdn) return false; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 739a0b3b72e1..36d22920f5a3 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -119,7 +119,6 @@ struct pnv_phb { int flags; void __iomem *regs; u64 regs_phys; - int initialized; spinlock_t lock; #ifdef CONFIG_DEBUG_FS From 5537fcb319d016ce387f818dd774179bc03217f5 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:06 +1100 Subject: [PATCH 059/188] powerpc/pci: Add ppc_md.discover_phbs() On many powerpc platforms the discovery and initalisation of pci_controllers (PHBs) happens inside of setup_arch(). This is very early in boot (pre-initcalls) and means that we're initialising the PHB long before many basic kernel services (slab allocator, debugfs, a real ioremap) are available. On PowerNV this causes an additional problem since we map the PHB registers with ioremap(). As of commit d538aadc2718 ("powerpc/ioremap: warn on early use of ioremap()") a warning is printed because we're using the "incorrect" API to setup and MMIO mapping in searly boot. The kernel does provide early_ioremap(), but that is not intended to create long-lived MMIO mappings and a seperate warning is printed by generic code if early_ioremap() mappings are "leaked." This is all fixable with dumb hacks like using early_ioremap() to setup the initial mapping then replacing it with a real ioremap later on in boot, but it does raise the question: Why the hell are we setting up the PHB's this early in boot? The old and wise claim it's due to "hysterical rasins." Aside from amused grapes there doesn't appear to be any real reason to maintain the current behaviour. Already most of the newer embedded platforms perform PHB discovery in an arch_initcall and between the end of setup_arch() and the start of initcalls none of the generic kernel code does anything PCI related. On powerpc scanning PHBs occurs in a subsys_initcall so it should be possible to move the PHB discovery to a core, postcore or arch initcall. This patch adds the ppc_md.discover_phbs hook and a core_initcall stub that calls it. The core_initcalls are the earliest to be called so this will any possibly issues with dependency between initcalls. This isn't just an academic issue either since on pseries and PowerNV EEH init occurs in an arch_initcall and depends on the pci_controllers being available, similarly the creation of pci_dns occurs at core_initcall_sync (i.e. between core and postcore initcalls). These problems need to be addressed seperately. Reported-by: kernel test robot Signed-off-by: Oliver O'Halloran [mpe: Make discover_phbs() static] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-1-oohall@gmail.com --- arch/powerpc/include/asm/machdep.h | 3 +++ arch/powerpc/kernel/pci-common.c | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index cf6ebbc16cb4..764f2732a821 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -59,6 +59,9 @@ struct machdep_calls { int (*pcibios_root_bridge_prepare)(struct pci_host_bridge *bridge); + /* finds all the pci_controllers present at boot */ + void (*discover_phbs)(void); + /* To setup PHBs when using automatic OF platform driver for PCI */ int (*pci_setup_phb)(struct pci_controller *host); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 2b555997b295..001e90cd8948 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1699,3 +1699,13 @@ static void fixup_hide_host_resource_fsl(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl); + + +static int __init discover_phbs(void) +{ + if (ppc_md.discover_phbs) + ppc_md.discover_phbs(); + + return 0; +} +core_initcall(discover_phbs); From fbbefb320214db14c3e740fce98e2c95c9d0669b Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:07 +1100 Subject: [PATCH 060/188] powerpc/pci: Move PHB discovery for PCI_DN using platforms Make powernv, pseries, powermac and maple use ppc_mc.discover_phbs. These platforms need to be done together because they all depend on pci_dn's being created from the DT. The pci_dn contains a pointer to the relevant pci_controller so they need to be created after the pci_controller structures are available, but before PCI devices are scanned. Currently this ordering is provided by initcalls and the sequence is: 1. PHBs are discovered (setup_arch) (early boot, pre-initcalls) 2. pci_dn are created from the unflattended DT (core initcall) 3. PHBs are scanned pcibios_init() (subsys initcall) The new ppc_md.discover_phbs() function is also a core_initcall so we can't guarantee ordering between the creation of pci_controllers and the creation of pci_dn's which require a pci_controller. We could use the postcore, or core_sync initcall levels, but it's cleaner to just move the pci_dn setup into the per-PHB inits which occur inside of .discover_phb() for these platforms. This brings the boot-time path in line with the PHB hotplug path that is used for pseries DLPAR operations too. Signed-off-by: Oliver O'Halloran [mpe: Squash powermac & maple in to avoid breakage those platforms, convert memblock allocs to use kmalloc to avoid warnings] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-2-oohall@gmail.com --- arch/powerpc/kernel/pci_dn.c | 22 ---------------------- arch/powerpc/platforms/maple/pci.c | 3 +++ arch/powerpc/platforms/maple/setup.c | 4 +--- arch/powerpc/platforms/powermac/pci.c | 4 ++++ arch/powerpc/platforms/powermac/setup.c | 4 +--- arch/powerpc/platforms/powernv/pci-ioda.c | 10 +++++++--- arch/powerpc/platforms/powernv/setup.c | 4 +--- arch/powerpc/platforms/pseries/setup.c | 7 +++++-- 8 files changed, 22 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 54e240597fd9..61571ae23953 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -481,28 +481,6 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb) pci_traverse_device_nodes(dn, add_pdn, phb); } -/** - * pci_devs_phb_init - Initialize phbs and pci devs under them. - * - * This routine walks over all phb's (pci-host bridges) on the - * system, and sets up assorted pci-related structures - * (including pci info in the device node structs) for each - * pci device found underneath. This routine runs once, - * early in the boot sequence. - */ -static int __init pci_devs_phb_init(void) -{ - struct pci_controller *phb, *tmp; - - /* This must be done first so the device nodes have valid pci info! */ - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) - pci_devs_phb_init_dynamic(phb); - - return 0; -} - -core_initcall(pci_devs_phb_init); - static void pci_dev_pdn_setup(struct pci_dev *pdev) { struct pci_dn *pdn; diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index c86a66d5e998..a20b9576de22 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -536,6 +536,9 @@ static int __init maple_add_bridge(struct device_node *dev) /* Check for legacy IOs */ isa_bridge_find_early(hose); + /* create pci_dn's for DT nodes under this PHB */ + pci_devs_phb_init_dynamic(hose); + return 0; } diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index f7e66a2005b4..4e9ad5bf3efb 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -179,9 +179,6 @@ static void __init maple_setup_arch(void) #ifdef CONFIG_SMP smp_ops = &maple_smp_ops; #endif - /* Lookup PCI hosts */ - maple_pci_init(); - maple_use_rtas_reboot_and_halt_if_present(); printk(KERN_DEBUG "Using native/NAP idle loop\n"); @@ -351,6 +348,7 @@ define_machine(maple) { .name = "Maple", .probe = maple_probe, .setup_arch = maple_setup_arch, + .discover_phbs = maple_pci_init, .init_IRQ = maple_init_IRQ, .pci_irq_fixup = maple_pci_irq_fixup, .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index e35eaa9cf938..e9abe0f2e7f0 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -850,6 +850,10 @@ static int __init pmac_add_bridge(struct device_node *dev) /* Fixup "bus-range" OF property */ fixup_bus_range(dev); + /* create pci_dn's for DT nodes under this PHB */ + if (IS_ENABLED(CONFIG_PPC64)) + pci_devs_phb_init_dynamic(hose); + return 0; } diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 2e2cc0c75d87..86aee3f2483f 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -298,9 +298,6 @@ static void __init pmac_setup_arch(void) of_node_put(ic); } - /* Lookup PCI hosts */ - pmac_pci_init(); - #ifdef CONFIG_PPC32 ohare_init(); l2cr_init(); @@ -600,6 +597,7 @@ define_machine(powermac) { .name = "PowerMac", .probe = pmac_probe, .setup_arch = pmac_setup_arch, + .discover_phbs = pmac_pci_init, .show_cpuinfo = pmac_show_cpuinfo, .init_IRQ = pmac_pic_init, .get_irq = NULL, /* changed later */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 0451b188789a..7ee14ac275bd 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2921,7 +2921,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = memblock_alloc(sizeof(*phb), SMP_CACHE_BYTES); + phb = kmalloc(sizeof(*phb), GFP_KERNEL); if (!phb) panic("%s: Failed to allocate %zu bytes\n", __func__, sizeof(*phb)); @@ -2970,7 +2970,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, else phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE; - phb->diag_data = memblock_alloc(phb->diag_data_size, SMP_CACHE_BYTES); + phb->diag_data = kmalloc(phb->diag_data_size, GFP_KERNEL); if (!phb->diag_data) panic("%s: Failed to allocate %u bytes\n", __func__, phb->diag_data_size); @@ -3032,9 +3032,10 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, } pemap_off = size; size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); - aux = memblock_alloc(size, SMP_CACHE_BYTES); + aux = kmalloc(size, GFP_KERNEL); if (!aux) panic("%s: Failed to allocate %lu bytes\n", __func__, size); + phb->ioda.pe_alloc = aux; phb->ioda.m64_segmap = aux + m64map_off; phb->ioda.m32_segmap = aux + m32map_off; @@ -3161,6 +3162,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Remove M64 resource if we can't configure it successfully */ if (!phb->init_m64 || phb->init_m64(phb)) hose->mem_resources[1].flags = 0; + + /* create pci_dn's for DT nodes under this PHB */ + pci_devs_phb_init_dynamic(hose); } void __init pnv_pci_init_ioda2_phb(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 4426a109ec2f..aadf932c4e61 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -180,9 +180,6 @@ static void __init pnv_setup_arch(void) /* Initialize SMP */ pnv_smp_init(); - /* Setup PCI */ - pnv_pci_init(); - /* Setup RTC and NVRAM callbacks */ if (firmware_has_feature(FW_FEATURE_OPAL)) opal_nvram_init(); @@ -547,6 +544,7 @@ define_machine(powernv) { .init_IRQ = pnv_init_IRQ, .show_cpuinfo = pnv_show_cpuinfo, .get_proc_freq = pnv_get_proc_freq, + .discover_phbs = pnv_pci_init, .progress = pnv_progress, .machine_shutdown = pnv_shutdown, .power_save = NULL, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 0272aa4e74e3..46e1540abc22 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -463,7 +463,7 @@ void pseries_little_endian_exceptions(void) } #endif -static void __init find_and_init_phbs(void) +static void __init pSeries_discover_phbs(void) { struct device_node *node; struct pci_controller *phb; @@ -481,6 +481,9 @@ static void __init find_and_init_phbs(void) pci_process_bridge_OF_ranges(phb, node, 0); isa_bridge_find_early(phb); phb->controller_ops = pseries_pci_controller_ops; + + /* create pci_dn's for DT nodes under this PHB */ + pci_devs_phb_init_dynamic(phb); } of_node_put(root); @@ -786,7 +789,6 @@ static void __init pSeries_setup_arch(void) /* Find and initialize PCI host bridges */ init_pci_config_tokens(); - find_and_init_phbs(); of_reconfig_notifier_register(&pci_dn_reconfig_nb); pSeries_nvram_init(); @@ -1050,6 +1052,7 @@ define_machine(pseries) { .init_IRQ = pseries_init_irq, .show_cpuinfo = pSeries_show_cpuinfo, .log_error = pSeries_log_error, + .discover_phbs = pSeries_discover_phbs, .pcibios_fixup = pSeries_final_fixup, .restart = rtas_restart, .halt = rtas_halt, From 893586ec949d3e48573a585c26bf04998fea6e1f Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:09 +1100 Subject: [PATCH 061/188] powerpc/512x: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-4-oohall@gmail.com --- arch/powerpc/platforms/512x/mpc5121_ads.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c index 6303fbfc4e4f..9d030c2e0004 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads.c @@ -24,21 +24,23 @@ static void __init mpc5121_ads_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif printk(KERN_INFO "MPC5121 ADS board from Freescale Semiconductor\n"); /* * cpld regs are needed early */ mpc5121_ads_cpld_map(); + mpc512x_setup_arch(); +} + +static void __init mpc5121_ads_setup_pci(void) +{ #ifdef CONFIG_PCI + struct device_node *np; + for_each_compatible_node(np, "pci", "fsl,mpc5121-pci") mpc83xx_add_bridge(np); #endif - - mpc512x_setup_arch(); } static void __init mpc5121_ads_init_IRQ(void) @@ -64,6 +66,7 @@ define_machine(mpc5121_ads) { .name = "MPC5121 ADS", .probe = mpc5121_ads_probe, .setup_arch = mpc5121_ads_setup_arch, + .discover_phbs = mpc5121_ads_setup_pci, .init = mpc512x_init, .init_IRQ = mpc5121_ads_init_IRQ, .get_irq = ipic_get_irq, From eab3166f4eac384b48ebd2ed7b61dc465c1912cf Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:10 +1100 Subject: [PATCH 062/188] powerpc/52xx/efika: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-5-oohall@gmail.com --- arch/powerpc/platforms/52xx/efika.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 4514a6f7458a..3b7d70d71692 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -185,8 +185,6 @@ static void __init efika_setup_arch(void) /* Map important registers from the internal memory map */ mpc52xx_map_common_devices(); - efika_pcisetup(); - #ifdef CONFIG_PM mpc52xx_suspend.board_suspend_prepare = efika_suspend_prepare; mpc52xx_pm_init(); @@ -218,6 +216,7 @@ define_machine(efika) .name = EFIKA_PLATFORM_NAME, .probe = efika_probe, .setup_arch = efika_setup_arch, + .discover_phbs = efika_pcisetup, .init = mpc52xx_declare_of_platform_devices, .show_cpuinfo = efika_show_cpuinfo, .init_IRQ = mpc52xx_init_irq, From e0bf9de2242a31a8f79015376ed08c4efe74774a Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:11 +1100 Subject: [PATCH 063/188] powerpc/52xx/lite5200: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-6-oohall@gmail.com --- arch/powerpc/platforms/52xx/lite5200.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c index 3181aac08225..04cc97397095 100644 --- a/arch/powerpc/platforms/52xx/lite5200.c +++ b/arch/powerpc/platforms/52xx/lite5200.c @@ -165,8 +165,6 @@ static void __init lite5200_setup_arch(void) mpc52xx_suspend.board_resume_finish = lite5200_resume_finish; lite5200_pm_init(); #endif - - mpc52xx_setup_pci(); } static const char * const board[] __initconst = { @@ -187,6 +185,7 @@ define_machine(lite5200) { .name = "lite5200", .probe = lite5200_probe, .setup_arch = lite5200_setup_arch, + .discover_phbs = mpc52xx_setup_pci, .init = mpc52xx_declare_of_platform_devices, .init_IRQ = mpc52xx_init_irq, .get_irq = mpc52xx_get_irq, From ba5087622a0f11c8d3c6587392ebc70f96503e51 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:12 +1100 Subject: [PATCH 064/188] powerpc/52xx/media5200: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-7-oohall@gmail.com --- arch/powerpc/platforms/52xx/media5200.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 07c5bc4ed0b5..efb8bdecbcc7 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -202,8 +202,6 @@ static void __init media5200_setup_arch(void) /* Some mpc5200 & mpc5200b related configuration */ mpc5200_setup_xlb_arbiter(); - mpc52xx_setup_pci(); - np = of_find_matching_node(NULL, mpc5200_gpio_ids); gpio = of_iomap(np, 0); of_node_put(np); @@ -244,6 +242,7 @@ define_machine(media5200_platform) { .name = "media5200-platform", .probe = media5200_probe, .setup_arch = media5200_setup_arch, + .discover_phbs = mpc52xx_setup_pci, .init = mpc52xx_declare_of_platform_devices, .init_IRQ = media5200_init_irq, .get_irq = mpc52xx_get_irq, From a760cfd9cfa2193961d7e599f46fbfe2498c400a Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:13 +1100 Subject: [PATCH 065/188] powerpc/52xx/mpc5200_simple: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-8-oohall@gmail.com --- arch/powerpc/platforms/52xx/mpc5200_simple.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c index 2d01e9b2e779..b9f5675b0a1d 100644 --- a/arch/powerpc/platforms/52xx/mpc5200_simple.c +++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c @@ -40,8 +40,6 @@ static void __init mpc5200_simple_setup_arch(void) /* Some mpc5200 & mpc5200b related configuration */ mpc5200_setup_xlb_arbiter(); - - mpc52xx_setup_pci(); } /* list of the supported boards */ @@ -73,6 +71,7 @@ define_machine(mpc5200_simple_platform) { .name = "mpc5200-simple-platform", .probe = mpc5200_simple_probe, .setup_arch = mpc5200_simple_setup_arch, + .discover_phbs = mpc52xx_setup_pci, .init = mpc52xx_declare_of_platform_devices, .init_IRQ = mpc52xx_init_irq, .get_irq = mpc52xx_get_irq, From 3c82a6aecd367bbbe7876c406cd3e12b5b0e4204 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:14 +1100 Subject: [PATCH 066/188] powerpc/82xx/*: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-9-oohall@gmail.com --- arch/powerpc/platforms/82xx/mpc8272_ads.c | 2 +- arch/powerpc/platforms/82xx/pq2fads.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c index 3fe1a6593280..0b5b9dec16d5 100644 --- a/arch/powerpc/platforms/82xx/mpc8272_ads.c +++ b/arch/powerpc/platforms/82xx/mpc8272_ads.c @@ -171,7 +171,6 @@ static void __init mpc8272_ads_setup_arch(void) iounmap(bcsr); init_ioports(); - pq2_init_pci(); if (ppc_md.progress) ppc_md.progress("mpc8272_ads_setup_arch(), finish", 0); @@ -205,6 +204,7 @@ define_machine(mpc8272_ads) .name = "Freescale MPC8272 ADS", .probe = mpc8272_ads_probe, .setup_arch = mpc8272_ads_setup_arch, + .discover_phbs = pq2_init_pci, .init_IRQ = mpc8272_ads_pic_init, .get_irq = cpm2_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c index a74082140718..ac9113d524af 100644 --- a/arch/powerpc/platforms/82xx/pq2fads.c +++ b/arch/powerpc/platforms/82xx/pq2fads.c @@ -150,8 +150,6 @@ static void __init pq2fads_setup_arch(void) /* Enable external IRQs */ clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_siumcr, 0x0c000000); - pq2_init_pci(); - if (ppc_md.progress) ppc_md.progress("pq2fads_setup_arch(), finish", 0); } @@ -184,6 +182,7 @@ define_machine(pq2fads) .name = "Freescale PQ2FADS", .probe = pq2fads_probe, .setup_arch = pq2fads_setup_arch, + .discover_phbs = pq2_init_pci, .init_IRQ = pq2fads_pic_init, .get_irq = cpm2_get_irq, .calibrate_decr = generic_calibrate_decr, From 83f84041ff1cf6c23fc38861218af2d4ca2d9b38 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:15 +1100 Subject: [PATCH 067/188] powerpc/83xx: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-10-oohall@gmail.com --- arch/powerpc/platforms/83xx/asp834x.c | 1 + arch/powerpc/platforms/83xx/km83xx.c | 1 + arch/powerpc/platforms/83xx/misc.c | 2 -- arch/powerpc/platforms/83xx/mpc830x_rdb.c | 1 + arch/powerpc/platforms/83xx/mpc831x_rdb.c | 1 + arch/powerpc/platforms/83xx/mpc832x_mds.c | 1 + arch/powerpc/platforms/83xx/mpc832x_rdb.c | 1 + arch/powerpc/platforms/83xx/mpc834x_itx.c | 1 + arch/powerpc/platforms/83xx/mpc834x_mds.c | 1 + arch/powerpc/platforms/83xx/mpc836x_mds.c | 1 + arch/powerpc/platforms/83xx/mpc836x_rdk.c | 1 + arch/powerpc/platforms/83xx/mpc837x_mds.c | 1 + arch/powerpc/platforms/83xx/mpc837x_rdb.c | 1 + 13 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/83xx/asp834x.c b/arch/powerpc/platforms/83xx/asp834x.c index 28474876f41b..68061c2a57c1 100644 --- a/arch/powerpc/platforms/83xx/asp834x.c +++ b/arch/powerpc/platforms/83xx/asp834x.c @@ -44,6 +44,7 @@ define_machine(asp834x) { .name = "ASP8347E", .probe = asp834x_probe, .setup_arch = asp834x_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index bcdc2c203ec9..108e1e4d2683 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -180,6 +180,7 @@ define_machine(mpc83xx_km) { .name = "mpc83xx-km-platform", .probe = mpc83xx_km_probe, .setup_arch = mpc83xx_km_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index a952e91db3ee..3285dabcf923 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -132,8 +132,6 @@ void __init mpc83xx_setup_arch(void) setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG); update_bats(); } - - mpc83xx_setup_pci(); } int machine_check_83xx(struct pt_regs *regs) diff --git a/arch/powerpc/platforms/83xx/mpc830x_rdb.c b/arch/powerpc/platforms/83xx/mpc830x_rdb.c index 51426e88ec67..956d4389effa 100644 --- a/arch/powerpc/platforms/83xx/mpc830x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc830x_rdb.c @@ -48,6 +48,7 @@ define_machine(mpc830x_rdb) { .name = "MPC830x RDB", .probe = mpc830x_rdb_probe, .setup_arch = mpc830x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc831x_rdb.c b/arch/powerpc/platforms/83xx/mpc831x_rdb.c index 5ccd57a48492..3b578f080e3b 100644 --- a/arch/powerpc/platforms/83xx/mpc831x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc831x_rdb.c @@ -48,6 +48,7 @@ define_machine(mpc831x_rdb) { .name = "MPC831x RDB", .probe = mpc831x_rdb_probe, .setup_arch = mpc831x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c index 6fa5402ebf20..850d566ef900 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c @@ -101,6 +101,7 @@ define_machine(mpc832x_mds) { .name = "MPC832x MDS", .probe = mpc832x_sys_probe, .setup_arch = mpc832x_sys_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index 622c625d5ce4..b6133a237a70 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -219,6 +219,7 @@ define_machine(mpc832x_rdb) { .name = "MPC832x RDB", .probe = mpc832x_rdb_probe, .setup_arch = mpc832x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c index ebfd139bca20..9630f3aa4d9c 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_itx.c +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c @@ -70,6 +70,7 @@ define_machine(mpc834x_itx) { .name = "MPC834x ITX", .probe = mpc834x_itx_probe, .setup_arch = mpc834x_itx_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc834x_mds.c b/arch/powerpc/platforms/83xx/mpc834x_mds.c index 356228e35279..6d91bdce0a18 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc834x_mds.c @@ -91,6 +91,7 @@ define_machine(mpc834x_mds) { .name = "MPC834x MDS", .probe = mpc834x_mds_probe, .setup_arch = mpc834x_mds_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c index 90d9cbfae659..da4cf52cb55b 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c @@ -201,6 +201,7 @@ define_machine(mpc836x_mds) { .name = "MPC836x MDS", .probe = mpc836x_mds_probe, .setup_arch = mpc836x_mds_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c index b4aac2cde849..3427ad0d9d38 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c +++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c @@ -41,6 +41,7 @@ define_machine(mpc836x_rdk) { .name = "MPC836x RDK", .probe = mpc836x_rdk_probe, .setup_arch = mpc836x_rdk_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc837x_mds.c b/arch/powerpc/platforms/83xx/mpc837x_mds.c index 9d3721c965be..f28d166ea7db 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc837x_mds.c @@ -93,6 +93,7 @@ define_machine(mpc837x_mds) { .name = "MPC837x MDS", .probe = mpc837x_mds_probe, .setup_arch = mpc837x_mds_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c index 7c45f7ac2607..7fb7684c256b 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c @@ -73,6 +73,7 @@ define_machine(mpc837x_rdb) { .name = "MPC837x RDB/WLAN", .probe = mpc837x_rdb_probe, .setup_arch = mpc837x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, From 053d58c870298d62b9c5154672ef2f1684c4ea43 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:16 +1100 Subject: [PATCH 068/188] powerpc/amigaone: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-11-oohall@gmail.com --- arch/powerpc/platforms/amigaone/setup.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index f5d0bf999759..b25ddf39dd43 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -65,6 +65,12 @@ static int __init amigaone_add_bridge(struct device_node *dev) } void __init amigaone_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0); +} + +void __init amigaone_discover_phbs(void) { struct device_node *np; int phb = -ENODEV; @@ -74,9 +80,6 @@ void __init amigaone_setup_arch(void) phb = amigaone_add_bridge(np); BUG_ON(phb != 0); - - if (ppc_md.progress) - ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0); } void __init amigaone_init_IRQ(void) @@ -159,6 +162,7 @@ define_machine(amigaone) { .name = "AmigaOne", .probe = amigaone_probe, .setup_arch = amigaone_setup_arch, + .discover_phbs = amigaone_discover_phbs, .show_cpuinfo = amigaone_show_cpuinfo, .init_IRQ = amigaone_init_IRQ, .restart = amigaone_restart, From 407d418f2fd4c20aa8ca1cf4168a414d77766852 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:17 +1100 Subject: [PATCH 069/188] powerpc/chrp: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-12-oohall@gmail.com --- arch/powerpc/platforms/chrp/pci.c | 8 ++++++++ arch/powerpc/platforms/chrp/setup.c | 12 +----------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index b2c2bf35b76c..8c421dc78b28 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -314,6 +314,14 @@ chrp_find_bridges(void) } } of_node_put(root); + + /* + * "Temporary" fixes for PCI devices. + * -- Geert + */ + hydra_init(); /* Mac I/O */ + + pci_create_OF_bus_map(); } /* SL82C105 IDE Control/Status Register */ diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index c45435aa5e36..3cfc382841e5 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -334,22 +334,11 @@ static void __init chrp_setup_arch(void) /* On pegasos, enable the L2 cache if not already done by OF */ pegasos_set_l2cr(); - /* Lookup PCI host bridges */ - chrp_find_bridges(); - - /* - * Temporary fixes for PCI devices. - * -- Geert - */ - hydra_init(); /* Mac I/O */ - /* * Fix the Super I/O configuration */ sio_init(); - pci_create_OF_bus_map(); - /* * Print the banner, then scroll down so boot progress * can be printed. -- Cort @@ -582,6 +571,7 @@ define_machine(chrp) { .name = "CHRP", .probe = chrp_probe, .setup_arch = chrp_setup_arch, + .discover_phbs = chrp_find_bridges, .init = chrp_init2, .show_cpuinfo = chrp_show_cpuinfo, .init_IRQ = chrp_init_IRQ, From 08c4738254b87117c69816d8033dd25f38185f92 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:18 +1100 Subject: [PATCH 070/188] powerpc/embedded6xx/holly: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-13-oohall@gmail.com --- arch/powerpc/platforms/embedded6xx/holly.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index d8f2e2c737bb..53065d564161 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -108,15 +108,13 @@ static void holly_remap_bridge(void) tsi108_write_reg(TSI108_PCI_P2O_BAR2, 0x0); } -static void __init holly_setup_arch(void) +static void __init holly_init_pci(void) { struct device_node *np; if (ppc_md.progress) ppc_md.progress("holly_setup_arch():set_bridge", 0); - tsi108_csr_vir_base = get_vir_csrbase(); - /* setup PCI host bridge */ holly_remap_bridge(); @@ -127,6 +125,11 @@ static void __init holly_setup_arch(void) ppc_md.pci_exclude_device = holly_exclude_device; if (ppc_md.progress) ppc_md.progress("tsi108: resources set", 0x100); +} + +static void __init holly_setup_arch(void) +{ + tsi108_csr_vir_base = get_vir_csrbase(); printk(KERN_INFO "PPC750GX/CL Platform\n"); } @@ -259,6 +262,7 @@ define_machine(holly){ .name = "PPC750 GX/CL TSI", .probe = holly_probe, .setup_arch = holly_setup_arch, + .discover_phbs = holly_init_pci, .init_IRQ = holly_init_IRQ, .show_cpuinfo = holly_show_cpuinfo, .get_irq = mpic_get_irq, From daa6c24780c15f4abcb76a9d426142beff9f62c6 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:19 +1100 Subject: [PATCH 071/188] powerpc/embedded6xx/linkstation: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-14-oohall@gmail.com --- arch/powerpc/platforms/embedded6xx/linkstation.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index f514d5d28cd4..eb8342e7f84e 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -63,15 +63,18 @@ static int __init linkstation_add_bridge(struct device_node *dev) } static void __init linkstation_setup_arch(void) +{ + printk(KERN_INFO "BUFFALO Network Attached Storage Series\n"); + printk(KERN_INFO "(C) 2002-2005 BUFFALO INC.\n"); +} + +static void __init linkstation_setup_pci(void) { struct device_node *np; /* Lookup PCI host bridges */ for_each_compatible_node(np, "pci", "mpc10x-pci") linkstation_add_bridge(np); - - printk(KERN_INFO "BUFFALO Network Attached Storage Series\n"); - printk(KERN_INFO "(C) 2002-2005 BUFFALO INC.\n"); } /* @@ -153,6 +156,7 @@ define_machine(linkstation){ .name = "Buffalo Linkstation", .probe = linkstation_probe, .setup_arch = linkstation_setup_arch, + .discover_phbs = linkstation_setup_pci, .init_IRQ = linkstation_init_IRQ, .show_cpuinfo = linkstation_show_cpuinfo, .get_irq = mpic_get_irq, From 748770aeb44108ecb4e09d273e7718611cd60a98 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:20 +1100 Subject: [PATCH 072/188] powerpc/embedded6xx/mpc7448: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-15-oohall@gmail.com --- arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index b95c3380d2b5..5565647dc879 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -58,16 +58,14 @@ int mpc7448_hpc2_exclude_device(struct pci_controller *hose, return PCIBIOS_SUCCESSFUL; } -static void __init mpc7448_hpc2_setup_arch(void) +static void __init mpc7448_hpc2_setup_pci(void) { +#ifdef CONFIG_PCI struct device_node *np; if (ppc_md.progress) - ppc_md.progress("mpc7448_hpc2_setup_arch():set_bridge", 0); - - tsi108_csr_vir_base = get_vir_csrbase(); + ppc_md.progress("mpc7448_hpc2_setup_pci():set_bridge", 0); /* setup PCI host bridge */ -#ifdef CONFIG_PCI for_each_compatible_node(np, "pci", "tsi108-pci") tsi108_setup_pci(np, MPC7448HPC2_PCI_CFG_PHYS, 0); @@ -75,6 +73,11 @@ static void __init mpc7448_hpc2_setup_arch(void) if (ppc_md.progress) ppc_md.progress("tsi108: resources set", 0x100); #endif +} + +static void __init mpc7448_hpc2_setup_arch(void) +{ + tsi108_csr_vir_base = get_vir_csrbase(); printk(KERN_INFO "MPC7448HPC2 (TAIGA) Platform\n"); printk(KERN_INFO @@ -181,6 +184,7 @@ define_machine(mpc7448_hpc2){ .name = "MPC7448 HPC2", .probe = mpc7448_hpc2_probe, .setup_arch = mpc7448_hpc2_setup_arch, + .discover_phbs = mpc7448_hpc2_setup_pci, .init_IRQ = mpc7448_hpc2_init_IRQ, .show_cpuinfo = mpc7448_hpc2_show_cpuinfo, .get_irq = mpic_get_irq, From d20a864f434b277b245ac6508920d90a48f6155d Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:21 +1100 Subject: [PATCH 073/188] powerpc/embedded6xx/mve5100: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-16-oohall@gmail.com --- arch/powerpc/platforms/embedded6xx/mvme5100.c | 13 ++++++++----- arch/powerpc/platforms/embedded6xx/storcenter.c | 8 ++++++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 1cd488daa0bf..c06a0490d157 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -154,17 +154,19 @@ static const struct of_device_id mvme5100_of_bus_ids[] __initconst = { */ static void __init mvme5100_setup_arch(void) { - struct device_node *np; - if (ppc_md.progress) ppc_md.progress("mvme5100_setup_arch()", 0); - for_each_compatible_node(np, "pci", "hawk-pci") - mvme5100_add_bridge(np); - restart = ioremap(BOARD_MODRST_REG, 4); } +static void __init mvme5100_setup_pci(void) +{ + struct device_node *np; + + for_each_compatible_node(np, "pci", "hawk-pci") + mvme5100_add_bridge(np); +} static void mvme5100_show_cpuinfo(struct seq_file *m) { @@ -205,6 +207,7 @@ define_machine(mvme5100) { .name = "MVME5100", .probe = mvme5100_probe, .setup_arch = mvme5100_setup_arch, + .discover_phbs = mvme5100_setup_pci, .init_IRQ = mvme5100_pic_init, .show_cpuinfo = mvme5100_show_cpuinfo, .get_irq = mpic_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c index e346ddcef45e..e188b90f7016 100644 --- a/arch/powerpc/platforms/embedded6xx/storcenter.c +++ b/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -65,14 +65,17 @@ static int __init storcenter_add_bridge(struct device_node *dev) } static void __init storcenter_setup_arch(void) +{ + printk(KERN_INFO "IOMEGA StorCenter\n"); +} + +static void __init storcenter_setup_pci(void) { struct device_node *np; /* Lookup PCI host bridges */ for_each_compatible_node(np, "pci", "mpc10x-pci") storcenter_add_bridge(np); - - printk(KERN_INFO "IOMEGA StorCenter\n"); } /* @@ -117,6 +120,7 @@ define_machine(storcenter){ .name = "IOMEGA StorCenter", .probe = storcenter_probe, .setup_arch = storcenter_setup_arch, + .discover_phbs = storcenter_setup_pci, .init_IRQ = storcenter_init_IRQ, .get_irq = mpic_get_irq, .restart = storcenter_restart, From c144bc719234500e292c0545de99822bd8a78a6b Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 3 Nov 2020 15:35:22 +1100 Subject: [PATCH 074/188] powerpc/pasemi: Move PHB discovery Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201103043523.916109-17-oohall@gmail.com --- arch/powerpc/platforms/pasemi/setup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index b612474f8f8e..376797eb7894 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -144,8 +144,6 @@ static void __init pas_setup_arch(void) /* Setup SMP callback */ smp_ops = &pas_smp_ops; #endif - /* Lookup PCI hosts */ - pas_pci_init(); /* Remap SDC register for doing reset */ /* XXXOJN This should maybe come out of the device tree */ @@ -446,6 +444,7 @@ define_machine(pasemi) { .name = "PA Semi PWRficient", .probe = pas_probe, .setup_arch = pas_setup_arch, + .discover_phbs = pas_pci_init, .init_IRQ = pas_init_IRQ, .get_irq = mpic_get_irq, .restart = pas_restart, From c0ef717305f51e29b5ce0c78a6bfe566b3283415 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:11 +1000 Subject: [PATCH 075/188] powerpc/64s: interrupt exit improve bounding of interrupt recursion When replaying pending soft-masked interrupts when an interrupt returns to an irqs-enabled context, there is a special case required if this was an asynchronous interrupt to avoid unbounded interrupt recursion. This case was not tested for in the case the asynchronous interrupt hit in user context, because a subsequent nested interrupt would by definition hit in kernel mode, which then exits via the kernel path which does test this case. There is no reason to allow this for such interrupts. While recursion is bounded at the next level, it's simpler and uses less stack to apply the replay logic consistently. This also expands the comment which was really pretty poor and didn't explain the problem (I can say that because I wrote it). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-2-npiggin@gmail.com --- arch/powerpc/kernel/syscall_64.c | 55 +++++++++++++++++++------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 7c85ed04a164..e0eb2a502db3 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -138,8 +138,12 @@ notrace long system_call_exception(long r3, long r4, long r5, /* * local irqs must be disabled. Returns false if the caller must re-enable * them, check for new work, and try again. + * + * This should be called with local irqs disabled, but if they were previously + * enabled when the interrupt handler returns (indicating a process-context / + * synchronous interrupt) then irqs_enabled should be true. */ -static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri) +static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) { /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); @@ -156,6 +160,29 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri) trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + /* + * Must replay pending soft-masked interrupts now. Don't just + * local_irq_enabe(); local_irq_disable(); because if we are + * returning from an asynchronous interrupt here, another one + * might hit after irqs are enabled, and it would exit via this + * same path allowing another to fire, and so on unbounded. + * + * If interrupts were enabled when this interrupt exited, + * indicating a process context (synchronous) interrupt, + * local_irq_enable/disable can be used, which will enable + * interrupts rather than keeping them masked (unclear how + * much benefit this is over just replaying for all cases, + * because we immediately disable again, so all we're really + * doing is allowing hard interrupts to execute directly for + * a very small time, rather than being masked and replayed). + */ + if (irqs_enabled) { + local_irq_enable(); + local_irq_disable(); + } else { + replay_soft_interrupts(); + } + return false; } local_paca->irq_happened = 0; @@ -212,8 +239,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ret |= _TIF_RESTOREALL; } -again: local_irq_disable(); + +again: ti_flags = READ_ONCE(*ti_flagsp); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); @@ -258,10 +286,8 @@ again: } /* scv need not set RI=0 because SRRs are not used */ - if (unlikely(!prep_irq_for_enabled_exit(!scv))) { - local_irq_enable(); + if (unlikely(!prep_irq_for_enabled_exit(!scv, true))) goto again; - } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; @@ -336,11 +362,8 @@ again: } } - if (unlikely(!prep_irq_for_enabled_exit(true))) { - local_irq_enable(); - local_irq_disable(); + if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) goto again; - } #ifdef CONFIG_PPC_BOOK3E if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) { @@ -403,20 +426,8 @@ again: } } - if (unlikely(!prep_irq_for_enabled_exit(true))) { - /* - * Can't local_irq_restore to replay if we were in - * interrupt context. Must replay directly. - */ - if (irqs_disabled_flags(flags)) { - replay_soft_interrupts(); - } else { - local_irq_restore(flags); - local_irq_save(flags); - } - /* Took an interrupt, may have more exit work to do. */ + if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) goto again; - } } else { /* Returning to a kernel context with local irqs disabled. */ __hard_EE_RI_disable(); From 112665286d08c87e66d699e7cba43c1497ad165f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:12 +1000 Subject: [PATCH 076/188] KVM: PPC: Book3S HV: Context tracking exit guest context before enabling irqs Interrupts that occur in kernel mode expect that context tracking is set to kernel. Enabling local irqs before context tracking switches from guest to host means interrupts can come in and trigger warnings about wrong context, and possibly worse. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-3-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6f612d240392..d348e77cee20 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3407,8 +3407,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_set_host_core(pcpu); + guest_exit_irqoff(); + local_irq_enable(); - guest_exit(); /* Let secondaries go back to the offline loop */ for (i = 0; i < controlled_threads; ++i) { @@ -4217,8 +4218,9 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_set_host_core(pcpu); + guest_exit_irqoff(); + local_irq_enable(); - guest_exit(); cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest); From 7a24ae2e172f770df07f8e48ed3ed2f3a6b17e37 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 30 Jan 2021 23:08:13 +1000 Subject: [PATCH 077/188] powerpc/32s: move DABR match out of handle_page_fault handle_page_fault() has some code dedicated to book3s/32 to call do_break() when the DSI is a DABR match. On other platforms, do_break() is handled separately. Do the same for book3s/32, do it earlier in the process of DSI. This change also avoid doing the test on ISI. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-4-npiggin@gmail.com --- arch/powerpc/kernel/entry_32.S | 15 --------------- arch/powerpc/kernel/head_book3s_32.S | 3 +++ 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 1c9b0ccc2172..238eacfda7b0 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -670,10 +670,6 @@ ppc_swapcontext: .globl handle_page_fault handle_page_fault: addi r3,r1,STACK_FRAME_OVERHEAD -#ifdef CONFIG_PPC_BOOK3S_32 - andis. r0,r5,DSISR_DABRMATCH@h - bne- handle_dabr_fault -#endif bl do_page_fault cmpwi r3,0 beq+ ret_from_except @@ -687,17 +683,6 @@ handle_page_fault: bl __bad_page_fault b ret_from_except_full -#ifdef CONFIG_PPC_BOOK3S_32 - /* We have a data breakpoint exception - handle it */ -handle_dabr_fault: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - clrrwi r0,r0,1 - stw r0,_TRAP(r1) - bl do_break - b ret_from_except_full -#endif - /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index c02024bce544..0c4fcb6e1a35 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -690,7 +690,10 @@ handle_page_fault_tramp_1: lwz r5, _DSISR(r11) /* fall through */ handle_page_fault_tramp_2: + andis. r0, r5, DSISR_DABRMATCH@h + bne- 1f EXC_XFER_LITE(0x300, handle_page_fault) +1: EXC_XFER_STD(0x300, do_break) #ifdef CONFIG_VMAP_STACK #ifdef CONFIG_PPC_BOOK3S_604 From 36f0114140eef53e931592b65bdf8bb61ffac1f8 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:14 +1000 Subject: [PATCH 078/188] powerpc/64s: move DABR match out of handle_page_fault Similar to the 32/s change, move the test and call to the do_break handler to the DSI. Suggested-by: Christophe Leroy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-5-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 34 +++++++++++++--------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e02ad6fefa46..f697fd00ac1c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1439,6 +1439,8 @@ EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access ld r4,_DAR(r1) ld r5,_DSISR(r1) + andis. r0,r5,DSISR_DABRMATCH@h + bne- 1f BEGIN_MMU_FTR_SECTION ld r6,_MSR(r1) li r3,0x300 @@ -1447,6 +1449,18 @@ MMU_FTR_SECTION_ELSE b handle_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +1: /* We have a data breakpoint exception - handle it */ + ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_break + /* + * do_break() may have changed the NV GPRS while handling a breakpoint. + * If so, we need to restore them with their updated values. + */ + REST_NVGPRS(r1) + b interrupt_return + GEN_KVM data_access @@ -3209,7 +3223,7 @@ disable_machine_check: .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_BOOK3S_64 - lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h + lis r0,(DSISR_BAD_FAULT_64S | DSISR_KEYFAULT)@h ori r0,r0,DSISR_BAD_FAULT_64S@l and. r0,r5,r0 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ @@ -3243,15 +3257,13 @@ do_hash_page: /* Error */ blt- 13f - /* Reload DAR/DSISR into r4/r5 for the DABR check below */ + /* Reload DAR/DSISR into r4/r5 for handle_page_fault */ ld r4,_DAR(r1) ld r5,_DSISR(r1) #endif /* CONFIG_PPC_BOOK3S_64 */ /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: -11: andis. r0,r5,DSISR_DABRMATCH@h - bne- handle_dabr_fault addi r3,r1,STACK_FRAME_OVERHEAD bl do_page_fault cmpdi r3,0 @@ -3262,20 +3274,6 @@ handle_page_fault: bl __bad_page_fault b interrupt_return -/* We have a data breakpoint exception - handle it */ -handle_dabr_fault: - ld r4,_DAR(r1) - ld r5,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_break - /* - * do_break() may have changed the NV GPRS while handling a breakpoint. - * If so, we need to restore them with their updated values. - */ - REST_NVGPRS(r1) - b interrupt_return - - #ifdef CONFIG_PPC_BOOK3S_64 /* We have a page fault that hash_page could handle but HV refused * the PTE insertion From a4922f5442e7e6ce85da304e224d940edec2f1fb Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:15 +1000 Subject: [PATCH 079/188] powerpc/64s: move the hash fault handling logic to C The fault handling still has some complex logic particularly around hash table handling, in asm. Implement most of this in C. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-6-npiggin@gmail.com --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 + arch/powerpc/kernel/exceptions-64s.S | 127 ++++-------------- arch/powerpc/mm/book3s64/hash_utils.c | 77 +++++++---- 3 files changed, 78 insertions(+), 127 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index f911bdb68d8b..1aa68094ad29 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -456,6 +456,7 @@ static inline unsigned long hpt_hash(unsigned long vpn, long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, unsigned long rlags, unsigned long vflags, int psize, int ssize); +int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr); extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpage_prot); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f697fd00ac1c..91381fbdef42 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1401,14 +1401,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * * Handling: * - Hash MMU - * Go to do_hash_page first to see if the HPT can be filled from an entry in - * the Linux page table. Hash faults can hit in kernel mode in a fairly + * Go to do_hash_fault, which attempts to fill the HPT from an entry in the + * Linux page table. Hash faults can hit in kernel mode in a fairly * arbitrary state (e.g., interrupts disabled, locks held) when accessing * "non-bolted" regions, e.g., vmalloc space. However these should always be - * backed by Linux page tables. + * backed by Linux page table entries. * - * If none is found, do a Linux page fault. Linux page faults can happen in - * kernel mode due to user copy operations of course. + * If no entry is found the Linux page fault handler is invoked (by + * do_hash_fault). Linux page faults can happen in kernel mode due to user + * copy operations of course. * * KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest * MMU context, which may cause a DSI in the host, which must go to the @@ -1439,27 +1440,29 @@ EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access ld r4,_DAR(r1) ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD andis. r0,r5,DSISR_DABRMATCH@h bne- 1f BEGIN_MMU_FTR_SECTION - ld r6,_MSR(r1) - li r3,0x300 - b do_hash_page /* Try to handle as hpte fault */ + bl do_hash_fault MMU_FTR_SECTION_ELSE - b handle_page_fault + bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + cmpdi r3,0 + beq+ interrupt_return + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + ld r4,_DAR(r1) + bl __bad_page_fault + b interrupt_return -1: /* We have a data breakpoint exception - handle it */ - ld r4,_DAR(r1) - ld r5,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_break +1: bl do_break /* * do_break() may have changed the NV GPRS while handling a breakpoint. * If so, we need to restore them with their updated values. */ REST_NVGPRS(r1) - b interrupt_return + b interrupt_return GEN_KVM data_access @@ -1554,13 +1557,19 @@ EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access ld r4,_DAR(r1) ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION - ld r6,_MSR(r1) - li r3,0x400 - b do_hash_page /* Try to handle as hpte fault */ + bl do_hash_fault MMU_FTR_SECTION_ELSE - b handle_page_fault + bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + cmpdi r3,0 + beq+ interrupt_return + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + ld r4,_DAR(r1) + bl __bad_page_fault + b interrupt_return GEN_KVM instruction_access @@ -3216,83 +3225,3 @@ disable_machine_check: RFI_TO_KERNEL 1: mtlr r0 blr - -/* - * Hash table stuff - */ - .balign IFETCH_ALIGN_BYTES -do_hash_page: -#ifdef CONFIG_PPC_BOOK3S_64 - lis r0,(DSISR_BAD_FAULT_64S | DSISR_KEYFAULT)@h - ori r0,r0,DSISR_BAD_FAULT_64S@l - and. r0,r5,r0 /* weird error? */ - bne- handle_page_fault /* if not, try to insert a HPTE */ - - /* - * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then - * don't call hash_page, just fail the fault. This is required to - * prevent re-entrancy problems in the hash code, namely perf - * interrupts hitting while something holds H_PAGE_BUSY, and taking a - * hash fault. See the comment in hash_preload(). - */ - ld r11, PACA_THREAD_INFO(r13) - lwz r0,TI_PREEMPT(r11) - andis. r0,r0,NMI_MASK@h - bne 77f - - /* - * r3 contains the trap number - * r4 contains the faulting address - * r5 contains dsisr - * r6 msr - * - * at return r3 = 0 for success, 1 for page fault, negative for error - */ - bl __hash_page /* build HPTE if possible */ - cmpdi r3,0 /* see if __hash_page succeeded */ - - /* Success */ - beq interrupt_return /* Return from exception on success */ - - /* Error */ - blt- 13f - - /* Reload DAR/DSISR into r4/r5 for handle_page_fault */ - ld r4,_DAR(r1) - ld r5,_DSISR(r1) -#endif /* CONFIG_PPC_BOOK3S_64 */ - -/* Here we have a page fault that hash_page can't handle. */ -handle_page_fault: - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_page_fault - cmpdi r3,0 - beq+ interrupt_return - mr r5,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) - bl __bad_page_fault - b interrupt_return - -#ifdef CONFIG_PPC_BOOK3S_64 -/* We have a page fault that hash_page could handle but HV refused - * the PTE insertion - */ -13: mr r5,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) - bl low_hash_fault - b interrupt_return -#endif - -/* - * We come here as a result of a DSI at a point where we don't want - * to call hash_page, such as when we are accessing memory (possibly - * user memory) inside a PMU interrupt that occurred while interrupts - * were soft-disabled. We want to invoke the exception handler for - * the access, or panic if there isn't a handler. - */ -77: addi r3,r1,STACK_FRAME_OVERHEAD - li r5,SIGSEGV - bl bad_page_fault - b interrupt_return diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 73b06adb6eeb..e866cae57e2f 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1512,16 +1512,40 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, - unsigned long msr) +int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr) { unsigned long access = _PAGE_PRESENT | _PAGE_READ; unsigned long flags = 0; - struct mm_struct *mm = current->mm; - unsigned int region_id = get_region_id(ea); + struct mm_struct *mm; + unsigned int region_id; + int err; + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) + goto page_fault; + + /* + * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then + * don't call hash_page, just fail the fault. This is required to + * prevent re-entrancy problems in the hash code, namely perf + * interrupts hitting while something holds H_PAGE_BUSY, and taking a + * hash fault. See the comment in hash_preload(). + * + * We come here as a result of a DSI at a point where we don't want + * to call hash_page, such as when we are accessing memory (possibly + * user memory) inside a PMU interrupt that occurred while interrupts + * were soft-disabled. We want to invoke the exception handler for + * the access, or panic if there isn't a handler. + */ + if (unlikely(in_nmi())) { + bad_page_fault(regs, ea, SIGSEGV); + return 0; + } + + region_id = get_region_id(ea); if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID)) mm = &init_mm; + else + mm = current->mm; if (dsisr & DSISR_NOHPTE) flags |= HPTE_NOHPTE_UPDATE; @@ -1537,13 +1561,31 @@ int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, * 2) user space access kernel space. */ access |= _PAGE_PRIVILEGED; - if ((msr & MSR_PR) || (region_id == USER_REGION_ID)) + if (user_mode(regs) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; - if (trap == 0x400) + if (regs->trap == 0x400) access |= _PAGE_EXEC; - return hash_page_mm(mm, ea, access, trap, flags); + err = hash_page_mm(mm, ea, access, regs->trap, flags); + if (unlikely(err < 0)) { + // failed to instert a hash PTE due to an hypervisor error + if (user_mode(regs)) { + if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2) + _exception(SIGSEGV, regs, SEGV_ACCERR, ea); + else + _exception(SIGBUS, regs, BUS_ADRERR, ea); + } else { + bad_page_fault(regs, ea, SIGBUS); + } + err = 0; + + } else if (err) { +page_fault: + err = do_page_fault(regs, ea, dsisr); + } + + return err; } #ifdef CONFIG_PPC_MM_SLICES @@ -1843,27 +1885,6 @@ void flush_hash_range(unsigned long number, int local) } } -/* - * low_hash_fault is called when we the low level hash code failed - * to instert a PTE due to an hypervisor error - */ -void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) -{ - enum ctx_state prev_state = exception_enter(); - - if (user_mode(regs)) { -#ifdef CONFIG_PPC_SUBPAGE_PROT - if (rc == -2) - _exception(SIGSEGV, regs, SEGV_ACCERR, address); - else -#endif - _exception(SIGBUS, regs, BUS_ADRERR, address); - } else - bad_page_fault(regs, address, SIGBUS); - - exception_exit(prev_state); -} - long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, unsigned long rflags, unsigned long vflags, int psize, int ssize) From a01a3f2ddbcda83e8572787c0ec1dcbeba86915a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:16 +1000 Subject: [PATCH 080/188] powerpc: remove arguments from fault handler functions Make mm fault handlers all just take the pt_regs * argument and load DAR/DSISR from that. Make those that return a value return long. This is done to make the function signatures match other handlers, which will help with a future patch to add wrappers. Explicit arguments could be added for performance but that would require more wrapper macro variants. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-7-npiggin@gmail.com --- arch/powerpc/include/asm/asm-prototypes.h | 4 ++-- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 2 +- arch/powerpc/include/asm/bug.h | 2 +- arch/powerpc/kernel/exceptions-64e.S | 2 -- arch/powerpc/kernel/exceptions-64s.S | 17 ++++------------- arch/powerpc/kernel/head_40x.S | 10 +++++----- arch/powerpc/kernel/head_8xx.S | 6 +++--- arch/powerpc/kernel/head_book3s_32.S | 5 ++--- arch/powerpc/kernel/head_booke.h | 4 +--- arch/powerpc/mm/book3s64/hash_utils.c | 8 +++++--- arch/powerpc/mm/book3s64/slb.c | 11 +++++++---- arch/powerpc/mm/fault.c | 5 ++--- 12 files changed, 33 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d0b832cbbec8..22c9d08fa3a4 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -82,8 +82,8 @@ void kernel_bad_stack(struct pt_regs *regs); void system_reset_exception(struct pt_regs *regs); void machine_check_exception(struct pt_regs *regs); void emulation_assist_interrupt(struct pt_regs *regs); -long do_slb_fault(struct pt_regs *regs, unsigned long ea); -void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err); +long do_slb_fault(struct pt_regs *regs); +void do_bad_slb_fault(struct pt_regs *regs); /* signals, syscalls and interrupts */ long sys_swapcontext(struct ucontext __user *old_ctx, diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 1aa68094ad29..36884bc0bcd7 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -456,7 +456,7 @@ static inline unsigned long hpt_hash(unsigned long vpn, long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, unsigned long rlags, unsigned long vflags, int psize, int ssize); -int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr); +long do_hash_fault(struct pt_regs *regs); extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpage_prot); diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 464f8ca8a5c9..f7827e993196 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -111,7 +111,7 @@ #ifndef __ASSEMBLY__ struct pt_regs; -extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); +long do_page_fault(struct pt_regs *); extern void bad_page_fault(struct pt_regs *, unsigned long, int); void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig); extern void _exception(int, struct pt_regs *, int, unsigned long); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 74d07dc0bb48..43e71d86dcbf 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1011,8 +1011,6 @@ storage_fault_common: std r14,_DAR(r1) std r15,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r14 - mr r5,r15 ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) bl do_page_fault diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 91381fbdef42..33f88a1dcd08 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1438,10 +1438,9 @@ EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) EXC_VIRT_END(data_access, 0x4300, 0x80) EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access - ld r4,_DAR(r1) - ld r5,_DSISR(r1) + ld r4,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - andis. r0,r5,DSISR_DABRMATCH@h + andis. r0,r4,DSISR_DABRMATCH@h bne- 1f BEGIN_MMU_FTR_SECTION bl do_hash_fault @@ -1504,10 +1503,9 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) EXC_VIRT_END(data_access_slb, 0x4380, 0x80) EXC_COMMON_BEGIN(data_access_slb_common) GEN_COMMON data_access_slb - ld r4,_DAR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ + addi r3,r1,STACK_FRAME_OVERHEAD bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1519,8 +1517,6 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) RECONCILE_IRQ_STATE(r10, r11) - ld r4,_DAR(r1) - ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault b interrupt_return @@ -1555,8 +1551,6 @@ EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) EXC_VIRT_END(instruction_access, 0x4400, 0x80) EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access - ld r4,_DAR(r1) - ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION bl do_hash_fault @@ -1602,10 +1596,9 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) EXC_COMMON_BEGIN(instruction_access_slb_common) GEN_COMMON instruction_access_slb - ld r4,_DAR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ + addi r3,r1,STACK_FRAME_OVERHEAD bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1617,8 +1610,6 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) RECONCILE_IRQ_STATE(r10, r11) - ld r4,_DAR(r1) - ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault b interrupt_return diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a1ae00689e0f..3c5577ac4dc8 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -179,9 +179,9 @@ _ENTRY(saved_ksp_limit) */ START_EXCEPTION(0x0300, DataStorage) EXCEPTION_PROLOG - mfspr r5, SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + mfspr r5, SPRN_ESR /* Grab the ESR, save it */ stw r5, _ESR(r11) - mfspr r4, SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + mfspr r4, SPRN_DEAR /* Grab the DEAR, save it */ stw r4, _DEAR(r11) EXC_XFER_LITE(0x300, handle_page_fault) @@ -191,9 +191,9 @@ _ENTRY(saved_ksp_limit) */ START_EXCEPTION(0x0400, InstructionAccess) EXCEPTION_PROLOG - mr r4,r12 /* Pass SRR0 as arg2 */ - stw r4, _DEAR(r11) - li r5,0 /* Pass zero as arg3 */ + li r5,0 + stw r5, _ESR(r11) /* Zero ESR */ + stw r12, _DEAR(r11) /* SRR0 as DEAR */ EXC_XFER_LITE(0x400, handle_page_fault) /* 0x0500 - External Interrupt Exception */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 52702f3db6df..0b2c247cfdff 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -312,14 +312,14 @@ DataStoreTLBMiss: . = 0x1300 InstructionTLBError: EXCEPTION_PROLOG - mr r4,r12 andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ andis. r10,r9,SRR1_ISI_NOPT@h beq+ .Litlbie - tlbie r4 + tlbie r12 /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ .Litlbie: - stw r4, _DAR(r11) + stw r12, _DAR(r11) + stw r5, _DSISR(r11) EXC_XFER_LITE(0x400, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 0c4fcb6e1a35..61f862fa9655 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -362,9 +362,9 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) #endif #endif /* CONFIG_VMAP_STACK */ -1: mr r4,r12 andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ - stw r4, _DAR(r11) + stw r5, _DSISR(r11) + stw r12, _DAR(r11) EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ @@ -686,7 +686,6 @@ handle_page_fault_tramp_1: #ifdef CONFIG_VMAP_STACK EXCEPTION_PROLOG_2 handle_dar_dsisr=1 #endif - lwz r4, _DAR(r11) lwz r5, _DSISR(r11) /* fall through */ handle_page_fault_tramp_2: diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 74e230c200fb..0fbdacc7fab7 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -476,9 +476,7 @@ label: NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ - mr r4,r12; /* Pass SRR0 as arg2 */ \ - stw r4, _DEAR(r11); \ - li r5,0; /* Pass zero as arg3 */ \ + stw r12, _DEAR(r11); /* Pass SRR0 as arg2 */ \ EXC_XFER_LITE(0x0400, handle_page_fault) #define ALIGNMENT_EXCEPTION \ diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index e866cae57e2f..9a499af3eebf 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1512,13 +1512,15 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr) +long do_hash_fault(struct pt_regs *regs) { + unsigned long ea = regs->dar; + unsigned long dsisr = regs->dsisr; unsigned long access = _PAGE_PRESENT | _PAGE_READ; unsigned long flags = 0; struct mm_struct *mm; unsigned int region_id; - int err; + long err; if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) goto page_fault; @@ -1582,7 +1584,7 @@ int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr) } else if (err) { page_fault: - err = do_page_fault(regs, ea, dsisr); + err = do_page_fault(regs); } return err; diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 584567970c11..985902ce0272 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -813,8 +813,9 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) return slb_insert_entry(ea, context, flags, ssize, false); } -long do_slb_fault(struct pt_regs *regs, unsigned long ea) +long do_slb_fault(struct pt_regs *regs) { + unsigned long ea = regs->dar; unsigned long id = get_region_id(ea); /* IRQs are not reconciled here, so can't check irqs_disabled */ @@ -865,13 +866,15 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea) } } -void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err) +void do_bad_slb_fault(struct pt_regs *regs) { + int err = regs->result; + if (err == -EFAULT) { if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_BNDERR, ea); + _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); else - bad_page_fault(regs, ea, SIGSEGV); + bad_page_fault(regs, regs->dar, SIGSEGV); } else if (err == -EINVAL) { unrecoverable_exception(regs); } else { diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8961b44f350c..273ff845eccf 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -542,12 +542,11 @@ retry: } NOKPROBE_SYMBOL(__do_page_fault); -int do_page_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) +long do_page_fault(struct pt_regs *regs) { const struct exception_table_entry *entry; enum ctx_state prev_state = exception_enter(); - int rc = __do_page_fault(regs, address, error_code); + int rc = __do_page_fault(regs, regs->dar, regs->dsisr); exception_exit(prev_state); if (likely(!rc)) return 0; From b4ced8031000b832d845dd17994e0fa1b8310496 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:17 +1000 Subject: [PATCH 081/188] powerpc/fsl_booke/32: CacheLockingException remove args Like other interrupt handler conversions, switch to getting registers from the pt_regs argument. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-8-npiggin@gmail.com --- arch/powerpc/kernel/head_fsl_booke.S | 6 +++--- arch/powerpc/kernel/traps.c | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index fdd4d274c245..3f4a40cccef5 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -364,12 +364,12 @@ interrupt_base: /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) NORMAL_EXCEPTION_PROLOG(DATA_STORAGE) - mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + mfspr r5,SPRN_ESR /* Grab the ESR, save it */ stw r5,_ESR(r11) - mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + mfspr r4,SPRN_DEAR /* Grab the DEAR, save it */ + stw r4, _DEAR(r11) andis. r10,r5,(ESR_ILK|ESR_DLK)@h bne 1f - stw r4, _DEAR(r11) EXC_XFER_LITE(0x0300, handle_page_fault) 1: addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3ec7b443fe6b..1c77b1a8f7c9 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2062,9 +2062,10 @@ void altivec_assist_exception(struct pt_regs *regs) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_FSL_BOOKE -void CacheLockingException(struct pt_regs *regs, unsigned long address, - unsigned long error_code) +void CacheLockingException(struct pt_regs *regs) { + unsigned long error_code = regs->dsisr; + /* We treat cache locking instructions from the user * as priv ops, in the future we could try to do * something smarter From 18722ecf9efdc6a7ca933a3e5a83cc9dba375847 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:18 +1000 Subject: [PATCH 082/188] powerpc: do_break get registers from regs Similar to the previous patch this makes interrupt handler function types more regular so they can be wrapped with the next patch. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-9-npiggin@gmail.com --- arch/powerpc/include/asm/debug.h | 3 +-- arch/powerpc/kernel/head_8xx.S | 5 ++--- arch/powerpc/kernel/process.c | 7 +++---- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index ec57daf87f40..0550eceab3ca 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -52,8 +52,7 @@ extern void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int brkpt); #else -extern void do_break(struct pt_regs *regs, unsigned long address, - unsigned long error_code); +void do_break(struct pt_regs *regs); #endif #endif /* _ASM_POWERPC_DEBUG_H */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0b2c247cfdff..7869db974185 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -364,10 +364,9 @@ do_databreakpoint: addi r3,r1,STACK_FRAME_OVERHEAD mfspr r4,SPRN_BAR stw r4,_DAR(r11) -#ifdef CONFIG_VMAP_STACK - lwz r5,_DSISR(r11) -#else +#ifndef CONFIG_VMAP_STACK mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) #endif EXC_XFER_STD(0x1c00, do_break) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a66f435dabbf..4f0f81e9420b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -659,11 +659,10 @@ static void do_break_handler(struct pt_regs *regs) } } -void do_break (struct pt_regs *regs, unsigned long address, - unsigned long error_code) +void do_break(struct pt_regs *regs) { current->thread.trap_nr = TRAP_HWBKPT; - if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, regs->dsisr, 11, SIGSEGV) == NOTIFY_STOP) return; @@ -681,7 +680,7 @@ void do_break (struct pt_regs *regs, unsigned long address, do_break_handler(regs); /* Deliver the signal to userspace */ - force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)regs->dar); } #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ From 755d664174463791489dddf34c33308b61de68c3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:19 +1000 Subject: [PATCH 083/188] powerpc: DebugException remove args Like other interrupt handler conversions, switch to getting registers from the pt_regs argument. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-10-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64e.S | 2 -- arch/powerpc/kernel/head_40x.S | 1 + arch/powerpc/kernel/head_booke.h | 2 ++ arch/powerpc/kernel/traps.c | 4 +++- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 43e71d86dcbf..1a7291daadfd 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -791,7 +791,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) EXCEPTION_COMMON_CRIT(0xd00) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r14 ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) bl save_nvgprs @@ -864,7 +863,6 @@ kernel_dbg_exc: INTS_DISABLE std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r14 ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) bl save_nvgprs diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 3c5577ac4dc8..24724a7dad49 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -476,6 +476,7 @@ _ENTRY(saved_ksp_limit) /* continue normal handling for a critical exception... */ 2: mfspr r4,SPRN_DBSR + stw r4,_ESR(r11) /* DebugException takes DBSR in _ESR */ addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_TEMPLATE(DebugException, 0x2002, \ (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 0fbdacc7fab7..bf33af714d11 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -406,6 +406,7 @@ label: \ /* continue normal handling for a debug exception... */ \ 2: mfspr r4,SPRN_DBSR; \ + stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc) @@ -459,6 +460,7 @@ label: \ /* continue normal handling for a critical exception... */ \ 2: mfspr r4,SPRN_DBSR; \ + stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1c77b1a8f7c9..dfb0d3325f4b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1957,8 +1957,10 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) mtspr(SPRN_DBCR0, current->thread.debug.dbcr0); } -void DebugException(struct pt_regs *regs, unsigned long debug_status) +void DebugException(struct pt_regs *regs) { + unsigned long debug_status = regs->dsisr; + current->thread.debug.dbsr = debug_status; /* Hack alert: On BookE, Branch Taken stops on the branch itself, while From 73d7a97914f23397b012e851f6a1fe4061923a82 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:20 +1000 Subject: [PATCH 084/188] powerpc/32: transfer can avoid saving r4/r5 over trace call Now that handlers get all registers from pt_regs, r4 and r5 are no longer live here and may be clobbered. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-11-npiggin@gmail.com --- arch/powerpc/kernel/entry_32.S | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 238eacfda7b0..d6ea3f2d6cc0 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -276,8 +276,7 @@ reenable_mmu: * We save a bunch of GPRs, * r3 can be different from GPR3(r1) at this point, r9 and r11 * contains the old MSR and handler address respectively, - * r4 & r5 can contain page fault arguments that need to be passed - * along as well. r0, r6-r8, r12, CCR, CTR, XER etc... are left + * r0, r4-r8, r12, CCR, CTR, XER etc... are left * clobbered as they aren't useful past this point. */ @@ -285,15 +284,11 @@ reenable_mmu: stw r9,8(r1) stw r11,12(r1) stw r3,16(r1) - stw r4,20(r1) - stw r5,24(r1) /* If we are disabling interrupts (normal case), simply log it with * lockdep */ 1: bl trace_hardirqs_off - lwz r5,24(r1) - lwz r4,20(r1) lwz r3,16(r1) lwz r11,12(r1) lwz r9,8(r1) From 8458c628a53ba4311b2df12370be1a6f1870ff37 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:21 +1000 Subject: [PATCH 085/188] powerpc: bad_page_fault get registers from regs Similar to the previous patch this makes interrupt handler function types more regular so they can be wrapped with the next patch. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-12-npiggin@gmail.com --- arch/powerpc/include/asm/bug.h | 4 ++-- arch/powerpc/kernel/entry_32.S | 3 +-- arch/powerpc/kernel/exceptions-64e.S | 3 +-- arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 4 ++-- arch/powerpc/mm/book3s64/slb.c | 2 +- arch/powerpc/mm/fault.c | 6 +++--- arch/powerpc/platforms/8xx/machine_check.c | 2 +- 8 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index f7827e993196..4220789b9a97 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -112,8 +112,8 @@ struct pt_regs; long do_page_fault(struct pt_regs *); -extern void bad_page_fault(struct pt_regs *, unsigned long, int); -void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig); +void bad_page_fault(struct pt_regs *, int); +void __bad_page_fault(struct pt_regs *regs, int sig); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index d6ea3f2d6cc0..b102b40c4988 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -672,9 +672,8 @@ handle_page_fault: lwz r0,_TRAP(r1) clrrwi r0,r0,1 stw r0,_TRAP(r1) - mr r5,r3 + mr r4,r3 /* err arg for bad_page_fault */ addi r3,r1,STACK_FRAME_OVERHEAD - lwz r4,_DAR(r1) bl __bad_page_fault b ret_from_except_full diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 1a7291daadfd..003999c7836c 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1016,9 +1016,8 @@ storage_fault_common: bne- 1f b ret_from_except_lite 1: bl save_nvgprs - mr r5,r3 + mr r4,r3 addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) bl __bad_page_fault b ret_from_except diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index dfb0d3325f4b..f7370145be19 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1612,7 +1612,7 @@ bad: if (user_mode(regs)) _exception(sig, regs, code, regs->dar); else - bad_page_fault(regs, regs->dar, sig); + bad_page_fault(regs, sig); bail: exception_exit(prev_state); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 9a499af3eebf..1a270cc37d97 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1539,7 +1539,7 @@ long do_hash_fault(struct pt_regs *regs) * the access, or panic if there isn't a handler. */ if (unlikely(in_nmi())) { - bad_page_fault(regs, ea, SIGSEGV); + bad_page_fault(regs, SIGSEGV); return 0; } @@ -1578,7 +1578,7 @@ long do_hash_fault(struct pt_regs *regs) else _exception(SIGBUS, regs, BUS_ADRERR, ea); } else { - bad_page_fault(regs, ea, SIGBUS); + bad_page_fault(regs, SIGBUS); } err = 0; diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 985902ce0272..c581548b533f 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -874,7 +874,7 @@ void do_bad_slb_fault(struct pt_regs *regs) if (user_mode(regs)) _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); else - bad_page_fault(regs, regs->dar, SIGSEGV); + bad_page_fault(regs, SIGSEGV); } else if (err == -EINVAL) { unrecoverable_exception(regs); } else { diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 273ff845eccf..5dd3248b47c7 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -566,7 +566,7 @@ NOKPROBE_SYMBOL(do_page_fault); * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); @@ -604,7 +604,7 @@ void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) die("Kernel access of bad area", regs, sig); } -void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +void bad_page_fault(struct pt_regs *regs, int sig) { const struct exception_table_entry *entry; @@ -613,5 +613,5 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) if (entry) instruction_pointer_set(regs, extable_fixup(entry)); else - __bad_page_fault(regs, address, sig); + __bad_page_fault(regs, sig); } diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c index 88dedf38eccd..656365975895 100644 --- a/arch/powerpc/platforms/8xx/machine_check.c +++ b/arch/powerpc/platforms/8xx/machine_check.c @@ -26,7 +26,7 @@ int machine_check_8xx(struct pt_regs *regs) * to deal with that than having a wart in the mcheck handler. * -- BenH */ - bad_page_fault(regs, regs->dar, SIGBUS); + bad_page_fault(regs, SIGBUS); return 1; #else return 0; From 71f47976fafc4375674bd0714153be10f878040a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:22 +1000 Subject: [PATCH 086/188] powerpc/64s: add do_bad_page_fault_segv handler This function acts like an interrupt handler so it needs to follow the standard interrupt handler function signature which will be introduced in a future change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-13-npiggin@gmail.com --- arch/powerpc/include/asm/bug.h | 1 + arch/powerpc/kernel/exceptions-64s.S | 4 +--- arch/powerpc/mm/fault.c | 7 +++++++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 4220789b9a97..8f09ddae9305 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -114,6 +114,7 @@ struct pt_regs; long do_page_fault(struct pt_regs *); void bad_page_fault(struct pt_regs *, int); void __bad_page_fault(struct pt_regs *regs, int sig); +void do_bad_page_fault_segv(struct pt_regs *regs); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 33f88a1dcd08..fc793fa3fdf8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2151,9 +2151,7 @@ EXC_COMMON_BEGIN(h_data_storage_common) GEN_COMMON h_data_storage addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION - ld r4,_DAR(r1) - li r5,SIGSEGV - bl bad_page_fault + bl do_bad_page_fault_segv MMU_FTR_SECTION_ELSE bl unknown_exception ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 5dd3248b47c7..e476d7701413 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -615,3 +615,10 @@ void bad_page_fault(struct pt_regs *regs, int sig) else __bad_page_fault(regs, sig); } + +#ifdef CONFIG_PPC_BOOK3S_64 +void do_bad_page_fault_segv(struct pt_regs *regs) +{ + bad_page_fault(regs, SIGSEGV); +} +#endif From 4cb8428465148bcca0b6b8593d51f805818a70e0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:23 +1000 Subject: [PATCH 087/188] powerpc: rearrange do_page_fault error case to be inside exception_enter This keeps the context tracking over the entire interrupt handler which helps later with moving context tracking into interrupt wrappers. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-14-npiggin@gmail.com --- arch/powerpc/mm/fault.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index e476d7701413..970ac317e018 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -545,19 +545,24 @@ NOKPROBE_SYMBOL(__do_page_fault); long do_page_fault(struct pt_regs *regs) { const struct exception_table_entry *entry; - enum ctx_state prev_state = exception_enter(); - int rc = __do_page_fault(regs, regs->dar, regs->dsisr); - exception_exit(prev_state); - if (likely(!rc)) - return 0; + enum ctx_state prev_state; + long err; + + prev_state = exception_enter(); + err = __do_page_fault(regs, regs->dar, regs->dsisr); + if (likely(!err)) + goto out; entry = search_exception_tables(regs->nip); - if (unlikely(!entry)) - return rc; + if (likely(entry)) { + instruction_pointer_set(regs, extable_fixup(entry)); + err = 0; + } - instruction_pointer_set(regs, extable_fixup(entry)); +out: + exception_exit(prev_state); - return 0; + return err; } NOKPROBE_SYMBOL(do_page_fault); From f4c03b0e520c5f56e569a8da3fce5ddbd0696742 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:24 +1000 Subject: [PATCH 088/188] powerpc/64s: move bad_page_fault handling to C This simplifies code, and it is also useful when introducing interrupt handler wrappers when introducing wrapper functionality that doesn't cope with asm entry code calling into more than one handler function. 32-bit and 64e still have some such cases, which limits some ways they can use interrupt wrappers. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-15-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 12 ------------ arch/powerpc/mm/fault.c | 4 ++++ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fc793fa3fdf8..6e245e06848e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1447,12 +1447,6 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) - cmpdi r3,0 - beq+ interrupt_return - mr r5,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) - bl __bad_page_fault b interrupt_return 1: bl do_break @@ -1557,12 +1551,6 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) - cmpdi r3,0 - beq+ interrupt_return - mr r5,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) - bl __bad_page_fault b interrupt_return GEN_KVM instruction_access diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 970ac317e018..fc2d9a27c649 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -557,6 +557,10 @@ long do_page_fault(struct pt_regs *regs) if (likely(entry)) { instruction_pointer_set(regs, extable_fixup(entry)); err = 0; + } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { + /* 32 and 64e handle this in asm */ + __bad_page_fault(regs, err); + err = 0; } out: From bf0e2374aa7b4f8b01fd59fcb0746a9b6b05326a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:25 +1000 Subject: [PATCH 089/188] powerpc/64s: split do_hash_fault This is required for subsequent interrupt wrapper implementation. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-16-npiggin@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 56 ++++++++++++++++----------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 1a270cc37d97..d7d3a80a51d4 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1512,7 +1512,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -long do_hash_fault(struct pt_regs *regs) +static long __do_hash_fault(struct pt_regs *regs) { unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; @@ -1522,27 +1522,6 @@ long do_hash_fault(struct pt_regs *regs) unsigned int region_id; long err; - if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) - goto page_fault; - - /* - * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then - * don't call hash_page, just fail the fault. This is required to - * prevent re-entrancy problems in the hash code, namely perf - * interrupts hitting while something holds H_PAGE_BUSY, and taking a - * hash fault. See the comment in hash_preload(). - * - * We come here as a result of a DSI at a point where we don't want - * to call hash_page, such as when we are accessing memory (possibly - * user memory) inside a PMU interrupt that occurred while interrupts - * were soft-disabled. We want to invoke the exception handler for - * the access, or panic if there isn't a handler. - */ - if (unlikely(in_nmi())) { - bad_page_fault(regs, SIGSEGV); - return 0; - } - region_id = get_region_id(ea); if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID)) mm = &init_mm; @@ -1581,8 +1560,39 @@ long do_hash_fault(struct pt_regs *regs) bad_page_fault(regs, SIGBUS); } err = 0; + } - } else if (err) { + return err; +} + +long do_hash_fault(struct pt_regs *regs) +{ + unsigned long dsisr = regs->dsisr; + long err; + + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) + goto page_fault; + + /* + * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then + * don't call hash_page, just fail the fault. This is required to + * prevent re-entrancy problems in the hash code, namely perf + * interrupts hitting while something holds H_PAGE_BUSY, and taking a + * hash fault. See the comment in hash_preload(). + * + * We come here as a result of a DSI at a point where we don't want + * to call hash_page, such as when we are accessing memory (possibly + * user memory) inside a PMU interrupt that occurred while interrupts + * were soft-disabled. We want to invoke the exception handler for + * the access, or panic if there isn't a handler. + */ + if (unlikely(in_nmi())) { + bad_page_fault(regs, SIGSEGV); + return 0; + } + + err = __do_hash_fault(regs); + if (err) { page_fault: err = do_page_fault(regs); } From 31d6490ccb2868530300381d8079026cd4a9f7ad Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:26 +1000 Subject: [PATCH 090/188] powerpc/mm: Remove stale do_page_fault comment referring to SLB faults SLB faults no longer call do_page_fault, this was removed somewhere between 2.6.0 and 2.6.12. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-17-npiggin@gmail.com --- arch/powerpc/mm/fault.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index fc2d9a27c649..fef92efad733 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -377,13 +377,11 @@ static void sanity_check_fault(bool is_write, bool is_user, /* * For 600- and 800-family processors, the error_code parameter is DSISR - * for a data fault, SRR1 for an instruction fault. For 400-family processors - * the error_code parameter is ESR for a data fault, 0 for an instruction - * fault. - * For 64-bit processors, the error_code parameter is - * - DSISR for a non-SLB data access fault, - * - SRR1 & 0x08000000 for a non-SLB instruction access fault - * - 0 any SLB fault. + * for a data fault, SRR1 for an instruction fault. + * For 400-family processors the error_code parameter is ESR for a data fault, + * 0 for an instruction fault. + * For 64-bit processors, the error_code parameter is DSISR for a data access + * fault, SRR1 & 0x08000000 for an instruction access fault. * * The return value is 0 if the fault was handled, or the signal * number if this is a kernel fault that can't be handled here. From e44370abb2e99299678ec6b209f8aad574fa5f36 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:27 +1000 Subject: [PATCH 091/188] powerpc/64s: slb comment update This makes a small improvement to the description of the SLB interrupt environment. Move the memory access restrictions into one paragraph, and the interrupt restrictions into the next rather than mix them. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-18-npiggin@gmail.com --- arch/powerpc/mm/book3s64/slb.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index c581548b533f..14c62b685f0c 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -825,19 +825,21 @@ long do_slb_fault(struct pt_regs *regs) return -EINVAL; /* - * SLB kernel faults must be very careful not to touch anything - * that is not bolted. E.g., PACA and global variables are okay, - * mm->context stuff is not. - * - * SLB user faults can access all of kernel memory, but must be - * careful not to touch things like IRQ state because it is not - * "reconciled" here. The difficulty is that we must use - * fast_exception_return to return from kernel SLB faults without - * looking at possible non-bolted memory. We could test user vs - * kernel faults in the interrupt handler asm and do a full fault, - * reconcile, ret_from_except for user faults which would make them - * first class kernel code. But for performance it's probably nicer - * if they go via fast_exception_return too. + * SLB kernel faults must be very careful not to touch anything that is + * not bolted. E.g., PACA and global variables are okay, mm->context + * stuff is not. SLB user faults may access all of memory (and induce + * one recursive SLB kernel fault), so the kernel fault must not + * trample on the user fault state at those points. + */ + + /* + * The interrupt state is not reconciled, for performance, so that + * fast_interrupt_return can be used. The handler must not touch local + * irq state, or schedule. We could test for usermode and upgrade to a + * normal process context (synchronous) interrupt for those, which + * would make them first-class kernel code and able to be traced and + * instrumented, although performance would suffer a bit, it would + * probably be a good tradeoff. */ if (id >= LINEAR_MAP_REGION_ID) { long err; From 3a3138836bc35966d59742512b597997755878f7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:28 +1000 Subject: [PATCH 092/188] powerpc/traps: add NOKPROBE_SYMBOL for sreset and mce These NMIs could fire any time including inside kprobe code, so exclude them from kprobes. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-19-npiggin@gmail.com --- arch/powerpc/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f7370145be19..4349b25807cf 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -517,6 +517,7 @@ out: /* What should we do here? We could issue a shutdown or hard reset. */ } +NOKPROBE_SYMBOL(system_reset_exception); /* * I/O accesses can cause machine checks on powermacs. @@ -843,6 +844,7 @@ void machine_check_exception(struct pt_regs *regs) bail: if (nmi) nmi_exit(); } +NOKPROBE_SYMBOL(machine_check_exception); void SMIException(struct pt_regs *regs) { From 156b5371a9c2482a9ad23ec82d1a4f89a3ab430d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:29 +1000 Subject: [PATCH 093/188] powerpc/perf: move perf irq/nmi handling details into traps.c This is required in order to allow more significant differences between NMI type interrupt handlers and regular asynchronous handlers. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-20-npiggin@gmail.com --- arch/powerpc/kernel/traps.c | 31 +++++++++++++++++++++++++++- arch/powerpc/perf/core-book3s.c | 35 ++------------------------------ arch/powerpc/perf/core-fsl-emb.c | 25 ----------------------- 3 files changed, 32 insertions(+), 59 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 4349b25807cf..6da3a3642dfb 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1892,11 +1892,40 @@ void vsx_unavailable_tm(struct pt_regs *regs) } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -void performance_monitor_exception(struct pt_regs *regs) +static void performance_monitor_exception_nmi(struct pt_regs *regs) { + nmi_enter(); + __this_cpu_inc(irq_stat.pmu_irqs); perf_irq(regs); + + nmi_exit(); +} + +static void performance_monitor_exception_async(struct pt_regs *regs) +{ + irq_enter(); + + __this_cpu_inc(irq_stat.pmu_irqs); + + perf_irq(regs); + + irq_exit(); +} + +void performance_monitor_exception(struct pt_regs *regs) +{ + /* + * On 64-bit, if perf interrupts hit in a local_irq_disable + * (soft-masked) region, we consider them as NMIs. This is required to + * prevent hash faults on user addresses when reading callchains (and + * looks better from an irq tracing perspective). + */ + if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs))) + performance_monitor_exception_nmi(regs); + else + performance_monitor_exception_async(regs); } #ifdef CONFIG_PPC_ADV_DEBUG_REGS diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 28206b1fe172..9fd06010e8b6 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -110,10 +110,6 @@ static inline void perf_read_regs(struct pt_regs *regs) { regs->result = 0; } -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return 0; -} static inline int siar_valid(struct pt_regs *regs) { @@ -353,15 +349,6 @@ static inline void perf_read_regs(struct pt_regs *regs) regs->result = use_siar; } -/* - * If interrupts were soft-disabled when a PMU interrupt occurs, treat - * it as an NMI. - */ -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return (regs->softe & IRQS_DISABLED); -} - /* * On processors like P7+ that have the SIAR-Valid bit, marked instructions * must be sampled only if the SIAR-valid bit is set. @@ -2279,7 +2266,6 @@ static void __perf_event_interrupt(struct pt_regs *regs) struct perf_event *event; unsigned long val[8]; int found, active; - int nmi; if (cpuhw->n_limited) freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), @@ -2287,18 +2273,6 @@ static void __perf_event_interrupt(struct pt_regs *regs) perf_read_regs(regs); - /* - * If perf interrupts hit in a local_irq_disable (soft-masked) region, - * we consider them as NMIs. This is required to prevent hash faults on - * user addresses when reading callchains. See the NMI test in - * do_hash_page. - */ - nmi = perf_intr_is_nmi(regs); - if (nmi) - nmi_enter(); - else - irq_enter(); - /* Read all the PMCs since we'll need them a bunch of times */ for (i = 0; i < ppmu->n_counter; ++i) val[i] = read_pmc(i + 1); @@ -2344,8 +2318,8 @@ static void __perf_event_interrupt(struct pt_regs *regs) } } } - if (!found && !nmi && printk_ratelimit()) - printk(KERN_WARNING "Can't find PMC that caused IRQ\n"); + if (unlikely(!found) && !arch_irq_disabled_regs(regs)) + printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n"); /* * Reset MMCR0 to its normal value. This will set PMXE and @@ -2355,11 +2329,6 @@ static void __perf_event_interrupt(struct pt_regs *regs) * we get back out of this interrupt. */ write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0); - - if (nmi) - nmi_exit(); - else - irq_exit(); } static void perf_event_interrupt(struct pt_regs *regs) diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index e0e7e276bfd2..ee721f420a7b 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -31,19 +31,6 @@ static atomic_t num_events; /* Used to avoid races in calling reserve/release_pmc_hardware */ static DEFINE_MUTEX(pmc_reserve_mutex); -/* - * If interrupts were soft-disabled when a PMU interrupt occurs, treat - * it as an NMI. - */ -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ -#ifdef __powerpc64__ - return (regs->softe & IRQS_DISABLED); -#else - return 0; -#endif -} - static void perf_event_interrupt(struct pt_regs *regs); /* @@ -659,13 +646,6 @@ static void perf_event_interrupt(struct pt_regs *regs) struct perf_event *event; unsigned long val; int found = 0; - int nmi; - - nmi = perf_intr_is_nmi(regs); - if (nmi) - nmi_enter(); - else - irq_enter(); for (i = 0; i < ppmu->n_counter; ++i) { event = cpuhw->event[i]; @@ -690,11 +670,6 @@ static void perf_event_interrupt(struct pt_regs *regs) mtmsr(mfmsr() | MSR_PMM); mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); isync(); - - if (nmi) - nmi_exit(); - else - irq_exit(); } void hw_perf_event_setup(int cpu) From 0440b8a22cc48922f7c6ae894abd221cf7cc4b64 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:30 +1000 Subject: [PATCH 094/188] powerpc/time: move timer_broadcast_interrupt prototype to asm/time.h Interrupt handler prototypes are going to be rearranged in a future patch, so tidy this out of the way first. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-21-npiggin@gmail.com --- arch/powerpc/include/asm/hw_irq.h | 1 - arch/powerpc/include/asm/time.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 0363734ff56e..e5def36212cf 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -54,7 +54,6 @@ extern void replay_system_reset(void); extern void replay_soft_interrupts(void); extern void timer_interrupt(struct pt_regs *); -extern void timer_broadcast_interrupt(void); extern void performance_monitor_exception(struct pt_regs *regs); extern void WatchdogException(struct pt_regs *regs); extern void unknown_exception(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 8f789b597bae..8dd3cdb25338 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -102,6 +102,8 @@ DECLARE_PER_CPU(u64, decrementers_next_tb); /* Convert timebase ticks to nanoseconds */ unsigned long long tb_to_ns(unsigned long long tb_ticks); +void timer_broadcast_interrupt(void); + /* SPLPAR */ void accumulate_stolen_time(void); From 6c6aee009ec34cb7f5ef76f910c1b9417c81efd8 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:31 +1000 Subject: [PATCH 095/188] powerpc: add and use unknown_async_exception This is currently the same as unknown_exception, but it will diverge after interrupt wrappers are added and code moved out of asm into the wrappers (e.g., async handlers will check FINISH_NAP). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-22-npiggin@gmail.com --- arch/powerpc/include/asm/hw_irq.h | 1 + arch/powerpc/kernel/exceptions-64s.S | 4 ++-- arch/powerpc/kernel/head_book3s_32.S | 6 +++--- arch/powerpc/kernel/traps.c | 12 ++++++++++++ 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index e5def36212cf..75c2b137fc00 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -57,6 +57,7 @@ extern void timer_interrupt(struct pt_regs *); extern void performance_monitor_exception(struct pt_regs *regs); extern void WatchdogException(struct pt_regs *regs); extern void unknown_exception(struct pt_regs *regs); +void unknown_async_exception(struct pt_regs *regs); #ifdef CONFIG_PPC64 #include diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6e245e06848e..86eb1c9400b1 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1926,7 +1926,7 @@ EXC_COMMON_BEGIN(doorbell_super_common) #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception #else - bl unknown_exception + bl unknown_async_exception #endif b interrupt_return @@ -2312,7 +2312,7 @@ EXC_COMMON_BEGIN(h_doorbell_common) #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception #else - bl unknown_exception + bl unknown_async_exception #endif b interrupt_return diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 61f862fa9655..1b9c62423b09 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -238,8 +238,8 @@ __secondary_hold_acknowledge: /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and - putting it back to what it was (unknown_exception) when done. */ - EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) + putting it back to what it was (unknown_async_exception) when done. */ + EXCEPTION(0x100, Reset, unknown_async_exception, EXC_XFER_STD) /* Machine check */ /* @@ -641,7 +641,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #endif #ifndef CONFIG_TAU_INT -#define TAUException unknown_exception +#define TAUException unknown_async_exception #endif EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6da3a3642dfb..6691774fe1fb 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1073,6 +1073,18 @@ void unknown_exception(struct pt_regs *regs) exception_exit(prev_state); } +void unknown_async_exception(struct pt_regs *regs) +{ + enum ctx_state prev_state = exception_enter(); + + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + + _exception(SIGTRAP, regs, TRAP_UNK, 0); + + exception_exit(prev_state); +} + void instruction_breakpoint_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); From dcdb4f12963f3f4200e24e1dad78564a98736f67 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:32 +1000 Subject: [PATCH 096/188] powerpc/cell: tidy up pervasive declarations These are declared in ras.h and defined in ras.c so remove them from pervasive.h Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-23-npiggin@gmail.com --- arch/powerpc/platforms/cell/pervasive.c | 1 + arch/powerpc/platforms/cell/pervasive.h | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index 9068edef71f7..5b9a7e9f144b 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -25,6 +25,7 @@ #include #include "pervasive.h" +#include "ras.h" static void cbe_power_save(void) { diff --git a/arch/powerpc/platforms/cell/pervasive.h b/arch/powerpc/platforms/cell/pervasive.h index c6fccad6caee..0da74ab10716 100644 --- a/arch/powerpc/platforms/cell/pervasive.h +++ b/arch/powerpc/platforms/cell/pervasive.h @@ -13,9 +13,6 @@ #define PERVASIVE_H extern void cbe_pervasive_init(void); -extern void cbe_system_error_exception(struct pt_regs *regs); -extern void cbe_maintenance_exception(struct pt_regs *regs); -extern void cbe_thermal_exception(struct pt_regs *regs); #ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON extern int cbe_sysreset_hack(void); From 209e9d500e25eada096b2c09a34093bc458166f3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:33 +1000 Subject: [PATCH 097/188] powerpc: introduce die_mce As explained by commit daf00ae71dad ("powerpc/traps: restore recoverability of machine_check interrupts"), die() can't be called from within nmi_enter to nicely kill a process context that was interrupted. nmi_exit must be called first. This adds a function die_mce which takes care of this for machine check handlers. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-24-npiggin@gmail.com --- arch/powerpc/include/asm/bug.h | 1 + arch/powerpc/kernel/traps.c | 21 +++++++++++++++------ arch/powerpc/platforms/powernv/opal.c | 2 +- arch/powerpc/platforms/pseries/ras.c | 2 +- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 8f09ddae9305..c10ae0a9bbaf 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -118,6 +118,7 @@ void do_bad_page_fault_segv(struct pt_regs *regs); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); +void die_mce(const char *str, struct pt_regs *regs, long err); extern bool die_will_crash(void); extern void panic_flush_kmsg_start(void); extern void panic_flush_kmsg_end(void); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6691774fe1fb..f9ef183a5454 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -789,6 +789,19 @@ int machine_check_generic(struct pt_regs *regs) } #endif /* everything else */ +void die_mce(const char *str, struct pt_regs *regs, long err) +{ + /* + * The machine check wants to kill the interrupted context, but + * do_exit() checks for in_interrupt() and panics in that case, so + * exit the irq/nmi before calling die. + */ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + nmi_exit(); + die(str, regs, err); +} +NOKPROBE_SYMBOL(die_mce); + void machine_check_exception(struct pt_regs *regs) { int recover = 0; @@ -831,15 +844,11 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - if (nmi) nmi_exit(); - - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - die("Unrecoverable Machine check", regs, SIGBUS); - - return; + die_mce("Unrecoverable Machine check", regs, SIGBUS); bail: if (nmi) nmi_exit(); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index c61c3b62c8c6..303d7c775740 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -624,7 +624,7 @@ static int opal_recover_mce(struct pt_regs *regs, */ recovered = 0; } else { - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); recovered = 1; } } diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index d2fca1aa6742..92c08fe893cf 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -808,7 +808,7 @@ static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) */ recovered = 0; } else { - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); recovered = 1; } } From c538938fa2cfdc806c6304888e3876729e6939e0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:34 +1000 Subject: [PATCH 098/188] powerpc/mce: ensure machine check handler always tests RI A machine check that is handled must still check MSR[RI] for recoverability of the interrupted context. Without this patch it's possible for a handled machine check to return to a context where it has clobbered live registers. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-25-npiggin@gmail.com --- arch/powerpc/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f9ef183a5454..3a8699995a77 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -846,11 +846,11 @@ void machine_check_exception(struct pt_regs *regs) die_mce("Machine check", regs, SIGBUS); +bail: /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) die_mce("Unrecoverable Machine check", regs, SIGBUS); -bail: if (nmi) nmi_exit(); } NOKPROBE_SYMBOL(machine_check_exception); From 11cb0a25f71818ca7ab4856548ecfd83c169aa4d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:35 +1000 Subject: [PATCH 099/188] powerpc: improve handling of unrecoverable system reset If an unrecoverable system reset hits in process context, the system does not have to panic. Similar to machine check, call nmi_exit() before die(). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-26-npiggin@gmail.com --- arch/powerpc/kernel/traps.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3a8699995a77..f70d3f6174c8 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -503,8 +503,11 @@ out: die("Unrecoverable nested System Reset", regs, SIGABRT); #endif /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) + if (!(regs->msr & MSR_RI)) { + /* For the reason explained in die_mce, nmi_exit before die */ + nmi_exit(); die("Unrecoverable System Reset", regs, SIGABRT); + } if (saved_hsrrs) { mtspr(SPRN_HSRR0, hsrr0); From 8d41fc618ab804657acd2df8e761ce1001f41513 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:36 +1000 Subject: [PATCH 100/188] powerpc: interrupt handler wrapper functions Add wrapper functions (derived from x86 macros) for interrupt handler functions. This allows interrupt entry code to be written in C. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-27-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 169 +++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 arch/powerpc/include/asm/interrupt.h diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h new file mode 100644 index 000000000000..5e2526aacc52 --- /dev/null +++ b/arch/powerpc/include/asm/interrupt.h @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_INTERRUPT_H +#define _ASM_POWERPC_INTERRUPT_H + +#include +#include + +/** + * DECLARE_INTERRUPT_HANDLER_RAW - Declare raw interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + */ +#define DECLARE_INTERRUPT_HANDLER_RAW(func) \ + __visible long func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER_RAW - Define raw interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + * + * @func is called from ASM entry code. + * + * This is a plain function which does no tracing, reconciling, etc. + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * raw interrupt handlers must not enable or disable interrupts, or + * schedule, tracing and instrumentation (ftrace, lockdep, etc) would + * not be advisable either, although may be possible in a pinch, the + * trace will look odd at least. + * + * A raw handler may call one of the other interrupt handler functions + * to be converted into that interrupt context without these restrictions. + * + * On PPC64, _RAW handlers may return with fast_interrupt_return. + * + * Specific handlers may have additional restrictions. + */ +#define DEFINE_INTERRUPT_HANDLER_RAW(func) \ +static __always_inline long ____##func(struct pt_regs *regs); \ + \ +__visible noinstr long func(struct pt_regs *regs) \ +{ \ + long ret; \ + \ + ret = ____##func (regs); \ + \ + return ret; \ +} \ + \ +static __always_inline long ____##func(struct pt_regs *regs) + +/** + * DECLARE_INTERRUPT_HANDLER - Declare synchronous interrupt handler function + * @func: Function name of the entry point + */ +#define DECLARE_INTERRUPT_HANDLER(func) \ + __visible void func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER - Define synchronous interrupt handler function + * @func: Function name of the entry point + * + * @func is called from ASM entry code. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + */ +#define DEFINE_INTERRUPT_HANDLER(func) \ +static __always_inline void ____##func(struct pt_regs *regs); \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + ____##func (regs); \ +} \ + \ +static __always_inline void ____##func(struct pt_regs *regs) + +/** + * DECLARE_INTERRUPT_HANDLER_RET - Declare synchronous interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + */ +#define DECLARE_INTERRUPT_HANDLER_RET(func) \ + __visible long func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER_RET - Define synchronous interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + * + * @func is called from ASM entry code. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + */ +#define DEFINE_INTERRUPT_HANDLER_RET(func) \ +static __always_inline long ____##func(struct pt_regs *regs); \ + \ +__visible noinstr long func(struct pt_regs *regs) \ +{ \ + long ret; \ + \ + ret = ____##func (regs); \ + \ + return ret; \ +} \ + \ +static __always_inline long ____##func(struct pt_regs *regs) + +/** + * DECLARE_INTERRUPT_HANDLER_ASYNC - Declare asynchronous interrupt handler function + * @func: Function name of the entry point + */ +#define DECLARE_INTERRUPT_HANDLER_ASYNC(func) \ + __visible void func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER_ASYNC - Define asynchronous interrupt handler function + * @func: Function name of the entry point + * + * @func is called from ASM entry code. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + */ +#define DEFINE_INTERRUPT_HANDLER_ASYNC(func) \ +static __always_inline void ____##func(struct pt_regs *regs); \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + ____##func (regs); \ +} \ + \ +static __always_inline void ____##func(struct pt_regs *regs) + +/** + * DECLARE_INTERRUPT_HANDLER_NMI - Declare NMI interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + */ +#define DECLARE_INTERRUPT_HANDLER_NMI(func) \ + __visible long func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER_NMI - Define NMI interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + * + * @func is called from ASM entry code. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + */ +#define DEFINE_INTERRUPT_HANDLER_NMI(func) \ +static __always_inline long ____##func(struct pt_regs *regs); \ + \ +__visible noinstr long func(struct pt_regs *regs) \ +{ \ + long ret; \ + \ + ret = ____##func (regs); \ + \ + return ret; \ +} \ + \ +static __always_inline long ____##func(struct pt_regs *regs) + +#endif /* _ASM_POWERPC_INTERRUPT_H */ From 25b7e6bb743ca5a375bb89522a2c2bec840d5fc3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:37 +1000 Subject: [PATCH 101/188] powerpc: add interrupt wrapper entry / exit stub functions These will be used by subsequent patches. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-28-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 66 ++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 5e2526aacc52..3df1921cfda3 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -5,6 +5,50 @@ #include #include +struct interrupt_state { +}; + +static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +{ +} + +/* + * Care should be taken to note that interrupt_exit_prepare and + * interrupt_async_exit_prepare do not necessarily return immediately to + * regs context (e.g., if regs is usermode, we don't necessarily return to + * user mode). Other interrupts might be taken between here and return, + * context switch / preemption may occur in the exit path after this, or a + * signal may be delivered, etc. + * + * The real interrupt exit code is platform specific, e.g., + * interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64s. + * + * However interrupt_nmi_exit_prepare does return directly to regs, because + * NMIs do not do "exit work" or replay soft-masked interrupts. + */ +static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +{ +} + +static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +{ +} + +static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +{ +} + +struct interrupt_nmi_state { +}; + +static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) +{ +} + +static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) +{ +} + /** * DECLARE_INTERRUPT_HANDLER_RAW - Declare raw interrupt handler function * @func: Function name of the entry point @@ -71,7 +115,13 @@ static __always_inline void ____##func(struct pt_regs *regs); \ \ __visible noinstr void func(struct pt_regs *regs) \ { \ + struct interrupt_state state; \ + \ + interrupt_enter_prepare(regs, &state); \ + \ ____##func (regs); \ + \ + interrupt_exit_prepare(regs, &state); \ } \ \ static __always_inline void ____##func(struct pt_regs *regs) @@ -99,10 +149,15 @@ static __always_inline long ____##func(struct pt_regs *regs); \ \ __visible noinstr long func(struct pt_regs *regs) \ { \ + struct interrupt_state state; \ long ret; \ \ + interrupt_enter_prepare(regs, &state); \ + \ ret = ____##func (regs); \ \ + interrupt_exit_prepare(regs, &state); \ + \ return ret; \ } \ \ @@ -129,7 +184,13 @@ static __always_inline void ____##func(struct pt_regs *regs); \ \ __visible noinstr void func(struct pt_regs *regs) \ { \ + struct interrupt_state state; \ + \ + interrupt_async_enter_prepare(regs, &state); \ + \ ____##func (regs); \ + \ + interrupt_async_exit_prepare(regs, &state); \ } \ \ static __always_inline void ____##func(struct pt_regs *regs) @@ -157,10 +218,15 @@ static __always_inline long ____##func(struct pt_regs *regs); \ \ __visible noinstr long func(struct pt_regs *regs) \ { \ + struct interrupt_nmi_state state; \ long ret; \ \ + interrupt_nmi_enter_prepare(regs, &state); \ + \ ret = ____##func (regs); \ \ + interrupt_nmi_exit_prepare(regs, &state); \ + \ return ret; \ } \ \ From fd3f1e0f139f1314ff97438eebaa1f9d216e10a2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 7 Feb 2021 22:56:43 +1000 Subject: [PATCH 102/188] powerpc/traps: factor common code from program check and emulation assist Move the program check handling into a function called by both, rather than have the emulation assist handler call the program check handler. This allows each of these handlers to be implemented with "interrupt wrappers" in a later change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1612702475.d6qyt6qtfy.astroid@bobo.none --- arch/powerpc/kernel/traps.c | 38 +++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f70d3f6174c8..2c5986109412 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1462,9 +1462,8 @@ static int emulate_math(struct pt_regs *regs) static inline int emulate_math(struct pt_regs *regs) { return -1; } #endif -void program_check_exception(struct pt_regs *regs) +static void do_program_check(struct pt_regs *regs) { - enum ctx_state prev_state = exception_enter(); unsigned int reason = get_reason(regs); /* We can now get here via a FP Unavailable exception if the core @@ -1473,22 +1472,22 @@ void program_check_exception(struct pt_regs *regs) if (reason & REASON_FP) { /* IEEE FP exception */ parse_fpe(regs); - goto bail; + return; } if (reason & REASON_TRAP) { unsigned long bugaddr; /* Debugger is first in line to stop recursive faults in * rcu_lock, notify_die, or atomic_notifier_call_chain */ if (debugger_bpt(regs)) - goto bail; + return; if (kprobe_handler(regs)) - goto bail; + return; /* trap exception */ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - goto bail; + return; bugaddr = regs->nip; /* @@ -1500,10 +1499,10 @@ void program_check_exception(struct pt_regs *regs) if (!(regs->msr & MSR_PR) && /* not user-mode */ report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) { regs->nip += 4; - goto bail; + return; } _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - goto bail; + return; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (reason & REASON_TM) { @@ -1524,7 +1523,7 @@ void program_check_exception(struct pt_regs *regs) */ if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); - goto bail; + return; } else { printk(KERN_EMERG "Unexpected TM Bad Thing exception " "at %lx (msr 0x%lx) tm_scratch=%llx\n", @@ -1557,7 +1556,7 @@ void program_check_exception(struct pt_regs *regs) * pattern to occurrences etc. -dgibson 31/Mar/2003 */ if (!emulate_math(regs)) - goto bail; + return; /* Try to emulate it if we should. */ if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { @@ -1565,10 +1564,10 @@ void program_check_exception(struct pt_regs *regs) case 0: regs->nip += 4; emulate_single_step(regs); - goto bail; + return; case -EFAULT: _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - goto bail; + return; } } @@ -1578,7 +1577,14 @@ sigill: else _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); -bail: +} + +void program_check_exception(struct pt_regs *regs) +{ + enum ctx_state prev_state = exception_enter(); + + do_program_check(regs); + exception_exit(prev_state); } NOKPROBE_SYMBOL(program_check_exception); @@ -1589,8 +1595,12 @@ NOKPROBE_SYMBOL(program_check_exception); */ void emulation_assist_interrupt(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + regs->msr |= REASON_ILLEGAL; - program_check_exception(regs); + do_program_check(regs); + + exception_exit(prev_state); } NOKPROBE_SYMBOL(emulation_assist_interrupt); From 3a96570ffceb15c6ed9cc6f990f172dcdc8ac279 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:38 +1000 Subject: [PATCH 103/188] powerpc: convert interrupt handlers to use wrappers Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-29-npiggin@gmail.com --- arch/powerpc/include/asm/asm-prototypes.h | 29 ------ arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 - arch/powerpc/include/asm/debug.h | 3 - arch/powerpc/include/asm/hw_irq.h | 9 -- arch/powerpc/include/asm/interrupt.h | 66 ++++++++++++++ arch/powerpc/include/asm/nmi.h | 2 +- arch/powerpc/kernel/dbell.c | 6 +- arch/powerpc/kernel/irq.c | 3 +- arch/powerpc/kernel/mce.c | 5 +- arch/powerpc/kernel/process.c | 3 +- arch/powerpc/kernel/syscall_64.c | 1 + arch/powerpc/kernel/tau_6xx.c | 3 +- arch/powerpc/kernel/time.c | 3 +- arch/powerpc/kernel/traps.c | 88 ++++++++++++------- arch/powerpc/kernel/watchdog.c | 7 +- arch/powerpc/kvm/book3s_hv.c | 1 + arch/powerpc/kvm/book3s_hv_builtin.c | 1 + arch/powerpc/kvm/booke.c | 1 + arch/powerpc/mm/book3s64/hash_utils.c | 12 ++- arch/powerpc/mm/book3s64/slb.c | 7 +- arch/powerpc/mm/fault.c | 5 +- arch/powerpc/platforms/cell/ras.c | 6 +- arch/powerpc/platforms/cell/ras.h | 9 +- arch/powerpc/platforms/powernv/idle.c | 1 + 24 files changed, 170 insertions(+), 102 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 22c9d08fa3a4..939f3c94c8f3 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -56,35 +56,6 @@ int exit_vmx_usercopy(void); int enter_vmx_ops(void); void *exit_vmx_ops(void *dest); -/* Traps */ -long machine_check_early(struct pt_regs *regs); -long hmi_exception_realmode(struct pt_regs *regs); -void SMIException(struct pt_regs *regs); -void handle_hmi_exception(struct pt_regs *regs); -void instruction_breakpoint_exception(struct pt_regs *regs); -void RunModeException(struct pt_regs *regs); -void single_step_exception(struct pt_regs *regs); -void program_check_exception(struct pt_regs *regs); -void alignment_exception(struct pt_regs *regs); -void StackOverflow(struct pt_regs *regs); -void stack_overflow_exception(struct pt_regs *regs); -void kernel_fp_unavailable_exception(struct pt_regs *regs); -void altivec_unavailable_exception(struct pt_regs *regs); -void vsx_unavailable_exception(struct pt_regs *regs); -void fp_unavailable_tm(struct pt_regs *regs); -void altivec_unavailable_tm(struct pt_regs *regs); -void vsx_unavailable_tm(struct pt_regs *regs); -void facility_unavailable_exception(struct pt_regs *regs); -void TAUException(struct pt_regs *regs); -void altivec_assist_exception(struct pt_regs *regs); -void unrecoverable_exception(struct pt_regs *regs); -void kernel_bad_stack(struct pt_regs *regs); -void system_reset_exception(struct pt_regs *regs); -void machine_check_exception(struct pt_regs *regs); -void emulation_assist_interrupt(struct pt_regs *regs); -long do_slb_fault(struct pt_regs *regs); -void do_bad_slb_fault(struct pt_regs *regs); - /* signals, syscalls and interrupts */ long sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 36884bc0bcd7..f911bdb68d8b 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -456,7 +456,6 @@ static inline unsigned long hpt_hash(unsigned long vpn, long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, unsigned long rlags, unsigned long vflags, int psize, int ssize); -long do_hash_fault(struct pt_regs *regs); extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpage_prot); diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index 0550eceab3ca..86a14736c76c 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -50,9 +50,6 @@ bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int brkpt); -#else - -void do_break(struct pt_regs *regs); #endif #endif /* _ASM_POWERPC_DEBUG_H */ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 75c2b137fc00..614957f74cee 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -50,15 +50,6 @@ #ifndef __ASSEMBLY__ -extern void replay_system_reset(void); -extern void replay_soft_interrupts(void); - -extern void timer_interrupt(struct pt_regs *); -extern void performance_monitor_exception(struct pt_regs *regs); -extern void WatchdogException(struct pt_regs *regs); -extern void unknown_exception(struct pt_regs *regs); -void unknown_async_exception(struct pt_regs *regs); - #ifdef CONFIG_PPC64 #include diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 3df1921cfda3..4ffbd3d75324 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -232,4 +232,70 @@ __visible noinstr long func(struct pt_regs *regs) \ \ static __always_inline long ____##func(struct pt_regs *regs) + +/* Interrupt handlers */ +/* kernel/traps.c */ +DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception); +#ifdef CONFIG_PPC_BOOK3S_64 +DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception); +#else +DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception); +#endif +DECLARE_INTERRUPT_HANDLER(SMIException); +DECLARE_INTERRUPT_HANDLER(handle_hmi_exception); +DECLARE_INTERRUPT_HANDLER(unknown_exception); +DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception); +DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception); +DECLARE_INTERRUPT_HANDLER(RunModeException); +DECLARE_INTERRUPT_HANDLER(single_step_exception); +DECLARE_INTERRUPT_HANDLER(program_check_exception); +DECLARE_INTERRUPT_HANDLER(emulation_assist_interrupt); +DECLARE_INTERRUPT_HANDLER(alignment_exception); +DECLARE_INTERRUPT_HANDLER(StackOverflow); +DECLARE_INTERRUPT_HANDLER(stack_overflow_exception); +DECLARE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(altivec_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(vsx_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(facility_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(fp_unavailable_tm); +DECLARE_INTERRUPT_HANDLER(altivec_unavailable_tm); +DECLARE_INTERRUPT_HANDLER(vsx_unavailable_tm); +DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi); +DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async); +DECLARE_INTERRUPT_HANDLER_RAW(performance_monitor_exception); +DECLARE_INTERRUPT_HANDLER(DebugException); +DECLARE_INTERRUPT_HANDLER(altivec_assist_exception); +DECLARE_INTERRUPT_HANDLER(CacheLockingException); +DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException); +DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException); +DECLARE_INTERRUPT_HANDLER(unrecoverable_exception); +DECLARE_INTERRUPT_HANDLER(WatchdogException); +DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); + +/* slb.c */ +DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault); +DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault); + +/* hash_utils.c */ +DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); + +/* fault.c */ +DECLARE_INTERRUPT_HANDLER_RET(do_page_fault); +DECLARE_INTERRUPT_HANDLER(do_bad_page_fault_segv); + +/* process.c */ +DECLARE_INTERRUPT_HANDLER(do_break); + +/* time.c */ +DECLARE_INTERRUPT_HANDLER_ASYNC(timer_interrupt); + +/* mce.c */ +DECLARE_INTERRUPT_HANDLER_NMI(machine_check_early); +DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); + +DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); + +void replay_system_reset(void); +void replay_soft_interrupts(void); + #endif /* _ASM_POWERPC_INTERRUPT_H */ diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 63eccea93796..160abcb8e9fa 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -4,7 +4,7 @@ #ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); -void soft_nmi_interrupt(struct pt_regs *regs); +long soft_nmi_interrupt(struct pt_regs *regs); #else static inline void arch_touch_nmi_watchdog(void) {} #endif diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 52680cf07c9d..6a7ecfca5c3b 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -12,13 +12,14 @@ #include #include +#include #include #include #include #ifdef CONFIG_SMP -void doorbell_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -39,9 +40,8 @@ void doorbell_exception(struct pt_regs *regs) set_irq_regs(old_regs); } #else /* CONFIG_SMP */ -void doorbell_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) { printk(KERN_WARNING "Received doorbell on non-smp system\n"); } #endif /* CONFIG_SMP */ - diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 6b1eca53e36c..2055d204d08e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -54,6 +54,7 @@ #include #include +#include #include #include #include @@ -665,7 +666,7 @@ void __do_irq(struct pt_regs *regs) irq_exit(); } -void do_IRQ(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) { struct pt_regs *old_regs = set_irq_regs(regs); void *cursp, *irqsp, *sirqsp; diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 3e3a84127c0e..cfe96cd3f48b 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -583,7 +584,7 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info); * * regs->nip and regs->msr contains srr0 and ssr1. */ -long notrace machine_check_early(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early) { long handled = 0; u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); @@ -717,7 +718,7 @@ long hmi_handle_debugtrig(struct pt_regs *regs) /* * Return values: */ -long hmi_exception_realmode(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode) { int ret; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4f0f81e9420b..8520ed5ae144 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -41,6 +41,7 @@ #include #include +#include #include #include #include @@ -659,7 +660,7 @@ static void do_break_handler(struct pt_regs *regs) } } -void do_break(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(do_break) { current->thread.trap_nr = TRAP_HWBKPT; if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, regs->dsisr, diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index e0eb2a502db3..316c3ba16554 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 0b4694b8d248..3f300eccc09e 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -100,7 +101,7 @@ static void TAUupdate(int cpu) * with interrupts disabled */ -void TAUException(struct pt_regs * regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(TAUException) { int cpu = smp_processor_id(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 83633a24ce78..7636773b028f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -57,6 +57,7 @@ #include #include +#include #include #include #include @@ -571,7 +572,7 @@ void arch_irq_work_raise(void) * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. */ -void timer_interrupt(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) { struct clock_event_device *evt = this_cpu_ptr(&decrementers); u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 2c5986109412..51e56b7fceb7 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -430,8 +431,7 @@ nonrecoverable: regs->msr &= ~MSR_RI; #endif } - -void system_reset_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception) { unsigned long hsrr0, hsrr1; bool saved_hsrrs = false; @@ -519,6 +519,8 @@ out: this_cpu_set_ftrace_enabled(ftrace_enabled); /* What should we do here? We could issue a shutdown or hard reset. */ + + return 0; } NOKPROBE_SYMBOL(system_reset_exception); @@ -805,7 +807,11 @@ void die_mce(const char *str, struct pt_regs *regs, long err) } NOKPROBE_SYMBOL(die_mce); -void machine_check_exception(struct pt_regs *regs) +#ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception) +#else +DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception) +#endif { int recover = 0; @@ -855,10 +861,16 @@ bail: die_mce("Unrecoverable Machine check", regs, SIGBUS); if (nmi) nmi_exit(); + +#ifdef CONFIG_PPC_BOOK3S_64 + return; +#else + return 0; +#endif } NOKPROBE_SYMBOL(machine_check_exception); -void SMIException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */ { die("System Management Interrupt", regs, SIGABRT); } @@ -1044,7 +1056,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) } #endif /* CONFIG_VSX */ -void handle_hmi_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception) { struct pt_regs *old_regs; @@ -1073,7 +1085,7 @@ void handle_hmi_exception(struct pt_regs *regs) set_irq_regs(old_regs); } -void unknown_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(unknown_exception) { enum ctx_state prev_state = exception_enter(); @@ -1085,7 +1097,7 @@ void unknown_exception(struct pt_regs *regs) exception_exit(prev_state); } -void unknown_async_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception) { enum ctx_state prev_state = exception_enter(); @@ -1097,7 +1109,7 @@ void unknown_async_exception(struct pt_regs *regs) exception_exit(prev_state); } -void instruction_breakpoint_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception) { enum ctx_state prev_state = exception_enter(); @@ -1112,12 +1124,12 @@ bail: exception_exit(prev_state); } -void RunModeException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(RunModeException) { _exception(SIGTRAP, regs, TRAP_UNK, 0); } -void single_step_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(single_step_exception) { enum ctx_state prev_state = exception_enter(); @@ -1579,7 +1591,7 @@ sigill: } -void program_check_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(program_check_exception) { enum ctx_state prev_state = exception_enter(); @@ -1593,7 +1605,7 @@ NOKPROBE_SYMBOL(program_check_exception); * This occurs when running in hypervisor mode on POWER6 or later * and an illegal instruction is encountered. */ -void emulation_assist_interrupt(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt) { enum ctx_state prev_state = exception_enter(); @@ -1604,7 +1616,7 @@ void emulation_assist_interrupt(struct pt_regs *regs) } NOKPROBE_SYMBOL(emulation_assist_interrupt); -void alignment_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(alignment_exception) { enum ctx_state prev_state = exception_enter(); int sig, code, fixed = 0; @@ -1654,7 +1666,7 @@ bail: exception_exit(prev_state); } -void StackOverflow(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(StackOverflow) { pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n", current->comm, task_pid_nr(current), regs->gpr[1]); @@ -1663,7 +1675,7 @@ void StackOverflow(struct pt_regs *regs) panic("kernel stack overflow"); } -void stack_overflow_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(stack_overflow_exception) { enum ctx_state prev_state = exception_enter(); @@ -1672,7 +1684,7 @@ void stack_overflow_exception(struct pt_regs *regs) exception_exit(prev_state); } -void kernel_fp_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception) { enum ctx_state prev_state = exception_enter(); @@ -1683,7 +1695,7 @@ void kernel_fp_unavailable_exception(struct pt_regs *regs) exception_exit(prev_state); } -void altivec_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception) { enum ctx_state prev_state = exception_enter(); @@ -1702,7 +1714,7 @@ bail: exception_exit(prev_state); } -void vsx_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception) { if (user_mode(regs)) { /* A user program has executed an vsx instruction, @@ -1733,7 +1745,7 @@ static void tm_unavailable(struct pt_regs *regs) die("Unrecoverable TM Unavailable Exception", regs, SIGABRT); } -void facility_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception) { static char *facility_strings[] = { [FSCR_FP_LG] = "FPU", @@ -1853,7 +1865,7 @@ out: #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -void fp_unavailable_tm(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(fp_unavailable_tm) { /* Note: This does not handle any kind of FP laziness. */ @@ -1886,7 +1898,7 @@ void fp_unavailable_tm(struct pt_regs *regs) tm_recheckpoint(¤t->thread); } -void altivec_unavailable_tm(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(altivec_unavailable_tm) { /* See the comments in fp_unavailable_tm(). This function operates * the same way. @@ -1901,7 +1913,7 @@ void altivec_unavailable_tm(struct pt_regs *regs) current->thread.used_vr = 1; } -void vsx_unavailable_tm(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm) { /* See the comments in fp_unavailable_tm(). This works similarly, * though we're loading both FP and VEC registers in here. @@ -1926,7 +1938,9 @@ void vsx_unavailable_tm(struct pt_regs *regs) } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -static void performance_monitor_exception_nmi(struct pt_regs *regs) +#ifdef CONFIG_PPC64 +DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi); +DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi) { nmi_enter(); @@ -1935,9 +1949,13 @@ static void performance_monitor_exception_nmi(struct pt_regs *regs) perf_irq(regs); nmi_exit(); -} -static void performance_monitor_exception_async(struct pt_regs *regs) + return 0; +} +#endif + +DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async); +DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async) { irq_enter(); @@ -1948,7 +1966,7 @@ static void performance_monitor_exception_async(struct pt_regs *regs) irq_exit(); } -void performance_monitor_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception) { /* * On 64-bit, if perf interrupts hit in a local_irq_disable @@ -1960,6 +1978,8 @@ void performance_monitor_exception(struct pt_regs *regs) performance_monitor_exception_nmi(regs); else performance_monitor_exception_async(regs); + + return 0; } #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -2022,7 +2042,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) mtspr(SPRN_DBCR0, current->thread.debug.dbcr0); } -void DebugException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(DebugException) { unsigned long debug_status = regs->dsisr; @@ -2095,7 +2115,7 @@ NOKPROBE_SYMBOL(DebugException); #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ #ifdef CONFIG_ALTIVEC -void altivec_assist_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(altivec_assist_exception) { int err; @@ -2129,7 +2149,7 @@ void altivec_assist_exception(struct pt_regs *regs) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_FSL_BOOKE -void CacheLockingException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(CacheLockingException) { unsigned long error_code = regs->dsisr; @@ -2144,7 +2164,7 @@ void CacheLockingException(struct pt_regs *regs) #endif /* CONFIG_FSL_BOOKE */ #ifdef CONFIG_SPE -void SPEFloatingPointException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException) { extern int do_spe_mathemu(struct pt_regs *regs); unsigned long spefscr; @@ -2196,7 +2216,7 @@ void SPEFloatingPointException(struct pt_regs *regs) return; } -void SPEFloatingPointRoundException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) { extern int speround_handler(struct pt_regs *regs); int err; @@ -2238,7 +2258,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs) * in the MSR is 0. This indicates that SRR0/1 are live, and that * we therefore lost state by taking this exception. */ -void unrecoverable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(unrecoverable_exception) { pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n", regs->trap, regs->nip, regs->msr); @@ -2258,7 +2278,7 @@ void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) return; } -void WatchdogException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(WatchdogException) /* XXX NMI? async? */ { printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); WatchdogHandler(regs); @@ -2269,7 +2289,7 @@ void WatchdogException(struct pt_regs *regs) * We enter here if we discover during exception entry that we are * running in supervisor mode with a userspace value in the stack pointer. */ -void kernel_bad_stack(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(kernel_bad_stack) { printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", regs->gpr[1], regs->nip); diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 3ae13c2a10cf..be6b22838555 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -248,14 +249,14 @@ static void watchdog_timer_interrupt(int cpu) watchdog_smp_panic(cpu, tb); } -void soft_nmi_interrupt(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) { unsigned long flags; int cpu = raw_smp_processor_id(); u64 tb; if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) - return; + return 0; nmi_enter(); @@ -292,6 +293,8 @@ void soft_nmi_interrupt(struct pt_regs *regs) out: nmi_exit(); + + return 0; } static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index d348e77cee20..43e64e60ee33 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 8053efdf7ea7..10fc274bea65 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 288a9820ec01..bd2bb73021d8 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index d7d3a80a51d4..cf167f6d825d 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -1512,7 +1513,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -static long __do_hash_fault(struct pt_regs *regs) +DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault); +DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) { unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; @@ -1565,7 +1567,11 @@ static long __do_hash_fault(struct pt_regs *regs) return err; } -long do_hash_fault(struct pt_regs *regs) +/* + * The _RAW interrupt entry checks for the in_nmi() case before + * running the full handler. + */ +DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) { unsigned long dsisr = regs->dsisr; long err; @@ -1587,7 +1593,7 @@ long do_hash_fault(struct pt_regs *regs) * the access, or panic if there isn't a handler. */ if (unlikely(in_nmi())) { - bad_page_fault(regs, SIGSEGV); + do_bad_page_fault_segv(regs); return 0; } diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 14c62b685f0c..c91bd85eb90e 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -813,7 +814,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) return slb_insert_entry(ea, context, flags, ssize, false); } -long do_slb_fault(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault) { unsigned long ea = regs->dar; unsigned long id = get_region_id(ea); @@ -833,7 +834,7 @@ long do_slb_fault(struct pt_regs *regs) */ /* - * The interrupt state is not reconciled, for performance, so that + * This is a raw interrupt handler, for performance, so that * fast_interrupt_return can be used. The handler must not touch local * irq state, or schedule. We could test for usermode and upgrade to a * normal process context (synchronous) interrupt for those, which @@ -868,7 +869,7 @@ long do_slb_fault(struct pt_regs *regs) } } -void do_bad_slb_fault(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault) { int err = regs->result; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index fef92efad733..f8eb42aaafab 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -540,7 +541,7 @@ retry: } NOKPROBE_SYMBOL(__do_page_fault); -long do_page_fault(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) { const struct exception_table_entry *entry; enum ctx_state prev_state; @@ -624,7 +625,7 @@ void bad_page_fault(struct pt_regs *regs, int sig) } #ifdef CONFIG_PPC_BOOK3S_64 -void do_bad_page_fault_segv(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv) { bad_page_fault(regs, SIGSEGV); } diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index 6ea480539419..4325c05bedd9 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -49,7 +49,7 @@ static void dump_fir(int cpu) } -void cbe_system_error_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(cbe_system_error_exception) { int cpu = smp_processor_id(); @@ -58,7 +58,7 @@ void cbe_system_error_exception(struct pt_regs *regs) dump_stack(); } -void cbe_maintenance_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(cbe_maintenance_exception) { int cpu = smp_processor_id(); @@ -70,7 +70,7 @@ void cbe_maintenance_exception(struct pt_regs *regs) dump_stack(); } -void cbe_thermal_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(cbe_thermal_exception) { int cpu = smp_processor_id(); diff --git a/arch/powerpc/platforms/cell/ras.h b/arch/powerpc/platforms/cell/ras.h index 6c2e6bc0062e..226dbd48efad 100644 --- a/arch/powerpc/platforms/cell/ras.h +++ b/arch/powerpc/platforms/cell/ras.h @@ -2,9 +2,12 @@ #ifndef RAS_H #define RAS_H -extern void cbe_system_error_exception(struct pt_regs *regs); -extern void cbe_maintenance_exception(struct pt_regs *regs); -extern void cbe_thermal_exception(struct pt_regs *regs); +#include + +DECLARE_INTERRUPT_HANDLER(cbe_system_error_exception); +DECLARE_INTERRUPT_HANDLER(cbe_maintenance_exception); +DECLARE_INTERRUPT_HANDLER(cbe_thermal_exception); + extern void cbe_ras_init(void); #endif /* RAS_H */ diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index e6f461812856..999997d9e9a9 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include From e6f8a6c86ce7b2108c03c1cc014fdae278573df1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:39 +1000 Subject: [PATCH 104/188] powerpc: add interrupt_cond_local_irq_enable helper Simple helper for synchronous interrupt handlers (i.e., process-context) to enable interrupts if it was taken in an interrupts-enabled context. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-30-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 7 +++++++ arch/powerpc/kernel/traps.c | 24 +++++++----------------- arch/powerpc/mm/fault.c | 4 +--- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 4ffbd3d75324..488bdd5bd922 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_INTERRUPT_H #include +#include #include struct interrupt_state { @@ -298,4 +299,10 @@ DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); void replay_system_reset(void); void replay_soft_interrupts(void); +static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs) +{ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); +} + #endif /* _ASM_POWERPC_INTERRUPT_H */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 51e56b7fceb7..20c90a3548f6 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -343,8 +343,8 @@ static bool exception_common(int signr, struct pt_regs *regs, int code, show_signal_msg(signr, regs, code, addr); - if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs)) - local_irq_enable(); + if (arch_irqs_disabled()) + interrupt_cond_local_irq_enable(regs); current->thread.trap_nr = code; @@ -1555,9 +1555,7 @@ static void do_program_check(struct pt_regs *regs) if (!user_mode(regs)) goto sigill; - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); /* (reason & REASON_ILLEGAL) would be the obvious thing here, * but there seems to be a hardware bug on the 405GP (RevD) @@ -1622,9 +1620,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception) int sig, code, fixed = 0; unsigned long reason; - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); reason = get_reason(regs); @@ -1785,9 +1781,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception) die("Unexpected facility unavailable exception", regs, SIGABRT); } - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); if (status == FSCR_DSCR_LG) { /* @@ -2172,9 +2166,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException) int code = FPE_FLTUNK; int err; - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); flush_spe_to_thread(current); @@ -2221,9 +2213,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) extern int speround_handler(struct pt_regs *regs); int err; - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); preempt_disable(); if (regs->msr & MSR_SPE) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index f8eb42aaafab..8552ab6c008b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -434,9 +434,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, return bad_area_nosemaphore(regs, address); } - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); From 2a06bf3e95cd93e3640d431960181b8e47415f33 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:40 +1000 Subject: [PATCH 105/188] powerpc/64: context tracking remove _TIF_NOHZ Add context tracking to the system call handler explicitly, and remove _TIF_NOHZ. This improves system call performance when nohz_full is enabled. On a POWER9, gettid scv system call cost on a nohz_full CPU improves from 1129 cycles to 1004 cycles and on a housekeeping CPU from 550 cycles to 430 cycles. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-31-npiggin@gmail.com --- arch/powerpc/Kconfig | 1 - arch/powerpc/include/asm/thread_info.h | 4 +- arch/powerpc/kernel/ptrace/ptrace.c | 4 -- arch/powerpc/kernel/signal.c | 4 -- arch/powerpc/kernel/syscall_64.c | 72 ++++++++++++++++---------- 5 files changed, 47 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ef64d96bf316..686829dd1c66 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -196,7 +196,6 @@ config PPC select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_CONTEXT_TRACKING if PPC64 - select HAVE_TIF_NOHZ if PPC64 select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 3d8a47af7a25..386d576673a1 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -94,7 +94,6 @@ void arch_setup_new_exec(void); #define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ -#define TIF_NOHZ 9 /* in adaptive nohz mode */ #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ @@ -128,11 +127,10 @@ void arch_setup_new_exec(void); #define _TIF_UPROBE (1<flags) & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); @@ -340,8 +338,6 @@ void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); - - user_enter(); } void __init pt_regs_check(void); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 53782aa60ade..9ded046edb0e 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -282,8 +282,6 @@ static void do_signal(struct task_struct *tsk) void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { - user_exit(); - if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); @@ -299,8 +297,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) tracehook_notify_resume(regs); rseq_handle_notify_resume(NULL, regs); } - - user_enter(); } static unsigned long get_tm_stackpointer(struct task_struct *tsk) diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 316c3ba16554..45c4420fe339 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -1,9 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include #include #include +#include #include #include #include @@ -28,6 +30,9 @@ notrace long system_call_exception(long r3, long r4, long r5, if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + CT_WARN_ON(ct_state() == CONTEXT_KERNEL); + user_exit_irqoff(); + trace_hardirqs_off(); /* finish reconciling */ if (IS_ENABLED(CONFIG_PPC_BOOK3S)) @@ -144,7 +149,7 @@ notrace long system_call_exception(long r3, long r4, long r5, * enabled when the interrupt handler returns (indicating a process-context / * synchronous interrupt) then irqs_enabled should be true. */ -static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) +static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri) { /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); @@ -161,29 +166,6 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_en trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - /* - * Must replay pending soft-masked interrupts now. Don't just - * local_irq_enabe(); local_irq_disable(); because if we are - * returning from an asynchronous interrupt here, another one - * might hit after irqs are enabled, and it would exit via this - * same path allowing another to fire, and so on unbounded. - * - * If interrupts were enabled when this interrupt exited, - * indicating a process context (synchronous) interrupt, - * local_irq_enable/disable can be used, which will enable - * interrupts rather than keeping them masked (unclear how - * much benefit this is over just replaying for all cases, - * because we immediately disable again, so all we're really - * doing is allowing hard interrupts to execute directly for - * a very small time, rather than being masked and replayed). - */ - if (irqs_enabled) { - local_irq_enable(); - local_irq_disable(); - } else { - replay_soft_interrupts(); - } - return false; } local_paca->irq_happened = 0; @@ -192,6 +174,37 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_en return true; } +static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) +{ + if (__prep_irq_for_enabled_exit(clear_ri)) + return true; + + /* + * Must replay pending soft-masked interrupts now. Don't just + * local_irq_enabe(); local_irq_disable(); because if we are + * returning from an asynchronous interrupt here, another one + * might hit after irqs are enabled, and it would exit via this + * same path allowing another to fire, and so on unbounded. + * + * If interrupts were enabled when this interrupt exited, + * indicating a process context (synchronous) interrupt, + * local_irq_enable/disable can be used, which will enable + * interrupts rather than keeping them masked (unclear how + * much benefit this is over just replaying for all cases, + * because we immediately disable again, so all we're really + * doing is allowing hard interrupts to execute directly for + * a very small time, rather than being masked and replayed). + */ + if (irqs_enabled) { + local_irq_enable(); + local_irq_disable(); + } else { + replay_soft_interrupts(); + } + + return false; +} + /* * This should be called after a syscall returns, with r3 the return value * from the syscall. If this function returns non-zero, the system call @@ -209,6 +222,8 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, unsigned long ti_flags; unsigned long ret = 0; + CT_WARN_ON(ct_state() == CONTEXT_USER); + kuap_check_amr(); regs->result = r3; @@ -240,9 +255,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ret |= _TIF_RESTOREALL; } +again: local_irq_disable(); -again: ti_flags = READ_ONCE(*ti_flagsp); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); @@ -286,9 +301,14 @@ again: } } + user_enter_irqoff(); + /* scv need not set RI=0 because SRRs are not used */ - if (unlikely(!prep_irq_for_enabled_exit(!scv, true))) + if (unlikely(!__prep_irq_for_enabled_exit(!scv))) { + user_exit_irqoff(); + local_irq_enable(); goto again; + } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; From a008f8f9fd67ffb13d906ef4ea6235a3d62dfdb6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:41 +1000 Subject: [PATCH 106/188] powerpc/64s/hash: improve context tracking of hash faults This moves the 64s/hash context tracking from hash_page_mm() to __do_hash_fault(), so it's no longer called by OCXL / SPU accelerators, which was certainly the wrong thing to be doing, because those callers are not low level interrupt handlers, so should have entered a kernel context tracking already. Then remain in kernel context for the duration of the fault, rather than enter/exit for the hash fault then enter/exit for the page fault, which is pointless. Even still, calling exception_enter/exit in __do_hash_fault seems questionable because that's touching per-cpu variables, tracing, etc., which might have been interrupted by this hash fault or themselves cause hash faults. But maybe I miss something because hash_page_mm very deliberately calls trace_hash_fault too, for example. So for now go with it, it's no worse than before, in this regard. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-32-npiggin@gmail.com --- arch/powerpc/include/asm/bug.h | 1 + arch/powerpc/mm/book3s64/hash_utils.c | 7 ++--- arch/powerpc/mm/fault.c | 39 +++++++++++++++++++-------- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index c10ae0a9bbaf..d1635ffbb179 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -112,6 +112,7 @@ struct pt_regs; long do_page_fault(struct pt_regs *); +long hash__do_page_fault(struct pt_regs *); void bad_page_fault(struct pt_regs *, int); void __bad_page_fault(struct pt_regs *regs, int sig); void do_bad_page_fault_segv(struct pt_regs *regs); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index cf167f6d825d..d681dc5a7b1c 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1289,7 +1289,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long flags) { bool is_thp; - enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; pte_t *ptep; @@ -1491,7 +1490,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, DBG_LOW(" -> rc=%d\n", rc); bail: - exception_exit(prev_state); return rc; } EXPORT_SYMBOL_GPL(hash_page_mm); @@ -1516,6 +1514,7 @@ EXPORT_SYMBOL_GPL(hash_page); DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault); DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) { + enum ctx_state prev_state = exception_enter(); unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; unsigned long access = _PAGE_PRESENT | _PAGE_READ; @@ -1564,6 +1563,8 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) err = 0; } + exception_exit(prev_state); + return err; } @@ -1600,7 +1601,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) err = __do_hash_fault(regs); if (err) { page_fault: - err = do_page_fault(regs); + err = hash__do_page_fault(regs); } return err; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8552ab6c008b..9c4220efc20f 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -387,7 +387,7 @@ static void sanity_check_fault(bool is_write, bool is_user, * The return value is 0 if the fault was handled, or the signal * number if this is a kernel fault that can't be handled here. */ -static int __do_page_fault(struct pt_regs *regs, unsigned long address, +static int ___do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { struct vm_area_struct * vma; @@ -537,36 +537,53 @@ retry: return 0; } -NOKPROBE_SYMBOL(__do_page_fault); +NOKPROBE_SYMBOL(___do_page_fault); -DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) +static long __do_page_fault(struct pt_regs *regs) { const struct exception_table_entry *entry; - enum ctx_state prev_state; long err; - prev_state = exception_enter(); - err = __do_page_fault(regs, regs->dar, regs->dsisr); + err = ___do_page_fault(regs, regs->dar, regs->dsisr); if (likely(!err)) - goto out; + return err; entry = search_exception_tables(regs->nip); if (likely(entry)) { instruction_pointer_set(regs, extable_fixup(entry)); - err = 0; + return 0; } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { - /* 32 and 64e handle this in asm */ __bad_page_fault(regs, err); - err = 0; + return 0; + } else { + /* 32 and 64e handle the bad page fault in asm */ + return err; } +} +NOKPROBE_SYMBOL(__do_page_fault); + +DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) +{ + enum ctx_state prev_state = exception_enter(); + long err; + + err = __do_page_fault(regs); -out: exception_exit(prev_state); return err; } NOKPROBE_SYMBOL(do_page_fault); +#ifdef CONFIG_PPC_BOOK3S_64 +/* Same as do_page_fault but interrupt entry has already run in do_hash_fault */ +long hash__do_page_fault(struct pt_regs *regs) +{ + return __do_page_fault(regs); +} +NOKPROBE_SYMBOL(hash__do_page_fault); +#endif + /* * bad_page_fault is called when we have a bad access from the kernel. * It is called from the DSI and ISI handlers in head.S and from some From 540d4d34bef4ec58aba12b159030492616d6f54e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:42 +1000 Subject: [PATCH 107/188] powerpc/64: context tracking move to interrupt wrappers This moves exception_enter/exit calls to wrapper functions for synchronous interrupts. More interrupt handlers are covered by this than previously. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-33-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 9 +++++ arch/powerpc/kernel/traps.c | 58 ++++----------------------- arch/powerpc/mm/book3s64/hash_utils.c | 3 -- arch/powerpc/mm/fault.c | 9 +---- 4 files changed, 17 insertions(+), 62 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 488bdd5bd922..e65ce3e2b071 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -7,10 +7,16 @@ #include struct interrupt_state { +#ifdef CONFIG_PPC64 + enum ctx_state ctx_state; +#endif }; static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { +#ifdef CONFIG_PPC64 + state->ctx_state = exception_enter(); +#endif } /* @@ -29,6 +35,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup */ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { +#ifdef CONFIG_PPC64 + exception_exit(state->ctx_state); +#endif } static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 20c90a3548f6..9c26fb41b275 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1087,41 +1087,28 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception) DEFINE_INTERRUPT_HANDLER(unknown_exception) { - enum ctx_state prev_state = exception_enter(); - printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); _exception(SIGTRAP, regs, TRAP_UNK, 0); - - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception) { - enum ctx_state prev_state = exception_enter(); - printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); _exception(SIGTRAP, regs, TRAP_UNK, 0); - - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception) { - enum ctx_state prev_state = exception_enter(); - if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - goto bail; + return; if (debugger_iabr_match(regs)) - goto bail; + return; _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - -bail: - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER(RunModeException) @@ -1131,8 +1118,6 @@ DEFINE_INTERRUPT_HANDLER(RunModeException) DEFINE_INTERRUPT_HANDLER(single_step_exception) { - enum ctx_state prev_state = exception_enter(); - clear_single_step(regs); clear_br_trace(regs); @@ -1141,14 +1126,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - goto bail; + return; if (debugger_sstep(regs)) - goto bail; + return; _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); - -bail: - exception_exit(prev_state); } NOKPROBE_SYMBOL(single_step_exception); @@ -1591,11 +1573,7 @@ sigill: DEFINE_INTERRUPT_HANDLER(program_check_exception) { - enum ctx_state prev_state = exception_enter(); - do_program_check(regs); - - exception_exit(prev_state); } NOKPROBE_SYMBOL(program_check_exception); @@ -1605,25 +1583,19 @@ NOKPROBE_SYMBOL(program_check_exception); */ DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt) { - enum ctx_state prev_state = exception_enter(); - regs->msr |= REASON_ILLEGAL; do_program_check(regs); - - exception_exit(prev_state); } NOKPROBE_SYMBOL(emulation_assist_interrupt); DEFINE_INTERRUPT_HANDLER(alignment_exception) { - enum ctx_state prev_state = exception_enter(); int sig, code, fixed = 0; unsigned long reason; interrupt_cond_local_irq_enable(regs); reason = get_reason(regs); - if (reason & REASON_BOUNDARY) { sig = SIGBUS; code = BUS_ADRALN; @@ -1631,7 +1603,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception) } if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT)) - goto bail; + return; /* we don't implement logging of alignment exceptions */ if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS)) @@ -1641,7 +1613,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception) /* skip over emulated instruction */ regs->nip += inst_length(reason); emulate_single_step(regs); - goto bail; + return; } /* Operand address was bad */ @@ -1657,9 +1629,6 @@ bad: _exception(sig, regs, code, regs->dar); else bad_page_fault(regs, sig); - -bail: - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER(StackOverflow) @@ -1673,41 +1642,28 @@ DEFINE_INTERRUPT_HANDLER(StackOverflow) DEFINE_INTERRUPT_HANDLER(stack_overflow_exception) { - enum ctx_state prev_state = exception_enter(); - die("Kernel stack overflow", regs, SIGSEGV); - - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception) { - enum ctx_state prev_state = exception_enter(); - printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); - - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception) { - enum ctx_state prev_state = exception_enter(); - if (user_mode(regs)) { /* A user program has executed an altivec instruction, but this kernel doesn't support altivec. */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - goto bail; + return; } printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); - -bail: - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index d681dc5a7b1c..fb7c10524bcd 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1514,7 +1514,6 @@ EXPORT_SYMBOL_GPL(hash_page); DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault); DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) { - enum ctx_state prev_state = exception_enter(); unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; unsigned long access = _PAGE_PRESENT | _PAGE_READ; @@ -1563,8 +1562,6 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) err = 0; } - exception_exit(prev_state); - return err; } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 9c4220efc20f..b26a7643fc6e 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -564,14 +564,7 @@ NOKPROBE_SYMBOL(__do_page_fault); DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) { - enum ctx_state prev_state = exception_enter(); - long err; - - err = __do_page_fault(regs); - - exception_exit(prev_state); - - return err; + return __do_page_fault(regs); } NOKPROBE_SYMBOL(do_page_fault); From 6fdb0f410bb026ade092039a6c2655a53323c996 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:43 +1000 Subject: [PATCH 108/188] powerpc/64: add context tracking to asynchronous interrupts Previously context tracking was not done for asynchronous interrupts, (those that run in interrupt context), and if those would cause a reschedule when they exit, then scheduling functions (schedule_user, preempt_schedule_irq) call exception_enter/exit to fix this up and exit user context. This is a hack we would like to get away from, so do context tracking for asynchronous interrupts too. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-34-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index e65ce3e2b071..f7f64c3c514d 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -42,10 +42,12 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { + interrupt_enter_prepare(regs, state); } static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { + interrupt_exit_prepare(regs, state); } struct interrupt_nmi_state { From 1b1b6a6f4cc0ecc27745fa578cbaf912d76dbdda Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:44 +1000 Subject: [PATCH 109/188] powerpc: handle irq_enter/irq_exit in interrupt handler wrappers Move irq_enter/irq_exit into asynchronous interrupt handler wrappers. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-35-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 2 ++ arch/powerpc/kernel/dbell.c | 3 +-- arch/powerpc/kernel/irq.c | 4 ---- arch/powerpc/kernel/tau_6xx.c | 3 --- arch/powerpc/kernel/time.c | 4 ++-- arch/powerpc/kernel/traps.c | 10 +++------- 6 files changed, 8 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index f7f64c3c514d..5a1395499508 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -43,10 +43,12 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { interrupt_enter_prepare(regs, state); + irq_enter(); } static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { + irq_exit(); interrupt_exit_prepare(regs, state); } diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 6a7ecfca5c3b..5545c9cd17c1 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -23,7 +23,6 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) { struct pt_regs *old_regs = set_irq_regs(regs); - irq_enter(); trace_doorbell_entry(regs); ppc_msgsync(); @@ -36,7 +35,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) smp_ipi_demux_relaxed(); /* already performed the barrier */ trace_doorbell_exit(regs); - irq_exit(); + set_irq_regs(old_regs); } #else /* CONFIG_SMP */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2055d204d08e..681abb7c0507 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -641,8 +641,6 @@ void __do_irq(struct pt_regs *regs) { unsigned int irq; - irq_enter(); - trace_irq_entry(regs); /* @@ -662,8 +660,6 @@ void __do_irq(struct pt_regs *regs) generic_handle_irq(irq); trace_irq_exit(regs); - - irq_exit(); } DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 3f300eccc09e..6c31af7f4fa8 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -105,12 +105,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(TAUException) { int cpu = smp_processor_id(); - irq_enter(); tau[cpu].interrupts++; TAUupdate(cpu); - - irq_exit(); } #endif /* CONFIG_TAU_INT */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 7636773b028f..b67d93a609a2 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -611,7 +611,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) #endif old_regs = set_irq_regs(regs); - irq_enter(); + trace_timer_interrupt_entry(regs); if (test_irq_work_pending()) { @@ -636,7 +636,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) } trace_timer_interrupt_exit(regs); - irq_exit(); + set_irq_regs(old_regs); } EXPORT_SYMBOL(timer_interrupt); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 9c26fb41b275..13fb93a59556 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -801,7 +801,9 @@ void die_mce(const char *str, struct pt_regs *regs, long err) * do_exit() checks for in_interrupt() and panics in that case, so * exit the irq/nmi before calling die. */ - if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + irq_exit(); + else nmi_exit(); die(str, regs, err); } @@ -1061,7 +1063,6 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception) struct pt_regs *old_regs; old_regs = set_irq_regs(regs); - irq_enter(); #ifdef CONFIG_VSX /* Real mode flagged P9 special emu is needed */ @@ -1081,7 +1082,6 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception) if (ppc_md.handle_hmi_exception) ppc_md.handle_hmi_exception(regs); - irq_exit(); set_irq_regs(old_regs); } @@ -1907,13 +1907,9 @@ DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi) DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async); DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async) { - irq_enter(); - __this_cpu_inc(irq_stat.pmu_irqs); perf_irq(regs); - - irq_exit(); } DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception) From f821bc97dee4f3ee92c3668d495af49dfd720fe0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:45 +1000 Subject: [PATCH 110/188] powerpc/64s: move context tracking exit to interrupt exit path The interrupt handler wrapper functions are not the ideal place to maintain context tracking because after they return, the low level exit code must then determine if there are interrupts to replay, or if the task should be preempted, etc. Those paths (e.g., schedule_user) include their own exception_enter/exit pairs to fix this up but it's a bit hacky (see schedule_user() comments). Ideally context tracking will go to user mode only when there are no more interrupts or context switches or other exit processing work to handle. 64e can not do this because it does not use the C interrupt exit code. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-36-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 34 +++++++++++++++++++++++++--- arch/powerpc/kernel/syscall_64.c | 18 +++++++++++++-- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 5a1395499508..1c966e47b36f 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -7,16 +7,30 @@ #include struct interrupt_state { -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3E_64 enum ctx_state ctx_state; #endif }; static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3E_64 state->ctx_state = exception_enter(); #endif + +#ifdef CONFIG_PPC_BOOK3S_64 + if (user_mode(regs)) { + CT_WARN_ON(ct_state() != CONTEXT_USER); + user_exit_irqoff(); + } else { + /* + * CT_WARN_ON comes here via program_check_exception, + * so avoid recursion. + */ + if (TRAP(regs) != 0x700) + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); + } +#endif } /* @@ -35,9 +49,23 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup */ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3E_64 exception_exit(state->ctx_state); #endif + + /* + * Book3S exits to user via interrupt_exit_user_prepare(), which does + * context tracking, which is a cleaner way to handle PREEMPT=y + * and avoid context entry/exit in e.g., preempt_schedule_irq()), + * which is likely to be where the core code wants to end up. + * + * The above comment explains why we can't do the + * + * if (user_mode(regs)) + * user_exit_irqoff(); + * + * sequence here. + */ } static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 45c4420fe339..a2102e7a2713 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -255,9 +255,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ret |= _TIF_RESTOREALL; } -again: local_irq_disable(); +again: ti_flags = READ_ONCE(*ti_flagsp); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); @@ -307,6 +307,7 @@ again: if (unlikely(!__prep_irq_for_enabled_exit(!scv))) { user_exit_irqoff(); local_irq_enable(); + local_irq_disable(); goto again; } @@ -341,6 +342,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(!FULL_REGS(regs)); BUG_ON(regs->softe != IRQS_ENABLED); + CT_WARN_ON(ct_state() == CONTEXT_USER); /* * We don't need to restore AMR on the way back to userspace for KUAP. @@ -383,8 +385,14 @@ again: } } - if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) + user_enter_irqoff(); + + if (unlikely(!__prep_irq_for_enabled_exit(true))) { + user_exit_irqoff(); + local_irq_enable(); + local_irq_disable(); goto again; + } #ifdef CONFIG_PPC_BOOK3E if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) { @@ -425,6 +433,12 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign unrecoverable_exception(regs); BUG_ON(regs->msr & MSR_PR); BUG_ON(!FULL_REGS(regs)); + /* + * CT_WARN_ON comes here via program_check_exception, + * so avoid recursion. + */ + if (TRAP(regs) != 0x700) + CT_WARN_ON(ct_state() == CONTEXT_USER); amr = kuap_get_and_check_amr(); From 75b96950fddab6f1c59a10160b6bf38948bdb0e3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:46 +1000 Subject: [PATCH 111/188] powerpc/64s: reconcile interrupts in C There is no need for this to be in asm, use the new intrrupt entry wrapper. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-37-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 15 +++++++++++---- arch/powerpc/kernel/exceptions-64s.S | 26 -------------------------- 2 files changed, 11 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 1c966e47b36f..e96d215f518a 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -14,11 +14,14 @@ struct interrupt_state { static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { -#ifdef CONFIG_PPC_BOOK3E_64 - state->ctx_state = exception_enter(); -#endif - + /* + * Book3E reconciles irq soft mask in asm + */ #ifdef CONFIG_PPC_BOOK3S_64 + if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) + trace_hardirqs_off(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + if (user_mode(regs)) { CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); @@ -31,6 +34,10 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup CT_WARN_ON(ct_state() != CONTEXT_KERNEL); } #endif + +#ifdef CONFIG_PPC_BOOK3E_64 + state->ctx_state = exception_enter(); +#endif } /* diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 86eb1c9400b1..88b489676e89 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -139,7 +139,6 @@ name: #define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */ #define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */ #define __ISTACK(name) .L_ISTACK_ ## name -#define IRECONCILE .L_IRECONCILE_\name\() /* Do RECONCILE_IRQ_STATE */ #define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */ #define INT_DEFINE_BEGIN(n) \ @@ -203,9 +202,6 @@ do_define_int n .ifndef ISTACK ISTACK=1 .endif - .ifndef IRECONCILE - IRECONCILE=1 - .endif .ifndef IKUAP IKUAP=1 .endif @@ -653,10 +649,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .if ISTACK ACCOUNT_STOLEN_TIME .endif - - .if IRECONCILE - RECONCILE_IRQ_STATE(r10, r11) - .endif .endm /* @@ -935,7 +927,6 @@ INT_DEFINE_BEGIN(system_reset) */ ISET_RI=0 ISTACK=0 - IRECONCILE=0 IKVM_REAL=1 INT_DEFINE_END(system_reset) @@ -1123,7 +1114,6 @@ INT_DEFINE_BEGIN(machine_check_early) ISTACK=0 IDAR=1 IDSISR=1 - IRECONCILE=0 IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */ INT_DEFINE_END(machine_check_early) @@ -1483,7 +1473,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) INT_DEFINE_BEGIN(data_access_slb) IVEC=0x380 IAREA=PACA_EXSLB - IRECONCILE=0 IDAR=1 IKVM_SKIP=1 IKVM_REAL=1 @@ -1510,7 +1499,6 @@ MMU_FTR_SECTION_ELSE li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault b interrupt_return @@ -1568,7 +1556,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) INT_DEFINE_BEGIN(instruction_access_slb) IVEC=0x480 IAREA=PACA_EXSLB - IRECONCILE=0 IISIDE=1 IDAR=1 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -1597,7 +1584,6 @@ MMU_FTR_SECTION_ELSE li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault b interrupt_return @@ -1757,7 +1743,6 @@ EXC_COMMON_BEGIN(program_check_common) */ INT_DEFINE_BEGIN(fp_unavailable) IVEC=0x800 - IRECONCILE=0 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif @@ -1772,7 +1757,6 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100) EXC_COMMON_BEGIN(fp_unavailable_common) GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_fp_unavailable_exception 0: trap @@ -1791,7 +1775,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) b fast_interrupt_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl fp_unavailable_tm b interrupt_return @@ -1856,7 +1839,6 @@ INT_DEFINE_BEGIN(hdecrementer) IVEC=0x980 IHSRR=1 ISTACK=0 - IRECONCILE=0 IKVM_REAL=1 IKVM_VIRT=1 INT_DEFINE_END(hdecrementer) @@ -2230,7 +2212,6 @@ INT_DEFINE_BEGIN(hmi_exception_early) IHSRR=1 IREALMODE_COMMON=1 ISTACK=0 - IRECONCILE=0 IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */ IKVM_REAL=1 INT_DEFINE_END(hmi_exception_early) @@ -2404,7 +2385,6 @@ EXC_COMMON_BEGIN(performance_monitor_common) */ INT_DEFINE_BEGIN(altivec_unavailable) IVEC=0xf20 - IRECONCILE=0 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif @@ -2434,7 +2414,6 @@ BEGIN_FTR_SECTION b fast_interrupt_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_tm b interrupt_return @@ -2442,7 +2421,6 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception b interrupt_return @@ -2458,7 +2436,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) */ INT_DEFINE_BEGIN(vsx_unavailable) IVEC=0xf40 - IRECONCILE=0 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif @@ -2487,7 +2464,6 @@ BEGIN_FTR_SECTION b load_up_vsx #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_tm b interrupt_return @@ -2495,7 +2471,6 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_exception b interrupt_return @@ -2830,7 +2805,6 @@ EXC_VIRT_NONE(0x5800, 0x100) INT_DEFINE_BEGIN(soft_nmi) IVEC=0x900 ISTACK=0 - IRECONCILE=0 /* Soft-NMI may fire under local_irq_disable */ INT_DEFINE_END(soft_nmi) /* From 2994e1babfc477a3101ec6841b9dc5b770c1ec18 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:47 +1000 Subject: [PATCH 112/188] powerpc/64: move account_stolen_time into its own function This will be used by interrupt entry as well. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-38-npiggin@gmail.com --- arch/powerpc/include/asm/cputime.h | 14 ++++++++++++++ arch/powerpc/kernel/syscall_64.c | 10 +--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index ed75d1c318e3..504f7fe6711a 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -87,6 +87,17 @@ static notrace inline void account_cpu_user_exit(void) acct->starttime_user = tb; } +static notrace inline void account_stolen_time(void) +{ +#ifdef CONFIG_PPC_SPLPAR + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + struct lppaca *lp = local_paca->lppaca_ptr; + + if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx))) + accumulate_stolen_time(); + } +#endif +} #endif /* __KERNEL__ */ #else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ @@ -96,5 +107,8 @@ static inline void account_cpu_user_entry(void) static inline void account_cpu_user_exit(void) { } +static notrace inline void account_stolen_time(void) +{ +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* __POWERPC_CPUTIME_H */ diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index a2102e7a2713..d6be4f9a67e5 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -69,15 +69,7 @@ notrace long system_call_exception(long r3, long r4, long r5, account_cpu_user_entry(); -#ifdef CONFIG_PPC_SPLPAR - if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && - firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct lppaca *lp = local_paca->lppaca_ptr; - - if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx))) - accumulate_stolen_time(); - } -#endif + account_stolen_time(); /* * This is not required for the syscall exit path, but makes the From 56acfdd8bf9f75e83a1b2957bd415368f39b67b6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:48 +1000 Subject: [PATCH 113/188] powerpc/64: entry cpu time accounting in C There is no need for this to be in asm, use the new interrupt entry wrapper. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-39-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 6 ++++++ arch/powerpc/include/asm/ppc_asm.h | 24 ------------------------ arch/powerpc/kernel/exceptions-64e.S | 1 - arch/powerpc/kernel/exceptions-64s.S | 5 ----- 4 files changed, 6 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index e96d215f518a..ca8e08b18a16 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -4,6 +4,7 @@ #include #include +#include #include struct interrupt_state { @@ -25,6 +26,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup if (user_mode(regs)) { CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); + + account_cpu_user_entry(); + account_stolen_time(); } else { /* * CT_WARN_ON comes here via program_check_exception, @@ -37,6 +41,8 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup #ifdef CONFIG_PPC_BOOK3E_64 state->ctx_state = exception_enter(); + if (user_mode(regs)) + account_cpu_user_entry(); #endif } diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index cc1bca571332..3dceb64fc9af 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -25,7 +25,6 @@ #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) #define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) -#define ACCOUNT_STOLEN_TIME #else #define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) \ MFTB(ra); /* get timebase */ \ @@ -44,29 +43,6 @@ PPC_LL ra, ACCOUNT_SYSTEM_TIME(ptr); \ add ra,ra,rb; /* add on to system time */ \ PPC_STL ra, ACCOUNT_SYSTEM_TIME(ptr) - -#ifdef CONFIG_PPC_SPLPAR -#define ACCOUNT_STOLEN_TIME \ -BEGIN_FW_FTR_SECTION; \ - beq 33f; \ - /* from user - see if there are any DTL entries to process */ \ - ld r10,PACALPPACAPTR(r13); /* get ptr to VPA */ \ - ld r11,PACA_DTL_RIDX(r13); /* get log read index */ \ - addi r10,r10,LPPACA_DTLIDX; \ - LDX_BE r10,0,r10; /* get log write index */ \ - cmpd cr1,r11,r10; \ - beq+ cr1,33f; \ - bl accumulate_stolen_time; \ - ld r12,_MSR(r1); \ - andi. r10,r12,MSR_PR; /* Restore cr0 (coming from user) */ \ -33: \ -END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) - -#else /* CONFIG_PPC_SPLPAR */ -#define ACCOUNT_STOLEN_TIME - -#endif /* CONFIG_PPC_SPLPAR */ - #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ /* diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 003999c7836c..e8eb9992a270 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -398,7 +398,6 @@ exc_##n##_common: \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ beq 2f; /* if from kernel mode */ \ - ACCOUNT_CPU_USER_ENTRY(r13,r10,r11);/* accounting (uses cr0+eq) */ \ 2: ld r3,excf+EX_R10(r13); /* get back r10 */ \ ld r4,excf+EX_R11(r13); /* get back r11 */ \ mfspr r5,scratch; /* get back r13 */ \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 88b489676e89..898698cc9947 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -577,7 +577,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) kuap_save_amr_and_lock r9, r10, cr1, cr0 .endif beq 101f /* if from kernel mode */ - ACCOUNT_CPU_USER_ENTRY(r13, r9, r10) BEGIN_FTR_SECTION ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */ std r9,_PPR(r1) @@ -645,10 +644,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r11,exception_marker@toc(r2) std r10,RESULT(r1) /* clear regs->result */ std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */ - - .if ISTACK - ACCOUNT_STOLEN_TIME - .endif .endm /* From 74c3354bc1d89d53e8da2dcc6f9f6bfc28b2900f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 7 Feb 2021 22:54:12 +1000 Subject: [PATCH 114/188] powerpc/pseries/mce: restore msr before returning from handler The pseries real-mode machine check handler can enable the MMU, and return from the handler with the MMU still enabled. This works, but real-mode handler wrapper exit handlers want to rely on the MMU being in real-mode. So change the pseries handler to restore the MSR after it has finished virtual mode tasks. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1612702361.lm7fqo56re.astroid@bobo.none --- arch/powerpc/platforms/pseries/ras.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 92c08fe893cf..f8b390a9d9fb 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -717,6 +717,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log = NULL; int disposition = rtas_error_disposition(errp); + unsigned long msr; u8 error_type; if (!rtas_error_extended(errp)) @@ -742,9 +743,21 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) * SLB multihit is done by now. */ out: - mtmsr(mfmsr() | MSR_IR | MSR_DR); + msr = mfmsr(); + mtmsr(msr | MSR_IR | MSR_DR); + disposition = mce_handle_err_virtmode(regs, errp, mce_log, disposition); + + /* + * Queue irq work to log this rtas event later. + * irq_work_queue uses per-cpu variables, so do this in virt + * mode as well. + */ + irq_work_queue(&mce_errlog_process_work); + + mtmsr(msr); + return disposition; } @@ -860,10 +873,8 @@ long pseries_machine_check_realmode(struct pt_regs *regs) * virtual mode. */ disposition = mce_handle_error(regs, errp); - fwnmi_release_errinfo(); - /* Queue irq work to log this rtas event later. */ - irq_work_queue(&mce_errlog_process_work); + fwnmi_release_errinfo(); if (disposition == RTAS_DISP_FULLY_RECOVERED) return 1; From 118178e62e2e0da39b394e812fef7179c8bdb3bc Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:49 +1000 Subject: [PATCH 115/188] powerpc: move NMI entry/exit code into wrapper This moves the common NMI entry and exit code into the interrupt handler wrappers. This changes the behaviour of soft-NMI (watchdog) and HMI interrupts, and also MCE interrupts on 64e, by adding missing parts of the NMI entry to them. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-40-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 28 ++++++++++++++++++++++ arch/powerpc/kernel/mce.c | 11 --------- arch/powerpc/kernel/traps.c | 35 +++++----------------------- arch/powerpc/kernel/watchdog.c | 10 ++++---- 4 files changed, 38 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index ca8e08b18a16..cd819d42573c 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -94,14 +94,42 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int } struct interrupt_nmi_state { +#ifdef CONFIG_PPC64 + u8 ftrace_enabled; +#endif }; static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) { +#ifdef CONFIG_PPC64 + /* Allow DEC and PMI to be traced when they are soft-NMI */ + if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) { + state->ftrace_enabled = this_cpu_get_ftrace_enabled(); + this_cpu_set_ftrace_enabled(0); + } +#endif + + /* + * Do not use nmi_enter() for pseries hash guest taking a real-mode + * NMI because not everything it touches is within the RMA limit. + */ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || + !firmware_has_feature(FW_FEATURE_LPAR) || + radix_enabled() || (mfmsr() & MSR_DR)) + nmi_enter(); } static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) { + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || + !firmware_has_feature(FW_FEATURE_LPAR) || + radix_enabled() || (mfmsr() & MSR_DR)) + nmi_exit(); + +#ifdef CONFIG_PPC64 + if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) + this_cpu_set_ftrace_enabled(state->ftrace_enabled); +#endif } /** diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index cfe96cd3f48b..11f0cae086ed 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -587,12 +587,6 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info); DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early) { long handled = 0; - u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); - - this_cpu_set_ftrace_enabled(0); - /* Do not use nmi_enter/exit for pseries hpte guest */ - if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR)) - nmi_enter(); hv_nmi_check_nonrecoverable(regs); @@ -602,11 +596,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early) if (ppc_md.machine_check_early) handled = ppc_md.machine_check_early(regs); - if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR)) - nmi_exit(); - - this_cpu_set_ftrace_enabled(ftrace_enabled); - return handled; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 13fb93a59556..39c8b7e9b91a 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -435,11 +435,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception) { unsigned long hsrr0, hsrr1; bool saved_hsrrs = false; - u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); - - this_cpu_set_ftrace_enabled(0); - - nmi_enter(); /* * System reset can interrupt code where HSRRs are live and MSR[RI]=1. @@ -514,10 +509,6 @@ out: mtspr(SPRN_HSRR1, hsrr1); } - nmi_exit(); - - this_cpu_set_ftrace_enabled(ftrace_enabled); - /* What should we do here? We could issue a shutdown or hard reset. */ return 0; @@ -809,6 +800,12 @@ void die_mce(const char *str, struct pt_regs *regs, long err) } NOKPROBE_SYMBOL(die_mce); +/* + * BOOK3S_64 does not call this handler as a non-maskable interrupt + * (it uses its own early real-mode handler to handle the MCE proper + * and then raises irq_work to call this handler when interrupts are + * enabled). + */ #ifdef CONFIG_PPC_BOOK3S_64 DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception) #else @@ -817,20 +814,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception) { int recover = 0; - /* - * BOOK3S_64 does not call this handler as a non-maskable interrupt - * (it uses its own early real-mode handler to handle the MCE proper - * and then raises irq_work to call this handler when interrupts are - * enabled). - * - * This is silly. The BOOK3S_64 should just call a different function - * rather than expecting semantics to magically change. Something - * like 'non_nmi_machine_check_exception()', perhaps? - */ - const bool nmi = !IS_ENABLED(CONFIG_PPC_BOOK3S_64); - - if (nmi) nmi_enter(); - __this_cpu_inc(irq_stat.mce_exceptions); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); @@ -862,8 +845,6 @@ bail: if (!(regs->msr & MSR_RI)) die_mce("Unrecoverable Machine check", regs, SIGBUS); - if (nmi) nmi_exit(); - #ifdef CONFIG_PPC_BOOK3S_64 return; #else @@ -1892,14 +1873,10 @@ DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm) DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi); DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi) { - nmi_enter(); - __this_cpu_inc(irq_stat.pmu_irqs); perf_irq(regs); - nmi_exit(); - return 0; } #endif diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index be6b22838555..c9a8f4781a10 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -255,11 +255,12 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) int cpu = raw_smp_processor_id(); u64 tb; + /* should only arrive from kernel, with irqs disabled */ + WARN_ON_ONCE(!arch_irq_disabled_regs(regs)); + if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) return 0; - nmi_enter(); - __this_cpu_inc(irq_stat.soft_nmi_irqs); tb = get_tb(); @@ -267,7 +268,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) wd_smp_lock(&flags); if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) { wd_smp_unlock(&flags); - goto out; + return 0; } set_cpu_stuck(cpu, tb); @@ -291,9 +292,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) if (wd_panic_timeout_tb < 0x7fffffff) mtspr(SPRN_DEC, wd_panic_timeout_tb); -out: - nmi_exit(); - return 0; } From 6ecbb582b6947f041832fff07c2f38791ae19287 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:50 +1000 Subject: [PATCH 116/188] powerpc/64s: move NMI soft-mask handling to C Saving and restoring soft-mask state can now be done in C using the interrupt handler wrapper functions. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-41-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 25 ++++++++++++ arch/powerpc/kernel/exceptions-64s.S | 60 ---------------------------- 2 files changed, 25 insertions(+), 60 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index cd819d42573c..72a585084066 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -95,6 +95,10 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int struct interrupt_nmi_state { #ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 + u8 irq_soft_mask; + u8 irq_happened; +#endif u8 ftrace_enabled; #endif }; @@ -102,6 +106,20 @@ struct interrupt_nmi_state { static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) { #ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 + state->irq_soft_mask = local_paca->irq_soft_mask; + state->irq_happened = local_paca->irq_happened; + + /* + * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does + * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile + * because that goes through irq tracing which we don't want in NMI. + */ + local_paca->irq_soft_mask = IRQS_ALL_DISABLED; + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* Don't do any per-CPU operations until interrupt state is fixed */ +#endif /* Allow DEC and PMI to be traced when they are soft-NMI */ if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) { state->ftrace_enabled = this_cpu_get_ftrace_enabled(); @@ -129,6 +147,13 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter #ifdef CONFIG_PPC64 if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) this_cpu_set_ftrace_enabled(state->ftrace_enabled); + +#ifdef CONFIG_PPC_BOOK3S_64 + /* Check we didn't change the pending interrupt mask. */ + WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened); + local_paca->irq_happened = state->irq_happened; + local_paca->irq_soft_mask = state->irq_soft_mask; +#endif #endif } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 898698cc9947..ec3e8343d9a9 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1008,20 +1008,6 @@ EXC_COMMON_BEGIN(system_reset_common) ld r1,PACA_NMI_EMERG_SP(r13) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY system_reset - /* - * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does - * the right thing. We do not want to reconcile because that goes - * through irq tracing which we don't want in NMI. - * - * Save PACAIRQHAPPENED to RESULT (otherwise unused), and set HARD_DIS - * as we are running with MSR[EE]=0. - */ - li r10,IRQS_ALL_DISABLED - stb r10,PACAIRQSOFTMASK(r13) - lbz r10,PACAIRQHAPPENED(r13) - std r10,RESULT(r1) - ori r10,r10,PACA_IRQ_HARD_DIS - stb r10,PACAIRQHAPPENED(r13) addi r3,r1,STACK_FRAME_OVERHEAD bl system_reset_exception @@ -1037,14 +1023,6 @@ EXC_COMMON_BEGIN(system_reset_common) subi r10,r10,1 sth r10,PACA_IN_NMI(r13) - /* - * Restore soft mask settings. - */ - ld r10,RESULT(r1) - stb r10,PACAIRQHAPPENED(r13) - ld r10,SOFTE(r1) - stb r10,PACAIRQSOFTMASK(r13) - kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL @@ -1190,30 +1168,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) li r10,MSR_RI mtmsrd r10,1 - /* - * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see - * system_reset_common) - */ - li r10,IRQS_ALL_DISABLED - stb r10,PACAIRQSOFTMASK(r13) - lbz r10,PACAIRQHAPPENED(r13) - std r10,RESULT(r1) - ori r10,r10,PACA_IRQ_HARD_DIS - stb r10,PACAIRQHAPPENED(r13) - addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) - /* - * Restore soft mask settings. - */ - ld r10,RESULT(r1) - stb r10,PACAIRQHAPPENED(r13) - ld r10,SOFTE(r1) - stb r10,PACAIRQSOFTMASK(r13) - #ifdef CONFIG_PPC_P7_NAP /* * Check if thread was in power saving mode. We come here when any @@ -2818,17 +2777,6 @@ EXC_COMMON_BEGIN(soft_nmi_common) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY soft_nmi - /* - * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see - * system_reset_common) - */ - li r10,IRQS_ALL_DISABLED - stb r10,PACAIRQSOFTMASK(r13) - lbz r10,PACAIRQHAPPENED(r13) - std r10,RESULT(r1) - ori r10,r10,PACA_IRQ_HARD_DIS - stb r10,PACAIRQHAPPENED(r13) - addi r3,r1,STACK_FRAME_OVERHEAD bl soft_nmi_interrupt @@ -2836,14 +2784,6 @@ EXC_COMMON_BEGIN(soft_nmi_common) li r9,0 mtmsrd r9,1 - /* - * Restore soft mask settings. - */ - ld r10,RESULT(r1) - stb r10,PACAIRQHAPPENED(r13) - ld r10,SOFTE(r1) - stb r10,PACAIRQSOFTMASK(r13) - kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL From 86dbb39416493add2bdf5b7ad39a1276f2107b83 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:51 +1000 Subject: [PATCH 117/188] powerpc/64s: runlatch interrupt handling in C There is no need for this to be in asm, use the new intrrupt entry wrapper. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-42-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 7 +++++++ arch/powerpc/kernel/exceptions-64s.S | 18 ------------------ 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 72a585084066..4badb3e51c19 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -6,6 +6,7 @@ #include #include #include +#include struct interrupt_state { #ifdef CONFIG_PPC_BOOK3E_64 @@ -83,6 +84,12 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_CTRL) && + !test_thread_local_flags(_TLF_RUNLATCH)) + __ppc64_runlatch_on(); +#endif + interrupt_enter_prepare(regs, state); irq_enter(); } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ec3e8343d9a9..b3793f982b2b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -692,14 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r1,GPR1(r1) .endm -#define RUNLATCH_ON \ -BEGIN_FTR_SECTION \ - ld r3, PACA_THREAD_INFO(r13); \ - ld r4,TI_LOCAL_FLAGS(r3); \ - andi. r0,r4,_TLF_RUNLATCH; \ - beql ppc64_runlatch_on_trampoline; \ -END_FTR_SECTION_IFSET(CPU_FTR_CTRL) - /* * When the idle code in power4_idle puts the CPU into NAP mode, * it has to do so in a loop, and relies on the external interrupt @@ -1585,7 +1577,6 @@ EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ b interrupt_return @@ -1771,7 +1762,6 @@ EXC_VIRT_END(decrementer, 0x4900, 0x80) EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl timer_interrupt b interrupt_return @@ -1857,7 +1847,6 @@ EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception @@ -2212,7 +2201,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception b interrupt_return @@ -2242,7 +2230,6 @@ EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception @@ -2276,7 +2263,6 @@ EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ b interrupt_return @@ -2323,7 +2309,6 @@ EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception b interrupt_return @@ -3038,9 +3023,6 @@ kvmppc_skip_Hinterrupt: * come here. */ -EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline) - b __ppc64_runlatch_on - USE_FIXED_SECTION(virt_trampolines) /* * All code below __end_interrupts is treated as soft-masked. If From 665d8d58761cba41147fe7e98e2ceed1cbf603a2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 1 Feb 2021 12:25:03 +1100 Subject: [PATCH 118/188] powerpc/akebono: Fix unmet dependency errors The AKEBONO config has various selects under it, including some with user-selectable dependencies, which means those dependencies can be disabled. This leads to warnings from Kconfig. This can be seen with eg: $ make allnoconfig $ ./scripts/config --file build~/.config -k -e CONFIG_44x -k -e CONFIG_PPC_47x -e CONFIG_AKEBONO $ make olddefconfig WARNING: unmet direct dependencies detected for ATA Depends on [n]: HAS_IOMEM [=y] && BLOCK [=n] Selected by [y]: - AKEBONO [=y] && PPC_47x [=y] WARNING: unmet direct dependencies detected for NETDEVICES Depends on [n]: NET [=n] Selected by [y]: - AKEBONO [=y] && PPC_47x [=y] WARNING: unmet direct dependencies detected for ETHERNET Depends on [n]: NETDEVICES [=y] && NET [=n] Selected by [y]: - AKEBONO [=y] && PPC_47x [=y] WARNING: unmet direct dependencies detected for MMC_SDHCI Depends on [n]: MMC [=n] && HAS_DMA [=y] Selected by [y]: - AKEBONO [=y] && PPC_47x [=y] WARNING: unmet direct dependencies detected for MMC_SDHCI_PLTFM Depends on [n]: MMC [=n] && MMC_SDHCI [=y] Selected by [y]: - AKEBONO [=y] && PPC_47x [=y] The problem is that AKEBONO is using select to enable things that are not true dependencies, but rather things you probably want enabled in an AKEBONO kernel. That is what a defconfig is for. So drop those selects and instead move those symbols into the defconfig. This fixes all the kconfig warnings, and the result of make 44x/akebono_defconfig is the same before and after the patch. Reported-by: Yury Norov Reported-by: Randy Dunlap Reported-by: Florian Fainelli Signed-off-by: Michael Ellerman Tested-by: Randy Dunlap Reviewed-by: Randy Dunlap Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20210201012503.940145-1-mpe@ellerman.id.au --- arch/powerpc/configs/44x/akebono_defconfig | 5 +++++ arch/powerpc/platforms/44x/Kconfig | 7 ------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 3894ba8f8ffc..ae9b7beefdd6 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -21,6 +21,7 @@ CONFIG_IRQ_ALL_CPUS=y # CONFIG_COMPACTION is not set # CONFIG_SUSPEND is not set CONFIG_NET=y +CONFIG_NETDEVICES=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y @@ -41,7 +42,9 @@ CONFIG_BLK_DEV_RAM_SIZE=35000 # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y # CONFIG_SATA_PMP is not set +CONFIG_SATA_AHCI_PLATFORM=y # CONFIG_ATA_SFF is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set @@ -98,6 +101,8 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OHCI_HCD_PCI is not set CONFIG_USB_STORAGE=y CONFIG_MMC=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_M41T80=y CONFIG_EXT2_FS=y diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 78ac6d67a935..7d41e9264510 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -206,17 +206,10 @@ config AKEBONO select PPC4xx_HSTA_MSI select I2C select I2C_IBM_IIC - select NETDEVICES - select ETHERNET - select NET_VENDOR_IBM select IBM_EMAC_EMAC4 if IBM_EMAC select USB if USB_SUPPORT select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD - select MMC_SDHCI - select MMC_SDHCI_PLTFM - select ATA - select SATA_AHCI_PLATFORM help This option enables support for the IBM Akebono (476gtr) evaluation board From 6c6fdbb2b7002aa04e418b5d2b26df1c5ba5ab80 Mon Sep 17 00:00:00 2001 From: Chengyang Fan Date: Mon, 25 Jan 2021 17:53:38 +0800 Subject: [PATCH 119/188] powerpc: remove unneeded semicolons Remove superfluous semicolons after function definitions. Signed-off-by: Chengyang Fan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210125095338.1719405-1-cy.fan@huawei.com --- arch/powerpc/include/asm/book3s/32/mmu-hash.h | 2 +- arch/powerpc/include/asm/book3s/64/mmu.h | 2 +- arch/powerpc/include/asm/book3s/64/tlbflush-radix.h | 2 +- arch/powerpc/include/asm/book3s/64/tlbflush.h | 2 +- arch/powerpc/include/asm/firmware.h | 2 +- arch/powerpc/include/asm/kvm_ppc.h | 6 +++--- arch/powerpc/include/asm/paca.h | 6 +++--- arch/powerpc/include/asm/rtas.h | 2 +- arch/powerpc/include/asm/setup.h | 6 +++--- arch/powerpc/include/asm/simple_spinlock.h | 4 ++-- arch/powerpc/include/asm/smp.h | 2 +- arch/powerpc/include/asm/xmon.h | 4 ++-- arch/powerpc/kernel/prom.c | 2 +- arch/powerpc/kernel/setup.h | 12 ++++++------ arch/powerpc/platforms/powernv/subcore.h | 2 +- arch/powerpc/platforms/pseries/pseries.h | 2 +- 16 files changed, 29 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 685c589e723f..b85f8e114a9c 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -94,7 +94,7 @@ typedef struct { } mm_context_t; void update_bats(void); -static inline void cleanup_cpu_mmu_context(void) { }; +static inline void cleanup_cpu_mmu_context(void) { } /* patch sites */ extern s32 patch__hash_page_A0, patch__hash_page_A1, patch__hash_page_A2; diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 995bbcdd0ef8..eace8c3f7b0a 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -239,7 +239,7 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, #ifdef CONFIG_PPC_PSERIES extern void radix_init_pseries(void); #else -static inline void radix_init_pseries(void) { }; +static inline void radix_init_pseries(void) { } #endif #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 94439e0cefc9..8b33601cdb9d 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -35,7 +35,7 @@ extern void radix__flush_pwc_lpid(unsigned int lpid); extern void radix__flush_all_lpid(unsigned int lpid); extern void radix__flush_all_lpid_guest(unsigned int lpid); #else -static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); }; +static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); } static inline void radix__flush_tlb_lpid_page(unsigned int lpid, unsigned long addr, unsigned long page_size) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index dcb5c3839d2f..215973b4cb26 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -31,7 +31,7 @@ static inline void tlbiel_all(void) hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL); } #else -static inline void tlbiel_all(void) { BUG(); }; +static inline void tlbiel_all(void) { BUG(); } #endif static inline void tlbiel_all_lpid(bool radix) diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index aa6a5ef5d483..7604673787d6 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -137,7 +137,7 @@ extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; #ifdef CONFIG_PPC_PSERIES void pseries_probe_fw_features(void); #else -static inline void pseries_probe_fw_features(void) { }; +static inline void pseries_probe_fw_features(void) { } #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 0a056c64c317..259ba4ce9ad3 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -627,9 +627,9 @@ extern int h_ipi_redirect; static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( struct kvm *kvm) { return NULL; } -static inline void kvmppc_alloc_host_rm_ops(void) {}; -static inline void kvmppc_free_host_rm_ops(void) {}; -static inline void kvmppc_free_pimap(struct kvm *kvm) {}; +static inline void kvmppc_alloc_host_rm_ops(void) {} +static inline void kvmppc_free_host_rm_ops(void) {} +static inline void kvmppc_free_pimap(struct kvm *kvm) {} static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) { return 0; } static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 38e0c55e845d..169e687fd479 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -289,9 +289,9 @@ extern void free_unused_pacas(void); #else /* CONFIG_PPC64 */ -static inline void allocate_paca_ptrs(void) { }; -static inline void allocate_paca(int cpu) { }; -static inline void free_unused_pacas(void) { }; +static inline void allocate_paca_ptrs(void) { } +static inline void allocate_paca(int cpu) { } +static inline void free_unused_pacas(void) { } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 332e1000ca0f..658448ca5b8a 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -369,7 +369,7 @@ void rtas_initialize(void); #else static inline int page_is_rtas_user_buf(unsigned long pfn) { return 0;} static inline void pSeries_coalesce_init(void) { } -static inline void rtas_initialize(void) { }; +static inline void rtas_initialize(void) { } #endif extern int call_rtas(const char *, int, int, unsigned long *, ...); diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index a466749703f1..e89bfebd4e00 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -58,7 +58,7 @@ void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); #else -static inline void setup_barrier_nospec(void) { }; +static inline void setup_barrier_nospec(void) { } #endif void do_uaccess_flush_fixups(enum l1d_flush_type types); void do_entry_flush_fixups(enum l1d_flush_type types); @@ -68,13 +68,13 @@ extern bool barrier_nospec_enabled; #ifdef CONFIG_PPC_BARRIER_NOSPEC void do_barrier_nospec_fixups_range(bool enable, void *start, void *end); #else -static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { }; +static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { } #endif #ifdef CONFIG_PPC_FSL_BOOK3E void setup_spectre_v2(void); #else -static inline void setup_spectre_v2(void) {}; +static inline void setup_spectre_v2(void) {} #endif void do_btb_flush_fixups(void); diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h index 9c3c30534333..5b862de29dff 100644 --- a/arch/powerpc/include/asm/simple_spinlock.h +++ b/arch/powerpc/include/asm/simple_spinlock.h @@ -90,8 +90,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) void splpar_spin_yield(arch_spinlock_t *lock); void splpar_rw_yield(arch_rwlock_t *lock); #else /* SPLPAR */ -static inline void splpar_spin_yield(arch_spinlock_t *lock) {}; -static inline void splpar_rw_yield(arch_rwlock_t *lock) {}; +static inline void splpar_spin_yield(arch_spinlock_t *lock) {} +static inline void splpar_rw_yield(arch_rwlock_t *lock) {} #endif static inline void spin_yield(arch_spinlock_t *lock) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index c4e2d53acd2b..7a13bc20f0a0 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -236,7 +236,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys) #if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)) extern void smp_release_cpus(void); #else -static inline void smp_release_cpus(void) { }; +static inline void smp_release_cpus(void) { } #endif extern int smt_enabled_at_boot; diff --git a/arch/powerpc/include/asm/xmon.h b/arch/powerpc/include/asm/xmon.h index 454a7fc6113b..68bfb2361f03 100644 --- a/arch/powerpc/include/asm/xmon.h +++ b/arch/powerpc/include/asm/xmon.h @@ -17,8 +17,8 @@ struct pt_regs; extern int xmon(struct pt_regs *excp); extern irqreturn_t xmon_irq(int, void *); #else -static inline void xmon_setup(void) { }; -static inline void xmon_register_spus(struct list_head *list) { }; +static inline void xmon_setup(void) { } +static inline void xmon_register_spus(struct list_head *list) { } #endif #if defined(CONFIG_XMON) && defined(CONFIG_SMP) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index ae3c41730367..9a4797d1d40d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -707,7 +707,7 @@ static void __init save_fscr_to_task(void) init_task.thread.fscr = mfspr(SPRN_FSCR); } #else -static inline void save_fscr_to_task(void) {}; +static inline void save_fscr_to_task(void) {} #endif diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 2dd0d9cb5a20..84058bbc8fe9 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -14,31 +14,31 @@ void irqstack_early_init(void); #ifdef CONFIG_PPC32 void setup_power_save(void); #else -static inline void setup_power_save(void) { }; +static inline void setup_power_save(void) { } #endif #if defined(CONFIG_PPC64) && defined(CONFIG_SMP) void check_smt_enabled(void); #else -static inline void check_smt_enabled(void) { }; +static inline void check_smt_enabled(void) { } #endif #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) void setup_tlb_core_data(void); #else -static inline void setup_tlb_core_data(void) { }; +static inline void setup_tlb_core_data(void) { } #endif #if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x) void exc_lvl_early_init(void); #else -static inline void exc_lvl_early_init(void) { }; +static inline void exc_lvl_early_init(void) { } #endif #if defined(CONFIG_PPC64) || defined(CONFIG_VMAP_STACK) void emergency_stack_init(void); #else -static inline void emergency_stack_init(void) { }; +static inline void emergency_stack_init(void) { } #endif #ifdef CONFIG_PPC64 @@ -55,7 +55,7 @@ extern unsigned long spr_default_dscr; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE void kvm_cma_reserve(void); #else -static inline void kvm_cma_reserve(void) { }; +static inline void kvm_cma_reserve(void) { } #endif #ifdef CONFIG_TAU diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h index c8f574d1c04a..77feee8436d4 100644 --- a/arch/powerpc/platforms/powernv/subcore.h +++ b/arch/powerpc/platforms/powernv/subcore.h @@ -15,7 +15,7 @@ void split_core_secondary_loop(u8 *state); extern void update_subcore_sibling_mask(void); #else -static inline void update_subcore_sibling_mask(void) { }; +static inline void update_subcore_sibling_mask(void) { } #endif /* CONFIG_SMP */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 593840847cd3..4fe48c04c6c2 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -33,7 +33,7 @@ int smp_query_cpu_stopped(unsigned int pcpu); #define QCSS_HARDWARE_ERROR -1 #define QCSS_HARDWARE_BUSY -2 #else -static inline void smp_init_pseries(void) { }; +static inline void smp_init_pseries(void) { } #endif extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary); From ea7826583f5ed7abca97e6e56441caadcbbd957a Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Tue, 2 Feb 2021 15:07:46 +0530 Subject: [PATCH 120/188] powerpc/44x: Fix a spelling mismach to mismatch in head_44x.S s/mismach/mismatch/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210202093746.5198-1-unixbhaskar@gmail.com --- arch/powerpc/kernel/head_44x.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 8e36718f3167..813fa305c33b 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -376,7 +376,7 @@ interrupt_base: /* Load the next available TLB index */ lwz r13,tlb_44x_index@l(r10) - bne 2f /* Bail if permission mismach */ + bne 2f /* Bail if permission mismatch */ /* Increment, rollover, and store TLB index */ addi r13,r13,1 @@ -471,7 +471,7 @@ interrupt_base: /* Load the next available TLB index */ lwz r13,tlb_44x_index@l(r10) - bne 2f /* Bail if permission mismach */ + bne 2f /* Bail if permission mismatch */ /* Increment, rollover, and store TLB index */ addi r13,r13,1 From 266d8f7586533a4c473ccb392204e32df99b72b5 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Tue, 2 Feb 2021 20:30:50 +0530 Subject: [PATCH 121/188] powerpc/pkeys: Remove unused code This removes arch_supports_pkeys(), arch_usable_pkeys() and thread_pkey_regs_*() which are remnants from the following: commit 06bb53b33804 ("powerpc: store and restore the pkey state across context switches") commit 2cd4bd192ee9 ("powerpc/pkeys: Fix handling of pkey state across fork()") commit cf43d3b26452 ("powerpc: Enable pkey subsystem") arch_supports_pkeys() and arch_usable_pkeys() were unused since their introduction while thread_pkey_regs_*() became unused after the introduction of the following: commit d5fa30e6993f ("powerpc/book3s64/pkeys: Reset userspace AMR correctly on exec") commit 48a8ab4eeb82 ("powerpc/book3s64/pkeys: Don't update SPRN_AMR when in kernel mode") Signed-off-by: Sandipan Das Reviewed-by: Ram Pai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210202150050.75335-1-sandipan@linux.ibm.com --- arch/powerpc/include/asm/mmu_context.h | 3 --- arch/powerpc/include/asm/pkeys.h | 6 ------ 2 files changed, 9 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index d5821834dba9..652ce85f9410 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -282,9 +282,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, } #define pkey_mm_init(mm) -#define thread_pkey_regs_save(thread) -#define thread_pkey_regs_restore(new_thread, old_thread) -#define thread_pkey_regs_init(thread) #define arch_dup_pkeys(oldmm, mm) static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags) diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index a7951049e129..59a2c7dbc78f 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -169,10 +169,4 @@ static inline bool arch_pkeys_enabled(void) } extern void pkey_mm_init(struct mm_struct *mm); -extern bool arch_supports_pkeys(int cap); -extern unsigned int arch_usable_pkeys(void); -extern void thread_pkey_regs_save(struct thread_struct *thread); -extern void thread_pkey_regs_restore(struct thread_struct *new_thread, - struct thread_struct *old_thread); -extern void thread_pkey_regs_init(struct thread_struct *thread); #endif /*_ASM_POWERPC_KEYS_H */ From 91f3469a43fd1fb831649c2a2e684bf5ad4818b2 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 3 Feb 2021 01:55:35 -0500 Subject: [PATCH 122/188] powerpc/perf: Include PMCs as part of per-cpu cpuhw_events struct To support capturing of PMC's as part of extended registers, the value of SPR's PMC1 to PMC6 has to be saved in the starting of PMI interrupt handler. This is needed since we are resetting the overflown PMC before creating sample and hence directly reading SPRN_PMCx in 'perf_reg_value' will be capturing the modified value. To solve this, add a per-cpu array as part of structure cpu_hw_events and use this array to capture PMC values in the perf interrupt handler. Patch also re-factor's the interrupt handler code to use this per-cpu array instead of current local array. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1612335337-1888-2-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 9fd06010e8b6..72bedd812b60 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -54,6 +54,9 @@ struct cpu_hw_events { struct perf_branch_stack bhrb_stack; struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES]; u64 ic_init; + + /* Store the PMC values */ + unsigned long pmcs[MAX_HWEVENTS]; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); @@ -2264,7 +2267,6 @@ static void __perf_event_interrupt(struct pt_regs *regs) int i, j; struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; - unsigned long val[8]; int found, active; if (cpuhw->n_limited) @@ -2275,12 +2277,12 @@ static void __perf_event_interrupt(struct pt_regs *regs) /* Read all the PMCs since we'll need them a bunch of times */ for (i = 0; i < ppmu->n_counter; ++i) - val[i] = read_pmc(i + 1); + cpuhw->pmcs[i] = read_pmc(i + 1); /* Try to find what caused the IRQ */ found = 0; for (i = 0; i < ppmu->n_counter; ++i) { - if (!pmc_overflow(val[i])) + if (!pmc_overflow(cpuhw->pmcs[i])) continue; if (is_limited_pmc(i + 1)) continue; /* these won't generate IRQs */ @@ -2295,7 +2297,7 @@ static void __perf_event_interrupt(struct pt_regs *regs) event = cpuhw->event[j]; if (event->hw.idx == (i + 1)) { active = 1; - record_and_restart(event, val[i], regs); + record_and_restart(event, cpuhw->pmcs[i], regs); break; } } @@ -2309,11 +2311,11 @@ static void __perf_event_interrupt(struct pt_regs *regs) event = cpuhw->event[i]; if (!event->hw.idx || is_limited_pmc(event->hw.idx)) continue; - if (pmc_overflow_power7(val[event->hw.idx - 1])) { + if (pmc_overflow_power7(cpuhw->pmcs[event->hw.idx - 1])) { /* event has overflowed in a buggy way*/ found = 1; record_and_restart(event, - val[event->hw.idx - 1], + cpuhw->pmcs[event->hw.idx - 1], regs); } } @@ -2329,6 +2331,10 @@ static void __perf_event_interrupt(struct pt_regs *regs) * we get back out of this interrupt. */ write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0); + + /* Clear the cpuhw->pmcs */ + memset(&cpuhw->pmcs, 0, sizeof(cpuhw->pmcs)); + } static void perf_event_interrupt(struct pt_regs *regs) From e79b76e03b712e42c58d9649c92571e346abc38b Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 3 Feb 2021 01:55:36 -0500 Subject: [PATCH 123/188] powerpc/perf: Expose Performance Monitor Counter SPR's as part of extended regs Currently Monitor Mode Control Registers and Sampling registers are part of extended regs. Patch adds support to include Performance Monitor Counter Registers (PMC1 to PMC6 ) as part of extended registers. PMCs are saved in the perf interrupt handler as part of per-cpu array 'pmcs' in struct cpu_hw_events. While capturing the register values for extended regs, fetch these saved PMC values. Simplified the PERF_REG_PMU_MASK_300/31 definition to include PMU SPRs MMCR0 to PMC6. Exclude the unsupported SPRs (MMCR3, SIER2, SIER3) from extended mask value for CPU_FTR_ARCH_300 in the new definition. PERF_REG_EXTENDED_MAX is used to check if any index beyond the extended registers is requested in the sample. Have one PERF_REG_EXTENDED_MAX for CPU_FTR_ARCH_300/CPU_FTR_ARCH_31 since perf_reg_validate function already checks the extended mask for the presence of any unsupported register. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1612335337-1888-3-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/include/asm/perf_event.h | 2 ++ arch/powerpc/include/uapi/asm/perf_regs.h | 28 ++++++++++++++++++----- arch/powerpc/perf/core-book3s.c | 11 +++++++++ arch/powerpc/perf/perf_regs.c | 13 ++++------- 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index daec64d41b44..164e910bf654 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -14,6 +14,7 @@ #include #else static inline bool is_sier_available(void) { return false; } +static inline unsigned long get_pmcs_ext_regs(int idx) { return 0; } #endif #ifdef CONFIG_FSL_EMB_PERF_EVENT @@ -40,6 +41,7 @@ static inline bool is_sier_available(void) { return false; } /* To support perf_regs sier update */ extern bool is_sier_available(void); +extern unsigned long get_pmcs_ext_regs(int idx); /* To define perf extended regs mask value */ extern u64 PERF_REG_EXTENDED_MASK; #define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h index bdf5f10f8b9f..578b3ee86105 100644 --- a/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -55,17 +55,33 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_MMCR3, PERF_REG_POWERPC_SIER2, PERF_REG_POWERPC_SIER3, + PERF_REG_POWERPC_PMC1, + PERF_REG_POWERPC_PMC2, + PERF_REG_POWERPC_PMC3, + PERF_REG_POWERPC_PMC4, + PERF_REG_POWERPC_PMC5, + PERF_REG_POWERPC_PMC6, /* Max regs without the extended regs */ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, }; #define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) -/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */ -#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK) -/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */ -#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK) +/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */ +#define PERF_EXCLUDE_REG_EXT_300 (7ULL << PERF_REG_POWERPC_MMCR3) -#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1) -#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1) +/* + * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 + * includes 9 SPRS from MMCR0 to PMC6 excluding the + * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300. + */ +#define PERF_REG_PMU_MASK_300 ((0xfffULL << PERF_REG_POWERPC_MMCR0) - PERF_EXCLUDE_REG_EXT_300) + +/* + * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 + * includes 12 SPRs from MMCR0 to PMC6. + */ +#define PERF_REG_PMU_MASK_31 (0xfffULL << PERF_REG_POWERPC_MMCR0) + +#define PERF_REG_EXTENDED_MAX (PERF_REG_POWERPC_PMC6 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 72bedd812b60..278974f3264d 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -146,6 +146,17 @@ bool is_sier_available(void) return false; } +/* + * Return PMC value corresponding to the + * index passed. + */ +unsigned long get_pmcs_ext_regs(int idx) +{ + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + + return cpuhw->pmcs[idx]; +} + static bool regs_use_siar(struct pt_regs *regs) { /* diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 6f681b105eec..b931eed482c9 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -75,6 +75,8 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { static u64 get_ext_regs_value(int idx) { switch (idx) { + case PERF_REG_POWERPC_PMC1 ... PERF_REG_POWERPC_PMC6: + return get_pmcs_ext_regs(idx - PERF_REG_POWERPC_PMC1); case PERF_REG_POWERPC_MMCR0: return mfspr(SPRN_MMCR0); case PERF_REG_POWERPC_MMCR1: @@ -95,13 +97,6 @@ static u64 get_ext_regs_value(int idx) u64 perf_reg_value(struct pt_regs *regs, int idx) { - u64 perf_reg_extended_max = PERF_REG_POWERPC_MAX; - - if (cpu_has_feature(CPU_FTR_ARCH_31)) - perf_reg_extended_max = PERF_REG_MAX_ISA_31; - else if (cpu_has_feature(CPU_FTR_ARCH_300)) - perf_reg_extended_max = PERF_REG_MAX_ISA_300; - if (idx == PERF_REG_POWERPC_SIER && (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) || IS_ENABLED(CONFIG_PPC32) || @@ -113,14 +108,14 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) IS_ENABLED(CONFIG_PPC32))) return 0; - if (idx >= PERF_REG_POWERPC_MAX && idx < perf_reg_extended_max) + if (idx >= PERF_REG_POWERPC_MAX && idx < PERF_REG_EXTENDED_MAX) return get_ext_regs_value(idx); /* * If the idx is referring to value beyond the * supported registers, return 0 with a warning */ - if (WARN_ON_ONCE(idx >= perf_reg_extended_max)) + if (WARN_ON_ONCE(idx >= PERF_REG_EXTENDED_MAX)) return 0; return regs_get_register(regs, pt_regs_offset[idx]); From a2496049f1f1006178d0db706a8451dd03bd3ec6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:25 +1000 Subject: [PATCH 124/188] powerpc/64s/radix: add warning and comments in mm_cpumask trim Add a comment explaining part of the logic for mm_cpumask trimming, and add a (hopefully graceful) check and warning in case something gets it wrong. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-2-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index fb66d154b26c..40f496a45cd8 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -653,13 +653,14 @@ static void do_exit_flush_lazy_tlb(void *arg) { struct mm_struct *mm = arg; unsigned long pid = mm->context.id; + int cpu = smp_processor_id(); /* * A kthread could have done a mmget_not_zero() after the flushing CPU - * checked mm_is_singlethreaded, and be in the process of - * kthread_use_mm when interrupted here. In that case, current->mm will - * be set to mm, because kthread_use_mm() setting ->mm and switching to - * the mm is done with interrupts off. + * checked mm_cpumask, and be in the process of kthread_use_mm when + * interrupted here. In that case, current->mm will be set to mm, + * because kthread_use_mm() setting ->mm and switching to the mm is + * done with interrupts off. */ if (current->mm == mm) goto out_flush; @@ -673,8 +674,22 @@ static void do_exit_flush_lazy_tlb(void *arg) mmdrop(mm); } - atomic_dec(&mm->context.active_cpus); - cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm)); + /* + * This IPI is only initiated from a CPU which is running mm which + * is a single-threaded process, so there will not be another racing + * IPI coming in where we would find our cpumask already clear. + * + * Nothing else clears our bit in the cpumask except CPU offlining, + * in which case we should not be taking IPIs here. However check + * this just in case the logic is wrong somewhere, and don't underflow + * the active_cpus count. + */ + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { + atomic_dec(&mm->context.active_cpus); + cpumask_clear_cpu(cpu, mm_cpumask(mm)); + } else { + WARN_ON_ONCE(1); + } out_flush: _tlbiel_pid(pid, RIC_FLUSH_ALL); From 26418b36a11f2eaf2556aa8cefe86132907e311f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:26 +1000 Subject: [PATCH 125/188] powerpc/64s/radix: refactor TLB flush type selection The logic to decide what kind of TLB flush is required (local, global, or IPI) is spread multiple times over the several kinds of TLB flushes. Move it all into a single function which may issue IPIs if necessary, and also returns a flush type that is to be used. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-3-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 176 ++++++++++++++------------- 1 file changed, 94 insertions(+), 82 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 40f496a45cd8..c7c4552f0cc2 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -627,15 +627,6 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd } EXPORT_SYMBOL(radix__local_flush_tlb_page); -static bool mm_is_singlethreaded(struct mm_struct *mm) -{ - if (atomic_read(&mm->context.copros) > 0) - return false; - if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) - return true; - return false; -} - static bool mm_needs_flush_escalation(struct mm_struct *mm) { /* @@ -707,10 +698,58 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm) smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, (void *)mm, 1); } +#else /* CONFIG_SMP */ +static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } +#endif /* CONFIG_SMP */ +enum tlb_flush_type { + FLUSH_TYPE_LOCAL, + FLUSH_TYPE_GLOBAL, +}; + +static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) +{ + if (mm_is_thread_local(mm)) + return FLUSH_TYPE_LOCAL; + + /* Coprocessors require TLBIE to invalidate nMMU. */ + if (atomic_read(&mm->context.copros) > 0) + return FLUSH_TYPE_GLOBAL; + + /* + * In the fullmm case there's no point doing the exit_flush_lazy_tlbs + * because the mm is being taken down anyway, and a TLBIE tends to + * be faster than an IPI+TLBIEL. + */ + if (fullmm) + return FLUSH_TYPE_GLOBAL; + + /* + * If we are running the only thread of a single-threaded process, + * then we should almost always be able to trim off the rest of the + * CPU mask (except in the case of use_mm() races), so always try + * trimming the mask. + */ + if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { + exit_flush_lazy_tlbs(mm); + /* + * use_mm() race could prevent IPIs from being able to clear + * the cpumask here, however those users are established + * after our first check (and so after the PTEs are removed), + * and the TLB still gets flushed by the IPI, so this CPU + * will only require a local flush. + */ + return FLUSH_TYPE_LOCAL; + } + + return FLUSH_TYPE_GLOBAL; +} + +#ifdef CONFIG_SMP void radix__flush_tlb_mm(struct mm_struct *mm) { unsigned long pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -718,16 +757,13 @@ void radix__flush_tlb_mm(struct mm_struct *mm) preempt_disable(); /* - * Order loads of mm_cpumask vs previous stores to clear ptes before - * the invalidate. See barrier in switch_mm_irqs_off + * Order loads of mm_cpumask (in flush_type_needed) vs previous + * stores to clear ptes before the invalidate. See barrier in + * switch_mm_irqs_off */ smp_mb(); - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - exit_flush_lazy_tlbs(mm); - goto local; - } - + type = flush_type_needed(mm, false); + if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt = H_RPTI_TARGET_CMMU; @@ -744,7 +780,6 @@ void radix__flush_tlb_mm(struct mm_struct *mm) _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); } } else { -local: _tlbiel_pid(pid, RIC_FLUSH_TLB); } preempt_enable(); @@ -754,6 +789,7 @@ EXPORT_SYMBOL(radix__flush_tlb_mm); static void __flush_all_mm(struct mm_struct *mm, bool fullmm) { unsigned long pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -761,13 +797,8 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - if (!fullmm) { - exit_flush_lazy_tlbs(mm); - goto local; - } - } + type = flush_type_needed(mm, fullmm); + if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | @@ -782,7 +813,6 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) else _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); } else { -local: _tlbiel_pid(pid, RIC_FLUSH_ALL); } preempt_enable(); @@ -798,6 +828,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize) { unsigned long pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -805,11 +836,8 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - exit_flush_lazy_tlbs(mm); - goto local; - } + type = flush_type_needed(mm, false); + if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt, pg_sizes, size; @@ -827,7 +855,6 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, else _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); } else { -local: _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); } preempt_enable(); @@ -843,8 +870,6 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) } EXPORT_SYMBOL(radix__flush_tlb_page); -#else /* CONFIG_SMP */ -static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } #endif /* CONFIG_SMP */ static void do_tlbiel_kernel(void *info) @@ -908,7 +933,9 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; unsigned long page_size = 1UL << page_shift; unsigned long nr_pages = (end - start) >> page_shift; - bool local, full; + bool fullmm = (end == TLB_FLUSH_ALL); + bool flush_pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -916,24 +943,16 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - if (end != TLB_FLUSH_ALL) { - exit_flush_lazy_tlbs(mm); - goto is_local; - } - } - local = false; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_single_page_flush_ceiling); - } else { -is_local: - local = true; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_local_single_page_flush_ceiling); - } + type = flush_type_needed(mm, fullmm); - if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + if (fullmm) + flush_pid = true; + else if (type == FLUSH_TYPE_GLOBAL) + flush_pid = nr_pages > tlb_single_page_flush_ceiling; + else + flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; + + if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); @@ -943,8 +962,8 @@ is_local: tgt |= H_RPTI_TARGET_NMMU; pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes, start, end); - } else if (full) { - if (local) { + } else if (flush_pid) { + if (type == FLUSH_TYPE_LOCAL) { _tlbiel_pid(pid, RIC_FLUSH_TLB); } else { if (cputlb_use_tlbie()) { @@ -967,7 +986,7 @@ is_local: hflush = true; } - if (local) { + if (type == FLUSH_TYPE_LOCAL) { asm volatile("ptesync": : :"memory"); __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); if (hflush) @@ -1100,32 +1119,28 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned int page_shift = mmu_psize_defs[psize].shift; unsigned long page_size = 1UL << page_shift; unsigned long nr_pages = (end - start) >> page_shift; - bool local, full; + bool fullmm = (end == TLB_FLUSH_ALL); + bool flush_pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) return; + fullmm = (end == TLB_FLUSH_ALL); + preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - if (end != TLB_FLUSH_ALL) { - exit_flush_lazy_tlbs(mm); - goto is_local; - } - } - local = false; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_single_page_flush_ceiling); - } else { -is_local: - local = true; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_local_single_page_flush_ceiling); - } + type = flush_type_needed(mm, fullmm); - if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + if (fullmm) + flush_pid = true; + else if (type == FLUSH_TYPE_GLOBAL) + flush_pid = nr_pages > tlb_single_page_flush_ceiling; + else + flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; + + if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long type = H_RPTI_TYPE_TLB; unsigned long pg_sizes = psize_to_rpti_pgsize(psize); @@ -1135,8 +1150,8 @@ is_local: if (atomic_read(&mm->context.copros) > 0) tgt |= H_RPTI_TARGET_NMMU; pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); - } else if (full) { - if (local) { + } else if (flush_pid) { + if (type == FLUSH_TYPE_LOCAL) { _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); } else { if (cputlb_use_tlbie()) { @@ -1152,7 +1167,7 @@ is_local: } } else { - if (local) + if (type == FLUSH_TYPE_LOCAL) _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); else if (cputlb_use_tlbie()) _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); @@ -1179,6 +1194,7 @@ static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) { unsigned long pid, end; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -1195,11 +1211,8 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) /* Otherwise first do the PWC, then iterate the pages. */ preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - exit_flush_lazy_tlbs(mm); - goto local; - } + type = flush_type_needed(mm, false); + if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt, type, pg_sizes; @@ -1218,7 +1231,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) _tlbiel_va_range_multicast(mm, addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); } else { -local: _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); } From 54bb503345b81399575e2b7a3a6497ae212ad827 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:27 +1000 Subject: [PATCH 126/188] powerpc/64s/radix: Check for no TLB flush required If there are no CPUs in mm_cpumask, no TLB flush is required at all. This patch adds a check for this case. Currently it's not tested for, in fact mm_is_thread_local() returns false if the current CPU is not in mm_cpumask, so it's treated as a global flush. This can come up in some cases like exec failure before the new mm has ever been switched to. This patch reduces TLBIE instructions required to build a kernel from about 120,000 to 45,000. Another situation it could help is page reclaim, KSM, THP, etc., (i.e., asynch operations external to the process) where the process is sleeping and has all TLBs flushed out of all CPUs. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-4-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 38 ++++++++++++++++++---------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index c7c4552f0cc2..3d8b494c39f6 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -703,13 +703,19 @@ static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } #endif /* CONFIG_SMP */ enum tlb_flush_type { + FLUSH_TYPE_NONE, FLUSH_TYPE_LOCAL, FLUSH_TYPE_GLOBAL, }; static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) { - if (mm_is_thread_local(mm)) + int active_cpus = atomic_read(&mm->context.active_cpus); + int cpu = smp_processor_id(); + + if (active_cpus == 0) + return FLUSH_TYPE_NONE; + if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) return FLUSH_TYPE_LOCAL; /* Coprocessors require TLBIE to invalidate nMMU. */ @@ -763,7 +769,9 @@ void radix__flush_tlb_mm(struct mm_struct *mm) */ smp_mb(); type = flush_type_needed(mm, false); - if (type == FLUSH_TYPE_GLOBAL) { + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_pid(pid, RIC_FLUSH_TLB); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt = H_RPTI_TARGET_CMMU; @@ -779,8 +787,6 @@ void radix__flush_tlb_mm(struct mm_struct *mm) } else { _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); } - } else { - _tlbiel_pid(pid, RIC_FLUSH_TLB); } preempt_enable(); } @@ -798,7 +804,9 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ type = flush_type_needed(mm, fullmm); - if (type == FLUSH_TYPE_GLOBAL) { + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_pid(pid, RIC_FLUSH_ALL); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | @@ -812,8 +820,6 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) _tlbie_pid(pid, RIC_FLUSH_ALL); else _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); - } else { - _tlbiel_pid(pid, RIC_FLUSH_ALL); } preempt_enable(); } @@ -837,7 +843,9 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ type = flush_type_needed(mm, false); - if (type == FLUSH_TYPE_GLOBAL) { + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt, pg_sizes, size; @@ -854,8 +862,6 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); else _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); - } else { - _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); } preempt_enable(); } @@ -944,6 +950,8 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ type = flush_type_needed(mm, fullmm); + if (type == FLUSH_TYPE_NONE) + goto out; if (fullmm) flush_pid = true; @@ -1008,6 +1016,7 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); } } +out: preempt_enable(); } @@ -1132,6 +1141,8 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ type = flush_type_needed(mm, fullmm); + if (type == FLUSH_TYPE_NONE) + goto out; if (fullmm) flush_pid = true; @@ -1175,6 +1186,7 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, _tlbiel_va_range_multicast(mm, start, end, pid, page_size, psize, also_pwc); } +out: preempt_enable(); } @@ -1212,7 +1224,9 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ type = flush_type_needed(mm, false); - if (type == FLUSH_TYPE_GLOBAL) { + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt, type, pg_sizes; @@ -1230,8 +1244,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) else _tlbiel_va_range_multicast(mm, addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); - } else { - _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); } preempt_enable(); From 780de40601aabeca41bc9aa717a329a77aa85e1a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:28 +1000 Subject: [PATCH 127/188] powerpc/64s/radix: Allow mm_cpumask trimming from external sources mm_cpumask trimming is currently restricted to be issued by the current thread of a single-threaded mm. This patch relaxes that and allows the mask to be trimmed from any context. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-5-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 3d8b494c39f6..c27ef2ff8950 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -666,20 +666,16 @@ static void do_exit_flush_lazy_tlb(void *arg) } /* - * This IPI is only initiated from a CPU which is running mm which - * is a single-threaded process, so there will not be another racing - * IPI coming in where we would find our cpumask already clear. - * - * Nothing else clears our bit in the cpumask except CPU offlining, - * in which case we should not be taking IPIs here. However check - * this just in case the logic is wrong somewhere, and don't underflow - * the active_cpus count. + * This IPI may be initiated from any source including those not + * running the mm, so there may be a racing IPI that comes after + * this one which finds the cpumask already clear. Check and avoid + * underflowing the active_cpus count in that case. The race should + * not otherwise be a problem, but the TLB must be flushed because + * that's what the caller expects. */ if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { atomic_dec(&mm->context.active_cpus); cpumask_clear_cpu(cpu, mm_cpumask(mm)); - } else { - WARN_ON_ONCE(1); } out_flush: From 9393544842d6c85ebfc387c43a5059f8171d598f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:29 +1000 Subject: [PATCH 128/188] powerpc/64s/radix: occasionally attempt to trim mm_cpumask A single-threaded process that is flushing its own address space is so far the only case where the mm_cpumask is attempted to be trimmed. This patch expands that to flush in other situations, multi-threaded processes and external sources. For now it's a relatively simple occasional trim attempt. The main aim is to add the mechanism, tweaking and tuning can come with more data. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-6-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 60 ++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index c27ef2ff8950..1ef5d4e1a50a 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -639,10 +639,8 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm) return false; } -#ifdef CONFIG_SMP -static void do_exit_flush_lazy_tlb(void *arg) +static void exit_lazy_flush_tlb(struct mm_struct *mm) { - struct mm_struct *mm = arg; unsigned long pid = mm->context.id; int cpu = smp_processor_id(); @@ -682,6 +680,13 @@ out_flush: _tlbiel_pid(pid, RIC_FLUSH_ALL); } +#ifdef CONFIG_SMP +static void do_exit_flush_lazy_tlb(void *arg) +{ + struct mm_struct *mm = arg; + exit_lazy_flush_tlb(mm); +} + static void exit_flush_lazy_tlbs(struct mm_struct *mm) { /* @@ -694,10 +699,32 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm) smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, (void *)mm, 1); } + #else /* CONFIG_SMP */ static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } #endif /* CONFIG_SMP */ +static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); + +/* + * Interval between flushes at which we send out IPIs to check whether the + * mm_cpumask can be trimmed for the case where it's not a single-threaded + * process flushing its own mm. The intent is to reduce the cost of later + * flushes. Don't want this to be so low that it adds noticable cost to TLB + * flushing, or so high that it doesn't help reduce global TLBIEs. + */ +static unsigned long tlb_mm_cpumask_trim_timer = 1073; + +static bool tick_and_test_trim_clock(void) +{ + if (__this_cpu_inc_return(mm_cpumask_trim_clock) == + tlb_mm_cpumask_trim_timer) { + __this_cpu_write(mm_cpumask_trim_clock, 0); + return true; + } + return false; +} + enum tlb_flush_type { FLUSH_TYPE_NONE, FLUSH_TYPE_LOCAL, @@ -711,8 +738,20 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) if (active_cpus == 0) return FLUSH_TYPE_NONE; - if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) + if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { + if (current->mm != mm) { + /* + * Asynchronous flush sources may trim down to nothing + * if the process is not running, so occasionally try + * to trim. + */ + if (tick_and_test_trim_clock()) { + exit_lazy_flush_tlb(mm); + return FLUSH_TYPE_NONE; + } + } return FLUSH_TYPE_LOCAL; + } /* Coprocessors require TLBIE to invalidate nMMU. */ if (atomic_read(&mm->context.copros) > 0) @@ -744,6 +783,19 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) return FLUSH_TYPE_LOCAL; } + /* + * Occasionally try to trim down the cpumask. It's possible this can + * bring the mask to zero, which results in no flush. + */ + if (tick_and_test_trim_clock()) { + exit_flush_lazy_tlbs(mm); + if (current->mm == mm) + return FLUSH_TYPE_LOCAL; + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) + exit_lazy_flush_tlb(mm); + return FLUSH_TYPE_NONE; + } + return FLUSH_TYPE_GLOBAL; } From 032b7f08932c9b212952d6d585e45b2941b3e8be Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:30 +1000 Subject: [PATCH 129/188] powerpc/64s/radix: serialize_against_pte_lookup IPIs trim mm_cpumask serialize_against_pte_lookup() performs IPIs to all CPUs in mm_cpumask. Take this opportunity to try trim the CPU out of mm_cpumask. This can reduce the cost of future serialize_against_pte_lookup() and/or the cost of future TLB flushes. Reported-by: kernel test robot Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-7-npiggin@gmail.com --- arch/powerpc/mm/book3s64/pgtable.c | 13 ++++++++++--- arch/powerpc/mm/book3s64/radix_tlb.c | 20 +++++++++++++------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 5b3a3bae21aa..78c492e86752 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -79,10 +79,17 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } -static void do_nothing(void *unused) -{ +void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); +static void do_serialize(void *arg) +{ + /* We've taken the IPI, so try to trim the mask while here */ + if (radix_enabled()) { + struct mm_struct *mm = arg; + exit_lazy_flush_tlb(mm, false); + } } + /* * Serialize against find_current_mm_pte which does lock-less * lookup in page tables with local interrupts disabled. For huge pages @@ -96,7 +103,7 @@ static void do_nothing(void *unused) void serialize_against_pte_lookup(struct mm_struct *mm) { smp_mb(); - smp_call_function_many(mm_cpumask(mm), do_nothing, NULL, 1); + smp_call_function_many(mm_cpumask(mm), do_serialize, mm, 1); } /* diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 1ef5d4e1a50a..d7f1a6bd08ef 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -639,7 +639,11 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm) return false; } -static void exit_lazy_flush_tlb(struct mm_struct *mm) +/* + * If always_flush is true, then flush even if this CPU can't be removed + * from mm_cpumask. + */ +void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) { unsigned long pid = mm->context.id; int cpu = smp_processor_id(); @@ -652,7 +656,7 @@ static void exit_lazy_flush_tlb(struct mm_struct *mm) * done with interrupts off. */ if (current->mm == mm) - goto out_flush; + goto out; if (current->active_mm == mm) { WARN_ON_ONCE(current->mm != NULL); @@ -674,17 +678,19 @@ static void exit_lazy_flush_tlb(struct mm_struct *mm) if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { atomic_dec(&mm->context.active_cpus); cpumask_clear_cpu(cpu, mm_cpumask(mm)); + always_flush = true; } -out_flush: - _tlbiel_pid(pid, RIC_FLUSH_ALL); +out: + if (always_flush) + _tlbiel_pid(pid, RIC_FLUSH_ALL); } #ifdef CONFIG_SMP static void do_exit_flush_lazy_tlb(void *arg) { struct mm_struct *mm = arg; - exit_lazy_flush_tlb(mm); + exit_lazy_flush_tlb(mm, true); } static void exit_flush_lazy_tlbs(struct mm_struct *mm) @@ -746,7 +752,7 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) * to trim. */ if (tick_and_test_trim_clock()) { - exit_lazy_flush_tlb(mm); + exit_lazy_flush_tlb(mm, true); return FLUSH_TYPE_NONE; } } @@ -792,7 +798,7 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) if (current->mm == mm) return FLUSH_TYPE_LOCAL; if (cpumask_test_cpu(cpu, mm_cpumask(mm))) - exit_lazy_flush_tlb(mm); + exit_lazy_flush_tlb(mm, true); return FLUSH_TYPE_NONE; } From 3cb1aa7aa39402f4f2cb847b1f16ade3bce43a97 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:31 +1000 Subject: [PATCH 130/188] powerpc/64s: Implement ptep_clear_flush_young that does not flush TLBs Similarly to the x86 commit b13b1d2d8692 ("x86/mm: In the PTE swapout page reclaim case clear the accessed bit instead of flushing the TLB"), implement ptep_clear_flush_young that does not actually flush the TLB in the case the referenced bit is cleared. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-8-npiggin@gmail.com --- arch/powerpc/include/asm/book3s/64/pgtable.h | 23 +++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index a39886681629..058601efbc8a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -388,11 +388,28 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ ({ \ - int __r; \ - __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \ - __r; \ + __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \ }) +/* + * On Book3S CPUs, clearing the accessed bit without a TLB flush + * doesn't cause data corruption. [ It could cause incorrect + * page aging and the (mistaken) reclaim of hot pages, but the + * chance of that should be relatively low. ] + * + * So as a performance optimization don't flush the TLB when + * clearing the accessed bit, it will eventually be flushed by + * a context switch or a VM operation anyway. [ In the rare + * event of it not getting flushed for a long time the delay + * shouldn't really matter because there's no real memory + * pressure for swapout to react to. ] + */ +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +#define ptep_clear_flush_young ptep_test_and_clear_young + +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +#define pmdp_clear_flush_young pmdp_test_and_clear_young + static inline int __pte_write(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); From 768d70e19ba525debd571b36e6d0ab19956c63d7 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 6 Jan 2021 20:59:00 -0600 Subject: [PATCH 131/188] powerpc/pseries/dlpar: handle ibm, configure-connector delay status dlpar_configure_connector() has two problems in its handling of ibm,configure-connector's return status: 1. When the status is -2 (busy, call again), we call ibm,configure-connector again immediately without checking whether to schedule, which can result in monopolizing the CPU. 2. Extended delay status (9900..9905) goes completely unhandled, causing the configuration to unnecessarily terminate. Fix both of these issues by using rtas_busy_delay(). Fixes: ab519a011caa ("powerpc/pseries: Kernel DLPAR Infrastructure") Signed-off-by: Nathan Lynch Reviewed-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210107025900.410369-1-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/dlpar.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index deb48b41d488..233503fcf8f0 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -127,7 +127,6 @@ void dlpar_free_cc_nodes(struct device_node *dn) #define NEXT_PROPERTY 3 #define PREV_PARENT 4 #define MORE_MEMORY 5 -#define CALL_AGAIN -2 #define ERR_CFG_USE -9003 struct device_node *dlpar_configure_connector(__be32 drc_index, @@ -168,6 +167,9 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, spin_unlock(&rtas_data_buf_lock); + if (rtas_busy_delay(rc)) + continue; + switch (rc) { case COMPLETE: break; @@ -216,9 +218,6 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, last_dn = last_dn->parent; break; - case CALL_AGAIN: - break; - case MORE_MEMORY: case ERR_CFG_USE: default: From d137845c973147a22622cc76c7b0bc16f6206323 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 5 Feb 2021 04:14:52 -0500 Subject: [PATCH 132/188] powerpc/perf: Record counter overflow always if SAMPLE_IP is unset While sampling for marked events, currently we record the sample only if the SIAR valid bit of Sampled Instruction Event Register (SIER) is set. SIAR_VALID bit is used for fetching the instruction address from Sampled Instruction Address Register(SIAR). But there are some usecases, where the user is interested only in the PMU stats at each counter overflow and the exact IP of the overflow event is not required. Dropping SIAR invalid samples will fail to record some of the counter overflows in such cases. Example of such usecase is dumping the PMU stats (event counts) after some regular amount of instructions/events from the userspace (ex: via ptrace). Here counter overflow is indicated to userspace via signal handler, and captured by monitoring and enabling I/O signaling on the event file descriptor. In these cases, we expect to get sample/overflow indication after each specified sample_period. Perf event attribute will not have PERF_SAMPLE_IP set in the sample_type if exact IP of the overflow event is not requested. So while profiling if SAMPLE_IP is not set, just record the counter overflow irrespective of SIAR_VALID check. Suggested-by: Michael Ellerman Signed-off-by: Athira Rajeev [mpe: Reflow comment and if formatting] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1612516492-1428-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 278974f3264d..0e31aaa0a0d2 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2150,7 +2150,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val, left += period; if (left <= 0) left = period; - record = siar_valid(regs); + + /* + * If address is not requested in the sample via + * PERF_SAMPLE_IP, just record that sample irrespective + * of SIAR valid check. + */ + if (event->attr.sample_type & PERF_SAMPLE_IP) + record = siar_valid(regs); + else + record = 1; + event->hw.last_period = event->hw.sample_period; } if (left < 0x80000000LL) @@ -2168,9 +2178,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * MMCR2. Check attr.exclude_kernel and address to drop the sample in * these cases. */ - if (event->attr.exclude_kernel && record) - if (is_kernel_addr(mfspr(SPRN_SIAR))) - record = 0; + if (event->attr.exclude_kernel && + (event->attr.sample_type & PERF_SAMPLE_IP) && + is_kernel_addr(mfspr(SPRN_SIAR))) + record = 0; /* * Finally record data if requested. From 903178d0ce6bb30ef80a3604ab9ee2b57869fbc9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 5 Feb 2021 08:56:13 +0000 Subject: [PATCH 133/188] powerpc/8xx: Fix software emulation interrupt For unimplemented instructions or unimplemented SPRs, the 8xx triggers a "Software Emulation Exception" (0x1000). That interrupt doesn't set reason bits in SRR1 as the "Program Check Exception" does. Go through emulation_assist_interrupt() to set REASON_ILLEGAL. Fixes: fbbcc3bb139e ("powerpc/8xx: Remove SoftwareEmulation()") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ad782af87a222efc79cfb06079b0fd23d4224eaf.1612515180.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 7869db974185..46dff3f9c31f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -165,7 +165,7 @@ SystemCall: /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD) + EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD) . = 0x1100 /* From bbda4b6c7d7c7f79da71f95c92a5d76be22c3efd Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 4 Feb 2021 13:37:43 +0530 Subject: [PATCH 134/188] powerpc/sstep: Fix load-store and update emulation The Power ISA says that the fixed-point load and update instructions must neither use R0 for the base address (RA) nor have the destination (RT) and the base address (RA) as the same register. Similarly, for fixed-point stores and floating-point loads and stores, the instruction is invalid when R0 is used as the base address (RA). This is applicable to the following instructions. * Load Byte and Zero with Update (lbzu) * Load Byte and Zero with Update Indexed (lbzux) * Load Halfword and Zero with Update (lhzu) * Load Halfword and Zero with Update Indexed (lhzux) * Load Halfword Algebraic with Update (lhau) * Load Halfword Algebraic with Update Indexed (lhaux) * Load Word and Zero with Update (lwzu) * Load Word and Zero with Update Indexed (lwzux) * Load Word Algebraic with Update Indexed (lwaux) * Load Doubleword with Update (ldu) * Load Doubleword with Update Indexed (ldux) * Load Floating Single with Update (lfsu) * Load Floating Single with Update Indexed (lfsux) * Load Floating Double with Update (lfdu) * Load Floating Double with Update Indexed (lfdux) * Store Byte with Update (stbu) * Store Byte with Update Indexed (stbux) * Store Halfword with Update (sthu) * Store Halfword with Update Indexed (sthux) * Store Word with Update (stwu) * Store Word with Update Indexed (stwux) * Store Doubleword with Update (stdu) * Store Doubleword with Update Indexed (stdux) * Store Floating Single with Update (stfsu) * Store Floating Single with Update Indexed (stfsux) * Store Floating Double with Update (stfdu) * Store Floating Double with Update Indexed (stfdux) E.g. the following behaviour is observed for an invalid load and update instruction having RA = RT. While a userspace program having an instruction word like 0xe9ce0001, i.e. ldu r14, 0(r14), runs without getting receiving a SIGILL on a Power system (observed on P8 and P9), the outcome of executing that instruction word varies and its behaviour can be considered to be undefined. Attaching an uprobe at that instruction's address results in emulation which currently performs the load as well as writes the effective address back to the base register. This might not match the outcome from hardware. To remove any inconsistencies, this adds additional checks for the aforementioned instructions to make sure that the emulation infrastructure treats them as unknown. The kernel can then fallback to executing such instructions on hardware. Fixes: 0016a4cf5582 ("powerpc: Emulate most Book I instructions in emulate_step()") Signed-off-by: Sandipan Das Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210204080744.135785-1-sandipan@linux.ibm.com --- arch/powerpc/lib/sstep.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e96cff845ef7..11f14b209d7f 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -3017,6 +3017,20 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } + if (OP_IS_LOAD_STORE(op->type) && (op->type & UPDATE)) { + switch (GETTYPE(op->type)) { + case LOAD: + if (ra == rd) + goto unknown_opcode; + fallthrough; + case STORE: + case LOAD_FP: + case STORE_FP: + if (ra == 0) + goto unknown_opcode; + } + } + #ifdef CONFIG_VSX if ((GETTYPE(op->type) == LOAD_VSX || GETTYPE(op->type) == STORE_VSX) && From 22b89ba178dd0a66a26699ead014a3e73ff8e044 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 4 Feb 2021 13:37:44 +0530 Subject: [PATCH 135/188] powerpc/sstep: Fix darn emulation Commit 8813ff49607e ("powerpc/sstep: Check instruction validity against ISA version before emulation") introduced a proper way to skip unknown instructions. This makes sure that the same is used for the darn instruction when the range selection bits have a reserved value. Fixes: a23987ef267a ("powerpc: sstep: Add support for darn instruction") Signed-off-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210204080744.135785-2-sandipan@linux.ibm.com --- arch/powerpc/lib/sstep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 11f14b209d7f..683f7c20f74b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1916,7 +1916,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, goto compute_done; } - return -1; + goto unknown_opcode; #ifdef __powerpc64__ case 777: /* modsd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) From 8524e2e76441fc615a3b5c1415823e051cc79eae Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 7 Feb 2021 10:08:11 +0000 Subject: [PATCH 136/188] powerpc/uaccess: Perform barrier_nospec() in KUAP allowance helpers barrier_nospec() in uaccess helpers is there to protect against speculative accesses around access_ok(). When using user_access_begin() sequences together with unsafe_get_user() like macros, barrier_nospec() is called for every single read although we know the access_ok() is done onece. Since all user accesses must be granted by a call to either allow_read_from_user() or allow_read_write_user() which will always happen after the access_ok() check, move the barrier_nospec() there. Reported-by: Christopher M. Riedl Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c72f014730823b413528e90ab6c4d3bcb79f8497.1612692067.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/kup.h | 2 ++ arch/powerpc/include/asm/uaccess.h | 12 +----------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index bf221a2a523e..7ec21af49a45 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -91,6 +91,7 @@ static __always_inline void setup_kup(void) static inline void allow_read_from_user(const void __user *from, unsigned long size) { + barrier_nospec(); allow_user_access(NULL, from, size, KUAP_READ); } @@ -102,6 +103,7 @@ static inline void allow_write_to_user(void __user *to, unsigned long size) static inline void allow_read_write_user(void __user *to, const void __user *from, unsigned long size) { + barrier_nospec(); allow_user_access(to, from, size, KUAP_READ_WRITE); } diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 501c9a79038c..46123ae6a4c9 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -315,7 +315,6 @@ do { \ __chk_user_ptr(__gu_addr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ - barrier_nospec(); \ if (do_allow) \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ else \ @@ -333,10 +332,8 @@ do { \ __typeof__(size) __gu_size = (size); \ \ might_fault(); \ - if (access_ok(__gu_addr, __gu_size)) { \ - barrier_nospec(); \ + if (access_ok(__gu_addr, __gu_size)) \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ \ __gu_err; \ @@ -350,7 +347,6 @@ do { \ __typeof__(size) __gu_size = (size); \ \ __chk_user_ptr(__gu_addr); \ - barrier_nospec(); \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ \ @@ -395,7 +391,6 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { unsigned long ret; - barrier_nospec(); allow_read_write_user(to, from, n); ret = __copy_tofrom_user(to, from, n); prevent_read_write_user(to, from, n); @@ -412,19 +407,15 @@ static inline unsigned long raw_copy_from_user(void *to, switch (n) { case 1: - barrier_nospec(); __get_user_size(*(u8 *)to, from, 1, ret); break; case 2: - barrier_nospec(); __get_user_size(*(u16 *)to, from, 2, ret); break; case 4: - barrier_nospec(); __get_user_size(*(u32 *)to, from, 4, ret); break; case 8: - barrier_nospec(); __get_user_size(*(u64 *)to, from, 8, ret); break; } @@ -432,7 +423,6 @@ static inline unsigned long raw_copy_from_user(void *to, return 0; } - barrier_nospec(); allow_read_from_user(from, n); ret = __copy_tofrom_user((__force void __user *)to, from, n); prevent_read_from_user(from, n); From fd659e8f2c6d1e1e96fd5bdb515518801cd02012 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 6 Feb 2021 11:47:26 +0000 Subject: [PATCH 137/188] powerpc/32s: Change mfsrin() into a static inline function mfsrin() is a macro. Change in into an inline function to avoid conflicts in KVM and make it more evolutive. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/72c7b9879e2e2e6f5c27dadda6486386c2b50f23.1612612022.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/reg.h | 11 ++++++++--- arch/powerpc/kvm/book3s_emulate.c | 4 ---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e40a921d78f9..c006f33f68ad 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1413,9 +1413,14 @@ static inline void msr_check_and_clear(unsigned long bits) } #ifdef CONFIG_PPC32 -#define mfsrin(v) ({unsigned int rval; \ - asm volatile("mfsrin %0,%1" : "=r" (rval) : "r" (v)); \ - rval;}) +static inline u32 mfsrin(u32 idx) +{ + u32 val; + + asm volatile("mfsrin %0, %1" : "=r" (val): "r" (idx)); + + return val; +} static inline void mtsrin(u32 val, u32 idx) { diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index b08cc15f31c7..fdb57be71aa6 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -61,10 +61,6 @@ #define SPRN_GQR6 918 #define SPRN_GQR7 919 -/* Book3S_32 defines mfsrin(v) - but that messes up our abstract - * function pointers, so let's just disable the define. */ -#undef mfsrin - enum priv_level { PRIV_PROBLEM = 0, PRIV_SUPER = 1, From 179ae57dbad1b9a83eec376aa44d54fc24352e37 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 6 Feb 2021 11:47:27 +0000 Subject: [PATCH 138/188] powerpc/32s: mfsrin()/mtsrin() become mfsr()/mtsr() Function names should tell what the function does, not how. mfsrin() and mtsrin() are read/writing segment registers. They are called that way because they are using mfsrin and mtsrin instructions, but it doesn't matter for the caller. In preparation of following patch, change their name to mfsr() and mtsr() in order to make it obvious they manipulate segment registers without messing up with how they do it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f92d99f4349391b77766745900231aa880a0efb5.1612612022.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 8 ++++---- arch/powerpc/include/asm/reg.h | 4 ++-- arch/powerpc/mm/book3s32/mmu.c | 2 +- arch/powerpc/mm/ptdump/segment_regs.c | 2 +- arch/powerpc/xmon/xmon.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index a0117a9d5b06..73bc5d2c431d 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -95,12 +95,12 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) addr &= 0xf0000000; /* align addr to start of segment */ barrier(); /* make sure thread.kuap is updated before playing with SRs */ while (addr < end) { - mtsrin(sr, addr); + mtsr(sr, addr); sr += 0x111; /* next VSID */ sr &= 0xf0ffffff; /* clear VSID overflow */ addr += 0x10000000; /* address of next segment */ } - isync(); /* Context sync required after mtsrin() */ + isync(); /* Context sync required after mtsr() */ } static __always_inline void allow_user_access(void __user *to, const void __user *from, @@ -122,7 +122,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user end = min(addr + size, TASK_SIZE); current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf); - kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */ + kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ } static __always_inline void prevent_user_access(void __user *to, const void __user *from, @@ -151,7 +151,7 @@ static __always_inline void prevent_user_access(void __user *to, const void __us } current->thread.kuap = 0; - kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */ + kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* set Ks */ } static inline unsigned long prevent_user_access_return(void) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c006f33f68ad..83a7fc37d490 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1413,7 +1413,7 @@ static inline void msr_check_and_clear(unsigned long bits) } #ifdef CONFIG_PPC32 -static inline u32 mfsrin(u32 idx) +static inline u32 mfsr(u32 idx) { u32 val; @@ -1422,7 +1422,7 @@ static inline u32 mfsrin(u32 idx) return val; } -static inline void mtsrin(u32 val, u32 idx) +static inline void mtsr(u32 val, u32 idx) { asm volatile("mtsrin %0, %1" : : "r" (val), "r" (idx)); } diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 859e5bd603ac..d7eb266a3f7a 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -234,7 +234,7 @@ void mmu_mark_initmem_nx(void) if (is_module_segment(i << 28)) continue; - mtsrin(mfsrin(i << 28) | 0x10000000, i << 28); + mtsr(mfsr(i << 28) | 0x10000000, i << 28); } } diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c index dde2fe8de4b2..565048a0c9be 100644 --- a/arch/powerpc/mm/ptdump/segment_regs.c +++ b/arch/powerpc/mm/ptdump/segment_regs.c @@ -10,7 +10,7 @@ static void seg_show(struct seq_file *m, int i) { - u32 val = mfsrin(i << 28); + u32 val = mfsr(i << 28); seq_printf(m, "0x%01x0000000-0x%01xfffffff ", i, i); seq_printf(m, "Kern key %d ", (val >> 30) & 1); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index cec432eb9189..3fe37495f63d 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3719,7 +3719,7 @@ void dump_segments(void) printf("sr0-15 ="); for (i = 0; i < 16; ++i) - printf(" %x", mfsrin(i << 28)); + printf(" %x", mfsr(i << 28)); printf("\n"); } #endif From b842d131c7983f8f0b9c9572c073130b5f2bcf11 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 6 Feb 2021 11:47:28 +0000 Subject: [PATCH 139/188] powerpc/32s: Allow constant folding in mtsr()/mfsr() On the same way as we did in wrtee(), add an alternative using mtsr/mfsr instructions instead of mtsrin/mfsrin when the segment register can be determined at compile time. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9baed0ff9d76723ec90f1b567ddd4ac1ecc7a190.1612612022.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/reg.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 83a7fc37d490..eb4aa889916d 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1417,14 +1417,20 @@ static inline u32 mfsr(u32 idx) { u32 val; - asm volatile("mfsrin %0, %1" : "=r" (val): "r" (idx)); + if (__builtin_constant_p(idx)) + asm volatile("mfsr %0, %1" : "=r" (val): "i" (idx >> 28)); + else + asm volatile("mfsrin %0, %1" : "=r" (val): "r" (idx)); return val; } static inline void mtsr(u32 val, u32 idx) { - asm volatile("mtsrin %0, %1" : : "r" (val), "r" (idx)); + if (__builtin_constant_p(idx)) + asm volatile("mtsr %1, %0" : : "r" (val), "i" (idx >> 28)); + else + asm volatile("mtsrin %0, %1" : : "r" (val), "r" (idx)); } #endif From 73287caa9210ded6066833195f4335f7f688a46b Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Sat, 6 Feb 2021 01:23:42 -0600 Subject: [PATCH 140/188] powerpc64/idle: Fix SP offsets when saving GPRs The idle entry/exit code saves/restores GPRs in the stack "red zone" (Protected Zone according to PowerPC64 ELF ABI v2). However, the offset used for the first GPR is incorrect and overwrites the back chain - the Protected Zone actually starts below the current SP. In practice this is probably not an issue, but it's still incorrect so fix it. Also expand the comments to explain why using the stack "red zone" instead of creating a new stackframe is appropriate here. Signed-off-by: Christopher M. Riedl Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210206072342.5067-1-cmr@codefail.de --- arch/powerpc/kernel/idle_book3s.S | 138 ++++++++++++++++-------------- 1 file changed, 73 insertions(+), 65 deletions(-) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 22f249b6f58d..f9e6d83e6720 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -52,28 +52,32 @@ _GLOBAL(isa300_idle_stop_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) /* 168 bytes */ PPC_STOP b . /* catch bugs */ @@ -89,8 +93,8 @@ _GLOBAL(isa300_idle_stop_mayloss) */ _GLOBAL(idle_return_gpr_loss) ld r1,PACAR1(r13) - ld r4,-8*19(r1) - ld r5,-8*20(r1) + ld r4,-8*20(r1) + ld r5,-8*21(r1) mtlr r4 mtcr r5 /* @@ -98,25 +102,25 @@ _GLOBAL(idle_return_gpr_loss) * from PACATOC. This could be avoided for that less common case * if KVM saved its r2. */ - ld r2,-8*0(r1) - ld r14,-8*1(r1) - ld r15,-8*2(r1) - ld r16,-8*3(r1) - ld r17,-8*4(r1) - ld r18,-8*5(r1) - ld r19,-8*6(r1) - ld r20,-8*7(r1) - ld r21,-8*8(r1) - ld r22,-8*9(r1) - ld r23,-8*10(r1) - ld r24,-8*11(r1) - ld r25,-8*12(r1) - ld r26,-8*13(r1) - ld r27,-8*14(r1) - ld r28,-8*15(r1) - ld r29,-8*16(r1) - ld r30,-8*17(r1) - ld r31,-8*18(r1) + ld r2,-8*1(r1) + ld r14,-8*2(r1) + ld r15,-8*3(r1) + ld r16,-8*4(r1) + ld r17,-8*5(r1) + ld r18,-8*6(r1) + ld r19,-8*7(r1) + ld r20,-8*8(r1) + ld r21,-8*9(r1) + ld r22,-8*10(r1) + ld r23,-8*11(r1) + ld r24,-8*12(r1) + ld r25,-8*13(r1) + ld r26,-8*14(r1) + ld r27,-8*15(r1) + ld r28,-8*16(r1) + ld r29,-8*17(r1) + ld r30,-8*18(r1) + ld r31,-8*19(r1) blr /* @@ -154,28 +158,32 @@ _GLOBAL(isa206_idle_insn_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) cmpwi r3,PNV_THREAD_NAP bne 1f IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) From 0ecf6a9e47d825b7dddfebca738386b809e59a94 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Feb 2021 00:02:06 +1100 Subject: [PATCH 141/188] powerpc/64: Make stack tracing work during very early boot If we try to stack trace very early during boot, either due to a WARN/BUG or manual dump_stack(), we will oops in valid_emergency_stack() when we try to dereference the paca_ptrs array. The fix is simple, we just return false if paca_ptrs isn't allocated yet. The stack pointer definitely isn't part of any emergency stack because we haven't allocated any yet. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210202130207.1303975-1-mpe@ellerman.id.au --- arch/powerpc/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8520ed5ae144..e296440e9d16 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2047,6 +2047,9 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, unsigned long stack_page; unsigned long cpu = task_cpu(p); + if (!paca_ptrs) + return 0; + stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE; if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; From e7eb919057c3450cdd9d335e4a23a4da8da58db4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Feb 2021 00:02:07 +1100 Subject: [PATCH 142/188] powerpc/64s: Handle program checks in wrong endian during early boot There's a short window during boot where although the kernel is running little endian, any exceptions will cause the CPU to switch back to big endian. This situation persists until we call configure_exceptions(), which calls either the hypervisor or OPAL to configure the CPU so that exceptions will be taken in little endian (via HID0[HILE]). We don't intend to take exceptions during early boot, but one way we sometimes do is via a WARN/BUG etc. Those all boil down to a trap instruction, which will cause a program check exception. The first instruction of the program check handler is an mtsprg, which when executed in the wrong endian is an lhzu with a ~3GB displacement from r3. The content of r3 is random, so that becomes a load from some random location, and depending on the system (installed RAM etc.) can easily lead to a checkstop, or an infinitely recursive page fault. That prevents whatever the WARN/BUG was complaining about being printed to the console, and the user just sees a dead system. We can fix it by having a trampoline at the beginning of the program check handler that detects we are in the wrong endian, and flips us back to the correct endian. We can't flip MSR[LE] using mtmsr (alas), so we have to use rfid. That requires backing up SRR0/1 as well as a GPR. To do that we use SPRG0/2/3 (SPRG1 is already used for the paca). SPRG3 is user readable, but this trampoline is only active very early in boot, and SPRG3 will be reinitialised in vdso_getcpu_init() before userspace starts. With this trampoline in place we can survive a WARN early in boot and print a stack trace, which is eventually printed to the console once the console is up, eg: [83565.758545] kexec_core: Starting new kernel [ 0.000000] ------------[ cut here ]------------ [ 0.000000] static_key_enable_cpuslocked(): static key '0xc000000000ea6160' used before call to jump_label_init() [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/jump_label.c:166 static_key_enable_cpuslocked+0xfc/0x120 [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-gcc-8.2.0-dirty #618 [ 0.000000] NIP: c0000000002fd46c LR: c0000000002fd468 CTR: c000000000170660 [ 0.000000] REGS: c000000001227940 TRAP: 0700 Not tainted (5.10.0-gcc-8.2.0-dirty) [ 0.000000] MSR: 9000000002823003 CR: 24882422 XER: 20040000 [ 0.000000] CFAR: 0000000000000730 IRQMASK: 1 [ 0.000000] GPR00: c0000000002fd468 c000000001227bd0 c000000001228300 0000000000000065 [ 0.000000] GPR04: 0000000000000001 0000000000000065 c0000000010cf970 000000000000000d [ 0.000000] GPR08: 0000000000000000 0000000000000000 0000000000000000 c00000000122763f [ 0.000000] GPR12: 0000000000002000 c000000000f8a980 0000000000000000 0000000000000000 [ 0.000000] GPR16: 0000000000000000 0000000000000000 c000000000f88c8e c000000000f88c9a [ 0.000000] GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 0.000000] GPR24: 0000000000000000 c000000000dea3a8 0000000000000000 c000000000f35114 [ 0.000000] GPR28: 0000002800000000 c000000000f88c9a c000000000f88c8e c000000000ea6160 [ 0.000000] NIP [c0000000002fd46c] static_key_enable_cpuslocked+0xfc/0x120 [ 0.000000] LR [c0000000002fd468] static_key_enable_cpuslocked+0xf8/0x120 [ 0.000000] Call Trace: [ 0.000000] [c000000001227bd0] [c0000000002fd468] static_key_enable_cpuslocked+0xf8/0x120 (unreliable) [ 0.000000] [c000000001227c40] [c0000000002fd4c0] static_key_enable+0x30/0x50 [ 0.000000] [c000000001227c70] [c000000000f6629c] early_page_poison_param+0x58/0x9c [ 0.000000] [c000000001227cb0] [c000000000f351b8] do_early_param+0xa4/0x10c [ 0.000000] [c000000001227d30] [c00000000011e020] parse_args+0x270/0x5e0 [ 0.000000] [c000000001227e20] [c000000000f35864] parse_early_options+0x48/0x5c [ 0.000000] [c000000001227e40] [c000000000f358d0] parse_early_param+0x58/0x84 [ 0.000000] [c000000001227e70] [c000000000f3a368] early_init_devtree+0xc4/0x490 [ 0.000000] [c000000001227f10] [c000000000f3bca0] early_setup+0xc8/0x1c8 [ 0.000000] [c000000001227f90] [000000000000c320] 0xc320 [ 0.000000] Instruction dump: [ 0.000000] 4bfffddd 7c2004ac 39200001 913f0000 4bffffb8 7c651b78 3c82ffac 3c62ffc0 [ 0.000000] 38841b00 3863f310 4bdf03a5 60000000 <0fe00000> 4bffff38 60000000 60000000 [ 0.000000] random: get_random_bytes called from print_oops_end_marker+0x40/0x80 with crng_init=0 [ 0.000000] ---[ end trace 0000000000000000 ]--- [ 0.000000] dt-cpu-ftrs: setup for ISA 3000 Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210202130207.1303975-2-mpe@ellerman.id.au --- arch/powerpc/kernel/exceptions-64s.S | 45 ++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index b3793f982b2b..c51c436d5845 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1630,6 +1630,51 @@ INT_DEFINE_BEGIN(program_check) INT_DEFINE_END(program_check) EXC_REAL_BEGIN(program_check, 0x700, 0x100) + +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* + * There's a short window during boot where although the kernel is + * running little endian, any exceptions will cause the CPU to switch + * back to big endian. For example a WARN() boils down to a trap + * instruction, which will cause a program check, and we end up here but + * with the CPU in big endian mode. The first instruction of the program + * check handler (in GEN_INT_ENTRY below) is an mtsprg, which when + * executed in the wrong endian is an lhzu with a ~3GB displacement from + * r3. The content of r3 is random, so that is a load from some random + * location, and depending on the system can easily lead to a checkstop, + * or an infinitely recursive page fault. + * + * So to handle that case we have a trampoline here that can detect we + * are in the wrong endian and flip us back to the correct endian. We + * can't flip MSR[LE] using mtmsr, so we have to use rfid. That requires + * backing up SRR0/1 as well as a GPR. To do that we use SPRG0/2/3, as + * SPRG1 is already used for the paca. SPRG3 is user readable, but this + * trampoline is only active very early in boot, and SPRG3 will be + * reinitialised in vdso_getcpu_init() before userspace starts. + */ +BEGIN_FTR_SECTION + tdi 0,0,0x48 // Trap never, or in reverse endian: b . + 8 + b 1f // Skip trampoline if endian is correct + .long 0xa643707d // mtsprg 0, r11 Backup r11 + .long 0xa6027a7d // mfsrr0 r11 + .long 0xa643727d // mtsprg 2, r11 Backup SRR0 in SPRG2 + .long 0xa6027b7d // mfsrr1 r11 + .long 0xa643737d // mtsprg 3, r11 Backup SRR1 in SPRG3 + .long 0xa600607d // mfmsr r11 + .long 0x01006b69 // xori r11, r11, 1 Invert MSR[LE] + .long 0xa6037b7d // mtsrr1 r11 + .long 0x34076039 // li r11, 0x734 + .long 0xa6037a7d // mtsrr0 r11 + .long 0x2400004c // rfid + mfsprg r11, 3 + mtsrr1 r11 // Restore SRR1 + mfsprg r11, 2 + mtsrr0 r11 // Restore SRR0 + mfsprg r11, 0 // Restore r11 +1: +END_FTR_SECTION(0, 1) // nop out after boot +#endif /* CONFIG_CPU_LITTLE_ENDIAN */ + GEN_INT_ENTRY program_check, virt=0 EXC_REAL_END(program_check, 0x700, 0x100) EXC_VIRT_BEGIN(program_check, 0x4700, 0x100) From dea6f4c696996b9dff37f4e6690eb30e0878ace9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 11 Feb 2021 22:23:57 +1100 Subject: [PATCH 143/188] powerpc/powernv/pci: Use kzalloc() for phb related allocations As part of commit fbbefb320214 ("powerpc/pci: Move PHB discovery for PCI_DN using platforms"), I switched some allocations from memblock_alloc() to kmalloc(), otherwise memblock would warn that it was being called after slab init. However I missed that the code relied on the allocations being zeroed, without which we could end up crashing: pci_bus 0000:00: busn_res: [bus 00-ff] end is updated to ff BUG: Unable to handle kernel data access on read at 0x6b6b6b6b6b6b6af7 Faulting instruction address: 0xc0000000000dbc90 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV ... NIP pnv_ioda_get_pe_state+0xe0/0x1d0 LR pnv_ioda_get_pe_state+0xb4/0x1d0 Call Trace: pnv_ioda_get_pe_state+0xb4/0x1d0 (unreliable) pnv_pci_config_check_eeh.isra.9+0x78/0x270 pnv_pci_read_config+0xf8/0x160 pci_bus_read_config_dword+0xa4/0x120 pci_bus_generic_read_dev_vendor_id+0x54/0x270 pci_scan_single_device+0xb8/0x140 pci_scan_slot+0x80/0x1b0 pci_scan_child_bus_extend+0x94/0x490 pcibios_scan_phb+0x1f8/0x3c0 pcibios_init+0x8c/0x12c do_one_initcall+0x94/0x510 kernel_init_freeable+0x35c/0x3fc kernel_init+0x2c/0x168 ret_from_kernel_thread+0x5c/0x70 Switch them to kzalloc(). Fixes: fbbefb320214 ("powerpc/pci: Move PHB discovery for PCI_DN using platforms") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210211112749.3410771-1-mpe@ellerman.id.au --- arch/powerpc/platforms/powernv/pci-ioda.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7ee14ac275bd..f0f901683a2f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2921,7 +2921,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = kmalloc(sizeof(*phb), GFP_KERNEL); + phb = kzalloc(sizeof(*phb), GFP_KERNEL); if (!phb) panic("%s: Failed to allocate %zu bytes\n", __func__, sizeof(*phb)); @@ -2970,7 +2970,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, else phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE; - phb->diag_data = kmalloc(phb->diag_data_size, GFP_KERNEL); + phb->diag_data = kzalloc(phb->diag_data_size, GFP_KERNEL); if (!phb->diag_data) panic("%s: Failed to allocate %u bytes\n", __func__, phb->diag_data_size); @@ -3032,7 +3032,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, } pemap_off = size; size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); - aux = kmalloc(size, GFP_KERNEL); + aux = kzalloc(size, GFP_KERNEL); if (!aux) panic("%s: Failed to allocate %lu bytes\n", __func__, size); From e4bb64c7a42e61bcb6f8b70279fc1f7805eaad3f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 11 Feb 2021 16:36:36 +1000 Subject: [PATCH 144/188] powerpc: remove interrupt handler functions from the noinstr section The allyesconfig ppc64 kernel fails to link with relocations unable to fit after commit 3a96570ffceb ("powerpc: convert interrupt handlers to use wrappers"), which is due to the interrupt handler functions being put into the .noinstr.text section, which the linker script places on the opposite side of the main .text section from the interrupt entry asm code which calls the handlers. This results in a lot of linker stubs that overwhelm the 252-byte sized space we allow for them, or in the case of BE a .opd relocation link error for some reason. It's not required to put interrupt handlers in the .noinstr section, previously they used NOKPROBE_SYMBOL, so take them out and replace with a NOKPROBE_SYMBOL in the wrapper macro. Remove the explicit NOKPROBE_SYMBOL macros in the interrupt handler functions. This makes a number of interrupt handlers nokprobe that were not prior to the interrupt wrappers commit, but since that commit they were made nokprobe due to being in .noinstr.text, so this fix does not change that. The fixes tag is different to the commit that first exposes the problem because it is where the wrapper macros were introduced. Fixes: 8d41fc618ab8 ("powerpc: interrupt handler wrapper functions") Reported-by: Stephen Rothwell Signed-off-by: Nicholas Piggin [mpe: Slightly fix up comment wording] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210211063636.236420-1-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 25 ++++++++++++++++++++----- arch/powerpc/kernel/traps.c | 9 --------- arch/powerpc/mm/fault.c | 1 - 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 4badb3e51c19..5c285592d9b6 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -6,6 +6,7 @@ #include #include #include +#include #include struct interrupt_state { @@ -164,6 +165,15 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter #endif } +/* + * Don't use noinstr here like x86, but rather add NOKPROBE_SYMBOL to each + * function definition. The reason for this is the noinstr section is placed + * after the main text section, i.e., very far away from the interrupt entry + * asm. That creates problems with fitting linker stubs when building large + * kernels. + */ +#define interrupt_handler __visible noinline notrace __no_kcsan __no_sanitize_address + /** * DECLARE_INTERRUPT_HANDLER_RAW - Declare raw interrupt handler function * @func: Function name of the entry point @@ -198,7 +208,7 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter #define DEFINE_INTERRUPT_HANDLER_RAW(func) \ static __always_inline long ____##func(struct pt_regs *regs); \ \ -__visible noinstr long func(struct pt_regs *regs) \ +interrupt_handler long func(struct pt_regs *regs) \ { \ long ret; \ \ @@ -206,6 +216,7 @@ __visible noinstr long func(struct pt_regs *regs) \ \ return ret; \ } \ +NOKPROBE_SYMBOL(func); \ \ static __always_inline long ____##func(struct pt_regs *regs) @@ -228,7 +239,7 @@ static __always_inline long ____##func(struct pt_regs *regs) #define DEFINE_INTERRUPT_HANDLER(func) \ static __always_inline void ____##func(struct pt_regs *regs); \ \ -__visible noinstr void func(struct pt_regs *regs) \ +interrupt_handler void func(struct pt_regs *regs) \ { \ struct interrupt_state state; \ \ @@ -238,6 +249,7 @@ __visible noinstr void func(struct pt_regs *regs) \ \ interrupt_exit_prepare(regs, &state); \ } \ +NOKPROBE_SYMBOL(func); \ \ static __always_inline void ____##func(struct pt_regs *regs) @@ -262,7 +274,7 @@ static __always_inline void ____##func(struct pt_regs *regs) #define DEFINE_INTERRUPT_HANDLER_RET(func) \ static __always_inline long ____##func(struct pt_regs *regs); \ \ -__visible noinstr long func(struct pt_regs *regs) \ +interrupt_handler long func(struct pt_regs *regs) \ { \ struct interrupt_state state; \ long ret; \ @@ -275,6 +287,7 @@ __visible noinstr long func(struct pt_regs *regs) \ \ return ret; \ } \ +NOKPROBE_SYMBOL(func); \ \ static __always_inline long ____##func(struct pt_regs *regs) @@ -297,7 +310,7 @@ static __always_inline long ____##func(struct pt_regs *regs) #define DEFINE_INTERRUPT_HANDLER_ASYNC(func) \ static __always_inline void ____##func(struct pt_regs *regs); \ \ -__visible noinstr void func(struct pt_regs *regs) \ +interrupt_handler void func(struct pt_regs *regs) \ { \ struct interrupt_state state; \ \ @@ -307,6 +320,7 @@ __visible noinstr void func(struct pt_regs *regs) \ \ interrupt_async_exit_prepare(regs, &state); \ } \ +NOKPROBE_SYMBOL(func); \ \ static __always_inline void ____##func(struct pt_regs *regs) @@ -331,7 +345,7 @@ static __always_inline void ____##func(struct pt_regs *regs) #define DEFINE_INTERRUPT_HANDLER_NMI(func) \ static __always_inline long ____##func(struct pt_regs *regs); \ \ -__visible noinstr long func(struct pt_regs *regs) \ +interrupt_handler long func(struct pt_regs *regs) \ { \ struct interrupt_nmi_state state; \ long ret; \ @@ -344,6 +358,7 @@ __visible noinstr long func(struct pt_regs *regs) \ \ return ret; \ } \ +NOKPROBE_SYMBOL(func); \ \ static __always_inline long ____##func(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 39c8b7e9b91a..1583fd1c6010 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -513,7 +513,6 @@ out: return 0; } -NOKPROBE_SYMBOL(system_reset_exception); /* * I/O accesses can cause machine checks on powermacs. @@ -798,7 +797,6 @@ void die_mce(const char *str, struct pt_regs *regs, long err) nmi_exit(); die(str, regs, err); } -NOKPROBE_SYMBOL(die_mce); /* * BOOK3S_64 does not call this handler as a non-maskable interrupt @@ -851,7 +849,6 @@ bail: return 0; #endif } -NOKPROBE_SYMBOL(machine_check_exception); DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */ { @@ -1113,7 +1110,6 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); } -NOKPROBE_SYMBOL(single_step_exception); /* * After we have successfully emulated an instruction, we have to @@ -1556,7 +1552,6 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception) { do_program_check(regs); } -NOKPROBE_SYMBOL(program_check_exception); /* * This occurs when running in hypervisor mode on POWER6 or later @@ -1567,7 +1562,6 @@ DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt) regs->msr |= REASON_ILLEGAL; do_program_check(regs); } -NOKPROBE_SYMBOL(emulation_assist_interrupt); DEFINE_INTERRUPT_HANDLER(alignment_exception) { @@ -2034,7 +2028,6 @@ DEFINE_INTERRUPT_HANDLER(DebugException) } else handle_debug(regs, debug_status); } -NOKPROBE_SYMBOL(DebugException); #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ #ifdef CONFIG_ALTIVEC @@ -2183,7 +2176,6 @@ DEFINE_INTERRUPT_HANDLER(unrecoverable_exception) regs->trap, regs->nip, regs->msr); die("Unrecoverable exception", regs, SIGABRT); } -NOKPROBE_SYMBOL(unrecoverable_exception); #if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x) /* @@ -2214,7 +2206,6 @@ DEFINE_INTERRUPT_HANDLER(kernel_bad_stack) regs->gpr[1], regs->nip); die("Bad kernel stack pointer", regs, SIGABRT); } -NOKPROBE_SYMBOL(kernel_bad_stack); void __init trap_init(void) { diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b26a7643fc6e..bb368257b55c 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -566,7 +566,6 @@ DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) { return __do_page_fault(regs); } -NOKPROBE_SYMBOL(do_page_fault); #ifdef CONFIG_PPC_BOOK3S_64 /* Same as do_page_fault but interrupt entry has already run in do_hash_fault */ From 5c47c44f157f408c862b144bbd1d1e161a521aa2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 11 Feb 2021 00:08:02 +1100 Subject: [PATCH 145/188] powerpc/83xx: Fix build error when CONFIG_PCI=n As reported by lkp: arch/powerpc/platforms/83xx/km83xx.c:183:19: error: 'mpc83xx_setup_pci' undeclared here (not in a function) 183 | .discover_phbs = mpc83xx_setup_pci, | ^~~~~~~~~~~~~~~~~ | mpc83xx_setup_arch There is a stub defined for the CONFIG_PCI=n case, but now that mpc83xx_setup_pci() is being assigned to discover_phbs the correct empty value is NULL. Fixes: 83f84041ff1c ("powerpc/83xx: Move PHB discovery") Reported-by: kernel test robot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210210130804.3190952-1-mpe@ellerman.id.au --- arch/powerpc/platforms/83xx/mpc83xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h index f37d04332fc7..a30d30588cf6 100644 --- a/arch/powerpc/platforms/83xx/mpc83xx.h +++ b/arch/powerpc/platforms/83xx/mpc83xx.h @@ -76,7 +76,7 @@ extern void mpc83xx_ipic_init_IRQ(void); #ifdef CONFIG_PCI extern void mpc83xx_setup_pci(void); #else -#define mpc83xx_setup_pci() do {} while (0) +#define mpc83xx_setup_pci NULL #endif extern int mpc83xx_declare_of_platform_devices(void); From 2bb421a3d93601aa81bc39af7aac7280303e0761 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 11 Feb 2021 00:08:03 +1100 Subject: [PATCH 146/188] powerpc/mm/64s: Fix no previous prototype warning As reported by lkp: arch/powerpc/mm/book3s64/radix_tlb.c:646:6: warning: no previous prototype for function 'exit_lazy_flush_tlb' Fix it by moving the prototype into the existing header. Fixes: 032b7f08932c ("powerpc/64s/radix: serialize_against_pte_lookup IPIs trim mm_cpumask") Reported-by: kernel test robot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210210130804.3190952-2-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/internal.h | 2 ++ arch/powerpc/mm/book3s64/pgtable.c | 4 ++-- arch/powerpc/mm/book3s64/radix_tlb.c | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h index c12d78ee42f5..5045048ce244 100644 --- a/arch/powerpc/mm/book3s64/internal.h +++ b/arch/powerpc/mm/book3s64/internal.h @@ -15,4 +15,6 @@ static inline bool stress_slb(void) void slb_setup_new_exec(void); +void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); + #endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 78c492e86752..9ffa65074cb0 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -20,6 +20,8 @@ #include #include +#include "internal.h" + unsigned long __pmd_frag_nr; EXPORT_SYMBOL(__pmd_frag_nr); unsigned long __pmd_frag_size_shift; @@ -79,8 +81,6 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } -void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); - static void do_serialize(void *arg) { /* We've taken the IPI, so try to trim the mask while here */ diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index d7f1a6bd08ef..409e61210789 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -18,6 +18,8 @@ #include #include +#include "internal.h" + #define RIC_FLUSH_TLB 0 #define RIC_FLUSH_PWC 1 #define RIC_FLUSH_ALL 2 From f30520c64f290589e91461d7326b497c23e7f5fd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 11 Feb 2021 00:08:04 +1100 Subject: [PATCH 147/188] powerpc/amigaone: Make amigaone_discover_phbs() static It's only used in setup.c, so make it static. Fixes: 053d58c87029 ("powerpc/amigaone: Move PHB discovery") Reported-by: kernel test robot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210210130804.3190952-3-mpe@ellerman.id.au --- arch/powerpc/platforms/amigaone/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index b25ddf39dd43..9d252c554f7f 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -70,7 +70,7 @@ void __init amigaone_setup_arch(void) ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0); } -void __init amigaone_discover_phbs(void) +static void __init amigaone_discover_phbs(void) { struct device_node *np; int phb = -ENODEV; From de4ffc653f370e56d74994ae5247e39a100b4ff7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 8 Feb 2021 16:57:01 +1100 Subject: [PATCH 148/188] powerpc/uaccess: Simplify unsafe_put_user() implementation Currently unsafe_put_user() expands to __put_user_goto(), which expands to __put_user_nocheck_goto(). There are no other uses of __put_user_nocheck_goto(), and although there are some other uses of __put_user_goto() those could just use unsafe_put_user(). Every layer of indirection introduces the possibility that some code is calling that layer, and makes keeping track of the required semantics at each point more complicated. So drop __put_user_goto(), and rename __put_user_nocheck_goto() to __unsafe_put_user_goto(). The "nocheck" is implied by "unsafe". Replace the few uses of __put_user_goto() with unsafe_put_user(). Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210208135717.2618798-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/uaccess.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 46123ae6a4c9..9dcb1a658c24 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -52,8 +52,6 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true) #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) -#define __put_user_goto(x, ptr, label) \ - __put_user_nocheck_goto((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label) #define __get_user_allowed(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false) @@ -213,7 +211,7 @@ do { \ } \ } while (0) -#define __put_user_nocheck_goto(x, ptr, size, label) \ +#define __unsafe_put_user_goto(x, ptr, size, label) \ do { \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ if (!is_kernel_addr((unsigned long)__pu_addr)) \ @@ -530,7 +528,8 @@ user_write_access_begin(const void __user *ptr, size_t len) #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e) -#define unsafe_put_user(x, p, e) __put_user_goto(x, p, e) +#define unsafe_put_user(x, p, e) \ + __unsafe_put_user_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) #define unsafe_copy_to_user(d, s, l, e) \ do { \ @@ -540,17 +539,17 @@ do { \ int _i; \ \ for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long)) \ - __put_user_goto(*(long*)(_src + _i), (long __user *)(_dst + _i), e);\ + unsafe_put_user(*(long*)(_src + _i), (long __user *)(_dst + _i), e); \ if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) { \ - __put_user_goto(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \ + unsafe_put_user(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \ _i += 4; \ } \ if (_len & 2) { \ - __put_user_goto(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), e); \ + unsafe_put_user(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), e); \ _i += 2; \ } \ if (_len & 1) \ - __put_user_goto(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e);\ + unsafe_put_user(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e); \ } while (0) #define HAVE_GET_KERNEL_NOFAULT From 7d506ca97b665b95e698a53697dad99fae813c1a Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 8 Feb 2021 16:57:40 +1100 Subject: [PATCH 149/188] powerpc/uaccess: Avoid might_fault() when user access is enabled The amount of code executed with enabled user space access (unlocked KUAP) should be minimal. However with CONFIG_PROVE_LOCKING or CONFIG_DEBUG_ATOMIC_SLEEP enabled, might_fault() calls into various parts of the kernel, and may even end up replaying interrupts which in turn may access user space and forget to restore the KUAP state. The problem places are: 1. strncpy_from_user (and similar) which unlock KUAP and call unsafe_get_user -> __get_user_allowed -> __get_user_nocheck() with do_allow=false to skip KUAP as the caller took care of it. 2. __unsafe_put_user_goto() which is called with unlocked KUAP. eg: WARNING: CPU: 30 PID: 1 at arch/powerpc/include/asm/book3s/64/kup.h:324 arch_local_irq_restore+0x160/0x190 NIP arch_local_irq_restore+0x160/0x190 LR lock_is_held_type+0x140/0x200 Call Trace: 0xc00000007f392ff8 (unreliable) ___might_sleep+0x180/0x320 __might_fault+0x50/0xe0 filldir64+0x2d0/0x5d0 call_filldir+0xc8/0x180 ext4_readdir+0x948/0xb40 iterate_dir+0x1ec/0x240 sys_getdents64+0x80/0x290 system_call_exception+0x160/0x280 system_call_common+0xf0/0x27c Change __get_user_nocheck() to look at `do_allow` to decide whether to skip might_fault(). Since strncpy_from_user/etc call might_fault() anyway before unlocking KUAP, there should be no visible change. Drop might_fault() in __unsafe_put_user_goto() as it is only called from unsafe_put_user(), which already has KUAP unlocked. Since keeping might_fault() is still desirable for debugging, add calls to it in user_[read|write]_access_begin(). That also allows us to drop the is_kernel_addr() test, because there should be no code using user_[read|write]_access_begin() in order to access a kernel address. Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection") Signed-off-by: Alexey Kardashevskiy [mpe: Combine with related patch from myself, merge change logs] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210204121612.32721-1-aik@ozlabs.ru --- arch/powerpc/include/asm/uaccess.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 9dcb1a658c24..85d6712dc8ed 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -214,8 +214,6 @@ do { \ #define __unsafe_put_user_goto(x, ptr, size, label) \ do { \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - if (!is_kernel_addr((unsigned long)__pu_addr)) \ - might_fault(); \ __chk_user_ptr(ptr); \ __put_user_size_goto((x), __pu_addr, (size), label); \ } while (0) @@ -311,7 +309,7 @@ do { \ __typeof__(size) __gu_size = (size); \ \ __chk_user_ptr(__gu_addr); \ - if (!is_kernel_addr((unsigned long)__gu_addr)) \ + if (do_allow && !is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ if (do_allow) \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ @@ -496,6 +494,9 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t { if (unlikely(!access_ok(ptr, len))) return false; + + might_fault(); + allow_read_write_user((void __user *)ptr, ptr, len); return true; } @@ -509,6 +510,9 @@ user_read_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr, len))) return false; + + might_fault(); + allow_read_from_user(ptr, len); return true; } @@ -520,6 +524,9 @@ user_write_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr, len))) return false; + + might_fault(); + allow_write_to_user((void __user *)ptr, len); return true; } From 60a707d0c99aff4eadb7fd334c5fd21df386723e Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 2 Feb 2021 20:15:41 +1100 Subject: [PATCH 150/188] powerpc/kuap: Restore AMR after replaying soft interrupts Since de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection"), user access helpers call user_{read|write}_access_{begin|end} when user space access is allowed. Commit 890274c2dc4c ("powerpc/64s: Implement KUAP for Radix MMU") made the mentioned helpers program a AMR special register to allow such access for a short period of time, most of the time AMR is expected to block user memory access by the kernel. Since the code accesses the user space memory, unsafe_get_user() calls might_fault() which calls arch_local_irq_restore() if either CONFIG_PROVE_LOCKING or CONFIG_DEBUG_ATOMIC_SLEEP is enabled. arch_local_irq_restore() then attempts to replay pending soft interrupts as KUAP regions have hardware interrupts enabled. If a pending interrupt happens to do user access (performance interrupts do that), it enables access for a short period of time so after returning from the replay, the user access state remains blocked and if a user page fault happens - "Bug: Read fault blocked by AMR!" appears and SIGSEGV is sent. An example trace: Bug: Read fault blocked by AMR! WARNING: CPU: 0 PID: 1603 at /home/aik/p/kernel/arch/powerpc/include/asm/book3s/64/kup-radix.h:145 CPU: 0 PID: 1603 Comm: amr Not tainted 5.10.0-rc6_v5.10-rc6_a+fstn1 #24 NIP: c00000000009ece8 LR: c00000000009ece4 CTR: 0000000000000000 REGS: c00000000dc63560 TRAP: 0700 Not tainted (5.10.0-rc6_v5.10-rc6_a+fstn1) MSR: 8000000000021033 CR: 28002888 XER: 20040000 CFAR: c0000000001fa928 IRQMASK: 1 GPR00: c00000000009ece4 c00000000dc637f0 c000000002397600 000000000000001f GPR04: c0000000020eb318 0000000000000000 c00000000dc63494 0000000000000027 GPR08: c00000007fe4de68 c00000000dfe9180 0000000000000000 0000000000000001 GPR12: 0000000000002000 c0000000030a0000 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 bfffffffffffffff GPR20: 0000000000000000 c0000000134a4020 c0000000019c2218 0000000000000fe0 GPR24: 0000000000000000 0000000000000000 c00000000d106200 0000000040000000 GPR28: 0000000000000000 0000000000000300 c00000000dc63910 c000000001946730 NIP __do_page_fault+0xb38/0xde0 LR __do_page_fault+0xb34/0xde0 Call Trace: __do_page_fault+0xb34/0xde0 (unreliable) handle_page_fault+0x10/0x2c --- interrupt: 300 at strncpy_from_user+0x290/0x440 LR = strncpy_from_user+0x284/0x440 strncpy_from_user+0x2f0/0x440 (unreliable) getname_flags+0x88/0x2c0 do_sys_openat2+0x2d4/0x5f0 do_sys_open+0xcc/0x140 system_call_exception+0x160/0x240 system_call_common+0xf0/0x27c To fix it save/restore the AMR when replaying interrupts, and also add a check if AMR was not blocked prior to replaying interrupts. Originally found by syzkaller. Fixes: 890274c2dc4c ("powerpc/64s: Implement KUAP for Radix MMU") Signed-off-by: Alexey Kardashevskiy Reviewed-by: Nicholas Piggin [mpe: Use normal commit citation format and add full oops log to change log, move kuap_check_amr() into the restore routine to avoid warnings about unreconciled IRQ state] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210202091541.36499-1-aik@ozlabs.ru --- arch/powerpc/kernel/irq.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 681abb7c0507..4788522b2b14 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -266,6 +266,31 @@ again: } } +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) +static inline void replay_soft_interrupts_irqrestore(void) +{ + unsigned long kuap_state = get_kuap(); + + /* + * Check if anything calls local_irq_enable/restore() when KUAP is + * disabled (user access enabled). We handle that case here by saving + * and re-locking AMR but we shouldn't get here in the first place, + * hence the warning. + */ + kuap_check_amr(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(AMR_KUAP_BLOCKED); + + replay_soft_interrupts(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(kuap_state); +} +#else +#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() +#endif + notrace void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; @@ -329,7 +354,7 @@ notrace void arch_local_irq_restore(unsigned long mask) irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); - replay_soft_interrupts(); + replay_soft_interrupts_irqrestore(); local_paca->irq_happened = 0; trace_hardirqs_on(); From 14ad0e7d04f46865775fb010ccd96fb1cc83433a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 8 Feb 2021 16:33:26 +1000 Subject: [PATCH 151/188] powerpc/64s: syscall real mode entry use mtmsrd rather than rfid Have the real mode system call entry handler branch to the kernel 0xc000... address and then use mtmsrd to enable the MMU, rather than use SRRs and rfid. Commit 8729c26e675c ("powerpc/64s/exception: Move real to virt switch into the common handler") implemented this style of real mode entry for other interrupt handlers, so this brings system calls into line with them, which is the main motivcation for the change. This tends to be slightly faster due to avoiding the mtsprs, and it also does not clobber the SRR registers, which becomes important in a subsequent change. The real mode entry points don't tend to be too important for performance these days, but it is possible for a hypervisor to run guests in AIL=0 mode for certian reasons. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210208063326.331502-1-npiggin@gmail.com --- arch/powerpc/kernel/entry_64.S | 6 ++++++ arch/powerpc/kernel/exceptions-64s.S | 9 +++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index aa1af139d947..4b2d2823c7a5 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -225,6 +225,12 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) b system_call_vectored_common #endif + .balign IFETCH_ALIGN_BYTES + .globl system_call_common_real +system_call_common_real: + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ + mtmsrd r10 + .balign IFETCH_ALIGN_BYTES .globl system_call_common system_call_common: diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index c51c436d5845..de3e335bd931 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1973,12 +1973,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) HMT_MEDIUM .if ! \virt - __LOAD_HANDLER(r10, system_call_common) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - mtspr SPRN_SRR1,r10 - RFI_TO_KERNEL - b . /* prevent speculative execution */ + __LOAD_HANDLER(r10, system_call_common_real) + mtctr r10 + bctr .else li r10,MSR_RI mtmsrd r10,1 /* Set RI (EE=0) */ From ac7c5e9b08acdb54ef3525abcad24bdb3ed05551 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 8 Feb 2021 16:34:06 +1000 Subject: [PATCH 152/188] powerpc/64s: Remove EXSLB interrupt save area SLB faults should not be taken while the PACA save areas are live, all memory accesses should be fetches from the kernel text, and access to PACA and the current stack, before C code is called or any other accesses are made. All of these have pinned SLBs so will not take a SLB fault. Therefore EXSLB is not be required. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210208063406.331655-1-npiggin@gmail.com --- arch/powerpc/include/asm/paca.h | 3 +-- arch/powerpc/kernel/asm-offsets.c | 1 - arch/powerpc/kernel/exceptions-64s.S | 5 ----- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 169e687fd479..ec18ac818e3a 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -109,8 +109,7 @@ struct paca_struct { */ /* used for most interrupts/exceptions */ u64 exgen[EX_SIZE] __attribute__((aligned(0x80))); - u64 exslb[EX_SIZE]; /* used for SLB/segment table misses - * on the linear mapping */ + /* SLB related definitions */ u16 vmalloc_sllp; u8 slb_cache_ptr; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b12d7c049bfe..31edd9bbce75 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -255,7 +255,6 @@ int main(void) #endif /* CONFIG_PPC_MM_SLICES */ OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); - OFFSET(PACA_EXSLB, paca_struct, exslb); OFFSET(PACA_EXNMI, paca_struct, exnmi); #ifdef CONFIG_PPC_PSERIES OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index de3e335bd931..a4bd3c114a0a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1412,13 +1412,9 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * on user-handler data structures. * * KVM: Same as 0x300, DSLB must test for KVM guest. - * - * A dedicated save area EXSLB is used (XXX: but it actually need not be - * these days, we could use EXGEN). */ INT_DEFINE_BEGIN(data_access_slb) IVEC=0x380 - IAREA=PACA_EXSLB IDAR=1 IKVM_SKIP=1 IKVM_REAL=1 @@ -1501,7 +1497,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) */ INT_DEFINE_BEGIN(instruction_access_slb) IVEC=0x480 - IAREA=PACA_EXSLB IISIDE=1 IDAR=1 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE From 3642eb21256a317ac14e9ed560242c6d20cf06d9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 07:17:40 +0000 Subject: [PATCH 153/188] powerpc/32: Preserve cr1 in exception prolog stack check to fix build error THREAD_ALIGN_SHIFT = THREAD_SHIFT + 1 = PAGE_SHIFT + 1 Maximum PAGE_SHIFT is 18 for 256k pages so THREAD_ALIGN_SHIFT is 19 at the maximum. No need to clobber cr1, it can be preserved when moving r1 into CR when we check stack overflow. This reduces the number of instructions in Machine Check Exception prolog and fixes a build failure reported by the kernel test robot on v5.10 stable when building with RTAS + VMAP_STACK + KVM. That build failure is due to too many instructions in the prolog hence not fitting between 0x200 and 0x300. Allthough the problem doesn't show up in mainline, it is still worth the change. Fixes: 98bf2d3f4970 ("powerpc/32s: Fix RTAS machine check with VMAP stack") Cc: stable@vger.kernel.org Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5ae4d545e3ac58e133d2599e0deb88843cb494fc.1612768623.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 2 +- arch/powerpc/kernel/head_book3s_32.S | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index a2f72c966baf..abc7b603ab65 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -47,7 +47,7 @@ lwz r1,TASK_STACK-THREAD(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE 1: - mtcrf 0x7f, r1 + mtcrf 0x3f, r1 bt 32 - THREAD_ALIGN_SHIFT, stack_overflow #else subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 1b9c62423b09..a4eaf214e112 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -269,12 +269,6 @@ MachineCheck: 7: EXCEPTION_PROLOG_2 addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP -#ifdef CONFIG_VMAP_STACK - mfspr r4, SPRN_SPRG_THREAD - tovirt(r4, r4) - lwz r4, RTAS_SP(r4) - cmpwi cr1, r4, 0 -#endif beq cr1, machine_check_tramp twi 31, 0, 0 #else From c9df3f809cc98b196548864f52d3c4e280dd1970 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Sun, 7 Feb 2021 14:43:12 +0800 Subject: [PATCH 154/188] powerpc/xive: Assign boolean values to a bool variable Fix the following coccicheck warnings: ./arch/powerpc/kvm/book3s_xive.c:1856:2-17: WARNING: Assignment of 0/1 to bool variable. ./arch/powerpc/kvm/book3s_xive.c:1854:2-17: WARNING: Assignment of 0/1 to bool variable. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1612680192-43116-1-git-send-email-jiapeng.chong@linux.alibaba.com --- arch/powerpc/kvm/book3s_xive.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 30dfeac731c6..e7219b6f5f9a 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1813,9 +1813,9 @@ int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, return -EINVAL; if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL) - state->asserted = 1; + state->asserted = true; else if (level == 0 || level == KVM_INTERRUPT_UNSET) { - state->asserted = 0; + state->asserted = false; return 0; } From c7ba2d636342093cfb842f47640e5b62192adfed Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 3 Feb 2021 10:28:10 +0530 Subject: [PATCH 155/188] powerpc/mm: Enable compound page check for both THP and HugeTLB THP config results in compound pages. Make sure the kernel enables the PageCompound() check with CONFIG_HUGETLB_PAGE disabled and CONFIG_TRANSPARENT_HUGEPAGE enabled. This makes sure we correctly flush the icache with THP pages. flush_dcache_icache_page only matter for platforms that don't support COHERENT_ICACHE. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210203045812.234439-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/hugetlb.h | 2 -- arch/powerpc/mm/hugetlbpage.c | 18 ------------------ arch/powerpc/mm/mem.c | 28 ++++++++++++++++++++++------ 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 013165e62618..f18c543bc01d 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -17,8 +17,6 @@ extern bool hugetlb_disabled; void hugetlbpage_init_default(void); -void flush_dcache_icache_hugepage(struct page *page); - int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 4e7d9b91f1da..d142b76d507d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -663,24 +663,6 @@ static int __init hugetlbpage_init(void) arch_initcall(hugetlbpage_init); -void flush_dcache_icache_hugepage(struct page *page) -{ - int i; - void *start; - - BUG_ON(!PageCompound(page)); - - for (i = 0; i < compound_nr(page); i++) { - if (!PageHighMem(page)) { - __flush_dcache_icache(page_address(page+i)); - } else { - start = kmap_atomic(page+i); - __flush_dcache_icache(start); - kunmap_atomic(start); - } - } -} - void __init gigantic_hugetlb_cma_reserve(void) { unsigned long order = 0; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index afab328d0887..ed64ca80d5fd 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -494,14 +494,30 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); +static void flush_dcache_icache_hugepage(struct page *page) +{ + int i; + void *start; + + BUG_ON(!PageCompound(page)); + + for (i = 0; i < compound_nr(page); i++) { + if (!PageHighMem(page)) { + __flush_dcache_icache(page_address(page+i)); + } else { + start = kmap_atomic(page+i); + __flush_dcache_icache(start); + kunmap_atomic(start); + } + } +} + void flush_dcache_icache_page(struct page *page) { -#ifdef CONFIG_HUGETLB_PAGE - if (PageCompound(page)) { - flush_dcache_icache_hugepage(page); - return; - } -#endif + + if (PageCompound(page)) + return flush_dcache_icache_hugepage(page); + #if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) /* On 8xx there is no need to kmap since highmem is not supported */ __flush_dcache_icache(page_address(page)); From ec94b9b23d620d40ab2ced094a30c22bb8d69b9f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 3 Feb 2021 10:28:11 +0530 Subject: [PATCH 156/188] powerpc/mm: Add PG_dcache_clean to indicate dcache clean state This just add a better name for PG_arch_1. No functional change in this patch. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210203045812.234439-2-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/cacheflush.h | 6 ++++++ arch/powerpc/include/asm/kvm_ppc.h | 4 ++-- arch/powerpc/mm/book3s64/hash_utils.c | 4 ++-- arch/powerpc/mm/mem.c | 4 ++-- arch/powerpc/mm/pgtable.c | 14 +++++++------- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 138e46d8c04e..f63495109f63 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -8,6 +8,12 @@ #include #include +/* + * This flag is used to indicate that the page pointed to by a pte is clean + * and does not require cleaning before returning it to the user. + */ +#define PG_dcache_clean PG_arch_1 + #ifdef CONFIG_PPC_BOOK3S_64 /* * Book3s has no ptesync after setting a pte, so without this ptesync it's diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 259ba4ce9ad3..46dcc40617ed 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -881,9 +881,9 @@ static inline void kvmppc_mmu_flush_icache(kvm_pfn_t pfn) /* Clear i-cache for new pages */ page = pfn_to_page(pfn); - if (!test_bit(PG_arch_1, &page->flags)) { + if (!test_bit(PG_dcache_clean, &page->flags)) { flush_dcache_icache_page(page); - set_bit(PG_arch_1, &page->flags); + set_bit(PG_dcache_clean, &page->flags); } } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index fb7c10524bcd..581b20a2feaf 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1144,10 +1144,10 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) page = pte_page(pte); /* page is dirty */ - if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { + if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) { if (trap == 0x400) { flush_dcache_icache_page(page); - set_bit(PG_arch_1, &page->flags); + set_bit(PG_dcache_clean, &page->flags); } else pp |= HPTE_R_N; } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ed64ca80d5fd..883e67d37bbc 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -489,8 +489,8 @@ void flush_dcache_page(struct page *page) if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) return; /* avoid an atomic op if possible */ - if (test_bit(PG_arch_1, &page->flags)) - clear_bit(PG_arch_1, &page->flags); + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); } EXPORT_SYMBOL(flush_dcache_page); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 3a41545e5c07..354611940118 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -82,9 +82,9 @@ static pte_t set_pte_filter_hash(pte_t pte) struct page *pg = maybe_pte_to_page(pte); if (!pg) return pte; - if (!test_bit(PG_arch_1, &pg->flags)) { + if (!test_bit(PG_dcache_clean, &pg->flags)) { flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); } } return pte; @@ -117,13 +117,13 @@ static inline pte_t set_pte_filter(pte_t pte) return pte; /* If the page clean, we move on */ - if (test_bit(PG_arch_1, &pg->flags)) + if (test_bit(PG_dcache_clean, &pg->flags)) return pte; /* If it's an exec fault, we flush the cache and make it clean */ if (is_exec_fault()) { flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); return pte; } @@ -162,12 +162,12 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, goto bail; /* If the page is already clean, we move on */ - if (test_bit(PG_arch_1, &pg->flags)) + if (test_bit(PG_dcache_clean, &pg->flags)) goto bail; - /* Clean the page and set PG_arch_1 */ + /* Clean the page and set PG_dcache_clean */ flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); bail: return pte_mkexec(pte); From 2ac02e5ecec0cc2484d60a73b1bc6394aa2fad28 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 3 Feb 2021 10:28:12 +0530 Subject: [PATCH 157/188] powerpc/mm: Remove dcache flush from memory remove. We added dcache flush on memory add/remove in commit fb5924fddf9e ("powerpc/mm: Flush cache on memory hot(un)plug") to handle crashes on GPU hotplug. Instead of adding dcache flush in generic memory add/remove routine which is used even for regular memory, we should handle these devices specific flush in the device driver code. memtrace did handle this in the driver and that was removed by commit 7fd6641de28f ("powerpc/powernv/memtrace: Let the arch hotunplug code flush cache"). This patch reverts that commit. The dcache flush in memory add was removed by commit ea458effa88e ("powerpc: Don't flush caches when adding memory") which I don't think is correct. The reason why we require dcache flush in memtrace is to make sure we don't have a dirty cache when we remap a pfn to cache inhibited. We should do that when the memtrace module removes the memory and make the pfn available for HTM traces to map it as cache inhibited. The other device mentioned in commit fb5924fddf9e ("powerpc/mm: Flush cache on memory hot(un)plug") is nvlink device with coherent memory. The support for that was removed in commit 7eb3cf761927 ("powerpc/powernv: remove unused NPU DMA code") and commit 25b2995a35b6 ("mm: remove MEMORY_DEVICE_PUBLIC support") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210203045812.234439-3-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/mem.c | 22 ----------------- arch/powerpc/platforms/powernv/memtrace.c | 29 +++++++++++++++++++++++ 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 883e67d37bbc..4e8ce6d85232 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -91,27 +91,6 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -#define FLUSH_CHUNK_SIZE SZ_1G -/** - * flush_dcache_range_chunked(): Write any modified data cache blocks out to - * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE - * Does not invalidate the corresponding instruction cache blocks. - * - * @start: the start address - * @stop: the stop address (exclusive) - * @chunk: the max size of the chunks - */ -static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, - unsigned long chunk) -{ - unsigned long i; - - for (i = start; i < stop; i += chunk) { - flush_dcache_range(i, min(stop, i + chunk)); - cond_resched(); - } -} - int __ref arch_create_linear_mapping(int nid, u64 start, u64 size, struct mhp_params *params) { @@ -136,7 +115,6 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size) /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); - flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); mutex_lock(&linear_mapping_mutex); ret = remove_section_mapping(start, start + size); diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 5fc9408bb0b3..019669eb21d2 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -19,6 +19,7 @@ #include #include #include +#include /* This enables us to keep track of the memory removed from each node. */ struct memtrace_entry { @@ -51,6 +52,27 @@ static const struct file_operations memtrace_fops = { .open = simple_open, }; +#define FLUSH_CHUNK_SIZE SZ_1G +/** + * flush_dcache_range_chunked(): Write any modified data cache blocks out to + * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE + * Does not invalidate the corresponding instruction cache blocks. + * + * @start: the start address + * @stop: the stop address (exclusive) + * @chunk: the max size of the chunks + */ +static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, + unsigned long chunk) +{ + unsigned long i; + + for (i = start; i < stop; i += chunk) { + flush_dcache_range(i, min(stop, i + chunk)); + cond_resched(); + } +} + static void memtrace_clear_range(unsigned long start_pfn, unsigned long nr_pages) { @@ -62,6 +84,13 @@ static void memtrace_clear_range(unsigned long start_pfn, cond_resched(); clear_page(__va(PFN_PHYS(pfn))); } + /* + * Before we go ahead and use this range as cache inhibited range + * flush the cache. + */ + flush_dcache_range_chunked(PFN_PHYS(start_pfn), + PFN_PHYS(start_pfn + nr_pages), + FLUSH_CHUNK_SIZE); } static u64 memtrace_alloc_node(u32 nid, u64 size) From 2377c92e37fe97bc5b365f55cf60f56dfc4849f5 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Thu, 4 Feb 2021 17:01:10 +0530 Subject: [PATCH 158/188] powerpc/kexec_file: fix FDT size estimation for kdump kernel On systems with large amount of memory, loading kdump kernel through kexec_file_load syscall may fail with the below error: "Failed to update fdt with linux,drconf-usable-memory property" This happens because the size estimation for kdump kernel's FDT does not account for the additional space needed to setup usable memory properties. Fix it by accounting for the space needed to include linux,usable-memory & linux,drconf-usable-memory properties while estimating kdump kernel's FDT size. Fixes: 6ecd0163d360 ("powerpc/kexec_file: Add appropriate regions for memory reserve map") Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/161243826811.119001.14083048209224609814.stgit@hbathini --- arch/powerpc/include/asm/kexec.h | 1 + arch/powerpc/kexec/elf_64.c | 2 +- arch/powerpc/kexec/file_load_64.c | 35 +++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 55d6ede30c19..9ab344d29a54 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -136,6 +136,7 @@ int load_crashdump_segments_ppc64(struct kimage *image, int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, unsigned long fdt_load_addr); +unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image); int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline); diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index d0e459bb2f05..9842e33533df 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -102,7 +102,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr); } - fdt_size = fdt_totalsize(initial_boot_params) * 2; + fdt_size = kexec_fdt_totalsize_ppc64(image); fdt = kmalloc(fdt_size, GFP_KERNEL); if (!fdt) { pr_err("Not enough memory for the device tree.\n"); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index c69bcf9b547a..02b9e4d0dc40 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -925,6 +926,40 @@ out: return ret; } +/** + * kexec_fdt_totalsize_ppc64 - Return the estimated size needed to setup FDT + * for kexec/kdump kernel. + * @image: kexec image being loaded. + * + * Returns the estimated size needed for kexec/kdump kernel FDT. + */ +unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image) +{ + unsigned int fdt_size; + u64 usm_entries; + + /* + * The below estimate more than accounts for a typical kexec case where + * the additional space is to accommodate things like kexec cmdline, + * chosen node with properties for initrd start & end addresses and + * a property to indicate kexec boot.. + */ + fdt_size = fdt_totalsize(initial_boot_params) + (2 * COMMAND_LINE_SIZE); + if (image->type != KEXEC_TYPE_CRASH) + return fdt_size; + + /* + * For kdump kernel, also account for linux,usable-memory and + * linux,drconf-usable-memory properties. Get an approximate on the + * number of usable memory entries and use for FDT size estimation. + */ + usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) + + (2 * (resource_size(&crashk_res) / drmem_lmb_size()))); + fdt_size += (unsigned int)(usm_entries * sizeof(u64)); + + return fdt_size; +} + /** * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel * being loaded. From caccf2ac5c5d085cd35043027b3eb93c4ffead07 Mon Sep 17 00:00:00 2001 From: Joseph J Allen Date: Sun, 3 Jan 2021 01:34:35 +0000 Subject: [PATCH 159/188] powerpc: use kernel endianness in MSR in 32-bit signal handler This mirrors the behavior in handle_rt_signal32, to obey kernel endianness rather than assume a 32-bit process is big-endian. Without this change, any 32-bit little-endian process will SIGILL immediately upon handling a signal. Signed-off-by: Joseph J Allen Signed-off-by: Will Springer Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2058876.irdbgypaU6@sheen --- arch/powerpc/kernel/signal_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 934cbdf6dd10..75ee918a120a 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -929,8 +929,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, regs->gpr[3] = ksig->sig; regs->gpr[4] = (unsigned long) sc; regs->nip = (unsigned long)ksig->ka.sa.sa_handler; - /* enter the signal handler in big-endian mode */ + /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; + regs->msr |= (MSR_KERNEL & MSR_LE); return 0; failed: From 57f48b4b74e720e88b6d8674ae5d3804a8d03915 Mon Sep 17 00:00:00 2001 From: Will Springer Date: Sun, 3 Jan 2021 01:35:30 +0000 Subject: [PATCH 160/188] powerpc/compat_sys: swap hi/lo parts of 64-bit syscall args in LE mode Swap upper/lower 32 bits for 64-bit compat syscalls, conditioned on endianness. This is modeled after the same functionality in arch/mips/kernel/linux32.c. This fixes compat_sys on ppc64le, when called by 32-bit little-endian processes. Tested with `file /bin/bash` (pread64) and `truncate -s 5G test` (ftruncate64). Signed-off-by: Will Springer Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2765111.e9J7NaK4W3@sheen --- arch/powerpc/kernel/sys_ppc32.c | 49 +++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index d36c6391eaf5..16ff0399a257 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -59,57 +59,64 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len, /* * long long munging: * The 32 bit ABI passes long longs in an odd even register pair. + * High and low parts are swapped depending on endian mode, + * so define a macro (similar to mips linux32) to handle that. */ +#ifdef __LITTLE_ENDIAN__ +#define merge_64(low, high) ((u64)high << 32) | low +#else +#define merge_64(high, low) ((u64)high << 32) | low +#endif compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, - u32 reg6, u32 poshi, u32 poslo) + u32 reg6, u32 pos1, u32 pos2) { - return ksys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); + return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2)); } compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, - u32 reg6, u32 poshi, u32 poslo) + u32 reg6, u32 pos1, u32 pos2) { - return ksys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); + return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2)); } -compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count) +compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count) { - return ksys_readahead(fd, ((loff_t)offhi << 32) | offlo, count); + return ksys_readahead(fd, merge_64(offset1, offset2), count); } asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4, - unsigned long high, unsigned long low) + unsigned long len1, unsigned long len2) { - return ksys_truncate(path, (high << 32) | low); + return ksys_truncate(path, merge_64(len1, len2)); } -asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, - u32 lenhi, u32 lenlo) +asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, + u32 len1, u32 len2) { - return ksys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, - ((loff_t)lenhi << 32) | lenlo); + return ksys_fallocate(fd, mode, ((loff_t)offset1 << 32) | offset2, + merge_64(len1, len2)); } -asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high, - unsigned long low) +asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1, + unsigned long len2) { - return ksys_ftruncate(fd, (high << 32) | low); + return ksys_ftruncate(fd, merge_64(len1, len2)); } -long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low, +long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2, size_t len, int advice) { - return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low, len, + return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len, advice); } asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags, - unsigned offset_hi, unsigned offset_lo, - unsigned nbytes_hi, unsigned nbytes_lo) + unsigned offset1, unsigned offset2, + unsigned nbytes1, unsigned nbytes2) { - loff_t offset = ((loff_t)offset_hi << 32) | offset_lo; - loff_t nbytes = ((loff_t)nbytes_hi << 32) | nbytes_lo; + loff_t offset = merge_64(offset1, offset2); + loff_t nbytes = merge_64(nbytes1, nbytes2); return ksys_sync_file_range(fd, offset, nbytes, flags); } From 57fdfbce89137ae85cd5cef48be168040a47dd13 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:20 +0000 Subject: [PATCH 161/188] powerpc/32s: Add missing call to kuep_lock on syscall entry Userspace Execution protection and fast syscall entry were implemented independently from each other and were both merged in kernel 5.2, leading to syscall entry missing userspace execution protection. On syscall entry, execution of user space memory must be locked in the same way as on exception entry. Fixes: b86fb88855ea ("powerpc/32: implement fast entry for syscalls on non BOOKE") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c65e105b63aaf74f91a14f845bc77192350b84a6.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index b102b40c4988..b1e36602c013 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -351,6 +351,9 @@ trace_syscall_entry_irq_off: .globl transfer_to_syscall transfer_to_syscall: +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_lock r11, r12 +#endif #ifdef CONFIG_TRACE_IRQFLAGS andi. r12,r9,MSR_EE beq- trace_syscall_entry_irq_off From eca2411040c1ee15b8882c6427fb4eb5a48ada69 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:21 +0000 Subject: [PATCH 162/188] powerpc/32: Always enable data translation on syscall entry If the code can use a stack in vm area, it can also use a stack in linear space. Simplify code by removing old non VMAP stack code on PPC32 in syscall. That means the data translation is now re-enabled early in syscall entry in all cases, not only when using VMAP stacks. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/412c6c1786922d991bbb89c2ad2e82cffe8ab112.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 23 +---------------------- arch/powerpc/kernel/head_booke.h | 2 -- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index abc7b603ab65..d481e351f006 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -118,7 +118,6 @@ .macro SYSCALL_ENTRY trapno mfspr r12,SPRN_SPRG_THREAD mfspr r9, SPRN_SRR1 -#ifdef CONFIG_VMAP_STACK mfspr r11, SPRN_SRR0 mtctr r11 andi. r11, r9, MSR_PR @@ -126,30 +125,16 @@ lwz r1,TASK_STACK-THREAD(r12) beq- 99f addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE - li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take DTLB miss */ mtmsr r10 isync tovirt(r12, r12) stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 -#else - andi. r11, r9, MSR_PR - lwz r11,TASK_STACK-THREAD(r12) - beq- 99f - addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE - tophys(r11, r11) - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt(r1, r11) /* set new kernel sp */ -#endif mflr r10 stw r10, _LINK(r11) -#ifdef CONFIG_VMAP_STACK mfctr r10 -#else - mfspr r10,SPRN_SRR0 -#endif stw r10,_NIP(r11) mfcr r10 rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ @@ -157,11 +142,7 @@ #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #else -#ifdef CONFIG_VMAP_STACK LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~MSR_IR) /* can take exceptions */ -#else - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */ -#endif mtmsr r10 /* (except for mach check in rtas) */ #endif lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ @@ -190,7 +171,6 @@ li r12,-1 /* clear all pending debug events */ mtspr SPRN_DBSR,r12 lis r11,global_dbcr0@ha - tophys(r11,r11) addi r11,r11,global_dbcr0@l lwz r12,0(r11) mtspr SPRN_DBCR0,r12 @@ -200,7 +180,6 @@ #endif 3: - tovirt_novmstack r2, r2 /* set r2 to current */ lis r11, transfer_to_syscall@h ori r11, r11, transfer_to_syscall@l #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index bf33af714d11..706cd9368992 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -144,7 +144,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) li r12,-1 /* clear all pending debug events */ mtspr SPRN_DBSR,r12 lis r11,global_dbcr0@ha - tophys(r11,r11) addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP lwz r10, TASK_CPU(r2) @@ -158,7 +157,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r12,4(r11) 3: - tovirt(r2, r2) /* set r2 to current */ lis r11, transfer_to_syscall@h ori r11, r11, transfer_to_syscall@l #ifdef CONFIG_TRACE_IRQFLAGS From 76249ddc27080b6b835a89cedcc4185b3b5a6b23 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:22 +0000 Subject: [PATCH 163/188] powerpc/32: On syscall entry, enable instruction translation at the same time as data On 40x and 8xx, kernel text is pinned. On book3s/32, kernel text is mapped by BATs. Enable instruction translation at the same time as data translation, it makes things simpler. MSR_RI can also be set at the same time because srr0/srr1 are already saved and r1 is set properly. On booke, translation is always on, so at the end all PPC32 have translation on early. This reduces null_syscall benchmark by 13 cycles on 8xx (296 ==> 283 cycles). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3fe8891c814103a3549efc1d4e7ffc828bba5993.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 26 +++++++++----------------- arch/powerpc/kernel/head_booke.h | 7 ++----- 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index d481e351f006..97d8465eb12a 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -125,9 +125,13 @@ lwz r1,TASK_STACK-THREAD(r12) beq- 99f addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take DTLB miss */ - mtmsr r10 - isync + LOAD_REG_IMMEDIATE(r10, MSR_KERNEL) /* can take exceptions */ + mtspr SPRN_SRR1, r10 + lis r10, 1f@h + ori r10, r10, 1f@l + mtspr SPRN_SRR0, r10 + rfi +1: tovirt(r12, r12) stw r11,GPR1(r1) stw r11,0(r1) @@ -141,9 +145,6 @@ stw r10,_CCR(r11) /* save registers */ #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ -#else - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~MSR_IR) /* can take exceptions */ - mtmsr r10 /* (except for mach check in rtas) */ #endif lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ stw r2,GPR2(r11) @@ -180,8 +181,6 @@ #endif 3: - lis r11, transfer_to_syscall@h - ori r11, r11, transfer_to_syscall@l #ifdef CONFIG_TRACE_IRQFLAGS /* * If MSR is changing we need to keep interrupts disabled at this point @@ -193,15 +192,8 @@ #else LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE) #endif -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif - mtspr SPRN_SRR1,r10 - mtspr SPRN_SRR0,r11 - rfi /* jump to handler, enable MMU */ -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif + mtmsr r10 + b transfer_to_syscall /* jump to handler */ 99: b ret_from_kernel_syscall .endm diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 706cd9368992..b3c502c503a0 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -157,8 +157,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r12,4(r11) 3: - lis r11, transfer_to_syscall@h - ori r11, r11, transfer_to_syscall@l #ifdef CONFIG_TRACE_IRQFLAGS /* * If MSR is changing we need to keep interrupts disabled at this point @@ -172,9 +170,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) lis r10, (MSR_KERNEL | MSR_EE)@h ori r10, r10, (MSR_KERNEL | MSR_EE)@l #endif - mtspr SPRN_SRR1,r10 - mtspr SPRN_SRR0,r11 - rfi /* jump to handler, enable MMU */ + mtmsr r10 + b transfer_to_syscall /* jump to handler */ 99: b ret_from_kernel_syscall .endm From 2c59e5104821c5720e88bafa9e522f8bea9ce8fa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:23 +0000 Subject: [PATCH 164/188] powerpc/32: Reorder instructions to avoid using CTR in syscall entry Now that we are using rfi instead of mtmsr to reactivate MMU, it is possible to reorder instructions and avoid the need to use CTR for stashing SRR0. null_syscall on 8xx is reduced by 3 cycles (283 => 280 cycles). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8fa13a59f73647e058c95fc7e1c7a98f316bd20a.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 97d8465eb12a..930377375dc0 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -116,30 +116,28 @@ .endm .macro SYSCALL_ENTRY trapno - mfspr r12,SPRN_SPRG_THREAD mfspr r9, SPRN_SRR1 - mfspr r11, SPRN_SRR0 - mtctr r11 + mfspr r10, SPRN_SRR0 andi. r11, r9, MSR_PR + beq- 99f + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions */ + lis r12, 1f@h + ori r12, r12, 1f@l + mtspr SPRN_SRR1, r11 + mtspr SPRN_SRR0, r12 + mfspr r12,SPRN_SPRG_THREAD mr r11, r1 lwz r1,TASK_STACK-THREAD(r12) - beq- 99f + tovirt(r12, r12) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL) /* can take exceptions */ - mtspr SPRN_SRR1, r10 - lis r10, 1f@h - ori r10, r10, 1f@l - mtspr SPRN_SRR0, r10 rfi 1: - tovirt(r12, r12) stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 + stw r10,_NIP(r11) mflr r10 stw r10, _LINK(r11) - mfctr r10 - stw r10,_NIP(r11) mfcr r10 rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ stw r10,_CCR(r11) /* save registers */ From fb5608fd117a8b48752d2b5a7e70847c1ed33d33 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:24 +0000 Subject: [PATCH 165/188] powerpc/irq: Add helper to set regs->softe regs->softe doesn't exist on PPC32. Add irq_soft_mask_regs_set_state() helper to set regs->softe. This helper will void on PPC32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5f37d1177a751fdbca79df461d283850ca3a34a2.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/hw_irq.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 614957f74cee..ed0c3b049dfd 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -38,6 +38,8 @@ #define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE) #endif +#endif /* CONFIG_PPC64 */ + /* * flags for paca->irq_soft_mask */ @@ -46,8 +48,6 @@ #define IRQS_PMI_DISABLED 2 #define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED) -#endif /* CONFIG_PPC64 */ - #ifndef __ASSEMBLY__ #ifdef CONFIG_PPC64 @@ -287,6 +287,10 @@ extern void irq_set_pending_from_srr1(unsigned long srr1); extern void force_external_irq_replay(void); +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) +{ + regs->softe = val; +} #else /* CONFIG_PPC64 */ static inline unsigned long arch_local_save_flags(void) @@ -355,6 +359,9 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) static inline void may_hard_irq_enable(void) { } +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) +{ +} #endif /* CONFIG_PPC64 */ #define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST From 08353779f2889305f64e04de3e46ed59ed60f859 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:25 +0000 Subject: [PATCH 166/188] powerpc/irq: Rework helpers that manipulate MSR[EE/RI] In preparation of porting PPC32 to C syscall entry/exit, rewrite the following helpers as static inline functions and add support for PPC32 in them: __hard_irq_enable() __hard_irq_disable() __hard_EE_RI_disable() __hard_RI_enable() Then use them in PPC32 version of arch_local_irq_disable() and arch_local_irq_enable() to avoid code duplication. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0e290372a0e7dc2ae657b4a01aec85f8de7fdf77.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/hw_irq.h | 75 +++++++++++++++++++++---------- arch/powerpc/include/asm/reg.h | 1 + 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index ed0c3b049dfd..4739f61e632c 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -50,6 +50,55 @@ #ifndef __ASSEMBLY__ +static inline void __hard_irq_enable(void) +{ + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + wrtee(MSR_EE); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_EIE); + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + __mtmsrd(MSR_EE | MSR_RI, 1); + else + mtmsr(mfmsr() | MSR_EE); +} + +static inline void __hard_irq_disable(void) +{ + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + wrtee(0); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_EID); + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + __mtmsrd(MSR_RI, 1); + else + mtmsr(mfmsr() & ~MSR_EE); +} + +static inline void __hard_EE_RI_disable(void) +{ + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + wrtee(0); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_NRI); + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + __mtmsrd(0, 1); + else + mtmsr(mfmsr() & ~(MSR_EE | MSR_RI)); +} + +static inline void __hard_RI_enable(void) +{ + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + return; + + if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_EID); + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + __mtmsrd(MSR_RI, 1); + else + mtmsr(mfmsr() | MSR_RI); +} + #ifdef CONFIG_PPC64 #include @@ -212,18 +261,6 @@ static inline bool arch_irqs_disabled(void) #endif /* CONFIG_PPC_BOOK3S */ -#ifdef CONFIG_PPC_BOOK3E -#define __hard_irq_enable() wrtee(MSR_EE) -#define __hard_irq_disable() wrtee(0) -#define __hard_EE_RI_disable() wrtee(0) -#define __hard_RI_enable() do { } while (0) -#else -#define __hard_irq_enable() __mtmsrd(MSR_EE|MSR_RI, 1) -#define __hard_irq_disable() __mtmsrd(MSR_RI, 1) -#define __hard_EE_RI_disable() __mtmsrd(0, 1) -#define __hard_RI_enable() __mtmsrd(MSR_RI, 1) -#endif - #define hard_irq_disable() do { \ unsigned long flags; \ __hard_irq_disable(); \ @@ -322,22 +359,12 @@ static inline unsigned long arch_local_irq_save(void) static inline void arch_local_irq_disable(void) { - if (IS_ENABLED(CONFIG_BOOKE)) - wrtee(0); - else if (IS_ENABLED(CONFIG_PPC_8xx)) - wrtspr(SPRN_EID); - else - mtmsr(mfmsr() & ~MSR_EE); + __hard_irq_disable(); } static inline void arch_local_irq_enable(void) { - if (IS_ENABLED(CONFIG_BOOKE)) - wrtee(MSR_EE); - else if (IS_ENABLED(CONFIG_PPC_8xx)) - wrtspr(SPRN_EIE); - else - mtmsr(mfmsr() | MSR_EE); + __hard_irq_enable(); } static inline bool arch_irqs_disabled_flags(unsigned long flags) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index eb4aa889916d..da103e92c112 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1375,6 +1375,7 @@ #define mtmsr(v) asm volatile("mtmsr %0" : \ : "r" ((unsigned long)(v)) \ : "memory") +#define __mtmsrd(v, l) BUILD_BUG() #define __MTMSR "mtmsr" #endif From 6650c4782d5788346a25a4f698880d124f2699a0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:26 +0000 Subject: [PATCH 167/188] powerpc/irq: Add stub irq_soft_mask_return() for PPC32 To allow building syscall_64.c smoothly on PPC32, add stub version of irq_soft_mask_return(). Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9b9f62c5e2e63cc121fd749a923aaaee92ee0da4.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/hw_irq.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 4739f61e632c..56a98936a6a9 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -330,6 +330,11 @@ static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned l } #else /* CONFIG_PPC64 */ +static inline notrace unsigned long irq_soft_mask_return(void) +{ + return 0; +} + static inline unsigned long arch_local_save_flags(void) { return mfmsr(); From ab1a517d55b01b54ba70f5d54f926f5ab4b18339 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:27 +0000 Subject: [PATCH 168/188] powerpc/syscall: Rename syscall_64.c into interrupt.c syscall_64.c will be reused almost as is for PPC32. As this file also contains functions to handle other types of interrupts rename it interrupt.c Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cddc2deaa8f049d3ec419738e69804934919b935.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/{syscall_64.c => interrupt.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename arch/powerpc/kernel/{syscall_64.c => interrupt.c} (100%) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fe2ef598e2ea..b7a82276d569 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -49,7 +49,7 @@ obj-y := cputable.o syscalls.o \ hw_breakpoint_constraints.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o \ - paca.o nvram_64.o note.o syscall_64.o + paca.o nvram_64.o note.o interrupt.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/interrupt.c similarity index 100% rename from arch/powerpc/kernel/syscall_64.c rename to arch/powerpc/kernel/interrupt.c From 344bb20b159dd0996e521c0d4c131a6ae10c322a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:28 +0000 Subject: [PATCH 169/188] powerpc/syscall: Make interrupt.c buildable on PPC32 To allow building interrupt.c on PPC32, ifdef out specific PPC64 code or use helpers which are available on both PP32 and PPC64 Modify Makefile to always build interrupt.o Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ba073ad67bd971a88ce331b65d6655523b54c794.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/Makefile | 4 ++-- arch/powerpc/kernel/interrupt.c | 31 ++++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b7a82276d569..9c4d75a1eed2 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -46,10 +46,10 @@ obj-y := cputable.o syscalls.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ of_platform.o prom_parse.o firmware.o \ - hw_breakpoint_constraints.o + hw_breakpoint_constraints.o interrupt.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o \ - paca.o nvram_64.o note.o interrupt.o + paca.o nvram_64.o note.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index d6be4f9a67e5..2dac4d2bb1cf 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -39,7 +39,7 @@ notrace long system_call_exception(long r3, long r4, long r5, BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(!FULL_REGS(regs)); - BUG_ON(regs->softe != IRQS_ENABLED); + BUG_ON(arch_irq_disabled_regs(regs)); #ifdef CONFIG_PPC_PKEY if (mmu_has_feature(MMU_FTR_PKEY)) { @@ -65,7 +65,9 @@ notrace long system_call_exception(long r3, long r4, long r5, isync(); } else #endif +#ifdef CONFIG_PPC64 kuap_check_amr(); +#endif account_cpu_user_entry(); @@ -77,7 +79,7 @@ notrace long system_call_exception(long r3, long r4, long r5, * frame, or if the unwinder was taught the first stack frame always * returns to user with IRQS_ENABLED, this store could be avoided! */ - regs->softe = IRQS_ENABLED; + irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); local_irq_enable(); @@ -151,6 +153,7 @@ static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri) __hard_EE_RI_disable(); else __hard_irq_disable(); +#ifdef CONFIG_PPC64 if (unlikely(lazy_irq_pending_nocheck())) { /* Took an interrupt, may have more exit work to do. */ if (clear_ri) @@ -162,7 +165,7 @@ static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri) } local_paca->irq_happened = 0; irq_soft_mask_set(IRQS_ENABLED); - +#endif return true; } @@ -216,7 +219,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, CT_WARN_ON(ct_state() == CONTEXT_USER); +#ifdef CONFIG_PPC64 kuap_check_amr(); +#endif regs->result = r3; @@ -309,7 +314,7 @@ again: account_cpu_user_exit(); -#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */ +#ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */ /* * We do this at the end so that we do context switch with KERNEL AMR */ @@ -318,7 +323,7 @@ again: return ret; } -#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */ +#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) { #ifdef CONFIG_PPC_BOOK3E @@ -333,14 +338,16 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(!FULL_REGS(regs)); - BUG_ON(regs->softe != IRQS_ENABLED); + BUG_ON(arch_irq_disabled_regs(regs)); CT_WARN_ON(ct_state() == CONTEXT_USER); /* * We don't need to restore AMR on the way back to userspace for KUAP. * AMR can only have been unlocked if we interrupted the kernel. */ +#ifdef CONFIG_PPC64 kuap_check_amr(); +#endif local_irq_save(flags); @@ -407,7 +414,9 @@ again: /* * We do this at the end so that we do context switch with KERNEL AMR */ +#ifdef CONFIG_PPC64 kuap_user_restore(regs); +#endif return ret; } @@ -419,7 +428,9 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long flags; unsigned long ret = 0; +#ifdef CONFIG_PPC64 unsigned long amr; +#endif if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI))) unrecoverable_exception(regs); @@ -432,7 +443,9 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign if (TRAP(regs) != 0x700) CT_WARN_ON(ct_state() == CONTEXT_USER); +#ifdef CONFIG_PPC64 amr = kuap_get_and_check_amr(); +#endif if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) { clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp); @@ -441,7 +454,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign local_irq_save(flags); - if (regs->softe == IRQS_ENABLED) { + if (!arch_irq_disabled_regs(regs)) { /* Returning to a kernel context with local irqs enabled. */ WARN_ON_ONCE(!(regs->msr & MSR_EE)); again: @@ -458,8 +471,10 @@ again: } else { /* Returning to a kernel context with local irqs disabled. */ __hard_EE_RI_disable(); +#ifdef CONFIG_PPC64 if (regs->msr & MSR_EE) local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; +#endif } @@ -472,7 +487,9 @@ again: * which would cause Read-After-Write stalls. Hence, we take the AMR * value from the check above. */ +#ifdef CONFIG_PPC64 kuap_kernel_restore(regs, amr); +#endif return ret; } From 72b7a9e56b25babfe4c90bf3ce88285c7fb62ab9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:29 +0000 Subject: [PATCH 170/188] powerpc/syscall: Use is_compat_task() Instead of hard comparing task flags with _TIF_32BIT, use is_compat_task(). The advantage is that it returns 0 on PPC32 allthough _TIF_32BIT is always set. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c8094662199337a7200fea9f6e1d1f8b1b6d5f69.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/interrupt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 2dac4d2bb1cf..46fd195ca659 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -2,6 +2,8 @@ #include #include +#include + #include #include #include @@ -118,7 +120,7 @@ notrace long system_call_exception(long r3, long r4, long r5, /* May be faster to do array_index_nospec? */ barrier_nospec(); - if (unlikely(is_32bit_task())) { + if (unlikely(is_compat_task())) { f = (void *)compat_sys_call_table[r0]; r3 &= 0x00000000ffffffffULL; From 8875f47b7681aa4e4484a9b612577b044725f839 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:30 +0000 Subject: [PATCH 171/188] powerpc/syscall: Save r3 in regs->orig_r3 Save r3 in regs->orig_r3 in system_call_exception() Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9a90805ab6b9101b46daf56470f457a57acd86fc.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_64.S | 2 -- arch/powerpc/kernel/interrupt.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 4b2d2823c7a5..14696faa72f8 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -108,7 +108,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) li r11,\trapnr std r11,_TRAP(r1) std r12,_CCR(r1) - std r3,ORIG_GPR3(r1) addi r10,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r10) /* "regshere" marker */ @@ -284,7 +283,6 @@ END_BTB_FLUSH_SECTION std r10,_LINK(r1) std r11,_TRAP(r1) std r12,_CCR(r1) - std r3,ORIG_GPR3(r1) addi r10,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r10) /* "regshere" marker */ diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 46fd195ca659..1a2dec49f811 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -29,6 +29,8 @@ notrace long system_call_exception(long r3, long r4, long r5, { syscall_fn f; + regs->orig_gpr3 = r3; + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); From c01b916658150e98f00a4981750c37a3224c8735 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:31 +0000 Subject: [PATCH 172/188] powerpc/syscall: Change condition to check MSR_RI In system_call_exception(), MSR_RI also needs to be checked on 8xx. Only booke and 40x doesn't have MSR_RI. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/67820fada8dd6a8fe9d7b666f175d4cc9d8de87e.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/interrupt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 1a2dec49f811..107ec39f05cb 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -39,7 +39,7 @@ notrace long system_call_exception(long r3, long r4, long r5, trace_hardirqs_off(); /* finish reconciling */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S)) + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(!FULL_REGS(regs)); @@ -338,7 +338,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned unsigned long flags; unsigned long ret = 0; - if (IS_ENABLED(CONFIG_PPC_BOOK3S)) + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(!FULL_REGS(regs)); @@ -436,7 +436,8 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign unsigned long amr; #endif - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI))) + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && + unlikely(!(regs->msr & MSR_RI))) unrecoverable_exception(regs); BUG_ON(regs->msr & MSR_PR); BUG_ON(!FULL_REGS(regs)); From fbcee2ebe8edbb6a93316f0a189ae7fcfaa7094f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:32 +0000 Subject: [PATCH 173/188] powerpc/32: Always save non volatile GPRs at syscall entry In preparation for porting syscall entry/exit to C, inconditionally save non volatile general purpose registers. Commit 965dd3ad3076 ("powerpc/64/syscall: Remove non-volatile GPR save optimisation") provides detailed explanation. This increases the number of cycles by 24 cycles on 8xx with null_syscall benchmark (280 => 304 cycles) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/21c08162b83655195fe9ead78ff2cfd28508d023.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 46 +----------------------- arch/powerpc/kernel/head_32.h | 2 +- arch/powerpc/kernel/head_booke.h | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 20 +++-------- 4 files changed, 8 insertions(+), 62 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index b1e36602c013..97dc28a68465 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -351,6 +351,7 @@ trace_syscall_entry_irq_off: .globl transfer_to_syscall transfer_to_syscall: + SAVE_NVGPRS(r1) #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 #endif @@ -614,51 +615,6 @@ ret_from_kernel_syscall: #endif _ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) -/* - * The fork/clone functions need to copy the full register set into - * the child process. Therefore we need to save all the nonvolatile - * registers (r13 - r31) before calling the C code. - */ - .globl ppc_fork -ppc_fork: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_fork - - .globl ppc_vfork -ppc_vfork: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_vfork - - .globl ppc_clone -ppc_clone: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_clone - - .globl ppc_clone3 -ppc_clone3: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_clone3 - - .globl ppc_swapcontext -ppc_swapcontext: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_swapcontext - /* * Top-level page fault handling. * This is in assembler because if do_page_fault tells us that diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 930377375dc0..573c1d570a6a 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -148,7 +148,7 @@ stw r2,GPR2(r11) addi r10,r10,STACK_FRAME_REGS_MARKER@l stw r9,_MSR(r11) - li r2, \trapno + 1 + li r2, \trapno stw r10,8(r11) stw r2,_TRAP(r11) SAVE_GPR(0, r11) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index b3c502c503a0..626e716576ce 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -124,7 +124,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r2,GPR2(r11) addi r12, r12, STACK_FRAME_REGS_MARKER@l stw r9,_MSR(r11) - li r2, \trapno + 1 + li r2, \trapno stw r12, 8(r11) stw r2,_TRAP(r11) SAVE_GPR(0, r11) diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index f744eb5cba88..96b2157f0371 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -9,9 +9,7 @@ # 0 nospu restart_syscall sys_restart_syscall 1 nospu exit sys_exit -2 32 fork ppc_fork sys_fork -2 64 fork sys_fork -2 spu fork sys_ni_syscall +2 nospu fork sys_fork 3 common read sys_read 4 common write sys_write 5 common open sys_open compat_sys_open @@ -160,9 +158,7 @@ 119 32 sigreturn sys_sigreturn compat_sys_sigreturn 119 64 sigreturn sys_ni_syscall 119 spu sigreturn sys_ni_syscall -120 32 clone ppc_clone sys_clone -120 64 clone sys_clone -120 spu clone sys_ni_syscall +120 nospu clone sys_clone 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall @@ -244,9 +240,7 @@ 186 spu sendfile sys_sendfile64 187 common getpmsg sys_ni_syscall 188 common putpmsg sys_ni_syscall -189 32 vfork ppc_vfork sys_vfork -189 64 vfork sys_vfork -189 spu vfork sys_ni_syscall +189 nospu vfork sys_vfork 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 common readahead sys_readahead compat_sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 @@ -322,9 +316,7 @@ 248 32 clock_nanosleep sys_clock_nanosleep_time32 248 64 clock_nanosleep sys_clock_nanosleep 248 spu clock_nanosleep sys_clock_nanosleep -249 32 swapcontext ppc_swapcontext compat_sys_swapcontext -249 64 swapcontext sys_swapcontext -249 spu swapcontext sys_ni_syscall +249 nospu swapcontext sys_swapcontext compat_sys_swapcontext 250 common tgkill sys_tgkill 251 32 utimes sys_utimes_time32 251 64 utimes sys_utimes @@ -522,9 +514,7 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -435 32 clone3 ppc_clone3 sys_clone3 -435 64 clone3 sys_clone3 -435 spu clone3 sys_ni_syscall +435 nospu clone3 sys_clone3 436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd From 6f76a01173ccaa363739f913394d4e138d92d718 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:33 +0000 Subject: [PATCH 174/188] powerpc/syscall: implement system call entry/exit logic in C for PPC32 That's port of PPC64 syscall entry/exit logic in C to PPC32. Performancewise on 8xx: Before : 304 cycles on null_syscall After : 348 cycles on null_syscall Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a93b08e1275e9d1f0b1c39043d1b827586b2b401.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 224 +++++-------------------------- arch/powerpc/kernel/head_32.h | 18 --- arch/powerpc/kernel/head_booke.h | 17 --- 3 files changed, 30 insertions(+), 229 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 97dc28a68465..bbf7ecea6fe0 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -329,117 +329,23 @@ stack_ovf: _ASM_NOKPROBE_SYMBOL(stack_ovf) #endif -#ifdef CONFIG_TRACE_IRQFLAGS -trace_syscall_entry_irq_off: - /* - * Syscall shouldn't happen while interrupts are disabled, - * so let's do a warning here. - */ -0: trap - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING - bl trace_hardirqs_on - - /* Now enable for real */ - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE) - mtmsr r10 - - REST_GPR(0, r1) - REST_4GPRS(3, r1) - REST_2GPRS(7, r1) - b DoSyscall -#endif /* CONFIG_TRACE_IRQFLAGS */ - .globl transfer_to_syscall transfer_to_syscall: SAVE_NVGPRS(r1) #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 #endif -#ifdef CONFIG_TRACE_IRQFLAGS - andi. r12,r9,MSR_EE - beq- trace_syscall_entry_irq_off -#endif /* CONFIG_TRACE_IRQFLAGS */ -/* - * Handle a system call. - */ - .stabs "arch/powerpc/kernel/",N_SO,0,0,0f - .stabs "entry_32.S",N_SO,0,0,0f -0: + /* Calling convention has r9 = orig r0, r10 = regs */ + addi r10,r1,STACK_FRAME_OVERHEAD + mr r9,r0 + stw r10,THREAD+PT_REGS(r2) + bl system_call_exception -_GLOBAL(DoSyscall) - stw r3,ORIG_GPR3(r1) - li r12,0 - stw r12,RESULT(r1) -#ifdef CONFIG_TRACE_IRQFLAGS - /* Make sure interrupts are enabled */ - mfmsr r11 - andi. r12,r11,MSR_EE - /* We came in with interrupts disabled, we WARN and mark them enabled - * for lockdep now */ -0: tweqi r12, 0 - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING -#endif /* CONFIG_TRACE_IRQFLAGS */ - lwz r11,TI_FLAGS(r2) - andi. r11,r11,_TIF_SYSCALL_DOTRACE - bne- syscall_dotrace -syscall_dotrace_cont: - cmplwi 0,r0,NR_syscalls - lis r10,sys_call_table@h - ori r10,r10,sys_call_table@l - slwi r0,r0,2 - bge- 66f - - barrier_nospec_asm - /* - * Prevent the load of the handler below (based on the user-passed - * system call number) being speculatively executed until the test - * against NR_syscalls and branch to .66f above has - * committed. - */ - - lwzx r10,r10,r0 /* Fetch system call handler [ptr] */ - mtlr r10 - addi r9,r1,STACK_FRAME_OVERHEAD - PPC440EP_ERR42 - blrl /* Call handler */ - .globl ret_from_syscall ret_from_syscall: -#ifdef CONFIG_DEBUG_RSEQ - /* Check whether the syscall is issued inside a restartable sequence */ - stw r3,GPR3(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl rseq_syscall - lwz r3,GPR3(r1) -#endif - mr r6,r3 - /* disable interrupts so current_thread_info()->flags can't change */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */ - /* Note: We don't bother telling lockdep about it */ - mtmsr r10 - lwz r9,TI_FLAGS(r2) - li r8,-MAX_ERRNO - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) - bne- syscall_exit_work - cmplw 0,r3,r8 - blt+ syscall_exit_cont - lwz r11,_CCR(r1) /* Load CR */ - neg r3,r3 - oris r11,r11,0x1000 /* Set SO bit in CR */ - stw r11,_CCR(r1) -syscall_exit_cont: - lwz r8,_MSR(r1) -#ifdef CONFIG_TRACE_IRQFLAGS - /* If we are going to return from the syscall with interrupts - * off, we trace that here. It shouldn't normally happen. - */ - andi. r10,r8,MSR_EE - bne+ 1f - stw r3,GPR3(r1) - bl trace_hardirqs_off - lwz r3,GPR3(r1) -1: -#endif /* CONFIG_TRACE_IRQFLAGS */ + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,0 + bl syscall_exit_prepare #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) /* If the process has its own DBCR0 value, load it up. The internal debug mode bit tells us that dbcr0 should be loaded. */ @@ -453,12 +359,6 @@ syscall_exit_cont: cmplwi cr0,r5,0 bne- 2f #endif /* CONFIG_PPC_47x */ -1: -BEGIN_FTR_SECTION - lwarx r7,0,r1 -END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) - stwcx. r0,0,r1 /* to clear the reservation */ - ACCOUNT_CPU_USER_EXIT(r2, r5, r7) #ifdef CONFIG_PPC_BOOK3S_32 kuep_unlock r5, r7 #endif @@ -466,21 +366,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 - mtcr r5 lwz r7,_NIP(r1) - lwz r2,GPR2(r1) - lwz r1,GPR1(r1) + lwz r8,_MSR(r1) + cmpwi r3,0 + lwz r3,GPR3(r1) syscall_exit_finish: -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 + + bne 3f + mtcr r5 + +1: lwz r2,GPR2(r1) + lwz r1,GPR1(r1) rfi #ifdef CONFIG_40x b . /* Prevent prefetch past rfi */ #endif -_ASM_NOKPROBE_SYMBOL(syscall_exit_finish) + +3: mtcr r5 + lwz r4,_CTR(r1) + lwz r5,_XER(r1) + REST_NVGPRS(r1) + mtctr r4 + mtxer r5 + lwz r0,GPR0(r1) + lwz r3,GPR3(r1) + REST_8GPRS(4,r1) + lwz r12,GPR12(r1) + b 1b + #ifdef CONFIG_44x 2: li r7,0 iccci r0,r0 @@ -488,9 +403,6 @@ _ASM_NOKPROBE_SYMBOL(syscall_exit_finish) b 1b #endif /* CONFIG_44x */ -66: li r3,-ENOSYS - b ret_from_syscall - .globl ret_from_fork ret_from_fork: REST_NVGPRS(r1) @@ -509,82 +421,6 @@ ret_from_kernel_thread: li r3,0 b ret_from_syscall -/* Traced system call support */ -syscall_dotrace: - SAVE_NVGPRS(r1) - li r0,0xc00 - stw r0,_TRAP(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_syscall_trace_enter - /* - * Restore argument registers possibly just changed. - * We use the return value of do_syscall_trace_enter - * for call number to look up in the table (r0). - */ - mr r0,r3 - lwz r3,GPR3(r1) - lwz r4,GPR4(r1) - lwz r5,GPR5(r1) - lwz r6,GPR6(r1) - lwz r7,GPR7(r1) - lwz r8,GPR8(r1) - REST_NVGPRS(r1) - - cmplwi r0,NR_syscalls - /* Return code is already in r3 thanks to do_syscall_trace_enter() */ - bge- ret_from_syscall - b syscall_dotrace_cont - -syscall_exit_work: - andi. r0,r9,_TIF_RESTOREALL - beq+ 0f - REST_NVGPRS(r1) - b 2f -0: cmplw 0,r3,r8 - blt+ 1f - andi. r0,r9,_TIF_NOERROR - bne- 1f - lwz r11,_CCR(r1) /* Load CR */ - neg r3,r3 - oris r11,r11,0x1000 /* Set SO bit in CR */ - stw r11,_CCR(r1) - -1: stw r6,RESULT(r1) /* Save result */ - stw r3,GPR3(r1) /* Update return value */ -2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) - beq 4f - - /* Clear per-syscall TIF flags if any are set. */ - - li r11,_TIF_PERSYSCALL_MASK - addi r12,r2,TI_FLAGS -3: lwarx r8,0,r12 - andc r8,r8,r11 - stwcx. r8,0,r12 - bne- 3b - -4: /* Anything which requires enabling interrupts? */ - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP) - beq ret_from_except - - /* Re-enable interrupts. There is no need to trace that with - * lockdep as we are supposed to have IRQs on at this point - */ - ori r10,r10,MSR_EE - mtmsr r10 - - /* Save NVGPRS if they're not saved already */ - lwz r4,_TRAP(r1) - andi. r4,r4,1 - beq 5f - SAVE_NVGPRS(r1) - li r4,0xc00 - stw r4,_TRAP(r1) -5: - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_syscall_trace_leave - b ret_from_except_full - /* * System call was called from kernel. We get here with SRR1 in r9. * Mark the exception as recoverable once we have retrieved SRR0, diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 573c1d570a6a..af4978e9d50d 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -154,17 +154,12 @@ SAVE_GPR(0, r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) - addi r11,r1,STACK_FRAME_OVERHEAD addi r2,r12,-THREAD - stw r11,PT_REGS(r12) #if defined(CONFIG_40x) /* Check to see if the dbcr0 register is set up to debug. Use the internal debug mode bit to do this. */ lwz r12,THREAD_DBCR0(r12) andis. r12,r12,DBCR0_IDM@h -#endif - ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) -#if defined(CONFIG_40x) beq+ 3f /* From user and task is ptraced - load up global dbcr0 */ li r12,-1 /* clear all pending debug events */ @@ -176,21 +171,8 @@ lwz r12,4(r11) addi r12,r12,-1 stw r12,4(r11) -#endif - 3: -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * If MSR is changing we need to keep interrupts disabled at this point - * otherwise we might risk taking an interrupt before we tell lockdep - * they are enabled. - */ - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL) - rlwimi r10, r9, 0, MSR_EE -#else - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE) #endif - mtmsr r10 b transfer_to_syscall /* jump to handler */ 99: b ret_from_kernel_syscall .endm diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 626e716576ce..db931f1167aa 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -131,14 +131,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) - addi r11,r1,STACK_FRAME_OVERHEAD addi r2,r10,-THREAD - stw r11,PT_REGS(r10) /* Check to see if the dbcr0 register is set up to debug. Use the internal debug mode bit to do this. */ lwz r12,THREAD_DBCR0(r10) andis. r12,r12,DBCR0_IDM@h - ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) beq+ 3f /* From user and task is ptraced - load up global dbcr0 */ li r12,-1 /* clear all pending debug events */ @@ -157,20 +154,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r12,4(r11) 3: -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * If MSR is changing we need to keep interrupts disabled at this point - * otherwise we might risk taking an interrupt before we tell lockdep - * they are enabled. - */ - lis r10, MSR_KERNEL@h - ori r10, r10, MSR_KERNEL@l - rlwimi r10, r9, 0, MSR_EE -#else - lis r10, (MSR_KERNEL | MSR_EE)@h - ori r10, r10, (MSR_KERNEL | MSR_EE)@l -#endif - mtmsr r10 b transfer_to_syscall /* jump to handler */ 99: b ret_from_kernel_syscall .endm From 4d67facbcbdb3d9e3c9cb82e4ec47fc63d298dd8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:34 +0000 Subject: [PATCH 175/188] powerpc/32: Remove verification of MSR_PR on syscall in the ASM entry system_call_exception() checks MSR_PR and BUGs if a syscall is issued from kernel mode. No need to handle it anymore from the ASM entry code. null_syscall reduction 2 cycles (348 => 346 cycles) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1eddb42cb12092b1e3d72608d182c365db3da41d.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 30 ------------------------------ arch/powerpc/kernel/head_32.h | 3 --- arch/powerpc/kernel/head_booke.h | 3 --- 3 files changed, 36 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index bbf7ecea6fe0..cffe58e63356 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -421,36 +421,6 @@ ret_from_kernel_thread: li r3,0 b ret_from_syscall - /* - * System call was called from kernel. We get here with SRR1 in r9. - * Mark the exception as recoverable once we have retrieved SRR0, - * trap a warning and return ENOSYS with CR[SO] set. - */ - .globl ret_from_kernel_syscall -ret_from_kernel_syscall: - mfspr r9, SPRN_SRR0 - mfspr r10, SPRN_SRR1 -#if !defined(CONFIG_4xx) && !defined(CONFIG_BOOKE) - LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR|MSR_DR)) - mtmsr r11 -#endif - -0: trap - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING - - li r3, ENOSYS - crset so -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif - mtspr SPRN_SRR0, r9 - mtspr SPRN_SRR1, r10 - rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif -_ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) - /* * Top-level page fault handling. * This is in assembler because if do_page_fault tells us that diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index af4978e9d50d..0e0e7b5b93ab 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -118,8 +118,6 @@ .macro SYSCALL_ENTRY trapno mfspr r9, SPRN_SRR1 mfspr r10, SPRN_SRR0 - andi. r11, r9, MSR_PR - beq- 99f LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions */ lis r12, 1f@h ori r12, r12, 1f@l @@ -174,7 +172,6 @@ 3: #endif b transfer_to_syscall /* jump to handler */ -99: b ret_from_kernel_syscall .endm .macro save_dar_dsisr_on_stack reg1, reg2, sp diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index db931f1167aa..bfbd240cc8a2 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -106,10 +106,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #endif mfspr r9, SPRN_SRR1 BOOKE_CLEAR_BTB(r11) - andi. r11, r9, MSR_PR lwz r11, TASK_STACK - THREAD(r10) rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ - beq- 99f ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE) stw r12, _CCR(r11) /* save various registers */ mflr r12 @@ -155,7 +153,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) 3: b transfer_to_syscall /* jump to handler */ -99: b ret_from_kernel_syscall .endm /* To handle the additional exception priority levels on 40x and Book-E From eb595eca74067b78d36fb188b555e30f28686fc7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Feb 2021 15:10:41 +0000 Subject: [PATCH 176/188] powerpc/32: Remove the counter in global_dbcr0 global_dbcr0 has two parts, 4 bytes to save/restore the value of SPRN_DBCR0, and 4 bytes that are incremented/decremented everytime something is saving/loading the above value. This counter is only incremented/decremented, its value is never used and never read. Remove the counter and devide the size of global_dbcr0 by 2. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7e381dc58b3f583556cfab37ba5d813bfd5cce1e.1612796617.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 12 +++--------- arch/powerpc/kernel/head_32.h | 3 --- arch/powerpc/kernel/head_booke.h | 5 +---- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index cffe58e63356..9dd90be9f8a5 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -175,14 +175,11 @@ transfer_to_handler: addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP lwz r9,TASK_CPU(r2) - slwi r9,r9,3 + slwi r9,r9,2 add r11,r11,r9 #endif lwz r12,0(r11) mtspr SPRN_DBCR0,r12 - lwz r12,4(r11) - addi r12,r12,-1 - stw r12,4(r11) #endif b 3f @@ -981,14 +978,11 @@ load_dbcr0: addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP lwz r9,TASK_CPU(r2) - slwi r9,r9,3 + slwi r9,r9,2 add r11,r11,r9 #endif stw r10,0(r11) mtspr SPRN_DBCR0,r0 - lwz r10,4(r11) - addi r10,r10,1 - stw r10,4(r11) li r11,-1 mtspr SPRN_DBSR,r11 /* clear all pending debug events */ blr @@ -997,7 +991,7 @@ load_dbcr0: .align 4 .global global_dbcr0 global_dbcr0: - .space 8*NR_CPUS + .space 4*NR_CPUS .previous #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 0e0e7b5b93ab..1afad7bc3395 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -166,9 +166,6 @@ addi r11,r11,global_dbcr0@l lwz r12,0(r11) mtspr SPRN_DBCR0,r12 - lwz r12,4(r11) - addi r12,r12,-1 - stw r12,4(r11) 3: #endif b transfer_to_syscall /* jump to handler */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index bfbd240cc8a2..5f565232b99d 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -142,14 +142,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP lwz r10, TASK_CPU(r2) - slwi r10, r10, 3 + slwi r10, r10, 2 add r11, r11, r10 #endif lwz r12,0(r11) mtspr SPRN_DBCR0,r12 - lwz r12,4(r11) - addi r12,r12,-1 - stw r12,4(r11) 3: b transfer_to_syscall /* jump to handler */ From b966f2279048ee9f30d83ef8568b99fa40917c54 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2021 19:29:27 +0000 Subject: [PATCH 177/188] powerpc/syscall: Do not check unsupported scv vector on PPC32 Only book3s/64 has scv. No need to check the 0x7ff0 trap on 32 or 64e. For that, add a helper trap_is_unsupported_scv() similar to trap_is_scv(). And ignore the scv parameter in syscall_exit_prepare (Save 14 cycles 346 => 332 cycles) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/fb87b205ae8eb8c623f33bb316801acf95a831e6.1612898425.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ptrace.h | 5 +++++ arch/powerpc/kernel/interrupt.c | 9 +++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 58f9dc060a7b..c6b91e5c2701 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -229,6 +229,11 @@ static inline bool trap_is_scv(struct pt_regs *regs) return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000); } +static inline bool trap_is_unsupported_scv(struct pt_regs *regs) +{ + return IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x7ff0; +} + static inline bool trap_is_syscall(struct pt_regs *regs) { return (trap_is_scv(regs) || TRAP(regs) == 0xc00); diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 107ec39f05cb..75d657b63332 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -88,7 +88,7 @@ notrace long system_call_exception(long r3, long r4, long r5, local_irq_enable(); if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { - if (unlikely(regs->trap == 0x7ff0)) { + if (unlikely(trap_is_unsupported_scv(regs))) { /* Unsupported scv vector */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return regs->gpr[3]; @@ -111,7 +111,7 @@ notrace long system_call_exception(long r3, long r4, long r5, r8 = regs->gpr[8]; } else if (unlikely(r0 >= NR_syscalls)) { - if (unlikely(regs->trap == 0x7ff0)) { + if (unlikely(trap_is_unsupported_scv(regs))) { /* Unsupported scv vector */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return regs->gpr[3]; @@ -220,6 +220,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long ti_flags; unsigned long ret = 0; + bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; CT_WARN_ON(ct_state() == CONTEXT_USER); @@ -234,7 +235,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ti_flags = *ti_flagsp; - if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) { + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { r3 = -r3; regs->ccr |= 0x10000000; /* Set SO bit in CR */ @@ -305,7 +306,7 @@ again: user_enter_irqoff(); /* scv need not set RI=0 because SRRs are not used */ - if (unlikely(!__prep_irq_for_enabled_exit(!scv))) { + if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) { user_exit_irqoff(); local_irq_enable(); local_irq_disable(); From d524dda719f06967db4d3ba519edf9267f84c155 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2021 19:29:28 +0000 Subject: [PATCH 178/188] powerpc/32: Handle bookE debugging in C in syscall entry/exit The handling of SPRN_DBCR0 and other registers can easily be done in C instead of ASM. For that, create booke_load_dbcr0() and booke_restore_dbcr0(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1a7515f9258b27a9177de88491a8bb79b255ceb7.1612898425.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 12 ++++++++ arch/powerpc/include/asm/reg_booke.h | 3 ++ arch/powerpc/kernel/entry_32.S | 7 ----- arch/powerpc/kernel/head_32.h | 15 ---------- arch/powerpc/kernel/head_booke.h | 19 ------------- arch/powerpc/kernel/interrupt.c | 41 ++++++++++++++++++---------- 6 files changed, 42 insertions(+), 55 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 5c285592d9b6..aedfba29e43a 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -15,6 +15,18 @@ struct interrupt_state { #endif }; +static inline void booke_restore_dbcr0(void) +{ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dbcr0 = current->thread.debug.dbcr0; + + if (IS_ENABLED(CONFIG_PPC32) && unlikely(dbcr0 & DBCR0_IDM)) { + mtspr(SPRN_DBSR, -1); + mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]); + } +#endif +} + static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { /* diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 262782f08fd4..17b8dcd9a40d 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -691,6 +691,9 @@ #define mttmr(rn, v) asm volatile(MTTMR(rn, %0) : \ : "r" ((unsigned long)(v)) \ : "memory") + +extern unsigned long global_dbcr0[]; + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_POWERPC_REG_BOOKE_H__ */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 9dd90be9f8a5..78c430b7f9d9 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -343,13 +343,6 @@ ret_from_syscall: addi r4,r1,STACK_FRAME_OVERHEAD li r5,0 bl syscall_exit_prepare -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - /* If the process has its own DBCR0 value, load it up. The internal - debug mode bit tells us that dbcr0 should be loaded. */ - lwz r0,THREAD+THREAD_DBCR0(r2) - andis. r10,r0,DBCR0_IDM@h - bnel- load_dbcr0 -#endif #ifdef CONFIG_PPC_47x lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 1afad7bc3395..5d4706c14572 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -153,21 +153,6 @@ SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) addi r2,r12,-THREAD -#if defined(CONFIG_40x) - /* Check to see if the dbcr0 register is set up to debug. Use the - internal debug mode bit to do this. */ - lwz r12,THREAD_DBCR0(r12) - andis. r12,r12,DBCR0_IDM@h - beq+ 3f - /* From user and task is ptraced - load up global dbcr0 */ - li r12,-1 /* clear all pending debug events */ - mtspr SPRN_DBSR,r12 - lis r11,global_dbcr0@ha - addi r11,r11,global_dbcr0@l - lwz r12,0(r11) - mtspr SPRN_DBCR0,r12 -3: -#endif b transfer_to_syscall /* jump to handler */ .endm diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 5f565232b99d..47857795f50a 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -130,25 +130,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) SAVE_2GPRS(7, r11) addi r2,r10,-THREAD - /* Check to see if the dbcr0 register is set up to debug. Use the - internal debug mode bit to do this. */ - lwz r12,THREAD_DBCR0(r10) - andis. r12,r12,DBCR0_IDM@h - beq+ 3f - /* From user and task is ptraced - load up global dbcr0 */ - li r12,-1 /* clear all pending debug events */ - mtspr SPRN_DBSR,r12 - lis r11,global_dbcr0@ha - addi r11,r11,global_dbcr0@l -#ifdef CONFIG_SMP - lwz r10, TASK_CPU(r2) - slwi r10, r10, 2 - add r11, r11, r10 -#endif - lwz r12,0(r11) - mtspr SPRN_DBCR0,r12 - -3: b transfer_to_syscall /* jump to handler */ .endm diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 75d657b63332..f93664ad4a5e 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -73,6 +73,8 @@ notrace long system_call_exception(long r3, long r4, long r5, kuap_check_amr(); #endif + booke_restore_dbcr0(); + account_cpu_user_entry(); account_stolen_time(); @@ -204,6 +206,28 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_en return false; } +static notrace void booke_load_dbcr0(void) +{ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dbcr0 = current->thread.debug.dbcr0; + + if (likely(!(dbcr0 & DBCR0_IDM))) + return; + + /* + * Check to see if the dbcr0 register is set up to debug. + * Use the internal debug mode bit to do this. + */ + mtmsr(mfmsr() & ~MSR_DE); + if (IS_ENABLED(CONFIG_PPC32)) { + isync(); + global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0); + } + mtspr(SPRN_DBCR0, dbcr0); + mtspr(SPRN_DBSR, -1); +#endif +} + /* * This should be called after a syscall returns, with r3 the return value * from the syscall. If this function returns non-zero, the system call @@ -317,6 +341,8 @@ again: local_paca->tm_scratch = regs->msr; #endif + booke_load_dbcr0(); + account_cpu_user_exit(); #ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */ @@ -331,9 +357,6 @@ again: #ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) { -#ifdef CONFIG_PPC_BOOK3E - struct thread_struct *ts = ¤t->thread; -#endif unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long ti_flags; unsigned long flags; @@ -398,17 +421,7 @@ again: goto again; } -#ifdef CONFIG_PPC_BOOK3E - if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) { - /* - * Check to see if the dbcr0 register is set up to debug. - * Use the internal debug mode bit to do this. - */ - mtmsr(mfmsr() & ~MSR_DE); - mtspr(SPRN_DBCR0, ts->debug.dbcr0); - mtspr(SPRN_DBSR, -1); - } -#endif + booke_load_dbcr0(); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; From 5b90b9661a3396e00f6e8bcbb617a0787fb683d0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Feb 2021 08:44:09 +0000 Subject: [PATCH 179/188] powerpc/syscall: Avoid storing 'current' in another pointer By saving the pointer pointing to thread_info.flags, gcc copies r2 in a non-volatile register. We know 'current' doesn't change, so avoid that intermediaite pointer. Reduces null_syscall benchmark by 2 cycles (322 => 320 cycles) On PPC64, gcc seems to know that 'current' is not changing, and it keeps it in a non volatile register to avoid multiple read of 'current' in paca. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ad0363ff0ff8c125f40e1cdc589a85bbd7e31693.1612946484.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/interrupt.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index f93664ad4a5e..398cd86b6ada 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -241,7 +241,6 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv) { - unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long ti_flags; unsigned long ret = 0; bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; @@ -257,7 +256,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, /* Check whether the syscall is issued inside a restartable sequence */ rseq_syscall(regs); - ti_flags = *ti_flagsp; + ti_flags = current_thread_info()->flags; if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { @@ -271,7 +270,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ret = _TIF_RESTOREALL; else regs->gpr[3] = r3; - clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp); + clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); } else { regs->gpr[3] = r3; } @@ -284,7 +283,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, local_irq_disable(); again: - ti_flags = READ_ONCE(*ti_flagsp); + ti_flags = READ_ONCE(current_thread_info()->flags); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); if (ti_flags & _TIF_NEED_RESCHED) { @@ -300,7 +299,7 @@ again: do_notify_resume(regs, ti_flags); } local_irq_disable(); - ti_flags = READ_ONCE(*ti_flagsp); + ti_flags = READ_ONCE(current_thread_info()->flags); } if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { @@ -357,7 +356,6 @@ again: #ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) { - unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long ti_flags; unsigned long flags; unsigned long ret = 0; @@ -380,7 +378,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned local_irq_save(flags); again: - ti_flags = READ_ONCE(*ti_flagsp); + ti_flags = READ_ONCE(current_thread_info()->flags); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); /* returning to user: may enable */ if (ti_flags & _TIF_NEED_RESCHED) { @@ -391,7 +389,7 @@ again: do_notify_resume(regs, ti_flags); } local_irq_disable(); - ti_flags = READ_ONCE(*ti_flagsp); + ti_flags = READ_ONCE(current_thread_info()->flags); } if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { @@ -443,7 +441,6 @@ void preempt_schedule_irq(void); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) { - unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long flags; unsigned long ret = 0; #ifdef CONFIG_PPC64 @@ -466,8 +463,8 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign amr = kuap_get_and_check_amr(); #endif - if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) { - clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp); + if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { + clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); ret = 1; } @@ -479,7 +476,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign again: if (IS_ENABLED(CONFIG_PREEMPT)) { /* Return to preemptible kernel context */ - if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) { + if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) { if (preempt_count() == 0) preempt_schedule_irq(); } From e10656114d32c659768e7ca8aebaaa6ac6e959ab Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2021 10:26:21 +0000 Subject: [PATCH 180/188] spi: mpc52xx: Avoid using get_tbl() get_tbl() is confusing as it returns the content TBL register on PPC32 but the concatenation of TBL and TBU on PPC64. Use mftb() instead. This will allow the removal of get_tbl() in a following patch. Signed-off-by: Christophe Leroy Acked-by: Mark Brown Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/99bf008e2970de7f8ed3225cda69a6d06ae1a644.1612866360.git.christophe.leroy@csgroup.eu --- drivers/spi/spi-mpc52xx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-mpc52xx.c b/drivers/spi/spi-mpc52xx.c index ef2f24420460..e6a30f232370 100644 --- a/drivers/spi/spi-mpc52xx.c +++ b/drivers/spi/spi-mpc52xx.c @@ -120,7 +120,7 @@ static void mpc52xx_spi_start_transfer(struct mpc52xx_spi *ms) ms->cs_change = ms->transfer->cs_change; /* Write out the first byte */ - ms->wcol_tx_timestamp = get_tbl(); + ms->wcol_tx_timestamp = mftb(); if (ms->tx_buf) out_8(ms->regs + SPI_DATA, *ms->tx_buf++); else @@ -221,8 +221,8 @@ static int mpc52xx_spi_fsmstate_transfer(int irq, struct mpc52xx_spi *ms, * but it can also be worked around simply by retrying the * transfer which is what we do here. */ ms->wcol_count++; - ms->wcol_ticks += get_tbl() - ms->wcol_tx_timestamp; - ms->wcol_tx_timestamp = get_tbl(); + ms->wcol_ticks += mftb() - ms->wcol_tx_timestamp; + ms->wcol_tx_timestamp = mftb(); data = 0; if (ms->tx_buf) data = *(ms->tx_buf - 1); @@ -247,14 +247,14 @@ static int mpc52xx_spi_fsmstate_transfer(int irq, struct mpc52xx_spi *ms, /* Is the transfer complete? */ ms->len--; if (ms->len == 0) { - ms->timestamp = get_tbl(); + ms->timestamp = mftb(); ms->timestamp += ms->transfer->delay_usecs * tb_ticks_per_usec; ms->state = mpc52xx_spi_fsmstate_wait; return FSM_CONTINUE; } /* Write out the next byte */ - ms->wcol_tx_timestamp = get_tbl(); + ms->wcol_tx_timestamp = mftb(); if (ms->tx_buf) out_8(ms->regs + SPI_DATA, *ms->tx_buf++); else @@ -276,7 +276,7 @@ mpc52xx_spi_fsmstate_wait(int irq, struct mpc52xx_spi *ms, u8 status, u8 data) dev_err(&ms->master->dev, "spurious irq, status=0x%.2x\n", status); - if (((int)get_tbl()) - ms->timestamp < 0) + if (((int)mftb()) - ms->timestamp < 0) return FSM_POLL; ms->message->actual_length += ms->transfer->len; From 55d68df623eb679cc91f61137f14751e7f369662 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2021 10:26:22 +0000 Subject: [PATCH 181/188] powerpc/time: Avoid using get_tbl() get_tbl() is confusing as it returns the content TBL register on PPC32 but the concatenation of TBL and TBU on PPC64. Use mftb() instead. This will allow the removal of get_tbl() in a following patch. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/decefb47c8a2070bf55d20b096b813908c7b3110.1612866360.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 05e19470d523..b91ebebd9ff2 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -229,7 +229,7 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id) int dma, write, poll_dma; spin_lock_irqsave(&lpbfifo.lock, flags); - ts = get_tbl(); + ts = mftb(); req = lpbfifo.req; if (!req) { @@ -307,7 +307,7 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id) if (irq != 0) /* don't increment on polled case */ req->irq_count++; - req->irq_ticks += get_tbl() - ts; + req->irq_ticks += mftb() - ts; spin_unlock_irqrestore(&lpbfifo.lock, flags); /* Spinlock is released; it is now safe to call the callback */ @@ -330,7 +330,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id) u32 ts; spin_lock_irqsave(&lpbfifo.lock, flags); - ts = get_tbl(); + ts = mftb(); req = lpbfifo.req; if (!req || (req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA)) { @@ -361,7 +361,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id) lpbfifo.req = NULL; /* Release the lock before calling out to the callback. */ - req->irq_ticks += get_tbl() - ts; + req->irq_ticks += mftb() - ts; spin_unlock_irqrestore(&lpbfifo.lock, flags); if (req->callback) From 132f94f133961d18af615cb3503368e59529e9a8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2021 10:26:23 +0000 Subject: [PATCH 182/188] powerpc/time: Remove get_tbl() There are no more users of get_tbl(). Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/dd0368bfd497ffe06b31ee1b5f2ebf7760e30900.1612866360.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso/timebase.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h index 881f655caa0a..891c9d5eaabe 100644 --- a/arch/powerpc/include/asm/vdso/timebase.h +++ b/arch/powerpc/include/asm/vdso/timebase.h @@ -43,12 +43,6 @@ #define mttbl(v) asm volatile("mttbl %0":: "r"(v)) #define mttbu(v) asm volatile("mttbu %0":: "r"(v)) -/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */ -static inline unsigned long get_tbl(void) -{ - return mftb(); -} - static __always_inline u64 get_tb(void) { unsigned int tbhi, tblo, tbhi2; From e3de1e291fa58a1ab0f471a4b458eff2514e4b5f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 10 Feb 2021 00:59:20 +1100 Subject: [PATCH 183/188] powerpc/64: Fix stack trace not displaying final frame In commit bf13718bc57a ("powerpc: show registers when unwinding interrupt frames") we changed our stack dumping logic to show the full registers whenever we find an interrupt frame on the stack. However we didn't notice that on 64-bit this doesn't show the final frame, ie. the interrupt that brought us in from userspace, whereas on 32-bit it does. That is due to confusion about the size of that last frame. The code in show_stack() calls validate_sp(), passing it STACK_INT_FRAME_SIZE to check the sp is at least that far below the top of the stack. However on 64-bit that size is too large for the final frame, because it includes the red zone, but we don't allocate a red zone for the first frame. So add a new define that encodes the correct size for 32-bit and 64-bit, and use it in show_stack(). This results in the full trace being shown on 64-bit, eg: sysrq: Trigger a crash Kernel panic - not syncing: sysrq triggered crash CPU: 0 PID: 83 Comm: sh Not tainted 5.11.0-rc2-gcc-8.2.0-00188-g571abcb96b10-dirty #649 Call Trace: [c00000000a1c3ac0] [c000000000897b70] dump_stack+0xc4/0x114 (unreliable) [c00000000a1c3b00] [c00000000014334c] panic+0x178/0x41c [c00000000a1c3ba0] [c00000000094e600] sysrq_handle_crash+0x40/0x50 [c00000000a1c3c00] [c00000000094ef98] __handle_sysrq+0xd8/0x210 [c00000000a1c3ca0] [c00000000094f820] write_sysrq_trigger+0x100/0x188 [c00000000a1c3ce0] [c0000000005559dc] proc_reg_write+0x10c/0x1b0 [c00000000a1c3d10] [c000000000479950] vfs_write+0xf0/0x360 [c00000000a1c3d60] [c000000000479d9c] ksys_write+0x7c/0x140 [c00000000a1c3db0] [c00000000002bf5c] system_call_exception+0x19c/0x2c0 [c00000000a1c3e10] [c00000000000d35c] system_call_common+0xec/0x278 --- interrupt: c00 at 0x7fff9fbab428 NIP: 00007fff9fbab428 LR: 000000001000b724 CTR: 0000000000000000 REGS: c00000000a1c3e80 TRAP: 0c00 Not tainted (5.11.0-rc2-gcc-8.2.0-00188-g571abcb96b10-dirty) MSR: 900000000280f033 CR: 22002884 XER: 00000000 IRQMASK: 0 GPR00: 0000000000000004 00007fffc3cb8960 00007fff9fc59900 0000000000000001 GPR04: 000000002a4b32d0 0000000000000002 0000000000000063 0000000000000063 GPR08: 000000002a4b32d0 0000000000000000 0000000000000000 0000000000000000 GPR12: 0000000000000000 00007fff9fcca9a0 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 00000000100b8fd0 GPR20: 000000002a4b3485 00000000100b8f90 0000000000000000 0000000000000000 GPR24: 000000002a4b0440 00000000100e77b8 0000000000000020 000000002a4b32d0 GPR28: 0000000000000001 0000000000000002 000000002a4b32d0 0000000000000001 NIP [00007fff9fbab428] 0x7fff9fbab428 LR [000000001000b724] 0x1000b724 --- interrupt: c00 Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210209141627.2898485-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/ptrace.h | 3 +++ arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/process.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index c6b91e5c2701..975ba260006a 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -70,6 +70,9 @@ struct pt_regs }; #endif + +#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)) + #ifdef __powerpc64__ /* diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 31edd9bbce75..6109496e5fdf 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -308,7 +308,7 @@ int main(void) /* Interrupt register frame */ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); - DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); + DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS); STACK_PT_REGS_OFFSET(GPR0, gpr[0]); STACK_PT_REGS_OFFSET(GPR1, gpr[1]); STACK_PT_REGS_OFFSET(GPR2, gpr[2]); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e296440e9d16..924d023dad0a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2179,7 +2179,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack, * See if this is an exception frame. * We look for the "regshere" marker in the current frame. */ - if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE) + if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); From 6b385d1d7c0a346758e35b128815afa25d4709ee Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2021 14:02:12 +0000 Subject: [PATCH 184/188] powerpc/uaccess: get rid of small constant size cases in raw_copy_{to,from}_user() Copied from commit 4b842e4e25b1 ("x86: get rid of small constant size cases in raw_copy_{to,from}_user()") Very few call sites where that would be triggered remain, and none of those is anywhere near hot enough to bother. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/99d4ccb58a20d8408d0e19874393655ad5b40822.1612879284.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 41 ------------------------------ 1 file changed, 41 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 85d6712dc8ed..88f5b0fe9a12 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -398,26 +398,6 @@ static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long ret; - if (__builtin_constant_p(n) && (n <= 8)) { - ret = 1; - - switch (n) { - case 1: - __get_user_size(*(u8 *)to, from, 1, ret); - break; - case 2: - __get_user_size(*(u16 *)to, from, 2, ret); - break; - case 4: - __get_user_size(*(u32 *)to, from, 4, ret); - break; - case 8: - __get_user_size(*(u64 *)to, from, 8, ret); - break; - } - if (ret == 0) - return 0; - } allow_read_from_user(from, n); ret = __copy_tofrom_user((__force void __user *)to, from, n); @@ -428,27 +408,6 @@ static inline unsigned long raw_copy_from_user(void *to, static inline unsigned long raw_copy_to_user_allowed(void __user *to, const void *from, unsigned long n) { - if (__builtin_constant_p(n) && (n <= 8)) { - unsigned long ret = 1; - - switch (n) { - case 1: - __put_user_size_allowed(*(u8 *)from, (u8 __user *)to, 1, ret); - break; - case 2: - __put_user_size_allowed(*(u16 *)from, (u16 __user *)to, 2, ret); - break; - case 4: - __put_user_size_allowed(*(u32 *)from, (u32 __user *)to, 4, ret); - break; - case 8: - __put_user_size_allowed(*(u64 *)from, (u64 __user *)to, 8, ret); - break; - } - if (ret == 0) - return 0; - } - return __copy_tofrom_user(to, (__force const void __user *)from, n); } From 95d019e0f9225954e33b6efcad315be9d548a4d7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2021 14:02:13 +0000 Subject: [PATCH 185/188] powerpc/uaccess: Merge __put_user_size_allowed() into __put_user_size() __put_user_size_allowed() is only called from __put_user_size() now. Merge them together. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b3baeaec1ee2fbdc653bb6fb27b0be5b846163ef.1612879284.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 88f5b0fe9a12..297ee141c33a 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -108,22 +108,18 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) extern long __put_user_bad(void); -#define __put_user_size_allowed(x, ptr, size, retval) \ +#define __put_user_size(x, ptr, size, retval) \ do { \ __label__ __pu_failed; \ \ retval = 0; \ + allow_write_to_user(ptr, size); \ __put_user_size_goto(x, ptr, size, __pu_failed); \ + prevent_write_to_user(ptr, size); \ break; \ \ __pu_failed: \ retval = -EFAULT; \ -} while (0) - -#define __put_user_size(x, ptr, size, retval) \ -do { \ - allow_write_to_user(ptr, size); \ - __put_user_size_allowed(x, ptr, size, retval); \ prevent_write_to_user(ptr, size); \ } while (0) From 052f9d206f6c4b5b512b8c201d375f2dd194be35 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2021 14:02:14 +0000 Subject: [PATCH 186/188] powerpc/uaccess: Merge raw_copy_to_user_allowed() into raw_copy_to_user() Since commit 17bc43367fc2 ("powerpc/uaccess: Implement unsafe_copy_to_user() as a simple loop"), raw_copy_to_user_allowed() is only used by raw_copy_to_user(). Merge raw_copy_to_user_allowed() into raw_copy_to_user(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3ae114740317187e12edbd5ffa9157cb8c396dea.1612879284.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 297ee141c33a..78e2a3990eab 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -401,19 +401,13 @@ static inline unsigned long raw_copy_from_user(void *to, return ret; } -static inline unsigned long -raw_copy_to_user_allowed(void __user *to, const void *from, unsigned long n) -{ - return __copy_tofrom_user(to, (__force const void __user *)from, n); -} - static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { unsigned long ret; allow_write_to_user(to, n); - ret = raw_copy_to_user_allowed(to, from, n); + ret = __copy_tofrom_user(to, (__force const void __user *)from, n); prevent_write_to_user(to, n); return ret; } From b3abe590c80e0ba55b6fce48762232d90dbc37a5 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 2 Sep 2020 13:51:38 +1000 Subject: [PATCH 187/188] powerpc/pci: Remove unimplemented prototypes The corresponding definitions were deleted in commit 3d5134ee8341 ("[POWERPC] Rewrite IO allocation & mapping on powerpc64") which was merged a mere 13 years ago. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902035138.1762531-1-oohall@gmail.com --- arch/powerpc/include/asm/ppc-pci.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 583f8b589d10..2b9edbf6e929 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -13,10 +13,6 @@ extern unsigned long isa_io_base; -extern void pci_setup_phb_io(struct pci_controller *hose, int primary); -extern void pci_setup_phb_io_dynamic(struct pci_controller *hose, int primary); - - extern struct list_head hose_list; extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */ From 82d2c16b350f72aa21ac2a6860c542aa4b43a51e Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Tue, 9 Feb 2021 15:22:34 +0530 Subject: [PATCH 188/188] powerpc/perf: Adds support for programming of Thresholding in P10 Thresholding, a performance monitoring unit feature, can be used to identify marked instructions which take more than expected cycles between start event and end event. Threshold compare (thresh_cmp) bits are programmed in MMCRA register. In Power9, thresh_cmp bits were part of the event code. But in case of P10, thresh_cmp are not part of event code due to inclusion of MMCR3 bits. Patch here adds an option to use attr.config1 variable to be used to pass thresh_cmp value to be programmed in MMCRA register. A new ppmu flag called PPMU_HAS_ATTR_CONFIG1 has been added and this flag is used to notify the use of attr.config1 variable. Patch has extended the parameter list of 'compute_mmcr', to include power_pmu's 'flags' element and parameter list of get_constraint to include attr.config1 value. It also extend parameter list of power_check_constraints inorder to pass perf_event list. As stated by commit ef0e3b650f8d ("powerpc/perf: Fix Threshold Event Counter Multiplier width for P10"), constraint bits for thresh_cmp is also needed to be increased to 11 bits, which is handled as part of this patch. We added bit number 53 as part of constraint bits of thresh_cmp for power10 to make it an 11 bit field. Updated layout for p10: /* * Layout of constraint bits: * * 60 56 52 48 44 40 36 32 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | * [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ] * | | * [ thresh_cmp bits for p10] thresh_sel -* * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | * [ ] | [ ] | [ sample ] [ ] [6] [5] [4] [3] [2] [1] * | | | | | * BHRB IFM -* | | |*radix_scope | Count of events for each PMC. * EBB -* | | p1, p2, p3, p4, p5, p6. * L1 I/D qualifier -* | * nc - number of counters -* * * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints * we want the low bit of each field to be added to any existing value. * * Everything else is a value field. */ Result: command#: cat /sys/devices/cpu/format/thresh_cmp config1:0-17 ex. usage: command#: perf record -I --weight -d -e cpu/event=0x67340101EC,thresh_cmp=500/ ./ebizzy -S 2 -t 1 -s 4096 1826636 records/s real 2.00 s user 2.00 s sys 0.00 s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.038 MB perf.data (61 samples) ] Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210209095234.837356-1-kjain@linux.ibm.com --- arch/powerpc/include/asm/perf_event_server.h | 5 +- arch/powerpc/perf/core-book3s.c | 15 +++-- arch/powerpc/perf/isa207-common.c | 67 +++++++++++++++++--- arch/powerpc/perf/isa207-common.h | 15 +++-- arch/powerpc/perf/mpc7450-pmu.c | 5 +- arch/powerpc/perf/power10-pmu.c | 4 +- arch/powerpc/perf/power5+-pmu.c | 5 +- arch/powerpc/perf/power5-pmu.c | 5 +- arch/powerpc/perf/power6-pmu.c | 5 +- arch/powerpc/perf/power7-pmu.c | 5 +- arch/powerpc/perf/ppc970-pmu.c | 5 +- 11 files changed, 102 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 3b7baba01c92..00e7e671bb4b 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -36,9 +36,9 @@ struct power_pmu { unsigned long test_adder; int (*compute_mmcr)(u64 events[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]); + struct perf_event *pevents[], u32 flags); int (*get_constraint)(u64 event_id, unsigned long *mskp, - unsigned long *valp); + unsigned long *valp, u64 event_config1); int (*get_alternatives)(u64 event_id, unsigned int flags, u64 alt[]); void (*get_mem_data_src)(union perf_mem_data_src *dsrc, @@ -83,6 +83,7 @@ struct power_pmu { #define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */ #define PPMU_ARCH_31 0x00000200 /* Has MMCR3, SIER2 and SIER3 */ #define PPMU_P10_DD1 0x00000400 /* Is power10 DD1 processor version */ +#define PPMU_HAS_ATTR_CONFIG1 0x00000800 /* Using config1 attribute */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 0e31aaa0a0d2..4b4319d84c54 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -916,7 +916,7 @@ void perf_event_print_debug(void) */ static int power_check_constraints(struct cpu_hw_events *cpuhw, u64 event_id[], unsigned int cflags[], - int n_ev) + int n_ev, struct perf_event **event) { unsigned long mask, value, nv; unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS]; @@ -939,7 +939,7 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw, event_id[i] = cpuhw->alternatives[i][0]; } if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0], - &cpuhw->avalues[i][0])) + &cpuhw->avalues[i][0], event[i]->attr.config1)) return -1; } value = mask = 0; @@ -974,7 +974,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw, for (j = 1; j < n_alt[i]; ++j) ppmu->get_constraint(cpuhw->alternatives[i][j], &cpuhw->amasks[i][j], - &cpuhw->avalues[i][j]); + &cpuhw->avalues[i][j], + event[i]->attr.config1); } /* enumerate all possibilities and see if any will work */ @@ -1392,7 +1393,7 @@ static void power_pmu_enable(struct pmu *pmu) memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, - &cpuhw->mmcr, cpuhw->event)) { + &cpuhw->mmcr, cpuhw->event, ppmu->flags)) { /* shouldn't ever get here */ printk(KERN_ERR "oops compute_mmcr failed\n"); goto out; @@ -1580,7 +1581,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) goto out; - if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) + if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1, cpuhw->event)) goto out; event->hw.config = cpuhw->events[n0]; @@ -1790,7 +1791,7 @@ static int power_pmu_commit_txn(struct pmu *pmu) n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; - i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n); + i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n, cpuhw->event); if (i < 0) return -EAGAIN; @@ -2028,7 +2029,7 @@ static int power_pmu_event_init(struct perf_event *event) local_irq_save(irq_flags); cpuhw = this_cpu_ptr(&cpu_hw_events); - err = power_check_constraints(cpuhw, events, cflags, n + 1); + err = power_check_constraints(cpuhw, events, cflags, n + 1, ctrs); if (has_branch_stack(event)) { u64 bhrb_filter = -1; diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 6ab5b272090a..e4f577da33d8 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -108,12 +108,57 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) *mmcra |= MMCRA_SDAR_MODE_TLB; } +static u64 p10_thresh_cmp_val(u64 value) +{ + int exp = 0; + u64 result = value; + + if (!value) + return value; + + /* + * Incase of P10, thresh_cmp value is not part of raw event code + * and provided via attr.config1 parameter. To program threshold in MMCRA, + * take a 18 bit number N and shift right 2 places and increment + * the exponent E by 1 until the upper 10 bits of N are zero. + * Write E to the threshold exponent and write the lower 8 bits of N + * to the threshold mantissa. + * The max threshold that can be written is 261120. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (value > 261120) + value = 261120; + while ((64 - __builtin_clzl(value)) > 8) { + exp++; + value >>= 2; + } + + /* + * Note that it is invalid to write a mantissa with the + * upper 2 bits of mantissa being zero, unless the + * exponent is also zero. + */ + if (!(value & 0xC0) && exp) + result = 0; + else + result = (exp << 8) | value; + } + return result; +} + static u64 thresh_cmp_val(u64 value) { + if (cpu_has_feature(CPU_FTR_ARCH_31)) + value = p10_thresh_cmp_val(value); + + /* + * Since location of threshold compare bits in MMCRA + * is different for p8, using different shift value. + */ if (cpu_has_feature(CPU_FTR_ARCH_300)) return value << p9_MMCRA_THR_CMP_SHIFT; - - return value << MMCRA_THR_CMP_SHIFT; + else + return value << MMCRA_THR_CMP_SHIFT; } static unsigned long combine_from_event(u64 event) @@ -141,13 +186,13 @@ static bool is_thresh_cmp_valid(u64 event) { unsigned int cmp, exp; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return p10_thresh_cmp_val(event) != 0; + /* * Check the mantissa upper two bits are not zero, unless the * exponent is also zero. See the THRESH_CMP_MANTISSA doc. - * Power10: thresh_cmp is replaced by l2_l3 event select. */ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - return false; cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; exp = cmp >> 7; @@ -256,7 +301,7 @@ void isa207_get_mem_weight(u64 *weight) *weight = mantissa << (2 * exp); } -int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) +int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1) { unsigned int unit, pmc, cache, ebb; unsigned long mask, value; @@ -355,9 +400,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (cpu_has_feature(CPU_FTR_ARCH_31)) { - if (event_is_threshold(event)) { + if (event_is_threshold(event) && is_thresh_cmp_valid(event_config1)) { mask |= CNST_THRESH_CTL_SEL_MASK; value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT); + mask |= p10_CNST_THRESH_CMP_MASK; + value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1)); } } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { @@ -411,7 +458,7 @@ ebb_bhrb: int isa207_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], u32 flags) { unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; unsigned long mmcr3; @@ -504,6 +551,10 @@ int isa207_compute_mmcr(u64 event[], int n_ev, val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; mmcra |= thresh_cmp_val(val); + } else if (flags & PPMU_HAS_ATTR_CONFIG1) { + val = (pevents[i]->attr.config1 >> p10_EVENT_THR_CMP_SHIFT) & + p10_EVENT_THR_CMP_MASK; + mmcra |= thresh_cmp_val(val); } } diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 454b32c31440..1af0e8c97ac7 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -105,6 +105,10 @@ #define p10_EVENT_RADIX_SCOPE_QUAL_MASK 0x1 #define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT 45 +/* Event Threshold Compare bit constant for power10 in config1 attribute */ +#define p10_EVENT_THR_CMP_SHIFT 0 +#define p10_EVENT_THR_CMP_MASK 0x3FFFFull + #define p10_EVENT_VALID_MASK \ ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \ (p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ @@ -124,8 +128,8 @@ * 60 56 52 48 44 40 36 32 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | * [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ] - * | - * thresh_sel -* + * | | + * [ thresh_cmp bits for p10] thresh_sel -* * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | @@ -152,6 +156,9 @@ #define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32) #define CNST_THRESH_CTL_SEL_MASK CNST_THRESH_CTL_SEL_VAL(0x7ff) +#define p10_CNST_THRESH_CMP_VAL(v) (((v) & 0x7ffull) << 43) +#define p10_CNST_THRESH_CMP_MASK p10_CNST_THRESH_CMP_VAL(0x7ff) + #define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24) #define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK) @@ -262,10 +269,10 @@ #define PH(a, b) (P(LVL, HIT) | P(a, b)) #define PM(a, b) (P(LVL, MISS) | P(a, b)) -int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp); +int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1); int isa207_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]); + struct perf_event *pevents[], u32 flags); void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr); int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, const unsigned int ev_alt[][MAX_ALT]); diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c index 1919e9df9165..e39b15b79a83 100644 --- a/arch/powerpc/perf/mpc7450-pmu.c +++ b/arch/powerpc/perf/mpc7450-pmu.c @@ -148,7 +148,7 @@ static u32 classbits[N_CLASSES - 1][2] = { }; static int mpc7450_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, class; u32 mask, value; @@ -258,7 +258,8 @@ static const u32 pmcsel_mask[N_COUNTER] = { */ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { u8 event_index[N_CLASSES][N_COUNTER]; int n_classevent[N_CLASSES]; diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 79e0206ca454..a901c1348cad 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -216,6 +216,7 @@ PMU_FORMAT_ATTR(invert_bit, "config:47"); PMU_FORMAT_ATTR(src_mask, "config:48-53"); PMU_FORMAT_ATTR(src_match, "config:54-59"); PMU_FORMAT_ATTR(radix_scope, "config:9"); +PMU_FORMAT_ATTR(thresh_cmp, "config1:0-17"); static struct attribute *power10_pmu_format_attr[] = { &format_attr_event.attr, @@ -236,6 +237,7 @@ static struct attribute *power10_pmu_format_attr[] = { &format_attr_src_mask.attr, &format_attr_src_match.attr, &format_attr_radix_scope.attr, + &format_attr_thresh_cmp.attr, NULL, }; @@ -550,7 +552,7 @@ static struct power_pmu power10_pmu = { .get_mem_weight = isa207_get_mem_weight, .disable_pmc = isa207_disable_pmc, .flags = PPMU_HAS_SIER | PPMU_ARCH_207S | - PPMU_ARCH_31, + PPMU_ARCH_31 | PPMU_HAS_ATTR_CONFIG1, .n_generic = ARRAY_SIZE(power10_generic_events), .generic_events = power10_generic_events, .cache_events = &power10_cache_events, diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index 3e64b4a1511f..18732267993a 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -132,7 +132,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = { }; static int power5p_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, byte, unit, sh; int bit, fmask; @@ -451,7 +451,8 @@ static int power5p_marked_instr_event(u64 event) static int power5p_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr1 = 0; unsigned long mmcra = 0; diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index 017bb19b73fb..cb611c1e7abe 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -136,7 +136,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = { }; static int power5_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, byte, unit, sh; int bit, fmask; @@ -382,7 +382,8 @@ static int power5_marked_instr_event(u64 event) static int power5_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 189974478e9f..69ef38216418 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -173,7 +173,8 @@ static int power6_marked_instr_event(u64 event) * Assign PMC numbers and compute MMCR1 value for a set of events */ static int p6_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -266,7 +267,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, * 32-34 select field: nest (subunit) event selector */ static int p6_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, byte, sh, subunit; unsigned long mask = 0, value = 0; diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index bacfab104a1a..894c17f9a762 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -81,7 +81,7 @@ enum { */ static int power7_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, sh, unit; unsigned long mask = 0, value = 0; @@ -245,7 +245,8 @@ static int power7_marked_instr_event(u64 event) static int power7_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 7d78df97f272..1f8263785286 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -190,7 +190,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = { }; static int p970_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, byte, unit, sh, spcsel; unsigned long mask = 0, value = 0; @@ -256,7 +256,8 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) static int p970_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel;