From 841255544b653cbabe4ee5eda56bbb8b7ad8de8a Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 14 Dec 2021 20:07:33 +0800 Subject: [PATCH 01/23] clocksource/drivers/imx-sysctr: Drop IRQF_IRQPOLL Per the Documentation, IRQF_IRQPOLL is used for polling (only the interrupt that is registered first in a shared interrupt is considered for performance reasons) But this timer is not sharing interrupt line with others, and actually irqpoll not work with this timer with IRQF_IRQPOLL set, so drop the flag. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20211214120737.1611955-2-peng.fan@oss.nxp.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-sysctr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-sysctr.c b/drivers/clocksource/timer-imx-sysctr.c index 55a8e198d2a1..523e37662a6e 100644 --- a/drivers/clocksource/timer-imx-sysctr.c +++ b/drivers/clocksource/timer-imx-sysctr.c @@ -110,7 +110,7 @@ static struct timer_of to_sysctr = { }, .of_irq = { .handler = sysctr_timer_interrupt, - .flags = IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_TIMER, }, .of_clk = { .name = "per", From 59e2bcd8a95b4cc5a04809b6d2ee337e81b47f57 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 14 Dec 2021 20:07:34 +0800 Subject: [PATCH 02/23] clocksource/drivers/imx-tpm: Drop IRQF_IRQPOLL Per the Documentation, IRQF_IRQPOLL is used for polling (only the interrupt that is registered first in a shared interrupt is considered for performance reasons) The TPM timer is not sharing interrupt with others, and pass irqpoll not make sense for i.MX platform. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20211214120737.1611955-3-peng.fan@oss.nxp.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 2cdc077a39f5..2c0b0d4eba9e 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -137,7 +137,7 @@ static struct timer_of to_tpm = { }, .of_irq = { .handler = tpm_timer_interrupt, - .flags = IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_TIMER, }, .of_clk = { .name = "per", From 5b3c267506eba2972d53dafb8b988d5fd28d223d Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 14 Dec 2021 20:07:35 +0800 Subject: [PATCH 03/23] clocksource/drivers/imx-tpm: Mark two variable with __ro_after_init counter_width and timer_base will not be updated after init, so mark as __ro_after_init. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20211214120737.1611955-4-peng.fan@oss.nxp.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 2c0b0d4eba9e..3afd9b0a668a 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -32,8 +32,8 @@ #define TPM_C0SC_CHF_MASK (0x1 << 7) #define TPM_C0V 0x24 -static int counter_width; -static void __iomem *timer_base; +static int counter_width __ro_after_init; +static void __iomem *timer_base __ro_after_init; static inline void tpm_timer_disable(void) { From 39664b624f6a6518001b2c8f86bac1352c89d0af Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 14 Dec 2021 20:07:36 +0800 Subject: [PATCH 04/23] clocksource/drivers/imx-tpm: Add CLOCK_EVT_FEAT_DYNIRQ Add CLOCK_EVT_FEAT_DYNIRQ to allow the IRQ could be runtime set affinity to the cores that needs wake up, otherwise saying core0 has to send IPI to wakeup core1. With CLOCK_EVT_FEAT_DYNIRQ set, when broadcast timer could wake up the cores, IPI is not needed. Acked-by: Jacky Bai Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20211214120737.1611955-5-peng.fan@oss.nxp.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 3afd9b0a668a..578fe162fd99 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -129,7 +129,7 @@ static struct timer_of to_tpm = { .clkevt = { .name = "i.MX7ULP TPM Timer", .rating = 200, - .features = CLOCK_EVT_FEAT_ONESHOT, + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ, .set_state_shutdown = tpm_set_state_shutdown, .set_state_oneshot = tpm_set_state_oneshot, .set_next_event = tpm_set_next_event, From e547ffe9e6f497e36bde9d86dbcfbc781946752b Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 14 Dec 2021 20:07:37 +0800 Subject: [PATCH 05/23] clocksource/drivers/imx-tpm: Update name of clkevt The tpm driver is not only for i.MX7ULP now, i.MX8ULP also use it. It maybe also used by other i.MX variants, so update name to reflect it. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20211214120737.1611955-6-peng.fan@oss.nxp.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 578fe162fd99..df8064122b10 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -127,7 +127,7 @@ static irqreturn_t tpm_timer_interrupt(int irq, void *dev_id) static struct timer_of to_tpm = { .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK, .clkevt = { - .name = "i.MX7ULP TPM Timer", + .name = "i.MX TPM Timer", .rating = 200, .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ, .set_state_shutdown = tpm_set_state_shutdown, From fc153c1c58cb8c3bb3b443b4d7dc3211ff5f65fc Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 5 Dec 2021 22:38:15 -0500 Subject: [PATCH 06/23] clocksource: Add a Kconfig option for WATCHDOG_MAX_SKEW A watchdog maximum skew of 100us may still be too small for some systems or archs. It may also be too small when some kernel debug config options are enabled. So add a new Kconfig option CLOCKSOURCE_WATCHDOG_MAX_SKEW_US to allow kernel builders to have more control on the threshold for marking clocksource as unstable. Signed-off-by: Waiman Long Signed-off-by: Paul E. McKenney --- kernel/time/Kconfig | 9 +++++++++ kernel/time/clocksource.c | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 04bfd62f5e5c..27b7868b5c30 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -181,5 +181,14 @@ config HIGH_RES_TIMERS hardware is not capable then this option only increases the size of the kernel image. +config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US + int "Clocksource watchdog maximum allowable skew (in μs)" + depends on CLOCKSOURCE_WATCHDOG + range 50 1000 + default 100 + help + Specify the maximum amount of allowable watchdog skew in + microseconds before reporting the clocksource to be unstable. + endmenu endif diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1cf73807b450..95d7ca35bdf2 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -107,7 +107,13 @@ static u64 suspend_start; * This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as * a lower bound for cs->uncertainty_margin values when registering clocks. */ -#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC) +#ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US +#define MAX_SKEW_USEC CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US +#else +#define MAX_SKEW_USEC 100 +#endif + +#define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC) #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static void clocksource_watchdog_work(struct work_struct *work); From 10720e120e2b1d66172dccb06eb4f346a665cca6 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 5 Jan 2022 20:43:04 +0800 Subject: [PATCH 07/23] clocksource/drivers/imx-tpm: Exclude sched clock for ARM64 For ARM64 platform such as i.MX8ULP which has ARMv8 generic timer as sched clock, which is much faster compared with tpm sched clock. Reading the tpm count register in i.MX8ULP requires about 290ns, this is slow and introduce scheduler latency. So exclude tpm sched clock for ARM64 platform. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20220105124304.3567629-1-peng.fan@oss.nxp.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index df8064122b10..60cefc247b71 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -150,10 +150,10 @@ static int __init tpm_clocksource_init(void) tpm_delay_timer.read_current_timer = &tpm_read_current_timer; tpm_delay_timer.freq = timer_of_rate(&to_tpm) >> 3; register_current_timer_delay(&tpm_delay_timer); -#endif sched_clock_register(tpm_read_sched_clock, counter_width, timer_of_rate(&to_tpm) >> 3); +#endif return clocksource_mmio_init(timer_base + TPM_CNT, "imx-tpm", From bceaae3bac0ce27c549bb050336d8d08abc2ee54 Mon Sep 17 00:00:00 2001 From: Drew Fustini Date: Thu, 3 Feb 2022 21:35:05 -0800 Subject: [PATCH 08/23] clocksource/drivers/timer-ti-dm: Fix regression from errata i940 fix The existing fix for errata i940 causes a conflict for IPU2 which is using timer 3 and 4. From arch/arm/boot/dts/dra7-ipu-dsp-common.dtsi: &ipu2 { mboxes = <&mailbox6 &mbox_ipu2_ipc3x>; ti,timers = <&timer3>; ti,watchdog-timers = <&timer4>, <&timer9>; }; The conflict was noticed when booting mainline on the BeagleBoard X15 which has a TI AM5728 SoC: remoteproc remoteproc1: 55020000.ipu is available remoteproc remoteproc1: powering up 55020000.ipu remoteproc remoteproc1: Booting fw image dra7-ipu2-fw.xem4 omap-rproc 55020000.ipu: could not get timer platform device omap-rproc 55020000.ipu: omap_rproc_enable_timers failed: -19 remoteproc remoteproc1: can't start rproc 55020000.ipu: -19 This change modifies the errata fix to instead use timer 15 and 16 which resolves the timer conflict. It does not appear to introduce any latency regression. Results from cyclictest with original errata fix using dmtimer 3 and 4: # cyclictest --mlockall --smp --priority=80 --interval=200 --distance=0 policy: fifo: loadavg: 0.02 0.03 0.05 T: 0 ( 1449) P:80 I:200 C: 800368 Min: 0 Act: 32 Avg: 22 Max: 128 T: 1 ( 1450) P:80 I:200 C: 800301 Min: 0 Act: 12 Avg: 23 Max: 70 The results after the change to dmtimer 15 and 16: # cyclictest --mlockall --smp --priority=80 --interval=200 --distance=0 policy: fifo: loadavg: 0.36 0.19 0.07 T: 0 ( 1711) P:80 I:200 C: 759599 Min: 0 Act: 6 Avg: 22 Max: 108 T: 1 ( 1712) P:80 I:200 C: 759539 Min: 0 Act: 19 Avg: 23 Max: 79 Fixes: 25de4ce5ed02 ("clocksource/drivers/timer-ti-dm: Handle dra7 timer wrap errata i940") Link: https://lore.kernel.org/linux-omap/YfWsG0p6to3IJuvE@x1/ Suggested-by: Suman Anna Reviewed-by: Tony Lindgren Signed-off-by: Drew Fustini Link: https://lore.kernel.org/r/20220204053503.1409162-1-dfustini@baylibre.com Signed-off-by: Daniel Lezcano --- arch/arm/boot/dts/dra7-l4.dtsi | 5 ++--- arch/arm/boot/dts/dra7.dtsi | 8 ++++---- drivers/clocksource/timer-ti-dm-systimer.c | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 956a26d52a4c..0a11bacffc1f 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -3482,8 +3482,7 @@ ti,timer-pwm; }; }; - - target-module@2c000 { /* 0x4882c000, ap 17 02.0 */ + timer15_target: target-module@2c000 { /* 0x4882c000, ap 17 02.0 */ compatible = "ti,sysc-omap4-timer", "ti,sysc"; reg = <0x2c000 0x4>, <0x2c010 0x4>; @@ -3511,7 +3510,7 @@ }; }; - target-module@2e000 { /* 0x4882e000, ap 19 14.0 */ + timer16_target: target-module@2e000 { /* 0x4882e000, ap 19 14.0 */ compatible = "ti,sysc-omap4-timer", "ti,sysc"; reg = <0x2e000 0x4>, <0x2e010 0x4>; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 6b485cbed8d5..8f7ffe2f66e9 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1339,20 +1339,20 @@ }; /* Local timers, see ARM architected timer wrap erratum i940 */ -&timer3_target { +&timer15_target { ti,no-reset-on-init; ti,no-idle; timer@0 { - assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER3_CLKCTRL 24>; + assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>; assigned-clock-parents = <&timer_sys_clk_div>; }; }; -&timer4_target { +&timer16_target { ti,no-reset-on-init; ti,no-idle; timer@0 { - assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 24>; + assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>; assigned-clock-parents = <&timer_sys_clk_div>; }; }; diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c index b6f97960d8ee..f52bf81dc1dd 100644 --- a/drivers/clocksource/timer-ti-dm-systimer.c +++ b/drivers/clocksource/timer-ti-dm-systimer.c @@ -695,9 +695,9 @@ static int __init dmtimer_percpu_quirk_init(struct device_node *np, u32 pa) return 0; } - if (pa == 0x48034000) /* dra7 dmtimer3 */ + if (pa == 0x4882c000) /* dra7 dmtimer15 */ return dmtimer_percpu_timer_init(np, 0); - else if (pa == 0x48036000) /* dra7 dmtimer4 */ + else if (pa == 0x4882e000) /* dra7 dmtimer16 */ return dmtimer_percpu_timer_init(np, 1); return 0; From ab8da93dc06d82f464c47ab30e6c75190702f369 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 20 Feb 2022 11:38:15 +0100 Subject: [PATCH 09/23] clocksource/drivers/exynos_mct: Handle DTS with higher number of interrupts The driver statically defines maximum number of interrupts it can handle, however it does not respect that limit when configuring them. When provided with a DTS with more interrupts than assumed, the driver will overwrite static array mct_irqs leading to silent memory corruption. Validate the interrupts coming from DTS to avoid this. This does not change the fact that such DTS might not boot at all, because it is simply incompatible, however at least some warning will be printed. Fixes: 36ba5d527e95 ("ARM: EXYNOS: add device tree support for MCT controller driver") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Alim Akhtar Link: https://lore.kernel.org/r/20220220103815.135380-1-krzysztof.kozlowski@canonical.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/exynos_mct.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 6db3d5511b0f..03782b399ea1 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -541,6 +541,11 @@ static int __init exynos4_timer_interrupts(struct device_node *np, * irqs are specified. */ nr_irqs = of_irq_count(np); + if (nr_irqs > ARRAY_SIZE(mct_irqs)) { + pr_err("exynos-mct: too many (%d) interrupts configured in DT\n", + nr_irqs); + nr_irqs = ARRAY_SIZE(mct_irqs); + } for (i = MCT_L0_IRQ; i < nr_irqs; i++) mct_irqs[i] = irq_of_parse_and_map(np, i); @@ -553,11 +558,14 @@ static int __init exynos4_timer_interrupts(struct device_node *np, mct_irqs[MCT_L0_IRQ], err); } else { for_each_possible_cpu(cpu) { - int mct_irq = mct_irqs[MCT_L0_IRQ + cpu]; + int mct_irq; struct mct_clock_event_device *pcpu_mevt = per_cpu_ptr(&percpu_mct_tick, cpu); pcpu_mevt->evt.irq = -1; + if (MCT_L0_IRQ + cpu >= ARRAY_SIZE(mct_irqs)) + break; + mct_irq = mct_irqs[MCT_L0_IRQ + cpu]; irq_set_status_flags(mct_irq, IRQ_NOAUTOEN); if (request_irq(mct_irq, From dfc597c9bca9b4447820a59fe86526f016be1458 Mon Sep 17 00:00:00 2001 From: Alim Akhtar Date: Mon, 21 Feb 2022 23:15:45 +0530 Subject: [PATCH 10/23] clocksource/drivers/exynos_mct: Remove mct interrupt index enum MCT driver define an enum which list global and local timer's irq index. Most of them are not used but MCT_G0_IRQ and MCT_L0_IRQ and these two are at a fixed offset/index. Get rid of this enum and use a #define for the used irq index. No functional changes expected. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Alim Akhtar Link: https://lore.kernel.org/r/20220221174547.26176-1-alim.akhtar@samsung.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/exynos_mct.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 03782b399ea1..341ee47ba35b 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -60,27 +60,17 @@ #define MCT_CLKEVENTS_RATING 350 #endif +/* There are four Global timers starting with 0 offset */ +#define MCT_G0_IRQ 0 +/* Local timers count starts after global timer count */ +#define MCT_L0_IRQ 4 +#define MCT_NR_IRQS 12 + enum { MCT_INT_SPI, MCT_INT_PPI }; -enum { - MCT_G0_IRQ, - MCT_G1_IRQ, - MCT_G2_IRQ, - MCT_G3_IRQ, - MCT_L0_IRQ, - MCT_L1_IRQ, - MCT_L2_IRQ, - MCT_L3_IRQ, - MCT_L4_IRQ, - MCT_L5_IRQ, - MCT_L6_IRQ, - MCT_L7_IRQ, - MCT_NR_IRQS, -}; - static void __iomem *reg_base; static unsigned long clk_rate; static unsigned int mct_int_type; From f49b82a0a54fa85451ed96c35f24679522d59c7a Mon Sep 17 00:00:00 2001 From: Alim Akhtar Date: Mon, 21 Feb 2022 23:15:46 +0530 Subject: [PATCH 11/23] clocksource/drivers/exynos_mct: Bump up mct max irq number Bump-up maximum number of MCT IRQ to match the binding documentation. This make driver scalable for SoC which has more than 12 timer irqs, like recently added FSD SoC. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Alim Akhtar Link: https://lore.kernel.org/r/20220221174547.26176-2-alim.akhtar@samsung.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/exynos_mct.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 341ee47ba35b..bcf21006ebd8 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -64,7 +64,8 @@ #define MCT_G0_IRQ 0 /* Local timers count starts after global timer count */ #define MCT_L0_IRQ 4 -#define MCT_NR_IRQS 12 +/* Max number of IRQ as per DT binding document */ +#define MCT_NR_IRQS 20 enum { MCT_INT_SPI, From 0a3a4b9d2bb7928f54579421bbadd4aa9c4a94f0 Mon Sep 17 00:00:00 2001 From: Alim Akhtar Date: Mon, 21 Feb 2022 23:15:47 +0530 Subject: [PATCH 12/23] clocksource/drivers/exynos_mct: Increase the size of name array Variable _name_ hold mct_tick number per cpu and it is currently limited to 10. Which restrict the scalability of the MCT driver for the SoC which has more local timers interrupts (>= 12). Increase the length of it to make mct_tick printed correctly for each local timer interrupts per CPU. Signed-off-by: Alim Akhtar Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220221174547.26176-3-alim.akhtar@samsung.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/exynos_mct.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index bcf21006ebd8..f29c812b70c9 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -80,7 +80,11 @@ static int mct_irqs[MCT_NR_IRQS]; struct mct_clock_event_device { struct clock_event_device evt; unsigned long base; - char name[10]; + /** + * The length of the name must be adjusted if number of + * local timer interrupts grow over two digits + */ + char name[11]; }; static void exynos4_mct_write(unsigned int value, unsigned long offset) From 8c4b810a87005eb46564a48a69b5b255e515fa62 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 3 Feb 2022 17:05:02 +0000 Subject: [PATCH 13/23] clocksource/drivers/arm_arch_timer: Use event stream scaling when available With FEAT_ECV and the 1GHz counter, it is pretty likely that the event stream divider doesn't fit in the field that holds the divider value (we only have 4 bits to describe counter bits [15:0] Thankfully, FEAT_ECV also provides a scaling mechanism to switch the field to cover counter bits [23:8] instead. Enable this on arm64 when ECV is available (32bit doesn't have any detection infrastructure and is unlikely to be run on an ARMv8.6 system anyway). Signed-off-by: Marc Zyngier Cc: Mark Rutland Cc: Daniel Lezcano Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220203170502.2694422-1-maz@kernel.org Signed-off-by: Daniel Lezcano --- drivers/clocksource/arm_arch_timer.c | 13 +++++++++++-- include/clocksource/arm_arch_timer.h | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 1ecd52f903b8..9ab8221ee3c6 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -880,10 +880,19 @@ static void __arch_timer_setup(unsigned type, clockevents_config_and_register(clk, arch_timer_rate, 0xf, max_delta); } -static void arch_timer_evtstrm_enable(int divider) +static void arch_timer_evtstrm_enable(unsigned int divider) { u32 cntkctl = arch_timer_get_cntkctl(); +#ifdef CONFIG_ARM64 + /* ECV is likely to require a large divider. Use the EVNTIS flag. */ + if (cpus_have_const_cap(ARM64_HAS_ECV) && divider > 15) { + cntkctl |= ARCH_TIMER_EVT_INTERVAL_SCALE; + divider -= 8; + } +#endif + + divider = min(divider, 15U); cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK; /* Set the divider and enable virtual event stream */ cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT) @@ -912,7 +921,7 @@ static void arch_timer_configure_evtstream(void) lsb++; /* enable event stream */ - arch_timer_evtstrm_enable(max(0, min(lsb, 15))); + arch_timer_evtstrm_enable(max(0, lsb)); } static void arch_counter_set_user_access(void) diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index e715bdb720d5..057c8964aefb 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -56,6 +56,7 @@ enum arch_timer_spi_nr { #define ARCH_TIMER_EVT_TRIGGER_MASK (0xF << ARCH_TIMER_EVT_TRIGGER_SHIFT) #define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */ #define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */ +#define ARCH_TIMER_EVT_INTERVAL_SCALE (1 << 17) /* EVNTIS in the ARMv8 ARM */ #define ARCH_TIMER_EVT_STREAM_PERIOD_US 100 #define ARCH_TIMER_EVT_STREAM_FREQ \ From bf127df3cceada8693888fc86a3121c38ef25701 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 3 Mar 2022 11:42:12 -0700 Subject: [PATCH 14/23] clocksource/drivers/imx-tpm: Move tpm_read_sched_clock() under CONFIG_ARM When building this driver for an architecture other than ARCH=arm: drivers/clocksource/timer-imx-tpm.c:78:20: error: unused function 'tpm_read_sched_clock' [-Werror,-Wunused-function] static u64 notrace tpm_read_sched_clock(void) ^ 1 error generated. Move the function definition under the existing CONFIG_ARM section so there is no more warning. Fixes: 10720e120e2b ("clocksource/drivers/imx-tpm: Exclude sched clock for ARM64") Signed-off-by: Nathan Chancellor Reviewed-by: Peng Fan Link: https://lore.kernel.org/r/20220303184212.2356245-1-nathan@kernel.org Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 60cefc247b71..bd64a8a8427f 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -73,12 +73,12 @@ static unsigned long tpm_read_current_timer(void) { return tpm_read_counter(); } -#endif static u64 notrace tpm_read_sched_clock(void) { return tpm_read_counter(); } +#endif static int tpm_set_next_event(unsigned long delta, struct clock_event_device *evt) From cea9ffe0094d468b17814056fcebe7b3840af5f0 Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Fri, 4 Mar 2022 00:33:06 +0100 Subject: [PATCH 15/23] dt-bindings: timer: Tegra: Convert text bindings to yaml Convert Tegra timer binding into yaml format. This commit also merge 3 text bindings with almost identical content (differens in number of registers). Reviewed-by: Rob Herring Signed-off-by: David Heidelberg Link: https://lore.kernel.org/r/20220303233307.61753-1-david@ixit.cz Signed-off-by: Daniel Lezcano --- .../bindings/timer/nvidia,tegra-timer.yaml | 150 ++++++++++++++++++ .../bindings/timer/nvidia,tegra20-timer.txt | 24 --- .../bindings/timer/nvidia,tegra210-timer.txt | 36 ----- .../bindings/timer/nvidia,tegra30-timer.txt | 28 ---- 4 files changed, 150 insertions(+), 88 deletions(-) create mode 100644 Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml delete mode 100644 Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt delete mode 100644 Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt delete mode 100644 Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml b/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml new file mode 100644 index 000000000000..b78209cd0f28 --- /dev/null +++ b/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml @@ -0,0 +1,150 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/timer/nvidia,tegra-timer.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: NVIDIA Tegra timer + +maintainers: + - Stephen Warren + +allOf: + - if: + properties: + compatible: + contains: + const: nvidia,tegra210-timer + then: + properties: + interrupts: + # Either a single combined interrupt or up to 14 individual interrupts + minItems: 1 + maxItems: 14 + description: > + A list of 14 interrupts; one per each timer channels 0 through 13 + + - if: + properties: + compatible: + oneOf: + - items: + - enum: + - nvidia,tegra114-timer + - nvidia,tegra124-timer + - nvidia,tegra132-timer + - const: nvidia,tegra30-timer + - items: + - const: nvidia,tegra30-timer + - const: nvidia,tegra20-timer + then: + properties: + interrupts: + # Either a single combined interrupt or up to 6 individual interrupts + minItems: 1 + maxItems: 6 + description: > + A list of 6 interrupts; one per each of timer channels 1 through 5, + and one for the shared interrupt for the remaining channels. + + - if: + properties: + compatible: + const: nvidia,tegra20-timer + then: + properties: + interrupts: + # Either a single combined interrupt or up to 4 individual interrupts + minItems: 1 + maxItems: 4 + description: | + A list of 4 interrupts; one per timer channel. + +properties: + compatible: + oneOf: + - const: nvidia,tegra210-timer + description: > + The Tegra210 timer provides fourteen 29-bit timer counters and one 32-bit + timestamp counter. The TMRs run at either a fixed 1 MHz clock rate derived + from the oscillator clock (TMR0-TMR9) or directly at the oscillator clock + (TMR10-TMR13). Each TMR can be programmed to generate one-shot, periodic, + or watchdog interrupts. + - items: + - enum: + - nvidia,tegra114-timer + - nvidia,tegra124-timer + - nvidia,tegra132-timer + - const: nvidia,tegra30-timer + - items: + - const: nvidia,tegra30-timer + - const: nvidia,tegra20-timer + description: > + The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free + running counter, and 5 watchdog modules. The first two channels may also + trigger a legacy watchdog reset. + - const: nvidia,tegra20-timer + description: > + The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free + running counter. The first two channels may also trigger a watchdog reset. + + reg: + maxItems: 1 + + interrupts: true + + clocks: + maxItems: 1 + + clock-names: + items: + - const: timer + + +required: + - compatible + - reg + - interrupts + - clocks + +additionalProperties: false + +examples: + - | + #include + timer@60005000 { + compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer"; + reg = <0x60005000 0x400>; + interrupts = <0 0 IRQ_TYPE_LEVEL_HIGH>, + <0 1 IRQ_TYPE_LEVEL_HIGH>, + <0 41 IRQ_TYPE_LEVEL_HIGH>, + <0 42 IRQ_TYPE_LEVEL_HIGH>, + <0 121 IRQ_TYPE_LEVEL_HIGH>, + <0 122 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&tegra_car 214>; + }; + - | + #include + #include + #include + + timer@60005000 { + compatible = "nvidia,tegra210-timer"; + reg = <0x60005000 0x400>; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + ; + clocks = <&tegra_car TEGRA210_CLK_TIMER>; + clock-names = "timer"; + }; diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt deleted file mode 100644 index 4a864bd10d3d..000000000000 --- a/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt +++ /dev/null @@ -1,24 +0,0 @@ -NVIDIA Tegra20 timer - -The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free -running counter. The first two channels may also trigger a watchdog reset. - -Required properties: - -- compatible : should be "nvidia,tegra20-timer". -- reg : Specifies base physical address and size of the registers. -- interrupts : A list of 4 interrupts; one per timer channel. -- clocks : Must contain one entry, for the module clock. - See ../clocks/clock-bindings.txt for details. - -Example: - -timer { - compatible = "nvidia,tegra20-timer"; - reg = <0x60005000 0x60>; - interrupts = <0 0 0x04 - 0 1 0x04 - 0 41 0x04 - 0 42 0x04>; - clocks = <&tegra_car 132>; -}; diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt deleted file mode 100644 index 032cda96fe0d..000000000000 --- a/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt +++ /dev/null @@ -1,36 +0,0 @@ -NVIDIA Tegra210 timer - -The Tegra210 timer provides fourteen 29-bit timer counters and one 32-bit -timestamp counter. The TMRs run at either a fixed 1 MHz clock rate derived -from the oscillator clock (TMR0-TMR9) or directly at the oscillator clock -(TMR10-TMR13). Each TMR can be programmed to generate one-shot, periodic, -or watchdog interrupts. - -Required properties: -- compatible : "nvidia,tegra210-timer". -- reg : Specifies base physical address and size of the registers. -- interrupts : A list of 14 interrupts; one per each timer channels 0 through - 13. -- clocks : Must contain one entry, for the module clock. - See ../clocks/clock-bindings.txt for details. - -timer@60005000 { - compatible = "nvidia,tegra210-timer"; - reg = <0x0 0x60005000 0x0 0x400>; - interrupts = , - , - , - , - , - , - , - , - , - , - , - , - , - ; - clocks = <&tegra_car TEGRA210_CLK_TIMER>; - clock-names = "timer"; -}; diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt deleted file mode 100644 index 1761f53ee36f..000000000000 --- a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt +++ /dev/null @@ -1,28 +0,0 @@ -NVIDIA Tegra30 timer - -The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free -running counter, and 5 watchdog modules. The first two channels may also -trigger a legacy watchdog reset. - -Required properties: - -- compatible : For Tegra30, must contain "nvidia,tegra30-timer". Otherwise, - must contain '"nvidia,-timer", "nvidia,tegra30-timer"' where - is tegra124 or tegra132. -- reg : Specifies base physical address and size of the registers. -- interrupts : A list of 6 interrupts; one per each of timer channels 1 - through 5, and one for the shared interrupt for the remaining channels. -- clocks : Must contain one entry, for the module clock. - See ../clocks/clock-bindings.txt for details. - -timer { - compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer"; - reg = <0x60005000 0x400>; - interrupts = <0 0 0x04 - 0 1 0x04 - 0 41 0x04 - 0 42 0x04 - 0 121 0x04 - 0 122 0x04>; - clocks = <&tegra_car 214>; -}; From 34f03f7f3e9f79412b5bf8db9f2778c9ed2fd733 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 4 Mar 2022 15:35:56 +0200 Subject: [PATCH 16/23] clocksource/drivers/timer-microchip-pit64b: Remove mmio selection PIT64B timer driver doesn't depend on CLKSRC_MMIO since commit e85c1d21b16b ("clocksource/drivers/timer-microchip-pit64b: Add clocksource suspend/resume"). Remove the selection. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220304133601.2404086-2-claudiu.beznea@microchip.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index cfb8ea0df3b1..1ea556e75494 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -713,7 +713,6 @@ config INGENIC_OST config MICROCHIP_PIT64B bool "Microchip PIT64B support" depends on OF || COMPILE_TEST - select CLKSRC_MMIO select TIMER_OF help This option enables Microchip PIT64B timer for Atmel From ff10ee97cb203262e88d9c8bc87369cbd4004a0c Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 4 Mar 2022 15:35:57 +0200 Subject: [PATCH 17/23] clocksource/drivers/timer-microchip-pit64b: Use notrace Use notrace for mchp_pit64b_sched_read_clk() to avoid recursive call of prepare_ftrace_return() when issuing: echo function_graph > /sys/kernel/debug/tracing/current_tracer Fixes: 625022a5f160 ("clocksource/drivers/timer-microchip-pit64b: Add Microchip PIT64B support") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220304133601.2404086-3-claudiu.beznea@microchip.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-microchip-pit64b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-microchip-pit64b.c b/drivers/clocksource/timer-microchip-pit64b.c index cfa4ec7ef396..790d2c9b42a7 100644 --- a/drivers/clocksource/timer-microchip-pit64b.c +++ b/drivers/clocksource/timer-microchip-pit64b.c @@ -165,7 +165,7 @@ static u64 mchp_pit64b_clksrc_read(struct clocksource *cs) return mchp_pit64b_cnt_read(mchp_pit64b_cs_base); } -static u64 mchp_pit64b_sched_read_clk(void) +static u64 notrace mchp_pit64b_sched_read_clk(void) { return mchp_pit64b_cnt_read(mchp_pit64b_cs_base); } From 389e3bff69b4341b42779833063c7b462a6e6d42 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 4 Mar 2022 15:35:58 +0200 Subject: [PATCH 18/23] clocksource/drivers/timer-microchip-pit64b: Use 5MHz for clockevent Use 5MHz clock for clockevent timers. This increases timer's resolution. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220304133601.2404086-4-claudiu.beznea@microchip.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-microchip-pit64b.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/clocksource/timer-microchip-pit64b.c b/drivers/clocksource/timer-microchip-pit64b.c index 790d2c9b42a7..abce83d2f00b 100644 --- a/drivers/clocksource/timer-microchip-pit64b.c +++ b/drivers/clocksource/timer-microchip-pit64b.c @@ -42,8 +42,7 @@ #define MCHP_PIT64B_LSBMASK GENMASK_ULL(31, 0) #define MCHP_PIT64B_PRES_TO_MODE(p) (MCHP_PIT64B_MR_PRES & ((p) << 8)) #define MCHP_PIT64B_MODE_TO_PRES(m) ((MCHP_PIT64B_MR_PRES & (m)) >> 8) -#define MCHP_PIT64B_DEF_CS_FREQ 5000000UL /* 5 MHz */ -#define MCHP_PIT64B_DEF_CE_FREQ 32768 /* 32 KHz */ +#define MCHP_PIT64B_DEF_FREQ 5000000UL /* 5 MHz */ #define MCHP_PIT64B_NAME "pit64b" @@ -418,7 +417,6 @@ static int __init mchp_pit64b_init_clkevt(struct mchp_pit64b_timer *timer, static int __init mchp_pit64b_dt_init_timer(struct device_node *node, bool clkevt) { - u32 freq = clkevt ? MCHP_PIT64B_DEF_CE_FREQ : MCHP_PIT64B_DEF_CS_FREQ; struct mchp_pit64b_timer timer; unsigned long clk_rate; u32 irq = 0; @@ -446,7 +444,7 @@ static int __init mchp_pit64b_dt_init_timer(struct device_node *node, } /* Initialize mode (prescaler + SGCK bit). To be used at runtime. */ - ret = mchp_pit64b_init_mode(&timer, freq); + ret = mchp_pit64b_init_mode(&timer, MCHP_PIT64B_DEF_FREQ); if (ret) goto irq_unmap; From 4467b8bad2401794fb89a0268c8c8257180bf60f Mon Sep 17 00:00:00 2001 From: Guillaume Ranquet Date: Mon, 7 Mar 2022 18:26:56 +0100 Subject: [PATCH 19/23] clocksource/drivers/timer-of: Check return value of of_iomap in timer_of_base_init() of_base->base can either be iomapped using of_io_request_and_map() or of_iomap() depending whether or not an of_base->name has been set. Thus check of_base->base against NULL as of_iomap() does not return a PTR_ERR() in case of error. Fixes: 9aea417afa6b ("clocksource/drivers/timer-of: Don't request the resource by name") Signed-off-by: Guillaume Ranquet Link: https://lore.kernel.org/r/20220307172656.4836-1-granquet@baylibre.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-of.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index 529cc6a51cdb..c3f54d9912be 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -157,9 +157,9 @@ static __init int timer_of_base_init(struct device_node *np, of_base->base = of_base->name ? of_io_request_and_map(np, of_base->index, of_base->name) : of_iomap(np, of_base->index); - if (IS_ERR(of_base->base)) { - pr_err("Failed to iomap (%s)\n", of_base->name); - return PTR_ERR(of_base->base); + if (IS_ERR_OR_NULL(of_base->base)) { + pr_err("Failed to iomap (%s:%s)\n", np->name, of_base->name); + return of_base->base ? PTR_ERR(of_base->base) : -ENOMEM; } return 0; From a1ff03cd6fb9c501fff63a4a2bface9adcfa81cd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Feb 2022 01:01:07 +0100 Subject: [PATCH 20/23] tick: Detect and fix jiffies update stall On some rare cases, the timekeeper CPU may be delaying its jiffies update duty for a while. Known causes include: * The timekeeper is waiting on stop_machine in a MULTI_STOP_DISABLE_IRQ or MULTI_STOP_RUN state. Disabled interrupts prevent from timekeeping updates while waiting for the target CPU to complete its stop_machine() callback. * The timekeeper vcpu has VMEXIT'ed for a long while due to some overload on the host. Detect and fix these situations with emergency timekeeping catchups. Original-patch-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 17 +++++++++++++++++ kernel/time/tick-sched.h | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 17a283ce2b20..c89f50a7e690 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -169,6 +169,8 @@ static ktime_t tick_init_jiffy_update(void) return period; } +#define MAX_STALLED_JIFFIES 5 + static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) { int cpu = smp_processor_id(); @@ -196,6 +198,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) if (tick_do_timer_cpu == cpu) tick_do_update_jiffies64(now); + /* + * If jiffies update stalled for too long (timekeeper in stop_machine() + * or VMEXIT'ed for several msecs), force an update. + */ + if (ts->last_tick_jiffies != jiffies) { + ts->stalled_jiffies = 0; + ts->last_tick_jiffies = READ_ONCE(jiffies); + } else { + if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) { + tick_do_update_jiffies64(now); + ts->stalled_jiffies = 0; + ts->last_tick_jiffies = READ_ONCE(jiffies); + } + } + if (ts->inidle) ts->got_idle_tick = 1; } diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index d952ae393423..504649513399 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -49,6 +49,8 @@ enum tick_nohz_mode { * @timer_expires_base: Base time clock monotonic for @timer_expires * @next_timer: Expiry time of next expiring timer for debugging purpose only * @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick + * @last_tick_jiffies: Value of jiffies seen on last tick + * @stalled_jiffies: Number of stalled jiffies detected across ticks */ struct tick_sched { struct hrtimer sched_timer; @@ -77,6 +79,8 @@ struct tick_sched { u64 next_timer; ktime_t idle_expires; atomic_t tick_dep_mask; + unsigned long last_tick_jiffies; + unsigned int stalled_jiffies; }; extern struct tick_sched *tick_get_tick_sched(int cpu); From 2984539959dbaf4e65e19bf90c2419304a81a985 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 8 Feb 2022 17:16:33 +0100 Subject: [PATCH 21/23] tick/rcu: Remove obsolete rcu_needs_cpu() parameters With the removal of CONFIG_RCU_FAST_NO_HZ, the parameters in rcu_needs_cpu() are not necessary anymore. Simply remove them. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Paul Menzel --- include/linux/rcutiny.h | 3 +-- include/linux/rcutree.h | 2 +- kernel/rcu/tree.c | 3 +-- kernel/time/tick-sched.c | 10 ++++------ 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 858f4d429946..5fed476f977f 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -64,9 +64,8 @@ static inline void rcu_softirq_qs(void) rcu_tasks_qs(current, (preempt)); \ } while (0) -static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) +static inline int rcu_needs_cpu(void) { - *nextevt = KTIME_MAX; return 0; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 53209d669400..6cc91291d078 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -19,7 +19,7 @@ void rcu_softirq_qs(void); void rcu_note_context_switch(bool preempt); -int rcu_needs_cpu(u64 basem, u64 *nextevt); +int rcu_needs_cpu(void); void rcu_cpu_stall_reset(void); /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a4c25a6283b0..80faf2273ce9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1086,9 +1086,8 @@ void rcu_irq_enter_irqson(void) * Just check whether or not this CPU has non-offloaded RCU callbacks * queued. */ -int rcu_needs_cpu(u64 basemono, u64 *nextevt) +int rcu_needs_cpu(void) { - *nextevt = KTIME_MAX; return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c89f50a7e690..566ad5bd83e9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -785,7 +785,7 @@ static inline bool local_timer_softirq_pending(void) static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) { - u64 basemono, next_tick, next_tmr, next_rcu, delta, expires; + u64 basemono, next_tick, delta, expires; unsigned long basejiff; unsigned int seq; @@ -808,7 +808,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * minimal delta which brings us back to this place * immediately. Lather, rinse and repeat... */ - if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() || + if (rcu_needs_cpu() || arch_needs_cpu() || irq_work_needs_cpu() || local_timer_softirq_pending()) { next_tick = basemono + TICK_NSEC; } else { @@ -819,10 +819,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * disabled this also looks at the next expiring * hrtimer. */ - next_tmr = get_next_timer_interrupt(basejiff, basemono); - ts->next_timer = next_tmr; - /* Take the next rcu event into account */ - next_tick = next_rcu < next_tmr ? next_rcu : next_tmr; + next_tick = get_next_timer_interrupt(basejiff, basemono); + ts->next_timer = next_tick; } /* From 0345691b24c076655ce8f0f4bfd24cba3467ccbd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 8 Feb 2022 17:16:34 +0100 Subject: [PATCH 22/23] tick/rcu: Stop allowing RCU_SOFTIRQ in idle RCU_SOFTIRQ used to be special in that it could be raised on purpose within the idle path to prevent from stopping the tick. Some code still prevents from unnecessary warnings related to this specific behaviour while entering in dynticks-idle mode. However the nohz layout has changed quite a bit in ten years, and the removal of CONFIG_RCU_FAST_NO_HZ has been the final straw to this safe-conduct. Now the RCU_SOFTIRQ vector is expected to be raised from sane places. A remaining corner case is admitted though when the vector is invoked in fragile hotplug path. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Paul Menzel --- include/linux/interrupt.h | 8 ++++++- kernel/time/tick-sched.c | 50 +++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9367f1cb2e3c..9613326d2f8a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -579,7 +579,13 @@ enum NR_SOFTIRQS }; -#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) +/* + * Ignoring the RCU vector after ksoftirqd is parked is fine + * because: + * 1) rcutree_migrate_callbacks() takes care of the queue. + * 2) rcu_report_dead() reports the final quiescent states. + */ +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 566ad5bd83e9..2d76c91b85de 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -999,6 +999,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) __tick_nohz_full_update_tick(ts, ktime_get()); } +/* + * A pending softirq outside an IRQ (or softirq disabled section) context + * should be waiting for ksoftirqd to handle it. Therefore we shouldn't + * reach here due to the need_resched() early check in can_stop_idle_tick(). + * + * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the + * cpu_down() process, softirqs can still be raised while ksoftirqd is parked, + * triggering the below since wakep_softirqd() is ignored. + * + */ +static bool report_idle_softirq(void) +{ + static int ratelimit; + unsigned int pending = local_softirq_pending(); + + if (likely(!pending)) + return false; + + /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */ + if (!cpu_active(smp_processor_id())) { + pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK; + if (!pending) + return false; + } + + if (ratelimit < 10) + return false; + + /* On RT, softirqs handling may be waiting on some lock */ + if (!local_bh_blocked()) + return false; + + pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", + pending); + ratelimit++; + + return true; +} + static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { /* @@ -1025,17 +1064,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (need_resched()) return false; - if (unlikely(local_softirq_pending())) { - static int ratelimit; - - if (ratelimit < 10 && !local_bh_blocked() && - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { - pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", - (unsigned int) local_softirq_pending()); - ratelimit++; - } + if (unlikely(report_idle_softirq())) return false; - } if (tick_nohz_full_enabled()) { /* From f96272a90d9eaea9933aaab704ddbd258feb3841 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 8 Feb 2022 17:16:35 +0100 Subject: [PATCH 23/23] lib/irq_poll: Declare IRQ_POLL softirq vector as ksoftirqd-parking safe The following warning may appear while setting a CPU down: NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #20!!! The IRQ_POLL_SOFTIRQ vector can be raised during the hotplug cpu_down() path after ksoftirqd is parked and before the CPU actually dies. However this is handled afterward at the CPUHP_IRQ_POLL_DEAD stage where the queue gets migrated. Hence this warning can be considered spurious and the vector can join the "hotplug-safe" list. Reported-and-tested-by: Paul Menzel Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Paul Menzel --- include/linux/interrupt.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9613326d2f8a..f40754caaefa 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -580,12 +580,15 @@ enum }; /* - * Ignoring the RCU vector after ksoftirqd is parked is fine - * because: - * 1) rcutree_migrate_callbacks() takes care of the queue. + * The following vectors can be safely ignored after ksoftirqd is parked: + * + * _ RCU: + * 1) rcutree_migrate_callbacks() migrates the queue. * 2) rcu_report_dead() reports the final quiescent states. + * + * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue */ -#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ)) +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq.