linux/drivers/clocksource/arm_arch_timer.c

1785 lines
46 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/drivers/clocksource/arm_arch_timer.c
*
* Copyright (C) 2011 ARM Ltd.
* All Rights Reserved
*/
clocksource/arm_arch_timer: Force per-CPU interrupt to be level-triggered The ARM architected timer produces level-triggered interrupts (this is mandated by the architecture). Unfortunately, a number of device-trees get this wrong, and expose an edge-triggered interrupt. Until now, this wasn't too much an issue, as the programming of the trigger would fail (the corresponding PPI cannot be reconfigured), and the kernel would be happy with this. But we're about to change this, and trust DT a lot if the driver doesn't provide its own trigger information. In that context, the timer breaks badly. While we do need to fix the DTs, there is also some userspace out there (kvmtool) that generates the same kind of broken DT on the fly, and that will completely break with newer kernels. As a safety measure, and to keep buggy software alive as well as buying us some time to fix DTs all over the place, let's check what trigger configuration has been given us by the firmware. If this is not a level configuration, then we know that the DT/ACPI configuration is bust, and we pick some defaults which won't be worse than the existing setup. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Liu Gang <Gang.Liu@nxp.com> Cc: Mark Rutland <marc.rutland@arm.com> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Wenbin Song <Wenbin.Song@freescale.com> Cc: Mingkai Hu <Mingkai.Hu@freescale.com> Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Kevin Hilman <khilman@baylibre.com> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Jon Hunter <jonathanh@nvidia.com> Cc: arm@kernel.org Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-arm-kernel@lists.infradead.org Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Ray Jui <rjui@broadcom.com> Cc: "Hou Zhiqiang" <B48286@freescale.com> Cc: Tirumalesh Chalamarla <tchalamarla@cavium.com> Cc: linux-samsung-soc@vger.kernel.org Cc: Yuan Yao <yao.yuan@nxp.com> Cc: Jan Glauber <jglauber@cavium.com> Cc: Gregory Clement <gregory.clement@free-electrons.com> Cc: linux-amlogic@lists.infradead.org Cc: soren.brinkmann@xilinx.com Cc: Rajesh Bhagat <rajesh.bhagat@freescale.com> Cc: Scott Branden <sbranden@broadcom.com> Cc: Duc Dang <dhdang@apm.com> Cc: Kukjin Kim <kgene@kernel.org> Cc: Carlo Caione <carlo@caione.org> Cc: Dinh Nguyen <dinguyen@opensource.altera.com> Link: http://lkml.kernel.org/r/1470045256-9032-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-08-01 12:54:15 +03:00
#define pr_fmt(fmt) "arch_timer: " fmt
clocksource/arm_arch_timer: Force per-CPU interrupt to be level-triggered The ARM architected timer produces level-triggered interrupts (this is mandated by the architecture). Unfortunately, a number of device-trees get this wrong, and expose an edge-triggered interrupt. Until now, this wasn't too much an issue, as the programming of the trigger would fail (the corresponding PPI cannot be reconfigured), and the kernel would be happy with this. But we're about to change this, and trust DT a lot if the driver doesn't provide its own trigger information. In that context, the timer breaks badly. While we do need to fix the DTs, there is also some userspace out there (kvmtool) that generates the same kind of broken DT on the fly, and that will completely break with newer kernels. As a safety measure, and to keep buggy software alive as well as buying us some time to fix DTs all over the place, let's check what trigger configuration has been given us by the firmware. If this is not a level configuration, then we know that the DT/ACPI configuration is bust, and we pick some defaults which won't be worse than the existing setup. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Liu Gang <Gang.Liu@nxp.com> Cc: Mark Rutland <marc.rutland@arm.com> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Wenbin Song <Wenbin.Song@freescale.com> Cc: Mingkai Hu <Mingkai.Hu@freescale.com> Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Kevin Hilman <khilman@baylibre.com> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Jon Hunter <jonathanh@nvidia.com> Cc: arm@kernel.org Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-arm-kernel@lists.infradead.org Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Ray Jui <rjui@broadcom.com> Cc: "Hou Zhiqiang" <B48286@freescale.com> Cc: Tirumalesh Chalamarla <tchalamarla@cavium.com> Cc: linux-samsung-soc@vger.kernel.org Cc: Yuan Yao <yao.yuan@nxp.com> Cc: Jan Glauber <jglauber@cavium.com> Cc: Gregory Clement <gregory.clement@free-electrons.com> Cc: linux-amlogic@lists.infradead.org Cc: soren.brinkmann@xilinx.com Cc: Rajesh Bhagat <rajesh.bhagat@freescale.com> Cc: Scott Branden <sbranden@broadcom.com> Cc: Duc Dang <dhdang@apm.com> Cc: Kukjin Kim <kgene@kernel.org> Cc: Carlo Caione <carlo@caione.org> Cc: Dinh Nguyen <dinguyen@opensource.altera.com> Link: http://lkml.kernel.org/r/1470045256-9032-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-08-01 12:54:15 +03:00
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/clockchips.h>
#include <linux/clocksource.h>
#include <linux/clocksource_ids.h>
#include <linux/interrupt.h>
#include <linux/kstrtox.h>
#include <linux/of_irq.h>
#include <linux/of_address.h>
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/sched/clock.h>
#include <linux/sched_clock.h>
#include <linux/acpi.h>
ptp: arm/arm64: Enable ptp_kvm for arm/arm64 Currently, there is no mechanism to keep time sync between guest and host in arm/arm64 virtualization environment. Time in guest will drift compared with host after boot up as they may both use third party time sources to correct their time respectively. The time deviation will be in order of milliseconds. But in some scenarios,like in cloud environment, we ask for higher time precision. kvm ptp clock, which chooses the host clock source as a reference clock to sync time between guest and host, has been adopted by x86 which takes the time sync order from milliseconds to nanoseconds. This patch enables kvm ptp clock for arm/arm64 and improves clock sync precision significantly. Test result comparisons between with kvm ptp clock and without it in arm/arm64 are as follows. This test derived from the result of command 'chronyc sources'. we should take more care of the last sample column which shows the offset between the local clock and the source at the last measurement. no kvm ptp in guest: MS Name/IP address Stratum Poll Reach LastRx Last sample ======================================================================== ^* dns1.synet.edu.cn 2 6 377 13 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 21 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 29 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 37 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 45 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 53 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 61 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 4 -130us[ +796us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 12 -130us[ +796us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 20 -130us[ +796us] +/- 21ms in host: MS Name/IP address Stratum Poll Reach LastRx Last sample ======================================================================== ^* 120.25.115.20 2 7 377 72 -470us[ -603us] +/- 18ms ^* 120.25.115.20 2 7 377 92 -470us[ -603us] +/- 18ms ^* 120.25.115.20 2 7 377 112 -470us[ -603us] +/- 18ms ^* 120.25.115.20 2 7 377 2 +872ns[-6808ns] +/- 17ms ^* 120.25.115.20 2 7 377 22 +872ns[-6808ns] +/- 17ms ^* 120.25.115.20 2 7 377 43 +872ns[-6808ns] +/- 17ms ^* 120.25.115.20 2 7 377 63 +872ns[-6808ns] +/- 17ms ^* 120.25.115.20 2 7 377 83 +872ns[-6808ns] +/- 17ms ^* 120.25.115.20 2 7 377 103 +872ns[-6808ns] +/- 17ms ^* 120.25.115.20 2 7 377 123 +872ns[-6808ns] +/- 17ms The dns1.synet.edu.cn is the network reference clock for guest and 120.25.115.20 is the network reference clock for host. we can't get the clock error between guest and host directly, but a roughly estimated value will be in order of hundreds of us to ms. with kvm ptp in guest: chrony has been disabled in host to remove the disturb by network clock. MS Name/IP address Stratum Poll Reach LastRx Last sample ======================================================================== * PHC0 0 3 377 8 -7ns[ +1ns] +/- 3ns * PHC0 0 3 377 8 +1ns[ +16ns] +/- 3ns * PHC0 0 3 377 6 -4ns[ -0ns] +/- 6ns * PHC0 0 3 377 6 -8ns[ -12ns] +/- 5ns * PHC0 0 3 377 5 +2ns[ +4ns] +/- 4ns * PHC0 0 3 377 13 +2ns[ +4ns] +/- 4ns * PHC0 0 3 377 12 -4ns[ -6ns] +/- 4ns * PHC0 0 3 377 11 -8ns[ -11ns] +/- 6ns * PHC0 0 3 377 10 -14ns[ -20ns] +/- 4ns * PHC0 0 3 377 8 +4ns[ +5ns] +/- 4ns The PHC0 is the ptp clock which choose the host clock as its source clock. So we can see that the clock difference between host and guest is in order of ns. Cc: Mark Rutland <mark.rutland@arm.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Signed-off-by: Jianyong Wu <jianyong.wu@arm.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20201209060932.212364-8-jianyong.wu@arm.com
2020-12-09 09:09:30 +03:00
#include <linux/arm-smccc.h>
#include <linux/ptp_kvm.h>
#include <asm/arch_timer.h>
#include <asm/virt.h>
#include <clocksource/arm_arch_timer.h>
#define CNTTIDR 0x08
#define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4))
#define CNTACR(n) (0x40 + ((n) * 4))
#define CNTACR_RPCT BIT(0)
#define CNTACR_RVCT BIT(1)
#define CNTACR_RFRQ BIT(2)
#define CNTACR_RVOFF BIT(3)
#define CNTACR_RWVT BIT(4)
#define CNTACR_RWPT BIT(5)
#define CNTPCT_LO 0x00
#define CNTVCT_LO 0x08
#define CNTFRQ 0x10
#define CNTP_CVAL_LO 0x20
#define CNTP_CTL 0x2c
#define CNTV_CVAL_LO 0x30
#define CNTV_CTL 0x3c
clocksource/drivers/arm_arch_timer: Fix masking for high freq counters Unfortunately, the architecture provides no means to determine the bit width of the system counter. However, we do know the following from the specification: - the system counter is at least 56 bits wide - Roll-over time of not less than 40 years To date, the arch timer driver has depended on the first property, assuming any system counter to be 56 bits wide and masking off the rest. However, combining a narrow clocksource mask with a high frequency counter could result in prematurely wrapping the system counter by a significant margin. For example, a 56 bit wide, 1GHz system counter would wrap in a mere 2.28 years! This is a problem for two reasons: v8.6+ implementations are required to provide a 64 bit, 1GHz system counter. Furthermore, before v8.6, implementers may select a counter frequency of their choosing. Fix the issue by deriving a valid clock mask based on the second property from above. Set the floor at 56 bits, since we know no system counter is narrower than that. [maz: fixed width computation not to lose the last bit, added max delta generation for the timer] Suggested-by: Marc Zyngier <maz@kernel.org> Signed-off-by: Oliver Upton <oupton@google.com> Reviewed-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20210807191428.3488948-1-oupton@google.com Link: https://lore.kernel.org/r/20211017124225.3018098-13-maz@kernel.org Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2021-10-17 15:42:20 +03:00
/*
* The minimum amount of time a generic counter is guaranteed to not roll over
* (40 years)
*/
#define MIN_ROLLOVER_SECS (40ULL * 365 * 24 * 3600)
static unsigned arch_timers_present __initdata;
struct arch_timer {
void __iomem *base;
struct clock_event_device evt;
};
static struct arch_timer *arch_timer_mem __ro_after_init;
#define to_arch_timer(e) container_of(e, struct arch_timer, evt)
static u32 arch_timer_rate __ro_after_init;
static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI] __ro_after_init;
static const char *arch_timer_ppi_names[ARCH_TIMER_MAX_TIMER_PPI] = {
[ARCH_TIMER_PHYS_SECURE_PPI] = "sec-phys",
[ARCH_TIMER_PHYS_NONSECURE_PPI] = "phys",
[ARCH_TIMER_VIRT_PPI] = "virt",
[ARCH_TIMER_HYP_PPI] = "hyp-phys",
[ARCH_TIMER_HYP_VIRT_PPI] = "hyp-virt",
};
static struct clock_event_device __percpu *arch_timer_evt;
static enum arch_timer_ppi_nr arch_timer_uses_ppi __ro_after_init = ARCH_TIMER_VIRT_PPI;
static bool arch_timer_c3stop __ro_after_init;
static bool arch_timer_mem_use_virtual __ro_after_init;
static bool arch_counter_suspend_stop __ro_after_init;
#ifdef CONFIG_GENERIC_GETTIMEOFDAY
static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_ARCHTIMER;
#else
static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_NONE;
#endif /* CONFIG_GENERIC_GETTIMEOFDAY */
static cpumask_t evtstrm_available = CPU_MASK_NONE;
static bool evtstrm_enable __ro_after_init = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM);
static int __init early_evtstrm_cfg(char *buf)
{
return kstrtobool(buf, &evtstrm_enable);
}
early_param("clocksource.arm_arch_timer.evtstrm", early_evtstrm_cfg);
clocksource/drivers/arm_arch_timer: Fix masking for high freq counters Unfortunately, the architecture provides no means to determine the bit width of the system counter. However, we do know the following from the specification: - the system counter is at least 56 bits wide - Roll-over time of not less than 40 years To date, the arch timer driver has depended on the first property, assuming any system counter to be 56 bits wide and masking off the rest. However, combining a narrow clocksource mask with a high frequency counter could result in prematurely wrapping the system counter by a significant margin. For example, a 56 bit wide, 1GHz system counter would wrap in a mere 2.28 years! This is a problem for two reasons: v8.6+ implementations are required to provide a 64 bit, 1GHz system counter. Furthermore, before v8.6, implementers may select a counter frequency of their choosing. Fix the issue by deriving a valid clock mask based on the second property from above. Set the floor at 56 bits, since we know no system counter is narrower than that. [maz: fixed width computation not to lose the last bit, added max delta generation for the timer] Suggested-by: Marc Zyngier <maz@kernel.org> Signed-off-by: Oliver Upton <oupton@google.com> Reviewed-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20210807191428.3488948-1-oupton@google.com Link: https://lore.kernel.org/r/20211017124225.3018098-13-maz@kernel.org Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2021-10-17 15:42:20 +03:00
/*
* Makes an educated guess at a valid counter width based on the Generic Timer
* specification. Of note:
* 1) the system counter is at least 56 bits wide
* 2) a roll-over time of not less than 40 years
*
* See 'ARM DDI 0487G.a D11.1.2 ("The system counter")' for more details.
*/
static int arch_counter_get_width(void)
{
u64 min_cycles = MIN_ROLLOVER_SECS * arch_timer_rate;
/* guarantee the returned width is within the valid range */
return clamp_val(ilog2(min_cycles - 1) + 1, 56, 64);
}
/*
* Architected system timer support.
*/
static __always_inline
void arch_timer_reg_write(int access, enum arch_timer_reg reg, u64 val,
struct clock_event_device *clk)
{
if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
struct arch_timer *timer = to_arch_timer(clk);
switch (reg) {
case ARCH_TIMER_REG_CTRL:
writel_relaxed((u32)val, timer->base + CNTP_CTL);
break;
case ARCH_TIMER_REG_CVAL:
/*
* Not guaranteed to be atomic, so the timer
* must be disabled at this point.
*/
writeq_relaxed(val, timer->base + CNTP_CVAL_LO);
break;
default:
BUILD_BUG();
}
} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
struct arch_timer *timer = to_arch_timer(clk);
switch (reg) {
case ARCH_TIMER_REG_CTRL:
writel_relaxed((u32)val, timer->base + CNTV_CTL);
break;
case ARCH_TIMER_REG_CVAL:
/* Same restriction as above */
writeq_relaxed(val, timer->base + CNTV_CVAL_LO);
break;
default:
BUILD_BUG();
}
} else {
arch_timer_reg_write_cp15(access, reg, val);
}
}
static __always_inline
u32 arch_timer_reg_read(int access, enum arch_timer_reg reg,
struct clock_event_device *clk)
{
u32 val;
if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
struct arch_timer *timer = to_arch_timer(clk);
switch (reg) {
case ARCH_TIMER_REG_CTRL:
val = readl_relaxed(timer->base + CNTP_CTL);
break;
default:
BUILD_BUG();
}
} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
struct arch_timer *timer = to_arch_timer(clk);
switch (reg) {
case ARCH_TIMER_REG_CTRL:
val = readl_relaxed(timer->base + CNTV_CTL);
break;
default:
BUILD_BUG();
}
} else {
val = arch_timer_reg_read_cp15(access, reg);
}
return val;
}
static notrace u64 arch_counter_get_cntpct_stable(void)
{
return __arch_counter_get_cntpct_stable();
}
static notrace u64 arch_counter_get_cntpct(void)
{
return __arch_counter_get_cntpct();
}
static notrace u64 arch_counter_get_cntvct_stable(void)
{
return __arch_counter_get_cntvct_stable();
}
static notrace u64 arch_counter_get_cntvct(void)
{
return __arch_counter_get_cntvct();
}
/*
* Default to cp15 based access because arm64 uses this function for
* sched_clock() before DT is probed and the cp15 method is guaranteed
* to exist on arm64. arm doesn't use this before DT is probed so even
* if we don't have the cp15 accessors we won't have a problem.
*/
u64 (*arch_timer_read_counter)(void) __ro_after_init = arch_counter_get_cntvct;
EXPORT_SYMBOL_GPL(arch_timer_read_counter);
static u64 arch_counter_read(struct clocksource *cs)
{
return arch_timer_read_counter();
}
static u64 arch_counter_read_cc(const struct cyclecounter *cc)
{
return arch_timer_read_counter();
}
static struct clocksource clocksource_counter = {
.name = "arch_sys_counter",
.id = CSID_ARM_ARCH_COUNTER,
.rating = 400,
.read = arch_counter_read,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static struct cyclecounter cyclecounter __ro_after_init = {
.read = arch_counter_read_cc,
};
struct ate_acpi_oem_info {
char oem_id[ACPI_OEM_ID_SIZE + 1];
char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
u32 oem_revision;
};
#ifdef CONFIG_FSL_ERRATUM_A008585
/*
* The number of retries is an arbitrary value well beyond the highest number
* of iterations the loop has been observed to take.
*/
#define __fsl_a008585_read_reg(reg) ({ \
u64 _old, _new; \
int _retries = 200; \
\
do { \
_old = read_sysreg(reg); \
_new = read_sysreg(reg); \
_retries--; \
} while (unlikely(_old != _new) && _retries); \
\
WARN_ON_ONCE(!_retries); \
_new; \
})
static u64 notrace fsl_a008585_read_cntpct_el0(void)
{
return __fsl_a008585_read_reg(cntpct_el0);
}
static u64 notrace fsl_a008585_read_cntvct_el0(void)
{
return __fsl_a008585_read_reg(cntvct_el0);
}
#endif
#ifdef CONFIG_HISILICON_ERRATUM_161010101
/*
* Verify whether the value of the second read is larger than the first by
* less than 32 is the only way to confirm the value is correct, so clear the
* lower 5 bits to check whether the difference is greater than 32 or not.
* Theoretically the erratum should not occur more than twice in succession
* when reading the system counter, but it is possible that some interrupts
* may lead to more than twice read errors, triggering the warning, so setting
* the number of retries far beyond the number of iterations the loop has been
* observed to take.
*/
#define __hisi_161010101_read_reg(reg) ({ \
u64 _old, _new; \
int _retries = 50; \
\
do { \
_old = read_sysreg(reg); \
_new = read_sysreg(reg); \
_retries--; \
} while (unlikely((_new - _old) >> 5) && _retries); \
\
WARN_ON_ONCE(!_retries); \
_new; \
})
static u64 notrace hisi_161010101_read_cntpct_el0(void)
{
return __hisi_161010101_read_reg(cntpct_el0);
}
static u64 notrace hisi_161010101_read_cntvct_el0(void)
{
return __hisi_161010101_read_reg(cntvct_el0);
}
static struct ate_acpi_oem_info hisi_161010101_oem_info[] = {
/*
* Note that trailing spaces are required to properly match
* the OEM table information.
*/
{
.oem_id = "HISI ",
.oem_table_id = "HIP05 ",
.oem_revision = 0,
},
{
.oem_id = "HISI ",
.oem_table_id = "HIP06 ",
.oem_revision = 0,
},
{
.oem_id = "HISI ",
.oem_table_id = "HIP07 ",
.oem_revision = 0,
},
{ /* Sentinel indicating the end of the OEM array */ },
};
#endif
#ifdef CONFIG_ARM64_ERRATUM_858921
static u64 notrace arm64_858921_read_cntpct_el0(void)
{
u64 old, new;
old = read_sysreg(cntpct_el0);
new = read_sysreg(cntpct_el0);
return (((old ^ new) >> 32) & 1) ? old : new;
}
static u64 notrace arm64_858921_read_cntvct_el0(void)
{
u64 old, new;
old = read_sysreg(cntvct_el0);
new = read_sysreg(cntvct_el0);
return (((old ^ new) >> 32) & 1) ? old : new;
}
#endif
clocksource/drivers/arch_timer: Workaround for Allwinner A64 timer instability The Allwinner A64 SoC is known[1] to have an unstable architectural timer, which manifests itself most obviously in the time jumping forward a multiple of 95 years[2][3]. This coincides with 2^56 cycles at a timer frequency of 24 MHz, implying that the time went slightly backward (and this was interpreted by the kernel as it jumping forward and wrapping around past the epoch). Investigation revealed instability in the low bits of CNTVCT at the point a high bit rolls over. This leads to power-of-two cycle forward and backward jumps. (Testing shows that forward jumps are about twice as likely as backward jumps.) Since the counter value returns to normal after an indeterminate read, each "jump" really consists of both a forward and backward jump from the software perspective. Unless the kernel is trapping CNTVCT reads, a userspace program is able to read the register in a loop faster than it changes. A test program running on all 4 CPU cores that reported jumps larger than 100 ms was run for 13.6 hours and reported the following: Count | Event -------+--------------------------- 9940 | jumped backward 699ms 268 | jumped backward 1398ms 1 | jumped backward 2097ms 16020 | jumped forward 175ms 6443 | jumped forward 699ms 2976 | jumped forward 1398ms 9 | jumped forward 356516ms 9 | jumped forward 357215ms 4 | jumped forward 714430ms 1 | jumped forward 3578440ms This works out to a jump larger than 100 ms about every 5.5 seconds on each CPU core. The largest jump (almost an hour!) was the following sequence of reads: 0x0000007fffffffff → 0x00000093feffffff → 0x0000008000000000 Note that the middle bits don't necessarily all read as all zeroes or all ones during the anomalous behavior; however the low 10 bits checked by the function in this patch have never been observed with any other value. Also note that smaller jumps are much more common, with backward jumps of 2048 (2^11) cycles observed over 400 times per second on each core. (Of course, this is partially explained by lower bits rolling over more frequently.) Any one of these could have caused the 95 year time skip. Similar anomalies were observed while reading CNTPCT (after patching the kernel to allow reads from userspace). However, the CNTPCT jumps are much less frequent, and only small jumps were observed. The same program as before (except now reading CNTPCT) observed after 72 hours: Count | Event -------+--------------------------- 17 | jumped backward 699ms 52 | jumped forward 175ms 2831 | jumped forward 699ms 5 | jumped forward 1398ms Further investigation showed that the instability in CNTPCT/CNTVCT also affected the respective timer's TVAL register. The following values were observed immediately after writing CNVT_TVAL to 0x10000000: CNTVCT | CNTV_TVAL | CNTV_CVAL | CNTV_TVAL Error --------------------+------------+--------------------+----------------- 0x000000d4a2d8bfff | 0x10003fff | 0x000000d4b2d8bfff | +0x00004000 0x000000d4a2d94000 | 0x0fffffff | 0x000000d4b2d97fff | -0x00004000 0x000000d4a2d97fff | 0x10003fff | 0x000000d4b2d97fff | +0x00004000 0x000000d4a2d9c000 | 0x0fffffff | 0x000000d4b2d9ffff | -0x00004000 The pattern of errors in CNTV_TVAL seemed to depend on exactly which value was written to it. For example, after writing 0x10101010: CNTVCT | CNTV_TVAL | CNTV_CVAL | CNTV_TVAL Error --------------------+------------+--------------------+----------------- 0x000001ac3effffff | 0x1110100f | 0x000001ac4f10100f | +0x1000000 0x000001ac40000000 | 0x1010100f | 0x000001ac5110100f | -0x1000000 0x000001ac58ffffff | 0x1110100f | 0x000001ac6910100f | +0x1000000 0x000001ac66000000 | 0x1010100f | 0x000001ac7710100f | -0x1000000 0x000001ac6affffff | 0x1110100f | 0x000001ac7b10100f | +0x1000000 0x000001ac6e000000 | 0x1010100f | 0x000001ac7f10100f | -0x1000000 I was also twice able to reproduce the issue covered by Allwinner's workaround[4], that writing to TVAL sometimes fails, and both CVAL and TVAL are left with entirely bogus values. One was the following values: CNTVCT | CNTV_TVAL | CNTV_CVAL --------------------+------------+-------------------------------------- 0x000000d4a2d6014c | 0x8fbd5721 | 0x000000d132935fff (615s in the past) Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> ======================================================================== Because the CPU can read the CNTPCT/CNTVCT registers faster than they change, performing two reads of the register and comparing the high bits (like other workarounds) is not a workable solution. And because the timer can jump both forward and backward, no pair of reads can distinguish a good value from a bad one. The only way to guarantee a good value from consecutive reads would be to read _three_ times, and take the middle value only if the three values are 1) each unique and 2) increasing. This takes at minimum 3 counter cycles (125 ns), or more if an anomaly is detected. However, since there is a distinct pattern to the bad values, we can optimize the common case (1022/1024 of the time) to a single read by simply ignoring values that match the error pattern. This still takes no more than 3 cycles in the worst case, and requires much less code. As an additional safety check, we still limit the loop iteration to the number of max-frequency (1.2 GHz) CPU cycles in three 24 MHz counter periods. For the TVAL registers, the simple solution is to not use them. Instead, read or write the CVAL and calculate the TVAL value in software. Although the manufacturer is aware of at least part of the erratum[4], there is no official name for it. For now, use the kernel-internal name "UNKNOWN1". [1]: https://github.com/armbian/build/commit/a08cd6fe7ae9 [2]: https://forum.armbian.com/topic/3458-a64-datetime-clock-issue/ [3]: https://irclog.whitequark.org/linux-sunxi/2018-01-26 [4]: https://github.com/Allwinner-Homlet/H6-BSP4.9-linux/blob/master/drivers/clocksource/arm_arch_timer.c#L272 Acked-by: Maxime Ripard <maxime.ripard@bootlin.com> Tested-by: Andre Przywara <andre.przywara@arm.com> Signed-off-by: Samuel Holland <samuel@sholland.org> Cc: stable@vger.kernel.org Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2019-01-13 05:17:18 +03:00
#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
/*
* The low bits of the counter registers are indeterminate while bit 10 or
* greater is rolling over. Since the counter value can jump both backward
* (7ff -> 000 -> 800) and forward (7ff -> fff -> 800), ignore register values
* with all ones or all zeros in the low bits. Bound the loop by the maximum
* number of CPU cycles in 3 consecutive 24 MHz counter periods.
*/
#define __sun50i_a64_read_reg(reg) ({ \
u64 _val; \
int _retries = 150; \
\
do { \
_val = read_sysreg(reg); \
_retries--; \
} while (((_val + 1) & GENMASK(8, 0)) <= 1 && _retries); \
clocksource/drivers/arch_timer: Workaround for Allwinner A64 timer instability The Allwinner A64 SoC is known[1] to have an unstable architectural timer, which manifests itself most obviously in the time jumping forward a multiple of 95 years[2][3]. This coincides with 2^56 cycles at a timer frequency of 24 MHz, implying that the time went slightly backward (and this was interpreted by the kernel as it jumping forward and wrapping around past the epoch). Investigation revealed instability in the low bits of CNTVCT at the point a high bit rolls over. This leads to power-of-two cycle forward and backward jumps. (Testing shows that forward jumps are about twice as likely as backward jumps.) Since the counter value returns to normal after an indeterminate read, each "jump" really consists of both a forward and backward jump from the software perspective. Unless the kernel is trapping CNTVCT reads, a userspace program is able to read the register in a loop faster than it changes. A test program running on all 4 CPU cores that reported jumps larger than 100 ms was run for 13.6 hours and reported the following: Count | Event -------+--------------------------- 9940 | jumped backward 699ms 268 | jumped backward 1398ms 1 | jumped backward 2097ms 16020 | jumped forward 175ms 6443 | jumped forward 699ms 2976 | jumped forward 1398ms 9 | jumped forward 356516ms 9 | jumped forward 357215ms 4 | jumped forward 714430ms 1 | jumped forward 3578440ms This works out to a jump larger than 100 ms about every 5.5 seconds on each CPU core. The largest jump (almost an hour!) was the following sequence of reads: 0x0000007fffffffff → 0x00000093feffffff → 0x0000008000000000 Note that the middle bits don't necessarily all read as all zeroes or all ones during the anomalous behavior; however the low 10 bits checked by the function in this patch have never been observed with any other value. Also note that smaller jumps are much more common, with backward jumps of 2048 (2^11) cycles observed over 400 times per second on each core. (Of course, this is partially explained by lower bits rolling over more frequently.) Any one of these could have caused the 95 year time skip. Similar anomalies were observed while reading CNTPCT (after patching the kernel to allow reads from userspace). However, the CNTPCT jumps are much less frequent, and only small jumps were observed. The same program as before (except now reading CNTPCT) observed after 72 hours: Count | Event -------+--------------------------- 17 | jumped backward 699ms 52 | jumped forward 175ms 2831 | jumped forward 699ms 5 | jumped forward 1398ms Further investigation showed that the instability in CNTPCT/CNTVCT also affected the respective timer's TVAL register. The following values were observed immediately after writing CNVT_TVAL to 0x10000000: CNTVCT | CNTV_TVAL | CNTV_CVAL | CNTV_TVAL Error --------------------+------------+--------------------+----------------- 0x000000d4a2d8bfff | 0x10003fff | 0x000000d4b2d8bfff | +0x00004000 0x000000d4a2d94000 | 0x0fffffff | 0x000000d4b2d97fff | -0x00004000 0x000000d4a2d97fff | 0x10003fff | 0x000000d4b2d97fff | +0x00004000 0x000000d4a2d9c000 | 0x0fffffff | 0x000000d4b2d9ffff | -0x00004000 The pattern of errors in CNTV_TVAL seemed to depend on exactly which value was written to it. For example, after writing 0x10101010: CNTVCT | CNTV_TVAL | CNTV_CVAL | CNTV_TVAL Error --------------------+------------+--------------------+----------------- 0x000001ac3effffff | 0x1110100f | 0x000001ac4f10100f | +0x1000000 0x000001ac40000000 | 0x1010100f | 0x000001ac5110100f | -0x1000000 0x000001ac58ffffff | 0x1110100f | 0x000001ac6910100f | +0x1000000 0x000001ac66000000 | 0x1010100f | 0x000001ac7710100f | -0x1000000 0x000001ac6affffff | 0x1110100f | 0x000001ac7b10100f | +0x1000000 0x000001ac6e000000 | 0x1010100f | 0x000001ac7f10100f | -0x1000000 I was also twice able to reproduce the issue covered by Allwinner's workaround[4], that writing to TVAL sometimes fails, and both CVAL and TVAL are left with entirely bogus values. One was the following values: CNTVCT | CNTV_TVAL | CNTV_CVAL --------------------+------------+-------------------------------------- 0x000000d4a2d6014c | 0x8fbd5721 | 0x000000d132935fff (615s in the past) Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> ======================================================================== Because the CPU can read the CNTPCT/CNTVCT registers faster than they change, performing two reads of the register and comparing the high bits (like other workarounds) is not a workable solution. And because the timer can jump both forward and backward, no pair of reads can distinguish a good value from a bad one. The only way to guarantee a good value from consecutive reads would be to read _three_ times, and take the middle value only if the three values are 1) each unique and 2) increasing. This takes at minimum 3 counter cycles (125 ns), or more if an anomaly is detected. However, since there is a distinct pattern to the bad values, we can optimize the common case (1022/1024 of the time) to a single read by simply ignoring values that match the error pattern. This still takes no more than 3 cycles in the worst case, and requires much less code. As an additional safety check, we still limit the loop iteration to the number of max-frequency (1.2 GHz) CPU cycles in three 24 MHz counter periods. For the TVAL registers, the simple solution is to not use them. Instead, read or write the CVAL and calculate the TVAL value in software. Although the manufacturer is aware of at least part of the erratum[4], there is no official name for it. For now, use the kernel-internal name "UNKNOWN1". [1]: https://github.com/armbian/build/commit/a08cd6fe7ae9 [2]: https://forum.armbian.com/topic/3458-a64-datetime-clock-issue/ [3]: https://irclog.whitequark.org/linux-sunxi/2018-01-26 [4]: https://github.com/Allwinner-Homlet/H6-BSP4.9-linux/blob/master/drivers/clocksource/arm_arch_timer.c#L272 Acked-by: Maxime Ripard <maxime.ripard@bootlin.com> Tested-by: Andre Przywara <andre.przywara@arm.com> Signed-off-by: Samuel Holland <samuel@sholland.org> Cc: stable@vger.kernel.org Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2019-01-13 05:17:18 +03:00
\
WARN_ON_ONCE(!_retries); \
_val; \
})
static u64 notrace sun50i_a64_read_cntpct_el0(void)
{
return __sun50i_a64_read_reg(cntpct_el0);
}
static u64 notrace sun50i_a64_read_cntvct_el0(void)
{
return __sun50i_a64_read_reg(cntvct_el0);
}
#endif
#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND
DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround);
EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround);
static atomic_t timer_unstable_counter_workaround_in_use = ATOMIC_INIT(0);
clocksource/drivers/arm_arch_timer: Force inlining of erratum_set_next_event_generic() With some specific kernel configuration and Clang, the kernel fails to like with something like: ld.lld: error: undefined symbol: __compiletime_assert_200 >>> referenced by arch_timer.h:156 (./arch/arm64/include/asm/arch_timer.h:156) >>> clocksource/arm_arch_timer.o:(erratum_set_next_event_generic) in archive drivers/built-in.a ld.lld: error: undefined symbol: __compiletime_assert_197 >>> referenced by arch_timer.h:133 (./arch/arm64/include/asm/arch_timer.h:133) >>> clocksource/arm_arch_timer.o:(erratum_set_next_event_generic) in archive drivers/built-in.a make: *** [Makefile:1161: vmlinux] Error 1 These are due to the BUILD_BUG() macros contained in the low-level accessors (arch_timer_reg_{write,read}_cp15) being emitted, as the access type wasn't known at compile time. Fix this by making erratum_set_next_event_generic() __force_inline, resulting in the 'access' parameter to be resolved at compile time, similarly to what is already done for set_next_event(). Fixes: 4775bc63f880 ("Add build-time guards for unhandled register accesses") Reported-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Marc Zyngier <maz@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Sami Tolvanen <samitolvanen@google.com> Cc: Nick Desaulniers <ndesaulniers@google.com> Tested-by: Sami Tolvanen <samitolvanen@google.com> Reviewed-by: Nathan Chancellor <nathan@kernel.org> Tested-by: Nathan Chancellor <nathan@kernel.org> Link: https://lore.kernel.org/r/20211117113532.3895208-1-maz@kernel.org Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2021-11-17 14:35:32 +03:00
/*
* Force the inlining of this function so that the register accesses
* can be themselves correctly inlined.
*/
static __always_inline
void erratum_set_next_event_generic(const int access, unsigned long evt,
struct clock_event_device *clk)
{
unsigned long ctrl;
u64 cval;
ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
ctrl |= ARCH_TIMER_CTRL_ENABLE;
ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
if (access == ARCH_TIMER_PHYS_ACCESS) {
cval = evt + arch_counter_get_cntpct_stable();
write_sysreg(cval, cntp_cval_el0);
} else {
cval = evt + arch_counter_get_cntvct_stable();
write_sysreg(cval, cntv_cval_el0);
}
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
}
static __maybe_unused int erratum_set_next_event_virt(unsigned long evt,
struct clock_event_device *clk)
{
erratum_set_next_event_generic(ARCH_TIMER_VIRT_ACCESS, evt, clk);
return 0;
}
static __maybe_unused int erratum_set_next_event_phys(unsigned long evt,
struct clock_event_device *clk)
{
erratum_set_next_event_generic(ARCH_TIMER_PHYS_ACCESS, evt, clk);
return 0;
}
static const struct arch_timer_erratum_workaround ool_workarounds[] = {
#ifdef CONFIG_FSL_ERRATUM_A008585
{
.match_type = ate_match_dt,
.id = "fsl,erratum-a008585",
.desc = "Freescale erratum a005858",
.read_cntpct_el0 = fsl_a008585_read_cntpct_el0,
.read_cntvct_el0 = fsl_a008585_read_cntvct_el0,
.set_next_event_phys = erratum_set_next_event_phys,
.set_next_event_virt = erratum_set_next_event_virt,
},
#endif
#ifdef CONFIG_HISILICON_ERRATUM_161010101
{
.match_type = ate_match_dt,
.id = "hisilicon,erratum-161010101",
.desc = "HiSilicon erratum 161010101",
.read_cntpct_el0 = hisi_161010101_read_cntpct_el0,
.read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
.set_next_event_phys = erratum_set_next_event_phys,
.set_next_event_virt = erratum_set_next_event_virt,
},
{
.match_type = ate_match_acpi_oem_info,
.id = hisi_161010101_oem_info,
.desc = "HiSilicon erratum 161010101",
.read_cntpct_el0 = hisi_161010101_read_cntpct_el0,
.read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
.set_next_event_phys = erratum_set_next_event_phys,
.set_next_event_virt = erratum_set_next_event_virt,
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_858921
{
.match_type = ate_match_local_cap_id,
.id = (void *)ARM64_WORKAROUND_858921,
.desc = "ARM erratum 858921",
.read_cntpct_el0 = arm64_858921_read_cntpct_el0,
.read_cntvct_el0 = arm64_858921_read_cntvct_el0,
.set_next_event_phys = erratum_set_next_event_phys,
.set_next_event_virt = erratum_set_next_event_virt,
},
#endif
clocksource/drivers/arch_timer: Workaround for Allwinner A64 timer instability The Allwinner A64 SoC is known[1] to have an unstable architectural timer, which manifests itself most obviously in the time jumping forward a multiple of 95 years[2][3]. This coincides with 2^56 cycles at a timer frequency of 24 MHz, implying that the time went slightly backward (and this was interpreted by the kernel as it jumping forward and wrapping around past the epoch). Investigation revealed instability in the low bits of CNTVCT at the point a high bit rolls over. This leads to power-of-two cycle forward and backward jumps. (Testing shows that forward jumps are about twice as likely as backward jumps.) Since the counter value returns to normal after an indeterminate read, each "jump" really consists of both a forward and backward jump from the software perspective. Unless the kernel is trapping CNTVCT reads, a userspace program is able to read the register in a loop faster than it changes. A test program running on all 4 CPU cores that reported jumps larger than 100 ms was run for 13.6 hours and reported the following: Count | Event -------+--------------------------- 9940 | jumped backward 699ms 268 | jumped backward 1398ms 1 | jumped backward 2097ms 16020 | jumped forward 175ms 6443 | jumped forward 699ms 2976 | jumped forward 1398ms 9 | jumped forward 356516ms 9 | jumped forward 357215ms 4 | jumped forward 714430ms 1 | jumped forward 3578440ms This works out to a jump larger than 100 ms about every 5.5 seconds on each CPU core. The largest jump (almost an hour!) was the following sequence of reads: 0x0000007fffffffff → 0x00000093feffffff → 0x0000008000000000 Note that the middle bits don't necessarily all read as all zeroes or all ones during the anomalous behavior; however the low 10 bits checked by the function in this patch have never been observed with any other value. Also note that smaller jumps are much more common, with backward jumps of 2048 (2^11) cycles observed over 400 times per second on each core. (Of course, this is partially explained by lower bits rolling over more frequently.) Any one of these could have caused the 95 year time skip. Similar anomalies were observed while reading CNTPCT (after patching the kernel to allow reads from userspace). However, the CNTPCT jumps are much less frequent, and only small jumps were observed. The same program as before (except now reading CNTPCT) observed after 72 hours: Count | Event -------+--------------------------- 17 | jumped backward 699ms 52 | jumped forward 175ms 2831 | jumped forward 699ms 5 | jumped forward 1398ms Further investigation showed that the instability in CNTPCT/CNTVCT also affected the respective timer's TVAL register. The following values were observed immediately after writing CNVT_TVAL to 0x10000000: CNTVCT | CNTV_TVAL | CNTV_CVAL | CNTV_TVAL Error --------------------+------------+--------------------+----------------- 0x000000d4a2d8bfff | 0x10003fff | 0x000000d4b2d8bfff | +0x00004000 0x000000d4a2d94000 | 0x0fffffff | 0x000000d4b2d97fff | -0x00004000 0x000000d4a2d97fff | 0x10003fff | 0x000000d4b2d97fff | +0x00004000 0x000000d4a2d9c000 | 0x0fffffff | 0x000000d4b2d9ffff | -0x00004000 The pattern of errors in CNTV_TVAL seemed to depend on exactly which value was written to it. For example, after writing 0x10101010: CNTVCT | CNTV_TVAL | CNTV_CVAL | CNTV_TVAL Error --------------------+------------+--------------------+----------------- 0x000001ac3effffff | 0x1110100f | 0x000001ac4f10100f | +0x1000000 0x000001ac40000000 | 0x1010100f | 0x000001ac5110100f | -0x1000000 0x000001ac58ffffff | 0x1110100f | 0x000001ac6910100f | +0x1000000 0x000001ac66000000 | 0x1010100f | 0x000001ac7710100f | -0x1000000 0x000001ac6affffff | 0x1110100f | 0x000001ac7b10100f | +0x1000000 0x000001ac6e000000 | 0x1010100f | 0x000001ac7f10100f | -0x1000000 I was also twice able to reproduce the issue covered by Allwinner's workaround[4], that writing to TVAL sometimes fails, and both CVAL and TVAL are left with entirely bogus values. One was the following values: CNTVCT | CNTV_TVAL | CNTV_CVAL --------------------+------------+-------------------------------------- 0x000000d4a2d6014c | 0x8fbd5721 | 0x000000d132935fff (615s in the past) Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> ======================================================================== Because the CPU can read the CNTPCT/CNTVCT registers faster than they change, performing two reads of the register and comparing the high bits (like other workarounds) is not a workable solution. And because the timer can jump both forward and backward, no pair of reads can distinguish a good value from a bad one. The only way to guarantee a good value from consecutive reads would be to read _three_ times, and take the middle value only if the three values are 1) each unique and 2) increasing. This takes at minimum 3 counter cycles (125 ns), or more if an anomaly is detected. However, since there is a distinct pattern to the bad values, we can optimize the common case (1022/1024 of the time) to a single read by simply ignoring values that match the error pattern. This still takes no more than 3 cycles in the worst case, and requires much less code. As an additional safety check, we still limit the loop iteration to the number of max-frequency (1.2 GHz) CPU cycles in three 24 MHz counter periods. For the TVAL registers, the simple solution is to not use them. Instead, read or write the CVAL and calculate the TVAL value in software. Although the manufacturer is aware of at least part of the erratum[4], there is no official name for it. For now, use the kernel-internal name "UNKNOWN1". [1]: https://github.com/armbian/build/commit/a08cd6fe7ae9 [2]: https://forum.armbian.com/topic/3458-a64-datetime-clock-issue/ [3]: https://irclog.whitequark.org/linux-sunxi/2018-01-26 [4]: https://github.com/Allwinner-Homlet/H6-BSP4.9-linux/blob/master/drivers/clocksource/arm_arch_timer.c#L272 Acked-by: Maxime Ripard <maxime.ripard@bootlin.com> Tested-by: Andre Przywara <andre.przywara@arm.com> Signed-off-by: Samuel Holland <samuel@sholland.org> Cc: stable@vger.kernel.org Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2019-01-13 05:17:18 +03:00
#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
{
.match_type = ate_match_dt,
.id = "allwinner,erratum-unknown1",
.desc = "Allwinner erratum UNKNOWN1",
.read_cntpct_el0 = sun50i_a64_read_cntpct_el0,
.read_cntvct_el0 = sun50i_a64_read_cntvct_el0,
.set_next_event_phys = erratum_set_next_event_phys,
.set_next_event_virt = erratum_set_next_event_virt,
clocksource/drivers/arch_timer: Workaround for Allwinner A64 timer instability The Allwinner A64 SoC is known[1] to have an unstable architectural timer, which manifests itself most obviously in the time jumping forward a multiple of 95 years[2][3]. This coincides with 2^56 cycles at a timer frequency of 24 MHz, implying that the time went slightly backward (and this was interpreted by the kernel as it jumping forward and wrapping around past the epoch). Investigation revealed instability in the low bits of CNTVCT at the point a high bit rolls over. This leads to power-of-two cycle forward and backward jumps. (Testing shows that forward jumps are about twice as likely as backward jumps.) Since the counter value returns to normal after an indeterminate read, each "jump" really consists of both a forward and backward jump from the software perspective. Unless the kernel is trapping CNTVCT reads, a userspace program is able to read the register in a loop faster than it changes. A test program running on all 4 CPU cores that reported jumps larger than 100 ms was run for 13.6 hours and reported the following: Count | Event -------+--------------------------- 9940 | jumped backward 699ms 268 | jumped backward 1398ms 1 | jumped backward 2097ms 16020 | jumped forward 175ms 6443 | jumped forward 699ms 2976 | jumped forward 1398ms 9 | jumped forward 356516ms 9 | jumped forward 357215ms 4 | jumped forward 714430ms 1 | jumped forward 3578440ms This works out to a jump larger than 100 ms about every 5.5 seconds on each CPU core. The largest jump (almost an hour!) was the following sequence of reads: 0x0000007fffffffff → 0x00000093feffffff → 0x0000008000000000 Note that the middle bits don't necessarily all read as all zeroes or all ones during the anomalous behavior; however the low 10 bits checked by the function in this patch have never been observed with any other value. Also note that smaller jumps are much more common, with backward jumps of 2048 (2^11) cycles observed over 400 times per second on each core. (Of course, this is partially explained by lower bits rolling over more frequently.) Any one of these could have caused the 95 year time skip. Similar anomalies were observed while reading CNTPCT (after patching the kernel to allow reads from userspace). However, the CNTPCT jumps are much less frequent, and only small jumps were observed. The same program as before (except now reading CNTPCT) observed after 72 hours: Count | Event -------+--------------------------- 17 | jumped backward 699ms 52 | jumped forward 175ms 2831 | jumped forward 699ms 5 | jumped forward 1398ms Further investigation showed that the instability in CNTPCT/CNTVCT also affected the respective timer's TVAL register. The following values were observed immediately after writing CNVT_TVAL to 0x10000000: CNTVCT | CNTV_TVAL | CNTV_CVAL | CNTV_TVAL Error --------------------+------------+--------------------+----------------- 0x000000d4a2d8bfff | 0x10003fff | 0x000000d4b2d8bfff | +0x00004000 0x000000d4a2d94000 | 0x0fffffff | 0x000000d4b2d97fff | -0x00004000 0x000000d4a2d97fff | 0x10003fff | 0x000000d4b2d97fff | +0x00004000 0x000000d4a2d9c000 | 0x0fffffff | 0x000000d4b2d9ffff | -0x00004000 The pattern of errors in CNTV_TVAL seemed to depend on exactly which value was written to it. For example, after writing 0x10101010: CNTVCT | CNTV_TVAL | CNTV_CVAL | CNTV_TVAL Error --------------------+------------+--------------------+----------------- 0x000001ac3effffff | 0x1110100f | 0x000001ac4f10100f | +0x1000000 0x000001ac40000000 | 0x1010100f | 0x000001ac5110100f | -0x1000000 0x000001ac58ffffff | 0x1110100f | 0x000001ac6910100f | +0x1000000 0x000001ac66000000 | 0x1010100f | 0x000001ac7710100f | -0x1000000 0x000001ac6affffff | 0x1110100f | 0x000001ac7b10100f | +0x1000000 0x000001ac6e000000 | 0x1010100f | 0x000001ac7f10100f | -0x1000000 I was also twice able to reproduce the issue covered by Allwinner's workaround[4], that writing to TVAL sometimes fails, and both CVAL and TVAL are left with entirely bogus values. One was the following values: CNTVCT | CNTV_TVAL | CNTV_CVAL --------------------+------------+-------------------------------------- 0x000000d4a2d6014c | 0x8fbd5721 | 0x000000d132935fff (615s in the past) Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> ======================================================================== Because the CPU can read the CNTPCT/CNTVCT registers faster than they change, performing two reads of the register and comparing the high bits (like other workarounds) is not a workable solution. And because the timer can jump both forward and backward, no pair of reads can distinguish a good value from a bad one. The only way to guarantee a good value from consecutive reads would be to read _three_ times, and take the middle value only if the three values are 1) each unique and 2) increasing. This takes at minimum 3 counter cycles (125 ns), or more if an anomaly is detected. However, since there is a distinct pattern to the bad values, we can optimize the common case (1022/1024 of the time) to a single read by simply ignoring values that match the error pattern. This still takes no more than 3 cycles in the worst case, and requires much less code. As an additional safety check, we still limit the loop iteration to the number of max-frequency (1.2 GHz) CPU cycles in three 24 MHz counter periods. For the TVAL registers, the simple solution is to not use them. Instead, read or write the CVAL and calculate the TVAL value in software. Although the manufacturer is aware of at least part of the erratum[4], there is no official name for it. For now, use the kernel-internal name "UNKNOWN1". [1]: https://github.com/armbian/build/commit/a08cd6fe7ae9 [2]: https://forum.armbian.com/topic/3458-a64-datetime-clock-issue/ [3]: https://irclog.whitequark.org/linux-sunxi/2018-01-26 [4]: https://github.com/Allwinner-Homlet/H6-BSP4.9-linux/blob/master/drivers/clocksource/arm_arch_timer.c#L272 Acked-by: Maxime Ripard <maxime.ripard@bootlin.com> Tested-by: Andre Przywara <andre.przywara@arm.com> Signed-off-by: Samuel Holland <samuel@sholland.org> Cc: stable@vger.kernel.org Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2019-01-13 05:17:18 +03:00
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_1418040
{
.match_type = ate_match_local_cap_id,
.id = (void *)ARM64_WORKAROUND_1418040,
.desc = "ARM erratum 1418040",
.disable_compat_vdso = true,
},
#endif
};
typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *,
const void *);
static
bool arch_timer_check_dt_erratum(const struct arch_timer_erratum_workaround *wa,
const void *arg)
{
const struct device_node *np = arg;
return of_property_read_bool(np, wa->id);
}
static
bool arch_timer_check_local_cap_erratum(const struct arch_timer_erratum_workaround *wa,
const void *arg)
{
return this_cpu_has_cap((uintptr_t)wa->id);
}
static
bool arch_timer_check_acpi_oem_erratum(const struct arch_timer_erratum_workaround *wa,
const void *arg)
{
static const struct ate_acpi_oem_info empty_oem_info = {};
const struct ate_acpi_oem_info *info = wa->id;
const struct acpi_table_header *table = arg;
/* Iterate over the ACPI OEM info array, looking for a match */
while (memcmp(info, &empty_oem_info, sizeof(*info))) {
if (!memcmp(info->oem_id, table->oem_id, ACPI_OEM_ID_SIZE) &&
!memcmp(info->oem_table_id, table->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
info->oem_revision == table->oem_revision)
return true;
info++;
}
return false;
}
static const struct arch_timer_erratum_workaround *
arch_timer_iterate_errata(enum arch_timer_erratum_match_type type,
ate_match_fn_t match_fn,
void *arg)
{
int i;
for (i = 0; i < ARRAY_SIZE(ool_workarounds); i++) {
if (ool_workarounds[i].match_type != type)
continue;
if (match_fn(&ool_workarounds[i], arg))
return &ool_workarounds[i];
}
return NULL;
}
static
void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa,
bool local)
{
int i;
if (local) {
__this_cpu_write(timer_unstable_counter_workaround, wa);
} else {
for_each_possible_cpu(i)
per_cpu(timer_unstable_counter_workaround, i) = wa;
}
if (wa->read_cntvct_el0 || wa->read_cntpct_el0)
atomic_set(&timer_unstable_counter_workaround_in_use, 1);
/*
* Don't use the vdso fastpath if errata require using the
* out-of-line counter accessor. We may change our mind pretty
* late in the game (with a per-CPU erratum, for example), so
* change both the default value and the vdso itself.
*/
if (wa->read_cntvct_el0) {
clocksource_counter.vdso_clock_mode = VDSO_CLOCKMODE_NONE;
vdso_default = VDSO_CLOCKMODE_NONE;
} else if (wa->disable_compat_vdso && vdso_default != VDSO_CLOCKMODE_NONE) {
vdso_default = VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT;
clocksource_counter.vdso_clock_mode = vdso_default;
}
}
static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type type,
void *arg)
{
const struct arch_timer_erratum_workaround *wa, *__wa;
ate_match_fn_t match_fn = NULL;
bool local = false;
switch (type) {
case ate_match_dt:
match_fn = arch_timer_check_dt_erratum;
break;
case ate_match_local_cap_id:
match_fn = arch_timer_check_local_cap_erratum;
local = true;
break;
case ate_match_acpi_oem_info:
match_fn = arch_timer_check_acpi_oem_erratum;
break;
default:
WARN_ON(1);
return;
}
wa = arch_timer_iterate_errata(type, match_fn, arg);
if (!wa)
return;
__wa = __this_cpu_read(timer_unstable_counter_workaround);
if (__wa && wa != __wa)
pr_warn("Can't enable workaround for %s (clashes with %s\n)",
wa->desc, __wa->desc);
if (__wa)
return;
arch_timer_enable_workaround(wa, local);
pr_info("Enabling %s workaround for %s\n",
local ? "local" : "global", wa->desc);
}
static bool arch_timer_this_cpu_has_cntvct_wa(void)
{
return has_erratum_handler(read_cntvct_el0);
}
static bool arch_timer_counter_has_wa(void)
{
return atomic_read(&timer_unstable_counter_workaround_in_use);
}
#else
#define arch_timer_check_ool_workaround(t,a) do { } while(0)
#define arch_timer_this_cpu_has_cntvct_wa() ({false;})
#define arch_timer_counter_has_wa() ({false;})
#endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
static __always_inline irqreturn_t timer_handler(const int access,
struct clock_event_device *evt)
{
unsigned long ctrl;
ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, evt);
if (ctrl & ARCH_TIMER_CTRL_IT_STAT) {
ctrl |= ARCH_TIMER_CTRL_IT_MASK;
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt);
evt->event_handler(evt);
return IRQ_HANDLED;
}
return IRQ_NONE;
}
static irqreturn_t arch_timer_handler_virt(int irq, void *dev_id)
{
struct clock_event_device *evt = dev_id;
return timer_handler(ARCH_TIMER_VIRT_ACCESS, evt);
}
static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id)
{
struct clock_event_device *evt = dev_id;
return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt);
}
static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id)
{
struct clock_event_device *evt = dev_id;
return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt);
}
static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id)
{
struct clock_event_device *evt = dev_id;
return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt);
}
static __always_inline int arch_timer_shutdown(const int access,
struct clock_event_device *clk)
{
unsigned long ctrl;
ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
return 0;
}
static int arch_timer_shutdown_virt(struct clock_event_device *clk)
{
return arch_timer_shutdown(ARCH_TIMER_VIRT_ACCESS, clk);
}
static int arch_timer_shutdown_phys(struct clock_event_device *clk)
{
return arch_timer_shutdown(ARCH_TIMER_PHYS_ACCESS, clk);
}
static int arch_timer_shutdown_virt_mem(struct clock_event_device *clk)
{
return arch_timer_shutdown(ARCH_TIMER_MEM_VIRT_ACCESS, clk);
}
static int arch_timer_shutdown_phys_mem(struct clock_event_device *clk)
{
return arch_timer_shutdown(ARCH_TIMER_MEM_PHYS_ACCESS, clk);
}
static __always_inline void set_next_event(const int access, unsigned long evt,
struct clock_event_device *clk)
{
unsigned long ctrl;
u64 cnt;
ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
ctrl |= ARCH_TIMER_CTRL_ENABLE;
ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
if (access == ARCH_TIMER_PHYS_ACCESS)
cnt = __arch_counter_get_cntpct();
else
cnt = __arch_counter_get_cntvct();
arch_timer_reg_write(access, ARCH_TIMER_REG_CVAL, evt + cnt, clk);
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
}
static int arch_timer_set_next_event_virt(unsigned long evt,
struct clock_event_device *clk)
{
set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk);
return 0;
}
static int arch_timer_set_next_event_phys(unsigned long evt,
struct clock_event_device *clk)
{
set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk);
return 0;
}
static u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
{
u32 cnt_lo, cnt_hi, tmp_hi;
do {
cnt_hi = readl_relaxed(t->base + offset_lo + 4);
cnt_lo = readl_relaxed(t->base + offset_lo);
tmp_hi = readl_relaxed(t->base + offset_lo + 4);
} while (cnt_hi != tmp_hi);
return ((u64) cnt_hi << 32) | cnt_lo;
}
static __always_inline void set_next_event_mem(const int access, unsigned long evt,
struct clock_event_device *clk)
{
struct arch_timer *timer = to_arch_timer(clk);
unsigned long ctrl;
u64 cnt;
ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
ctrl |= ARCH_TIMER_CTRL_ENABLE;
ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
if (access == ARCH_TIMER_MEM_VIRT_ACCESS)
cnt = arch_counter_get_cnt_mem(timer, CNTVCT_LO);
else
cnt = arch_counter_get_cnt_mem(timer, CNTPCT_LO);
arch_timer_reg_write(access, ARCH_TIMER_REG_CVAL, evt + cnt, clk);
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
}
static int arch_timer_set_next_event_virt_mem(unsigned long evt,
struct clock_event_device *clk)
{
set_next_event_mem(ARCH_TIMER_MEM_VIRT_ACCESS, evt, clk);
return 0;
}
static int arch_timer_set_next_event_phys_mem(unsigned long evt,
struct clock_event_device *clk)
{
set_next_event_mem(ARCH_TIMER_MEM_PHYS_ACCESS, evt, clk);
return 0;
}
static u64 __arch_timer_check_delta(void)
{
#ifdef CONFIG_ARM64
const struct midr_range broken_cval_midrs[] = {
/*
* XGene-1 implements CVAL in terms of TVAL, meaning
* that the maximum timer range is 32bit. Shame on them.
clocksource/drivers/arm_arch_timer: Fix XGene-1 TVAL register math error The TVAL register is 32 bit signed. Thus only the lower 31 bits are available to specify when an interrupt is to occur at some time in the near future. Attempting to specify a larger interval with TVAL results in a negative time delta which means the timer fires immediately upon being programmed, rather than firing at that expected future time. The solution is for Linux to declare that TVAL is a 31 bit register rather than give its true size of 32 bits. This prevents Linux from programming TVAL with a too-large value. Note that, prior to 5.16, this little trick was the standard way to handle TVAL in Linux, so there is nothing new happening here on that front. The softlockup detector hides the issue, because it keeps generating short timer deadlines that are within the scope of the broken timer. Disabling it, it starts using NO_HZ with much longer timer deadlines, which turns into an interrupt flood: 11: 1124855130 949168462 758009394 76417474 104782230 30210281 310890 1734323687 GICv2 29 Level arch_timer And "much longer" isn't that long: it takes less than 43s to underflow TVAL at 50MHz (the frequency of the counter on XGene-1). Some comments on the v1 version of this patch by Marc Zyngier: XGene implements CVAL (a 64bit comparator) in terms of TVAL (a countdown register) instead of the other way around. TVAL being a 32bit register, the width of the counter should equally be 32. However, TVAL is a *signed* value, and keeps counting down in the negative range once the timer fires. It means that any TVAL value with bit 31 set will fire immediately, as it cannot be distinguished from an already expired timer. Reducing the timer range back to a paltry 31 bits papers over the issue. Another problem cannot be fixed though, which is that the timer interrupt *must* be handled within the negative countdown period, or the interrupt will be lost (TVAL will rollover to a positive value, indicative of a new timer deadline). Fixes: 012f18850452 ("clocksource/drivers/arm_arch_timer: Work around broken CVAL implementations") Signed-off-by: Joe Korty <joe.korty@concurrent-rt.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20221024165422.GA51107@zipoli.concurrent-rt.com Link: https://lore.kernel.org/r/20221121145343.896018-1-maz@kernel.org [maz: revamped the commit message]
2022-11-21 17:53:43 +03:00
*
* Note that TVAL is signed, thus has only 31 of its
* 32 bits to express magnitude.
*/
MIDR_ALL_VERSIONS(MIDR_CPU_MODEL(ARM_CPU_IMP_APM,
APM_CPU_PART_POTENZA)),
{},
};
if (is_midr_in_range_list(read_cpuid_id(), broken_cval_midrs)) {
clocksource/drivers/arm_arch_timer: Fix XGene-1 TVAL register math error The TVAL register is 32 bit signed. Thus only the lower 31 bits are available to specify when an interrupt is to occur at some time in the near future. Attempting to specify a larger interval with TVAL results in a negative time delta which means the timer fires immediately upon being programmed, rather than firing at that expected future time. The solution is for Linux to declare that TVAL is a 31 bit register rather than give its true size of 32 bits. This prevents Linux from programming TVAL with a too-large value. Note that, prior to 5.16, this little trick was the standard way to handle TVAL in Linux, so there is nothing new happening here on that front. The softlockup detector hides the issue, because it keeps generating short timer deadlines that are within the scope of the broken timer. Disabling it, it starts using NO_HZ with much longer timer deadlines, which turns into an interrupt flood: 11: 1124855130 949168462 758009394 76417474 104782230 30210281 310890 1734323687 GICv2 29 Level arch_timer And "much longer" isn't that long: it takes less than 43s to underflow TVAL at 50MHz (the frequency of the counter on XGene-1). Some comments on the v1 version of this patch by Marc Zyngier: XGene implements CVAL (a 64bit comparator) in terms of TVAL (a countdown register) instead of the other way around. TVAL being a 32bit register, the width of the counter should equally be 32. However, TVAL is a *signed* value, and keeps counting down in the negative range once the timer fires. It means that any TVAL value with bit 31 set will fire immediately, as it cannot be distinguished from an already expired timer. Reducing the timer range back to a paltry 31 bits papers over the issue. Another problem cannot be fixed though, which is that the timer interrupt *must* be handled within the negative countdown period, or the interrupt will be lost (TVAL will rollover to a positive value, indicative of a new timer deadline). Fixes: 012f18850452 ("clocksource/drivers/arm_arch_timer: Work around broken CVAL implementations") Signed-off-by: Joe Korty <joe.korty@concurrent-rt.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20221024165422.GA51107@zipoli.concurrent-rt.com Link: https://lore.kernel.org/r/20221121145343.896018-1-maz@kernel.org [maz: revamped the commit message]
2022-11-21 17:53:43 +03:00
pr_warn_once("Broken CNTx_CVAL_EL1, using 31 bit TVAL instead.\n");
return CLOCKSOURCE_MASK(31);
}
#endif
clocksource/drivers/arm_arch_timer: Fix masking for high freq counters Unfortunately, the architecture provides no means to determine the bit width of the system counter. However, we do know the following from the specification: - the system counter is at least 56 bits wide - Roll-over time of not less than 40 years To date, the arch timer driver has depended on the first property, assuming any system counter to be 56 bits wide and masking off the rest. However, combining a narrow clocksource mask with a high frequency counter could result in prematurely wrapping the system counter by a significant margin. For example, a 56 bit wide, 1GHz system counter would wrap in a mere 2.28 years! This is a problem for two reasons: v8.6+ implementations are required to provide a 64 bit, 1GHz system counter. Furthermore, before v8.6, implementers may select a counter frequency of their choosing. Fix the issue by deriving a valid clock mask based on the second property from above. Set the floor at 56 bits, since we know no system counter is narrower than that. [maz: fixed width computation not to lose the last bit, added max delta generation for the timer] Suggested-by: Marc Zyngier <maz@kernel.org> Signed-off-by: Oliver Upton <oupton@google.com> Reviewed-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20210807191428.3488948-1-oupton@google.com Link: https://lore.kernel.org/r/20211017124225.3018098-13-maz@kernel.org Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2021-10-17 15:42:20 +03:00
return CLOCKSOURCE_MASK(arch_counter_get_width());
}
static void __arch_timer_setup(unsigned type,
struct clock_event_device *clk)
{
u64 max_delta;
clk->features = CLOCK_EVT_FEAT_ONESHOT;
if (type == ARCH_TIMER_TYPE_CP15) {
typeof(clk->set_next_event) sne;
arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL);
clocksource: arch_arm_timer: Fix age-old arch timer C3STOP detection issue ARM arch timers are tightly coupled with the CPU logic and lose context on platform implementing HW power management when cores are powered down at run-time. Marking the arch timers as C3STOP regardless of power management capabilities causes issues on platforms with no power management, since in that case the arch timers cannot possibly enter states where the timer loses context at runtime and therefore can always be used as a high resolution clockevent device. In order to fix the C3STOP issue in a way compliant with how real HW works, this patch adds a boolean property to the arch timer bindings to define if the arch timer is managed by an always-on power domain. This power domain is present on all ARM platforms to date, and manages HW that must not be turned off, whatever the state of other HW components (eg power controller). On platforms with no power management capabilities, it is the only power domain present, which encompasses and manages power supply for all HW components in the system. If the timer is powered by the always-on power domain, the always-on property must be present in the bindings which means that the timer cannot be shutdown at runtime, so it is not a C3STOP clockevent device. If the timer binding does not contain the always-on property, the timer is assumed to be power-gateable, hence it must be defined as a C3STOP clockevent device. Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Magnus Damm <damm@opensource.se> Cc: Marc Carino <marc.ceeeee@gmail.com> Cc: Mark Rutland <mark.rutland@arm.com> Acked-by: Marc Zyngier <marc.zyngier@arm.com> Acked-by: Rob Herring <robh@kernel.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2014-04-08 13:04:32 +04:00
if (arch_timer_c3stop)
clk->features |= CLOCK_EVT_FEAT_C3STOP;
clk->name = "arch_sys_timer";
clk->rating = 450;
clk->cpumask = cpumask_of(smp_processor_id());
clk->irq = arch_timer_ppi[arch_timer_uses_ppi];
switch (arch_timer_uses_ppi) {
case ARCH_TIMER_VIRT_PPI:
clk->set_state_shutdown = arch_timer_shutdown_virt;
clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
sne = erratum_handler(set_next_event_virt);
break;
case ARCH_TIMER_PHYS_SECURE_PPI:
case ARCH_TIMER_PHYS_NONSECURE_PPI:
case ARCH_TIMER_HYP_PPI:
clk->set_state_shutdown = arch_timer_shutdown_phys;
clk->set_state_oneshot_stopped = arch_timer_shutdown_phys;
sne = erratum_handler(set_next_event_phys);
break;
default:
BUG();
}
clk->set_next_event = sne;
max_delta = __arch_timer_check_delta();
} else {
clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
clk->name = "arch_mem_timer";
clk->rating = 400;
clk->cpumask = cpu_possible_mask;
if (arch_timer_mem_use_virtual) {
clk->set_state_shutdown = arch_timer_shutdown_virt_mem;
clk->set_state_oneshot_stopped = arch_timer_shutdown_virt_mem;
clk->set_next_event =
arch_timer_set_next_event_virt_mem;
} else {
clk->set_state_shutdown = arch_timer_shutdown_phys_mem;
clk->set_state_oneshot_stopped = arch_timer_shutdown_phys_mem;
clk->set_next_event =
arch_timer_set_next_event_phys_mem;
}
max_delta = CLOCKSOURCE_MASK(56);
}
clk->set_state_shutdown(clk);
clockevents_config_and_register(clk, arch_timer_rate, 0xf, max_delta);
}
static void arch_timer_evtstrm_enable(unsigned int divider)
{
u32 cntkctl = arch_timer_get_cntkctl();
#ifdef CONFIG_ARM64
/* ECV is likely to require a large divider. Use the EVNTIS flag. */
if (cpus_have_const_cap(ARM64_HAS_ECV) && divider > 15) {
cntkctl |= ARCH_TIMER_EVT_INTERVAL_SCALE;
divider -= 8;
}
#endif
divider = min(divider, 15U);
cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK;
/* Set the divider and enable virtual event stream */
cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
| ARCH_TIMER_VIRT_EVT_EN;
arch_timer_set_cntkctl(cntkctl);
arch_timer_set_evtstrm_feature();
cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
}
static void arch_timer_configure_evtstream(void)
{
int evt_stream_div, lsb;
/*
* As the event stream can at most be generated at half the frequency
* of the counter, use half the frequency when computing the divider.
*/
evt_stream_div = arch_timer_rate / ARCH_TIMER_EVT_STREAM_FREQ / 2;
/*
* Find the closest power of two to the divisor. If the adjacent bit
* of lsb (last set bit, starts from 0) is set, then we use (lsb + 1).
*/
lsb = fls(evt_stream_div) - 1;
if (lsb > 0 && (evt_stream_div & BIT(lsb - 1)))
lsb++;
/* enable event stream */
arch_timer_evtstrm_enable(max(0, lsb));
}
static void arch_counter_set_user_access(void)
{
u32 cntkctl = arch_timer_get_cntkctl();
/* Disable user access to the timers and both counters */
/* Also disable virtual event stream */
cntkctl &= ~(ARCH_TIMER_USR_PT_ACCESS_EN
| ARCH_TIMER_USR_VT_ACCESS_EN
| ARCH_TIMER_USR_VCT_ACCESS_EN
| ARCH_TIMER_VIRT_EVT_EN
| ARCH_TIMER_USR_PCT_ACCESS_EN);
/*
* Enable user access to the virtual counter if it doesn't
* need to be workaround. The vdso may have been already
* disabled though.
*/
if (arch_timer_this_cpu_has_cntvct_wa())
pr_info("CPU%d: Trapping CNTVCT access\n", smp_processor_id());
else
cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
arch_timer_set_cntkctl(cntkctl);
}
static bool arch_timer_has_nonsecure_ppi(void)
{
return (arch_timer_uses_ppi == ARCH_TIMER_PHYS_SECURE_PPI &&
arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]);
}
clocksource/arm_arch_timer: Force per-CPU interrupt to be level-triggered The ARM architected timer produces level-triggered interrupts (this is mandated by the architecture). Unfortunately, a number of device-trees get this wrong, and expose an edge-triggered interrupt. Until now, this wasn't too much an issue, as the programming of the trigger would fail (the corresponding PPI cannot be reconfigured), and the kernel would be happy with this. But we're about to change this, and trust DT a lot if the driver doesn't provide its own trigger information. In that context, the timer breaks badly. While we do need to fix the DTs, there is also some userspace out there (kvmtool) that generates the same kind of broken DT on the fly, and that will completely break with newer kernels. As a safety measure, and to keep buggy software alive as well as buying us some time to fix DTs all over the place, let's check what trigger configuration has been given us by the firmware. If this is not a level configuration, then we know that the DT/ACPI configuration is bust, and we pick some defaults which won't be worse than the existing setup. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Liu Gang <Gang.Liu@nxp.com> Cc: Mark Rutland <marc.rutland@arm.com> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Wenbin Song <Wenbin.Song@freescale.com> Cc: Mingkai Hu <Mingkai.Hu@freescale.com> Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Kevin Hilman <khilman@baylibre.com> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Jon Hunter <jonathanh@nvidia.com> Cc: arm@kernel.org Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-arm-kernel@lists.infradead.org Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Ray Jui <rjui@broadcom.com> Cc: "Hou Zhiqiang" <B48286@freescale.com> Cc: Tirumalesh Chalamarla <tchalamarla@cavium.com> Cc: linux-samsung-soc@vger.kernel.org Cc: Yuan Yao <yao.yuan@nxp.com> Cc: Jan Glauber <jglauber@cavium.com> Cc: Gregory Clement <gregory.clement@free-electrons.com> Cc: linux-amlogic@lists.infradead.org Cc: soren.brinkmann@xilinx.com Cc: Rajesh Bhagat <rajesh.bhagat@freescale.com> Cc: Scott Branden <sbranden@broadcom.com> Cc: Duc Dang <dhdang@apm.com> Cc: Kukjin Kim <kgene@kernel.org> Cc: Carlo Caione <carlo@caione.org> Cc: Dinh Nguyen <dinguyen@opensource.altera.com> Link: http://lkml.kernel.org/r/1470045256-9032-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-08-01 12:54:15 +03:00
static u32 check_ppi_trigger(int irq)
{
u32 flags = irq_get_trigger_type(irq);
if (flags != IRQF_TRIGGER_HIGH && flags != IRQF_TRIGGER_LOW) {
pr_warn("WARNING: Invalid trigger for IRQ%d, assuming level low\n", irq);
pr_warn("WARNING: Please fix your firmware\n");
flags = IRQF_TRIGGER_LOW;
}
return flags;
}
static int arch_timer_starting_cpu(unsigned int cpu)
{
struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
clocksource/arm_arch_timer: Force per-CPU interrupt to be level-triggered The ARM architected timer produces level-triggered interrupts (this is mandated by the architecture). Unfortunately, a number of device-trees get this wrong, and expose an edge-triggered interrupt. Until now, this wasn't too much an issue, as the programming of the trigger would fail (the corresponding PPI cannot be reconfigured), and the kernel would be happy with this. But we're about to change this, and trust DT a lot if the driver doesn't provide its own trigger information. In that context, the timer breaks badly. While we do need to fix the DTs, there is also some userspace out there (kvmtool) that generates the same kind of broken DT on the fly, and that will completely break with newer kernels. As a safety measure, and to keep buggy software alive as well as buying us some time to fix DTs all over the place, let's check what trigger configuration has been given us by the firmware. If this is not a level configuration, then we know that the DT/ACPI configuration is bust, and we pick some defaults which won't be worse than the existing setup. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Liu Gang <Gang.Liu@nxp.com> Cc: Mark Rutland <marc.rutland@arm.com> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Wenbin Song <Wenbin.Song@freescale.com> Cc: Mingkai Hu <Mingkai.Hu@freescale.com> Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Kevin Hilman <khilman@baylibre.com> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Jon Hunter <jonathanh@nvidia.com> Cc: arm@kernel.org Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-arm-kernel@lists.infradead.org Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Ray Jui <rjui@broadcom.com> Cc: "Hou Zhiqiang" <B48286@freescale.com> Cc: Tirumalesh Chalamarla <tchalamarla@cavium.com> Cc: linux-samsung-soc@vger.kernel.org Cc: Yuan Yao <yao.yuan@nxp.com> Cc: Jan Glauber <jglauber@cavium.com> Cc: Gregory Clement <gregory.clement@free-electrons.com> Cc: linux-amlogic@lists.infradead.org Cc: soren.brinkmann@xilinx.com Cc: Rajesh Bhagat <rajesh.bhagat@freescale.com> Cc: Scott Branden <sbranden@broadcom.com> Cc: Duc Dang <dhdang@apm.com> Cc: Kukjin Kim <kgene@kernel.org> Cc: Carlo Caione <carlo@caione.org> Cc: Dinh Nguyen <dinguyen@opensource.altera.com> Link: http://lkml.kernel.org/r/1470045256-9032-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-08-01 12:54:15 +03:00
u32 flags;
__arch_timer_setup(ARCH_TIMER_TYPE_CP15, clk);
clocksource/arm_arch_timer: Force per-CPU interrupt to be level-triggered The ARM architected timer produces level-triggered interrupts (this is mandated by the architecture). Unfortunately, a number of device-trees get this wrong, and expose an edge-triggered interrupt. Until now, this wasn't too much an issue, as the programming of the trigger would fail (the corresponding PPI cannot be reconfigured), and the kernel would be happy with this. But we're about to change this, and trust DT a lot if the driver doesn't provide its own trigger information. In that context, the timer breaks badly. While we do need to fix the DTs, there is also some userspace out there (kvmtool) that generates the same kind of broken DT on the fly, and that will completely break with newer kernels. As a safety measure, and to keep buggy software alive as well as buying us some time to fix DTs all over the place, let's check what trigger configuration has been given us by the firmware. If this is not a level configuration, then we know that the DT/ACPI configuration is bust, and we pick some defaults which won't be worse than the existing setup. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Liu Gang <Gang.Liu@nxp.com> Cc: Mark Rutland <marc.rutland@arm.com> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Wenbin Song <Wenbin.Song@freescale.com> Cc: Mingkai Hu <Mingkai.Hu@freescale.com> Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Kevin Hilman <khilman@baylibre.com> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Jon Hunter <jonathanh@nvidia.com> Cc: arm@kernel.org Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-arm-kernel@lists.infradead.org Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Ray Jui <rjui@broadcom.com> Cc: "Hou Zhiqiang" <B48286@freescale.com> Cc: Tirumalesh Chalamarla <tchalamarla@cavium.com> Cc: linux-samsung-soc@vger.kernel.org Cc: Yuan Yao <yao.yuan@nxp.com> Cc: Jan Glauber <jglauber@cavium.com> Cc: Gregory Clement <gregory.clement@free-electrons.com> Cc: linux-amlogic@lists.infradead.org Cc: soren.brinkmann@xilinx.com Cc: Rajesh Bhagat <rajesh.bhagat@freescale.com> Cc: Scott Branden <sbranden@broadcom.com> Cc: Duc Dang <dhdang@apm.com> Cc: Kukjin Kim <kgene@kernel.org> Cc: Carlo Caione <carlo@caione.org> Cc: Dinh Nguyen <dinguyen@opensource.altera.com> Link: http://lkml.kernel.org/r/1470045256-9032-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-08-01 12:54:15 +03:00
flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]);
enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags);
clocksource/arm_arch_timer: Force per-CPU interrupt to be level-triggered The ARM architected timer produces level-triggered interrupts (this is mandated by the architecture). Unfortunately, a number of device-trees get this wrong, and expose an edge-triggered interrupt. Until now, this wasn't too much an issue, as the programming of the trigger would fail (the corresponding PPI cannot be reconfigured), and the kernel would be happy with this. But we're about to change this, and trust DT a lot if the driver doesn't provide its own trigger information. In that context, the timer breaks badly. While we do need to fix the DTs, there is also some userspace out there (kvmtool) that generates the same kind of broken DT on the fly, and that will completely break with newer kernels. As a safety measure, and to keep buggy software alive as well as buying us some time to fix DTs all over the place, let's check what trigger configuration has been given us by the firmware. If this is not a level configuration, then we know that the DT/ACPI configuration is bust, and we pick some defaults which won't be worse than the existing setup. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Liu Gang <Gang.Liu@nxp.com> Cc: Mark Rutland <marc.rutland@arm.com> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Wenbin Song <Wenbin.Song@freescale.com> Cc: Mingkai Hu <Mingkai.Hu@freescale.com> Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Kevin Hilman <khilman@baylibre.com> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Jon Hunter <jonathanh@nvidia.com> Cc: arm@kernel.org Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-arm-kernel@lists.infradead.org Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Ray Jui <rjui@broadcom.com> Cc: "Hou Zhiqiang" <B48286@freescale.com> Cc: Tirumalesh Chalamarla <tchalamarla@cavium.com> Cc: linux-samsung-soc@vger.kernel.org Cc: Yuan Yao <yao.yuan@nxp.com> Cc: Jan Glauber <jglauber@cavium.com> Cc: Gregory Clement <gregory.clement@free-electrons.com> Cc: linux-amlogic@lists.infradead.org Cc: soren.brinkmann@xilinx.com Cc: Rajesh Bhagat <rajesh.bhagat@freescale.com> Cc: Scott Branden <sbranden@broadcom.com> Cc: Duc Dang <dhdang@apm.com> Cc: Kukjin Kim <kgene@kernel.org> Cc: Carlo Caione <carlo@caione.org> Cc: Dinh Nguyen <dinguyen@opensource.altera.com> Link: http://lkml.kernel.org/r/1470045256-9032-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-08-01 12:54:15 +03:00
if (arch_timer_has_nonsecure_ppi()) {
flags = check_ppi_trigger(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]);
enable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI],
flags);
clocksource/arm_arch_timer: Force per-CPU interrupt to be level-triggered The ARM architected timer produces level-triggered interrupts (this is mandated by the architecture). Unfortunately, a number of device-trees get this wrong, and expose an edge-triggered interrupt. Until now, this wasn't too much an issue, as the programming of the trigger would fail (the corresponding PPI cannot be reconfigured), and the kernel would be happy with this. But we're about to change this, and trust DT a lot if the driver doesn't provide its own trigger information. In that context, the timer breaks badly. While we do need to fix the DTs, there is also some userspace out there (kvmtool) that generates the same kind of broken DT on the fly, and that will completely break with newer kernels. As a safety measure, and to keep buggy software alive as well as buying us some time to fix DTs all over the place, let's check what trigger configuration has been given us by the firmware. If this is not a level configuration, then we know that the DT/ACPI configuration is bust, and we pick some defaults which won't be worse than the existing setup. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Liu Gang <Gang.Liu@nxp.com> Cc: Mark Rutland <marc.rutland@arm.com> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Wenbin Song <Wenbin.Song@freescale.com> Cc: Mingkai Hu <Mingkai.Hu@freescale.com> Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Kevin Hilman <khilman@baylibre.com> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Jon Hunter <jonathanh@nvidia.com> Cc: arm@kernel.org Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-arm-kernel@lists.infradead.org Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Ray Jui <rjui@broadcom.com> Cc: "Hou Zhiqiang" <B48286@freescale.com> Cc: Tirumalesh Chalamarla <tchalamarla@cavium.com> Cc: linux-samsung-soc@vger.kernel.org Cc: Yuan Yao <yao.yuan@nxp.com> Cc: Jan Glauber <jglauber@cavium.com> Cc: Gregory Clement <gregory.clement@free-electrons.com> Cc: linux-amlogic@lists.infradead.org Cc: soren.brinkmann@xilinx.com Cc: Rajesh Bhagat <rajesh.bhagat@freescale.com> Cc: Scott Branden <sbranden@broadcom.com> Cc: Duc Dang <dhdang@apm.com> Cc: Kukjin Kim <kgene@kernel.org> Cc: Carlo Caione <carlo@caione.org> Cc: Dinh Nguyen <dinguyen@opensource.altera.com> Link: http://lkml.kernel.org/r/1470045256-9032-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-08-01 12:54:15 +03:00
}
arch_counter_set_user_access();
if (evtstrm_enable)
arch_timer_configure_evtstream();
return 0;
}
static int validate_timer_rate(void)
{
if (!arch_timer_rate)
return -EINVAL;
/* Arch timer frequency < 1MHz can cause trouble */
WARN_ON(arch_timer_rate < 1000000);
return 0;
}
/*
* For historical reasons, when probing with DT we use whichever (non-zero)
* rate was probed first, and don't verify that others match. If the first node
* probed has a clock-frequency property, this overrides the HW register.
*/
static void __init arch_timer_of_configure_rate(u32 rate, struct device_node *np)
{
/* Who has more than one independent system counter? */
if (arch_timer_rate)
return;
if (of_property_read_u32(np, "clock-frequency", &arch_timer_rate))
arch_timer_rate = rate;
/* Check the timer frequency. */
if (validate_timer_rate())
pr_warn("frequency not available\n");
}
static void __init arch_timer_banner(unsigned type)
{
pr_info("%s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n",
type & ARCH_TIMER_TYPE_CP15 ? "cp15" : "",
type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ?
" and " : "",
type & ARCH_TIMER_TYPE_MEM ? "mmio" : "",
(unsigned long)arch_timer_rate / 1000000,
(unsigned long)(arch_timer_rate / 10000) % 100,
type & ARCH_TIMER_TYPE_CP15 ?
(arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ? "virt" : "phys" :
"",
type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ? "/" : "",
type & ARCH_TIMER_TYPE_MEM ?
arch_timer_mem_use_virtual ? "virt" : "phys" :
"");
}
u32 arch_timer_get_rate(void)
{
return arch_timer_rate;
}
bool arch_timer_evtstrm_available(void)
{
/*
* We might get called from a preemptible context. This is fine
* because availability of the event stream should be always the same
* for a preemptible context and context where we might resume a task.
*/
return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
}
static u64 arch_counter_get_cntvct_mem(void)
{
return arch_counter_get_cnt_mem(arch_timer_mem, CNTVCT_LO);
}
static struct arch_timer_kvm_info arch_timer_kvm_info;
struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
{
return &arch_timer_kvm_info;
}
static void __init arch_counter_register(unsigned type)
{
u64 start_count;
clocksource/drivers/arm_arch_timer: Fix masking for high freq counters Unfortunately, the architecture provides no means to determine the bit width of the system counter. However, we do know the following from the specification: - the system counter is at least 56 bits wide - Roll-over time of not less than 40 years To date, the arch timer driver has depended on the first property, assuming any system counter to be 56 bits wide and masking off the rest. However, combining a narrow clocksource mask with a high frequency counter could result in prematurely wrapping the system counter by a significant margin. For example, a 56 bit wide, 1GHz system counter would wrap in a mere 2.28 years! This is a problem for two reasons: v8.6+ implementations are required to provide a 64 bit, 1GHz system counter. Furthermore, before v8.6, implementers may select a counter frequency of their choosing. Fix the issue by deriving a valid clock mask based on the second property from above. Set the floor at 56 bits, since we know no system counter is narrower than that. [maz: fixed width computation not to lose the last bit, added max delta generation for the timer] Suggested-by: Marc Zyngier <maz@kernel.org> Signed-off-by: Oliver Upton <oupton@google.com> Reviewed-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20210807191428.3488948-1-oupton@google.com Link: https://lore.kernel.org/r/20211017124225.3018098-13-maz@kernel.org Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2021-10-17 15:42:20 +03:00
int width;
/* Register the CP15 based counter if we have one */
if (type & ARCH_TIMER_TYPE_CP15) {
u64 (*rd)(void);
if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
if (arch_timer_counter_has_wa())
rd = arch_counter_get_cntvct_stable;
else
rd = arch_counter_get_cntvct;
} else {
if (arch_timer_counter_has_wa())
rd = arch_counter_get_cntpct_stable;
else
rd = arch_counter_get_cntpct;
}
arch_timer_read_counter = rd;
clocksource_counter.vdso_clock_mode = vdso_default;
} else {
arch_timer_read_counter = arch_counter_get_cntvct_mem;
}
clocksource/drivers/arm_arch_timer: Fix masking for high freq counters Unfortunately, the architecture provides no means to determine the bit width of the system counter. However, we do know the following from the specification: - the system counter is at least 56 bits wide - Roll-over time of not less than 40 years To date, the arch timer driver has depended on the first property, assuming any system counter to be 56 bits wide and masking off the rest. However, combining a narrow clocksource mask with a high frequency counter could result in prematurely wrapping the system counter by a significant margin. For example, a 56 bit wide, 1GHz system counter would wrap in a mere 2.28 years! This is a problem for two reasons: v8.6+ implementations are required to provide a 64 bit, 1GHz system counter. Furthermore, before v8.6, implementers may select a counter frequency of their choosing. Fix the issue by deriving a valid clock mask based on the second property from above. Set the floor at 56 bits, since we know no system counter is narrower than that. [maz: fixed width computation not to lose the last bit, added max delta generation for the timer] Suggested-by: Marc Zyngier <maz@kernel.org> Signed-off-by: Oliver Upton <oupton@google.com> Reviewed-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20210807191428.3488948-1-oupton@google.com Link: https://lore.kernel.org/r/20211017124225.3018098-13-maz@kernel.org Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2021-10-17 15:42:20 +03:00
width = arch_counter_get_width();
clocksource_counter.mask = CLOCKSOURCE_MASK(width);
cyclecounter.mask = CLOCKSOURCE_MASK(width);
if (!arch_counter_suspend_stop)
clocksource_counter.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
start_count = arch_timer_read_counter();
clocksource_register_hz(&clocksource_counter, arch_timer_rate);
cyclecounter.mult = clocksource_counter.mult;
cyclecounter.shift = clocksource_counter.shift;
timecounter_init(&arch_timer_kvm_info.timecounter,
&cyclecounter, start_count);
clocksource/drivers/arm_arch_timer: Fix masking for high freq counters Unfortunately, the architecture provides no means to determine the bit width of the system counter. However, we do know the following from the specification: - the system counter is at least 56 bits wide - Roll-over time of not less than 40 years To date, the arch timer driver has depended on the first property, assuming any system counter to be 56 bits wide and masking off the rest. However, combining a narrow clocksource mask with a high frequency counter could result in prematurely wrapping the system counter by a significant margin. For example, a 56 bit wide, 1GHz system counter would wrap in a mere 2.28 years! This is a problem for two reasons: v8.6+ implementations are required to provide a 64 bit, 1GHz system counter. Furthermore, before v8.6, implementers may select a counter frequency of their choosing. Fix the issue by deriving a valid clock mask based on the second property from above. Set the floor at 56 bits, since we know no system counter is narrower than that. [maz: fixed width computation not to lose the last bit, added max delta generation for the timer] Suggested-by: Marc Zyngier <maz@kernel.org> Signed-off-by: Oliver Upton <oupton@google.com> Reviewed-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20210807191428.3488948-1-oupton@google.com Link: https://lore.kernel.org/r/20211017124225.3018098-13-maz@kernel.org Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
2021-10-17 15:42:20 +03:00
sched_clock_register(arch_timer_read_counter, width, arch_timer_rate);
}
static void arch_timer_stop(struct clock_event_device *clk)
{
pr_debug("disable IRQ%d cpu #%d\n", clk->irq, smp_processor_id());
disable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi]);
if (arch_timer_has_nonsecure_ppi())
disable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]);
clk->set_state_shutdown(clk);
}
static int arch_timer_dying_cpu(unsigned int cpu)
{
struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
arch_timer_stop(clk);
return 0;
}
#ifdef CONFIG_CPU_PM
static DEFINE_PER_CPU(unsigned long, saved_cntkctl);
static int arch_timer_cpu_pm_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
if (action == CPU_PM_ENTER) {
__this_cpu_write(saved_cntkctl, arch_timer_get_cntkctl());
cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
} else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
if (arch_timer_have_evtstrm_feature())
cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
}
return NOTIFY_OK;
}
static struct notifier_block arch_timer_cpu_pm_notifier = {
.notifier_call = arch_timer_cpu_pm_notify,
};
static int __init arch_timer_cpu_pm_init(void)
{
return cpu_pm_register_notifier(&arch_timer_cpu_pm_notifier);
}
static void __init arch_timer_cpu_pm_deinit(void)
{
WARN_ON(cpu_pm_unregister_notifier(&arch_timer_cpu_pm_notifier));
}
#else
static int __init arch_timer_cpu_pm_init(void)
{
return 0;
}
static void __init arch_timer_cpu_pm_deinit(void)
{
}
#endif
static int __init arch_timer_register(void)
{
int err;
int ppi;
arch_timer_evt = alloc_percpu(struct clock_event_device);
if (!arch_timer_evt) {
err = -ENOMEM;
goto out;
}
ppi = arch_timer_ppi[arch_timer_uses_ppi];
switch (arch_timer_uses_ppi) {
case ARCH_TIMER_VIRT_PPI:
err = request_percpu_irq(ppi, arch_timer_handler_virt,
"arch_timer", arch_timer_evt);
break;
case ARCH_TIMER_PHYS_SECURE_PPI:
case ARCH_TIMER_PHYS_NONSECURE_PPI:
err = request_percpu_irq(ppi, arch_timer_handler_phys,
"arch_timer", arch_timer_evt);
if (!err && arch_timer_has_nonsecure_ppi()) {
ppi = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI];
err = request_percpu_irq(ppi, arch_timer_handler_phys,
"arch_timer", arch_timer_evt);
if (err)
free_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_SECURE_PPI],
arch_timer_evt);
}
break;
case ARCH_TIMER_HYP_PPI:
err = request_percpu_irq(ppi, arch_timer_handler_phys,
"arch_timer", arch_timer_evt);
break;
default:
BUG();
}
if (err) {
pr_err("can't register interrupt %d (%d)\n", ppi, err);
goto out_free;
}
err = arch_timer_cpu_pm_init();
if (err)
goto out_unreg_notify;
/* Register and immediately configure the timer on the boot CPU */
err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING,
"clockevents/arm/arch_timer:starting",
arch_timer_starting_cpu, arch_timer_dying_cpu);
if (err)
goto out_unreg_cpupm;
return 0;
out_unreg_cpupm:
arch_timer_cpu_pm_deinit();
out_unreg_notify:
free_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], arch_timer_evt);
if (arch_timer_has_nonsecure_ppi())
free_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI],
arch_timer_evt);
out_free:
free_percpu(arch_timer_evt);
out:
return err;
}
static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq)
{
int ret;
irq_handler_t func;
arch_timer_mem = kzalloc(sizeof(*arch_timer_mem), GFP_KERNEL);
if (!arch_timer_mem)
return -ENOMEM;
arch_timer_mem->base = base;
arch_timer_mem->evt.irq = irq;
__arch_timer_setup(ARCH_TIMER_TYPE_MEM, &arch_timer_mem->evt);
if (arch_timer_mem_use_virtual)
func = arch_timer_handler_virt_mem;
else
func = arch_timer_handler_phys_mem;
ret = request_irq(irq, func, IRQF_TIMER, "arch_mem_timer", &arch_timer_mem->evt);
if (ret) {
pr_err("Failed to request mem timer irq\n");
kfree(arch_timer_mem);
arch_timer_mem = NULL;
}
return ret;
}
static const struct of_device_id arch_timer_of_match[] __initconst = {
{ .compatible = "arm,armv7-timer", },
{ .compatible = "arm,armv8-timer", },
{},
};
static const struct of_device_id arch_timer_mem_of_match[] __initconst = {
{ .compatible = "arm,armv7-timer-mem", },
{},
};
static bool __init arch_timer_needs_of_probing(void)
{
struct device_node *dn;
bool needs_probing = false;
unsigned int mask = ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM;
/* We have two timers, and both device-tree nodes are probed. */
if ((arch_timers_present & mask) == mask)
return false;
/*
* Only one type of timer is probed,
* check if we have another type of timer node in device-tree.
*/
if (arch_timers_present & ARCH_TIMER_TYPE_CP15)
dn = of_find_matching_node(NULL, arch_timer_mem_of_match);
else
dn = of_find_matching_node(NULL, arch_timer_of_match);
if (dn && of_device_is_available(dn))
needs_probing = true;
of_node_put(dn);
return needs_probing;
}
static int __init arch_timer_common_init(void)
{
arch_timer_banner(arch_timers_present);
arch_counter_register(arch_timers_present);
return arch_timer_arch_init();
}
/**
* arch_timer_select_ppi() - Select suitable PPI for the current system.
*
* If HYP mode is available, we know that the physical timer
* has been configured to be accessible from PL1. Use it, so
* that a guest can use the virtual timer instead.
*
* On ARMv8.1 with VH extensions, the kernel runs in HYP. VHE
* accesses to CNTP_*_EL1 registers are silently redirected to
* their CNTHP_*_EL2 counterparts, and use a different PPI
* number.
*
* If no interrupt provided for virtual timer, we'll have to
* stick to the physical timer. It'd better be accessible...
* For arm64 we never use the secure interrupt.
*
* Return: a suitable PPI type for the current system.
*/
static enum arch_timer_ppi_nr __init arch_timer_select_ppi(void)
{
if (is_kernel_in_hyp_mode())
return ARCH_TIMER_HYP_PPI;
if (!is_hyp_mode_available() && arch_timer_ppi[ARCH_TIMER_VIRT_PPI])
return ARCH_TIMER_VIRT_PPI;
if (IS_ENABLED(CONFIG_ARM64))
return ARCH_TIMER_PHYS_NONSECURE_PPI;
return ARCH_TIMER_PHYS_SECURE_PPI;
}
static void __init arch_timer_populate_kvm_info(void)
{
arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
if (is_kernel_in_hyp_mode())
arch_timer_kvm_info.physical_irq = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI];
}
static int __init arch_timer_of_init(struct device_node *np)
{
int i, irq, ret;
u32 rate;
bool has_names;
if (arch_timers_present & ARCH_TIMER_TYPE_CP15) {
pr_warn("multiple nodes in dt, skipping\n");
return 0;
}
arch_timers_present |= ARCH_TIMER_TYPE_CP15;
has_names = of_property_read_bool(np, "interrupt-names");
for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) {
if (has_names)
irq = of_irq_get_byname(np, arch_timer_ppi_names[i]);
else
irq = of_irq_get(np, i);
if (irq > 0)
arch_timer_ppi[i] = irq;
}
arch_timer_populate_kvm_info();
rate = arch_timer_get_cntfrq();
arch_timer_of_configure_rate(rate, np);
arch_timer_c3stop = !of_property_read_bool(np, "always-on");
/* Check for globally applicable workarounds */
arch_timer_check_ool_workaround(ate_match_dt, np);
/*
* If we cannot rely on firmware initializing the timer registers then
* we should use the physical timers instead.
*/
if (IS_ENABLED(CONFIG_ARM) &&
of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
arch_timer_uses_ppi = ARCH_TIMER_PHYS_SECURE_PPI;
else
arch_timer_uses_ppi = arch_timer_select_ppi();
if (!arch_timer_ppi[arch_timer_uses_ppi]) {
pr_err("No interrupt available, giving up\n");
return -EINVAL;
}
/* On some systems, the counter stops ticking when in suspend. */
arch_counter_suspend_stop = of_property_read_bool(np,
"arm,no-tick-in-suspend");
ret = arch_timer_register();
if (ret)
return ret;
if (arch_timer_needs_of_probing())
return 0;
return arch_timer_common_init();
}
TIMER_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init);
TIMER_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);
static u32 __init
arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame)
{
void __iomem *base;
u32 rate;
base = ioremap(frame->cntbase, frame->size);
if (!base) {
pr_err("Unable to map frame @ %pa\n", &frame->cntbase);
return 0;
}
rate = readl_relaxed(base + CNTFRQ);
iounmap(base);
return rate;
}
static struct arch_timer_mem_frame * __init
arch_timer_mem_find_best_frame(struct arch_timer_mem *timer_mem)
{
struct arch_timer_mem_frame *frame, *best_frame = NULL;
void __iomem *cntctlbase;
u32 cnttidr;
int i;
cntctlbase = ioremap(timer_mem->cntctlbase, timer_mem->size);
if (!cntctlbase) {
pr_err("Can't map CNTCTLBase @ %pa\n",
&timer_mem->cntctlbase);
return NULL;
}
cnttidr = readl_relaxed(cntctlbase + CNTTIDR);
/*
* Try to find a virtual capable frame. Otherwise fall back to a
* physical capable frame.
*/
for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) {
u32 cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT |
CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT;
frame = &timer_mem->frame[i];
if (!frame->valid)
continue;
/* Try enabling everything, and see what sticks */
writel_relaxed(cntacr, cntctlbase + CNTACR(i));
cntacr = readl_relaxed(cntctlbase + CNTACR(i));
if ((cnttidr & CNTTIDR_VIRT(i)) &&
!(~cntacr & (CNTACR_RWVT | CNTACR_RVCT))) {
best_frame = frame;
arch_timer_mem_use_virtual = true;
break;
}
if (~cntacr & (CNTACR_RWPT | CNTACR_RPCT))
continue;
best_frame = frame;
}
iounmap(cntctlbase);
return best_frame;
}
static int __init
arch_timer_mem_frame_register(struct arch_timer_mem_frame *frame)
{
void __iomem *base;
int ret, irq = 0;
if (arch_timer_mem_use_virtual)
irq = frame->virt_irq;
else
irq = frame->phys_irq;
if (!irq) {
pr_err("Frame missing %s irq.\n",
arch_timer_mem_use_virtual ? "virt" : "phys");
return -EINVAL;
}
if (!request_mem_region(frame->cntbase, frame->size,
"arch_mem_timer"))
return -EBUSY;
base = ioremap(frame->cntbase, frame->size);
if (!base) {
pr_err("Can't map frame's registers\n");
return -ENXIO;
}
ret = arch_timer_mem_register(base, irq);
if (ret) {
iounmap(base);
return ret;
}
arch_timers_present |= ARCH_TIMER_TYPE_MEM;
return 0;
}
static int __init arch_timer_mem_of_init(struct device_node *np)
{
struct arch_timer_mem *timer_mem;
struct arch_timer_mem_frame *frame;
struct device_node *frame_node;
struct resource res;
int ret = -EINVAL;
u32 rate;
timer_mem = kzalloc(sizeof(*timer_mem), GFP_KERNEL);
if (!timer_mem)
return -ENOMEM;
if (of_address_to_resource(np, 0, &res))
goto out;
timer_mem->cntctlbase = res.start;
timer_mem->size = resource_size(&res);
for_each_available_child_of_node(np, frame_node) {
u32 n;
struct arch_timer_mem_frame *frame;
if (of_property_read_u32(frame_node, "frame-number", &n)) {
pr_err(FW_BUG "Missing frame-number.\n");
of_node_put(frame_node);
goto out;
}
if (n >= ARCH_TIMER_MEM_MAX_FRAMES) {
pr_err(FW_BUG "Wrong frame-number, only 0-%u are permitted.\n",
ARCH_TIMER_MEM_MAX_FRAMES - 1);
of_node_put(frame_node);
goto out;
}
frame = &timer_mem->frame[n];
if (frame->valid) {
pr_err(FW_BUG "Duplicated frame-number.\n");
of_node_put(frame_node);
goto out;
}
if (of_address_to_resource(frame_node, 0, &res)) {
of_node_put(frame_node);
goto out;
}
frame->cntbase = res.start;
frame->size = resource_size(&res);
frame->virt_irq = irq_of_parse_and_map(frame_node,
ARCH_TIMER_VIRT_SPI);
frame->phys_irq = irq_of_parse_and_map(frame_node,
ARCH_TIMER_PHYS_SPI);
frame->valid = true;
}
frame = arch_timer_mem_find_best_frame(timer_mem);
if (!frame) {
pr_err("Unable to find a suitable frame in timer @ %pa\n",
&timer_mem->cntctlbase);
ret = -EINVAL;
goto out;
}
rate = arch_timer_mem_frame_get_cntfrq(frame);
arch_timer_of_configure_rate(rate, np);
ret = arch_timer_mem_frame_register(frame);
if (!ret && !arch_timer_needs_of_probing())
ret = arch_timer_common_init();
out:
kfree(timer_mem);
return ret;
}
TIMER_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
arch_timer_mem_of_init);
#ifdef CONFIG_ACPI_GTDT
static int __init
arch_timer_mem_verify_cntfrq(struct arch_timer_mem *timer_mem)
{
struct arch_timer_mem_frame *frame;
u32 rate;
int i;
for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) {
frame = &timer_mem->frame[i];
if (!frame->valid)
continue;
rate = arch_timer_mem_frame_get_cntfrq(frame);
if (rate == arch_timer_rate)
continue;
pr_err(FW_BUG "CNTFRQ mismatch: frame @ %pa: (0x%08lx), CPU: (0x%08lx)\n",
&frame->cntbase,
(unsigned long)rate, (unsigned long)arch_timer_rate);
return -EINVAL;
}
return 0;
}
static int __init arch_timer_mem_acpi_init(int platform_timer_count)
{
struct arch_timer_mem *timers, *timer;
struct arch_timer_mem_frame *frame, *best_frame = NULL;
int timer_count, i, ret = 0;
timers = kcalloc(platform_timer_count, sizeof(*timers),
GFP_KERNEL);
if (!timers)
return -ENOMEM;
ret = acpi_arch_timer_mem_init(timers, &timer_count);
if (ret || !timer_count)
goto out;
/*
* While unlikely, it's theoretically possible that none of the frames
* in a timer expose the combination of feature we want.
*/
for (i = 0; i < timer_count; i++) {
timer = &timers[i];
frame = arch_timer_mem_find_best_frame(timer);
if (!best_frame)
best_frame = frame;
ret = arch_timer_mem_verify_cntfrq(timer);
if (ret) {
pr_err("Disabling MMIO timers due to CNTFRQ mismatch\n");
goto out;
}
if (!best_frame) /* implies !frame */
/*
* Only complain about missing suitable frames if we
* haven't already found one in a previous iteration.
*/
pr_err("Unable to find a suitable frame in timer @ %pa\n",
&timer->cntctlbase);
}
if (best_frame)
ret = arch_timer_mem_frame_register(best_frame);
out:
kfree(timers);
return ret;
}
/* Initialize per-processor generic timer and memory-mapped timer(if present) */
static int __init arch_timer_acpi_init(struct acpi_table_header *table)
{
int ret, platform_timer_count;
if (arch_timers_present & ARCH_TIMER_TYPE_CP15) {
pr_warn("already initialized, skipping\n");
return -EINVAL;
}
arch_timers_present |= ARCH_TIMER_TYPE_CP15;
ret = acpi_gtdt_init(table, &platform_timer_count);
if (ret)
return ret;
arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI] =
acpi_gtdt_map_ppi(ARCH_TIMER_PHYS_NONSECURE_PPI);
arch_timer_ppi[ARCH_TIMER_VIRT_PPI] =
acpi_gtdt_map_ppi(ARCH_TIMER_VIRT_PPI);
arch_timer_ppi[ARCH_TIMER_HYP_PPI] =
acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI);
arch_timer_populate_kvm_info();
/*
* When probing via ACPI, we have no mechanism to override the sysreg
* CNTFRQ value. This *must* be correct.
*/
arch_timer_rate = arch_timer_get_cntfrq();
ret = validate_timer_rate();
if (ret) {
pr_err(FW_BUG "frequency not available.\n");
return ret;
}
arch_timer_uses_ppi = arch_timer_select_ppi();
if (!arch_timer_ppi[arch_timer_uses_ppi]) {
pr_err("No interrupt available, giving up\n");
return -EINVAL;
}
/* Always-on capability */
arch_timer_c3stop = acpi_gtdt_c3stop(arch_timer_uses_ppi);
/* Check for globally applicable workarounds */
arch_timer_check_ool_workaround(ate_match_acpi_oem_info, table);
ret = arch_timer_register();
if (ret)
return ret;
if (platform_timer_count &&
arch_timer_mem_acpi_init(platform_timer_count))
pr_err("Failed to initialize memory-mapped timer.\n");
return arch_timer_common_init();
}
TIMER_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init);
#endif
ptp: arm/arm64: Enable ptp_kvm for arm/arm64 Currently, there is no mechanism to keep time sync between guest and host in arm/arm64 virtualization environment. Time in guest will drift compared with host after boot up as they may both use third party time sources to correct their time respectively. The time deviation will be in order of milliseconds. But in some scenarios,like in cloud environment, we ask for higher time precision. kvm ptp clock, which chooses the host clock source as a reference clock to sync time between guest and host, has been adopted by x86 which takes the time sync order from milliseconds to nanoseconds. This patch enables kvm ptp clock for arm/arm64 and improves clock sync precision significantly. Test result comparisons between with kvm ptp clock and without it in arm/arm64 are as follows. This test derived from the result of command 'chronyc sources'. we should take more care of the last sample column which shows the offset between the local clock and the source at the last measurement. no kvm ptp in guest: MS Name/IP address Stratum Poll Reach LastRx Last sample ======================================================================== ^* dns1.synet.edu.cn 2 6 377 13 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 21 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 29 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 37 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 45 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 53 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 61 +1040us[+1581us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 4 -130us[ +796us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 12 -130us[ +796us] +/- 21ms ^* dns1.synet.edu.cn 2 6 377 20 -130us[ +796us] +/- 21ms in host: MS Name/IP address Stratum Poll Reach LastRx Last sample ======================================================================== ^* 120.25.115.20 2 7 377 72 -470us[ -603us] +/- 18ms ^* 120.25.115.20 2 7 377 92 -470us[ -603us] +/- 18ms ^* 120.25.115.20 2 7 377 112 -470us[ -603us] +/- 18ms ^* 120.25.115.20 2 7 377 2 +872ns[-6808ns] +/- 17ms ^* 120.25.115.20 2 7 377 22 +872ns[-6808ns] +/- 17ms ^* 120.25.115.20 2 7 377 43 +872ns[-6808ns] +/- 17ms ^* 120.25.115.20 2 7 377 63 +872ns[-6808ns] +/- 17ms ^* 120.25.115.20 2 7 377 83 +872ns[-6808ns] +/- 17ms ^* 120.25.115.20 2 7 377 103 +872ns[-6808ns] +/- 17ms ^* 120.25.115.20 2 7 377 123 +872ns[-6808ns] +/- 17ms The dns1.synet.edu.cn is the network reference clock for guest and 120.25.115.20 is the network reference clock for host. we can't get the clock error between guest and host directly, but a roughly estimated value will be in order of hundreds of us to ms. with kvm ptp in guest: chrony has been disabled in host to remove the disturb by network clock. MS Name/IP address Stratum Poll Reach LastRx Last sample ======================================================================== * PHC0 0 3 377 8 -7ns[ +1ns] +/- 3ns * PHC0 0 3 377 8 +1ns[ +16ns] +/- 3ns * PHC0 0 3 377 6 -4ns[ -0ns] +/- 6ns * PHC0 0 3 377 6 -8ns[ -12ns] +/- 5ns * PHC0 0 3 377 5 +2ns[ +4ns] +/- 4ns * PHC0 0 3 377 13 +2ns[ +4ns] +/- 4ns * PHC0 0 3 377 12 -4ns[ -6ns] +/- 4ns * PHC0 0 3 377 11 -8ns[ -11ns] +/- 6ns * PHC0 0 3 377 10 -14ns[ -20ns] +/- 4ns * PHC0 0 3 377 8 +4ns[ +5ns] +/- 4ns The PHC0 is the ptp clock which choose the host clock as its source clock. So we can see that the clock difference between host and guest is in order of ns. Cc: Mark Rutland <mark.rutland@arm.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Signed-off-by: Jianyong Wu <jianyong.wu@arm.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20201209060932.212364-8-jianyong.wu@arm.com
2020-12-09 09:09:30 +03:00
int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *ts,
struct clocksource **cs)
{
struct arm_smccc_res hvc_res;
u32 ptp_counter;
ktime_t ktime;
if (!IS_ENABLED(CONFIG_HAVE_ARM_SMCCC_DISCOVERY))
return -EOPNOTSUPP;
if (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI)
ptp_counter = KVM_PTP_VIRT_COUNTER;
else
ptp_counter = KVM_PTP_PHYS_COUNTER;
arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID,
ptp_counter, &hvc_res);
if ((int)(hvc_res.a0) < 0)
return -EOPNOTSUPP;
ktime = (u64)hvc_res.a0 << 32 | hvc_res.a1;
*ts = ktime_to_timespec64(ktime);
if (cycle)
*cycle = (u64)hvc_res.a2 << 32 | hvc_res.a3;
if (cs)
*cs = &clocksource_counter;
return 0;
}
EXPORT_SYMBOL_GPL(kvm_arch_ptp_get_crosststamp);