9d7f29cdb4
calculate_min_delta() may incorrectly access a 4th element of buf2[]
which only has 3 elements. This may trigger undefined behaviour and has
been reported to cause strange crashes in start_kernel() sometime after
timer initialization when built with GCC 5.3, possibly due to
register/stack corruption:
sched_clock: 32 bits at 200MHz, resolution 5ns, wraps every 10737418237ns
CPU 0 Unable to handle kernel paging request at virtual address ffffb0aa, epc == 8067daa8, ra == 8067da84
Oops[#1]:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.9.18 #51
task: 8065e3e0 task.stack: 80644000
$ 0 : 00000000 00000001 00000000 00000000
$ 4 : 8065b4d0 00000000 805d0000 00000010
$ 8 : 00000010 80321400 fffff000 812de408
$12 : 00000000 00000000 00000000 ffffffff
$16 : 00000002 ffffffff 80660000 806a666c
$20 : 806c0000 00000000 00000000 00000000
$24 : 00000000 00000010
$28 : 80644000 80645ed0 00000000 8067da84
Hi : 00000000
Lo : 00000000
epc : 8067daa8 start_kernel+0x33c/0x500
ra : 8067da84 start_kernel+0x318/0x500
Status: 11000402 KERNEL EXL
Cause : 4080040c (ExcCode 03)
BadVA : ffffb0aa
PrId : 0501992c (MIPS 1004Kc)
Modules linked in:
Process swapper/0 (pid: 0, threadinfo=80644000, task=8065e3e0, tls=00000000)
Call Trace:
[<8067daa8>] start_kernel+0x33c/0x500
Code: 24050240 0c0131f9 24849c64 <a200b0a8> 41606020 000000c0 0c1a45e6 00000000 0c1a5f44
UBSAN also detects the same issue:
================================================================
UBSAN: Undefined behaviour in arch/mips/kernel/cevt-r4k.c:85:41
load of address 80647e4c with insufficient space
for an object of type 'unsigned int'
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.9.18 #47
Call Trace:
[<80028f70>] show_stack+0x88/0xa4
[<80312654>] dump_stack+0x84/0xc0
[<8034163c>] ubsan_epilogue+0x14/0x50
[<803417d8>] __ubsan_handle_type_mismatch+0x160/0x168
[<8002dab0>] r4k_clockevent_init+0x544/0x764
[<80684d34>] time_init+0x18/0x90
[<8067fa5c>] start_kernel+0x2f0/0x500
=================================================================
buf2[] is intentionally only 3 elements so that the last element is the
median once 5 samples have been inserted, so explicitly prevent the
possibility of comparing against the 4th element rather than extending
the array.
Fixes: 1fa405552e
("MIPS: cevt-r4k: Dynamically calculate min_delta_ns")
Reported-by: Rabin Vincent <rabinv@axis.com>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Tested-by: Rabin Vincent <rabinv@axis.com>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 4.7.x-
Patchwork: https://patchwork.linux-mips.org/patch/15892/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
299 lines
7.5 KiB
C
299 lines
7.5 KiB
C
/*
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
* for more details.
|
|
*
|
|
* Copyright (C) 2007 MIPS Technologies, Inc.
|
|
* Copyright (C) 2007 Ralf Baechle <ralf@linux-mips.org>
|
|
*/
|
|
#include <linux/clockchips.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/irq.h>
|
|
|
|
#include <asm/time.h>
|
|
#include <asm/cevt-r4k.h>
|
|
|
|
static int mips_next_event(unsigned long delta,
|
|
struct clock_event_device *evt)
|
|
{
|
|
unsigned int cnt;
|
|
int res;
|
|
|
|
cnt = read_c0_count();
|
|
cnt += delta;
|
|
write_c0_compare(cnt);
|
|
res = ((int)(read_c0_count() - cnt) >= 0) ? -ETIME : 0;
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* calculate_min_delta() - Calculate a good minimum delta for mips_next_event().
|
|
*
|
|
* Running under virtualisation can introduce overhead into mips_next_event() in
|
|
* the form of hypervisor emulation of CP0_Count/CP0_Compare registers,
|
|
* potentially with an unnatural frequency, which makes a fixed min_delta_ns
|
|
* value inappropriate as it may be too small.
|
|
*
|
|
* It can also introduce occasional latency from the guest being descheduled.
|
|
*
|
|
* This function calculates a good minimum delta based roughly on the 75th
|
|
* percentile of the time taken to do the mips_next_event() sequence, in order
|
|
* to handle potentially higher overhead while also eliminating outliers due to
|
|
* unpredictable hypervisor latency (which can be handled by retries).
|
|
*
|
|
* Return: An appropriate minimum delta for the clock event device.
|
|
*/
|
|
static unsigned int calculate_min_delta(void)
|
|
{
|
|
unsigned int cnt, i, j, k, l;
|
|
unsigned int buf1[4], buf2[3];
|
|
unsigned int min_delta;
|
|
|
|
/*
|
|
* Calculate the median of 5 75th percentiles of 5 samples of how long
|
|
* it takes to set CP0_Compare = CP0_Count + delta.
|
|
*/
|
|
for (i = 0; i < 5; ++i) {
|
|
for (j = 0; j < 5; ++j) {
|
|
/*
|
|
* This is like the code in mips_next_event(), and
|
|
* directly measures the borderline "safe" delta.
|
|
*/
|
|
cnt = read_c0_count();
|
|
write_c0_compare(cnt);
|
|
cnt = read_c0_count() - cnt;
|
|
|
|
/* Sorted insert into buf1 */
|
|
for (k = 0; k < j; ++k) {
|
|
if (cnt < buf1[k]) {
|
|
l = min_t(unsigned int,
|
|
j, ARRAY_SIZE(buf1) - 1);
|
|
for (; l > k; --l)
|
|
buf1[l] = buf1[l - 1];
|
|
break;
|
|
}
|
|
}
|
|
if (k < ARRAY_SIZE(buf1))
|
|
buf1[k] = cnt;
|
|
}
|
|
|
|
/* Sorted insert of 75th percentile into buf2 */
|
|
for (k = 0; k < i && k < ARRAY_SIZE(buf2); ++k) {
|
|
if (buf1[ARRAY_SIZE(buf1) - 1] < buf2[k]) {
|
|
l = min_t(unsigned int,
|
|
i, ARRAY_SIZE(buf2) - 1);
|
|
for (; l > k; --l)
|
|
buf2[l] = buf2[l - 1];
|
|
break;
|
|
}
|
|
}
|
|
if (k < ARRAY_SIZE(buf2))
|
|
buf2[k] = buf1[ARRAY_SIZE(buf1) - 1];
|
|
}
|
|
|
|
/* Use 2 * median of 75th percentiles */
|
|
min_delta = buf2[ARRAY_SIZE(buf2) - 1] * 2;
|
|
|
|
/* Don't go too low */
|
|
if (min_delta < 0x300)
|
|
min_delta = 0x300;
|
|
|
|
pr_debug("%s: median 75th percentile=%#x, min_delta=%#x\n",
|
|
__func__, buf2[ARRAY_SIZE(buf2) - 1], min_delta);
|
|
return min_delta;
|
|
}
|
|
|
|
DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
|
|
int cp0_timer_irq_installed;
|
|
|
|
/*
|
|
* Possibly handle a performance counter interrupt.
|
|
* Return true if the timer interrupt should not be checked
|
|
*/
|
|
static inline int handle_perf_irq(int r2)
|
|
{
|
|
/*
|
|
* The performance counter overflow interrupt may be shared with the
|
|
* timer interrupt (cp0_perfcount_irq < 0). If it is and a
|
|
* performance counter has overflowed (perf_irq() == IRQ_HANDLED)
|
|
* and we can't reliably determine if a counter interrupt has also
|
|
* happened (!r2) then don't check for a timer interrupt.
|
|
*/
|
|
return (cp0_perfcount_irq < 0) &&
|
|
perf_irq() == IRQ_HANDLED &&
|
|
!r2;
|
|
}
|
|
|
|
irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
|
|
{
|
|
const int r2 = cpu_has_mips_r2_r6;
|
|
struct clock_event_device *cd;
|
|
int cpu = smp_processor_id();
|
|
|
|
/*
|
|
* Suckage alert:
|
|
* Before R2 of the architecture there was no way to see if a
|
|
* performance counter interrupt was pending, so we have to run
|
|
* the performance counter interrupt handler anyway.
|
|
*/
|
|
if (handle_perf_irq(r2))
|
|
return IRQ_HANDLED;
|
|
|
|
/*
|
|
* The same applies to performance counter interrupts. But with the
|
|
* above we now know that the reason we got here must be a timer
|
|
* interrupt. Being the paranoiacs we are we check anyway.
|
|
*/
|
|
if (!r2 || (read_c0_cause() & CAUSEF_TI)) {
|
|
/* Clear Count/Compare Interrupt */
|
|
write_c0_compare(read_c0_compare());
|
|
cd = &per_cpu(mips_clockevent_device, cpu);
|
|
cd->event_handler(cd);
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
return IRQ_NONE;
|
|
}
|
|
|
|
struct irqaction c0_compare_irqaction = {
|
|
.handler = c0_compare_interrupt,
|
|
/*
|
|
* IRQF_SHARED: The timer interrupt may be shared with other interrupts
|
|
* such as perf counter and FDC interrupts.
|
|
*/
|
|
.flags = IRQF_PERCPU | IRQF_TIMER | IRQF_SHARED,
|
|
.name = "timer",
|
|
};
|
|
|
|
|
|
void mips_event_handler(struct clock_event_device *dev)
|
|
{
|
|
}
|
|
|
|
/*
|
|
* FIXME: This doesn't hold for the relocated E9000 compare interrupt.
|
|
*/
|
|
static int c0_compare_int_pending(void)
|
|
{
|
|
/* When cpu_has_mips_r2, this checks Cause.TI instead of Cause.IP7 */
|
|
return (read_c0_cause() >> cp0_compare_irq_shift) & (1ul << CAUSEB_IP);
|
|
}
|
|
|
|
/*
|
|
* Compare interrupt can be routed and latched outside the core,
|
|
* so wait up to worst case number of cycle counter ticks for timer interrupt
|
|
* changes to propagate to the cause register.
|
|
*/
|
|
#define COMPARE_INT_SEEN_TICKS 50
|
|
|
|
int c0_compare_int_usable(void)
|
|
{
|
|
unsigned int delta;
|
|
unsigned int cnt;
|
|
|
|
#ifdef CONFIG_KVM_GUEST
|
|
return 1;
|
|
#endif
|
|
|
|
/*
|
|
* IP7 already pending? Try to clear it by acking the timer.
|
|
*/
|
|
if (c0_compare_int_pending()) {
|
|
cnt = read_c0_count();
|
|
write_c0_compare(cnt);
|
|
back_to_back_c0_hazard();
|
|
while (read_c0_count() < (cnt + COMPARE_INT_SEEN_TICKS))
|
|
if (!c0_compare_int_pending())
|
|
break;
|
|
if (c0_compare_int_pending())
|
|
return 0;
|
|
}
|
|
|
|
for (delta = 0x10; delta <= 0x400000; delta <<= 1) {
|
|
cnt = read_c0_count();
|
|
cnt += delta;
|
|
write_c0_compare(cnt);
|
|
back_to_back_c0_hazard();
|
|
if ((int)(read_c0_count() - cnt) < 0)
|
|
break;
|
|
/* increase delta if the timer was already expired */
|
|
}
|
|
|
|
while ((int)(read_c0_count() - cnt) <= 0)
|
|
; /* Wait for expiry */
|
|
|
|
while (read_c0_count() < (cnt + COMPARE_INT_SEEN_TICKS))
|
|
if (c0_compare_int_pending())
|
|
break;
|
|
if (!c0_compare_int_pending())
|
|
return 0;
|
|
cnt = read_c0_count();
|
|
write_c0_compare(cnt);
|
|
back_to_back_c0_hazard();
|
|
while (read_c0_count() < (cnt + COMPARE_INT_SEEN_TICKS))
|
|
if (!c0_compare_int_pending())
|
|
break;
|
|
if (c0_compare_int_pending())
|
|
return 0;
|
|
|
|
/*
|
|
* Feels like a real count / compare timer.
|
|
*/
|
|
return 1;
|
|
}
|
|
|
|
unsigned int __weak get_c0_compare_int(void)
|
|
{
|
|
return MIPS_CPU_IRQ_BASE + cp0_compare_irq;
|
|
}
|
|
|
|
int r4k_clockevent_init(void)
|
|
{
|
|
unsigned int cpu = smp_processor_id();
|
|
struct clock_event_device *cd;
|
|
unsigned int irq, min_delta;
|
|
|
|
if (!cpu_has_counter || !mips_hpt_frequency)
|
|
return -ENXIO;
|
|
|
|
if (!c0_compare_int_usable())
|
|
return -ENXIO;
|
|
|
|
/*
|
|
* With vectored interrupts things are getting platform specific.
|
|
* get_c0_compare_int is a hook to allow a platform to return the
|
|
* interrupt number of its liking.
|
|
*/
|
|
irq = get_c0_compare_int();
|
|
|
|
cd = &per_cpu(mips_clockevent_device, cpu);
|
|
|
|
cd->name = "MIPS";
|
|
cd->features = CLOCK_EVT_FEAT_ONESHOT |
|
|
CLOCK_EVT_FEAT_C3STOP |
|
|
CLOCK_EVT_FEAT_PERCPU;
|
|
|
|
min_delta = calculate_min_delta();
|
|
|
|
cd->rating = 300;
|
|
cd->irq = irq;
|
|
cd->cpumask = cpumask_of(cpu);
|
|
cd->set_next_event = mips_next_event;
|
|
cd->event_handler = mips_event_handler;
|
|
|
|
clockevents_config_and_register(cd, mips_hpt_frequency, min_delta, 0x7fffffff);
|
|
|
|
if (cp0_timer_irq_installed)
|
|
return 0;
|
|
|
|
cp0_timer_irq_installed = 1;
|
|
|
|
setup_irq(irq, &c0_compare_irqaction);
|
|
|
|
return 0;
|
|
}
|
|
|