e63d6fb563
Enabling CONFIG_TAU_INT causes random crashes:
Unrecoverable exception 1700 at c0009414 (msr=1000)
Oops: Unrecoverable exception, sig: 6 [#1]
BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac
Modules linked in:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.7.0-pmac-00043-gd5f545e1a8593 #5
NIP: c0009414 LR: c0009414 CTR: c00116fc
REGS: c0799eb8 TRAP: 1700 Not tainted (5.7.0-pmac-00043-gd5f545e1a8593)
MSR: 00001000 <ME> CR: 22000228 XER: 00000100
GPR00: 00000000 c0799f70 c076e300 00800000 0291c0ac 00e00000 c076e300 00049032
GPR08: 00000001 c00116fc 00000000 dfbd3200 ffffffff 007f80a8 00000000 00000000
GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 c075ce04
GPR24: c075ce04 dfff8880 c07b0000 c075ce04 00080000 00000001 c079ef98 c079ef5c
NIP [c0009414] arch_cpu_idle+0x24/0x6c
LR [c0009414] arch_cpu_idle+0x24/0x6c
Call Trace:
[c0799f70] [00000001] 0x1 (unreliable)
[c0799f80] [c0060990] do_idle+0xd8/0x17c
[c0799fa0] [c0060ba4] cpu_startup_entry+0x20/0x28
[c0799fb0] [c072d220] start_kernel+0x434/0x44c
[c0799ff0] [00003860] 0x3860
Instruction dump:
XXXXXXXX XXXXXXXX XXXXXXXX 3d20c07b XXXXXXXX XXXXXXXX XXXXXXXX 7c0802a6
XXXXXXXX XXXXXXXX XXXXXXXX 4e800421 XXXXXXXX XXXXXXXX XXXXXXXX 7d2000a6
---[ end trace 3a0c9b5cb216db6b ]---
Resolve this problem by disabling each THRMn comparator when handling
the associated THRMn interrupt and by disabling the TAU entirely when
updating THRMn thresholds.
Fixes: 1da177e4c3
("Linux-2.6.12-rc2")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/5a0ba3dc5612c7aac596727331284a3676c08472.1599260540.git.fthain@telegraphics.com.au
241 lines
5.6 KiB
C
241 lines
5.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* temp.c Thermal management for cpu's with Thermal Assist Units
|
|
*
|
|
* Written by Troy Benjegerdes <hozer@drgw.net>
|
|
*
|
|
* TODO:
|
|
* dynamic power management to limit peak CPU temp (using ICTC)
|
|
* calibration???
|
|
*
|
|
* Silly, crazy ideas: use cpu load (from scheduler) and ICTC to extend battery
|
|
* life in portables, and add a 'performance/watt' metric somewhere in /proc
|
|
*/
|
|
|
|
#include <linux/errno.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/param.h>
|
|
#include <linux/string.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/init.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/workqueue.h>
|
|
|
|
#include <asm/io.h>
|
|
#include <asm/reg.h>
|
|
#include <asm/nvram.h>
|
|
#include <asm/cache.h>
|
|
#include <asm/8xx_immap.h>
|
|
#include <asm/machdep.h>
|
|
#include <asm/asm-prototypes.h>
|
|
|
|
#include "setup.h"
|
|
|
|
static struct tau_temp
|
|
{
|
|
int interrupts;
|
|
unsigned char low;
|
|
unsigned char high;
|
|
unsigned char grew;
|
|
} tau[NR_CPUS];
|
|
|
|
static bool tau_int_enable;
|
|
|
|
/* TODO: put these in a /proc interface, with some sanity checks, and maybe
|
|
* dynamic adjustment to minimize # of interrupts */
|
|
/* configurable values for step size and how much to expand the window when
|
|
* we get an interrupt. These are based on the limit that was out of range */
|
|
#define step_size 2 /* step size when temp goes out of range */
|
|
#define window_expand 1 /* expand the window by this much */
|
|
/* configurable values for shrinking the window */
|
|
#define shrink_timer 2000 /* period between shrinking the window */
|
|
#define min_window 2 /* minimum window size, degrees C */
|
|
|
|
static void set_thresholds(unsigned long cpu)
|
|
{
|
|
u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0;
|
|
|
|
/* setup THRM1, threshold, valid bit, interrupt when below threshold */
|
|
mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID);
|
|
|
|
/* setup THRM2, threshold, valid bit, interrupt when above threshold */
|
|
mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie);
|
|
}
|
|
|
|
static void TAUupdate(int cpu)
|
|
{
|
|
u32 thrm;
|
|
u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V;
|
|
|
|
/* if both thresholds are crossed, the step_sizes cancel out
|
|
* and the window winds up getting expanded twice. */
|
|
thrm = mfspr(SPRN_THRM1);
|
|
if ((thrm & bits) == bits) {
|
|
mtspr(SPRN_THRM1, 0);
|
|
|
|
if (tau[cpu].low >= step_size) {
|
|
tau[cpu].low -= step_size;
|
|
tau[cpu].high -= (step_size - window_expand);
|
|
}
|
|
tau[cpu].grew = 1;
|
|
pr_debug("%s: low threshold crossed\n", __func__);
|
|
}
|
|
thrm = mfspr(SPRN_THRM2);
|
|
if ((thrm & bits) == bits) {
|
|
mtspr(SPRN_THRM2, 0);
|
|
|
|
if (tau[cpu].high <= 127 - step_size) {
|
|
tau[cpu].low += (step_size - window_expand);
|
|
tau[cpu].high += step_size;
|
|
}
|
|
tau[cpu].grew = 1;
|
|
pr_debug("%s: high threshold crossed\n", __func__);
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_TAU_INT
|
|
/*
|
|
* TAU interrupts - called when we have a thermal assist unit interrupt
|
|
* with interrupts disabled
|
|
*/
|
|
|
|
void TAUException(struct pt_regs * regs)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
|
|
irq_enter();
|
|
tau[cpu].interrupts++;
|
|
|
|
TAUupdate(cpu);
|
|
|
|
irq_exit();
|
|
}
|
|
#endif /* CONFIG_TAU_INT */
|
|
|
|
static void tau_timeout(void * info)
|
|
{
|
|
int cpu;
|
|
int size;
|
|
int shrink;
|
|
|
|
cpu = smp_processor_id();
|
|
|
|
if (!tau_int_enable)
|
|
TAUupdate(cpu);
|
|
|
|
/* Stop thermal sensor comparisons and interrupts */
|
|
mtspr(SPRN_THRM3, 0);
|
|
|
|
size = tau[cpu].high - tau[cpu].low;
|
|
if (size > min_window && ! tau[cpu].grew) {
|
|
/* do an exponential shrink of half the amount currently over size */
|
|
shrink = (2 + size - min_window) / 4;
|
|
if (shrink) {
|
|
tau[cpu].low += shrink;
|
|
tau[cpu].high -= shrink;
|
|
} else { /* size must have been min_window + 1 */
|
|
tau[cpu].low += 1;
|
|
#if 1 /* debug */
|
|
if ((tau[cpu].high - tau[cpu].low) != min_window){
|
|
printk(KERN_ERR "temp.c: line %d, logic error\n", __LINE__);
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
tau[cpu].grew = 0;
|
|
|
|
set_thresholds(cpu);
|
|
|
|
/* Restart thermal sensor comparisons and interrupts.
|
|
* The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
|
|
* recommends that "the maximum value be set in THRM3 under all
|
|
* conditions."
|
|
*/
|
|
mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
|
|
}
|
|
|
|
static struct workqueue_struct *tau_workq;
|
|
|
|
static void tau_work_func(struct work_struct *work)
|
|
{
|
|
msleep(shrink_timer);
|
|
on_each_cpu(tau_timeout, NULL, 0);
|
|
/* schedule ourselves to be run again */
|
|
queue_work(tau_workq, work);
|
|
}
|
|
|
|
DECLARE_WORK(tau_work, tau_work_func);
|
|
|
|
/*
|
|
* setup the TAU
|
|
*
|
|
* Set things up to use THRM1 as a temperature lower bound, and THRM2 as an upper bound.
|
|
* Start off at zero
|
|
*/
|
|
|
|
int tau_initialized = 0;
|
|
|
|
static void __init TAU_init_smp(void *info)
|
|
{
|
|
unsigned long cpu = smp_processor_id();
|
|
|
|
/* set these to a reasonable value and let the timer shrink the
|
|
* window */
|
|
tau[cpu].low = 5;
|
|
tau[cpu].high = 120;
|
|
|
|
set_thresholds(cpu);
|
|
}
|
|
|
|
static int __init TAU_init(void)
|
|
{
|
|
/* We assume in SMP that if one CPU has TAU support, they
|
|
* all have it --BenH
|
|
*/
|
|
if (!cpu_has_feature(CPU_FTR_TAU)) {
|
|
printk("Thermal assist unit not available\n");
|
|
tau_initialized = 0;
|
|
return 1;
|
|
}
|
|
|
|
tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) &&
|
|
!strcmp(cur_cpu_spec->platform, "ppc750");
|
|
|
|
tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0);
|
|
if (!tau_workq)
|
|
return -ENOMEM;
|
|
|
|
on_each_cpu(TAU_init_smp, NULL, 0);
|
|
|
|
queue_work(tau_workq, &tau_work);
|
|
|
|
pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n",
|
|
tau_int_enable ? "interrupts" : "workqueue", shrink_timer);
|
|
tau_initialized = 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
__initcall(TAU_init);
|
|
|
|
/*
|
|
* return current temp
|
|
*/
|
|
|
|
u32 cpu_temp_both(unsigned long cpu)
|
|
{
|
|
return ((tau[cpu].high << 16) | tau[cpu].low);
|
|
}
|
|
|
|
u32 cpu_temp(unsigned long cpu)
|
|
{
|
|
return ((tau[cpu].high + tau[cpu].low) / 2);
|
|
}
|
|
|
|
u32 tau_interrupts(unsigned long cpu)
|
|
{
|
|
return (tau[cpu].interrupts);
|
|
}
|