bd54522466
BACKGROUND ========== When multiple work items are queued to a workqueue, their execution order doesn't match the queueing order. They may get executed in any order and simultaneously. When fully serialized execution - one by one in the queueing order - is needed, an ordered workqueue should be used which can be created with alloc_ordered_workqueue(). However, alloc_ordered_workqueue() was a later addition. Before it, an ordered workqueue could be obtained by creating an UNBOUND workqueue with @max_active==1. This originally was an implementation side-effect which was broken by 4c16bd327c74 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered"). Because there were users that depended on the ordered execution, 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") made workqueue allocation path to implicitly promote UNBOUND workqueues w/ @max_active==1 to ordered workqueues. While this has worked okay, overloading the UNBOUND allocation interface this way creates other issues. It's difficult to tell whether a given workqueue actually needs to be ordered and users that legitimately want a min concurrency level wq unexpectedly gets an ordered one instead. With planned UNBOUND workqueue updates to improve execution locality and more prevalence of chiplet designs which can benefit from such improvements, this isn't a state we wanna be in forever. This patch series audits all callsites that create an UNBOUND workqueue w/ @max_active==1 and converts them to alloc_ordered_workqueue() as necessary. WHAT TO LOOK FOR ================ The conversions are from alloc_workqueue(WQ_UNBOUND | flags, 1, args..) to alloc_ordered_workqueue(flags, args...) which don't cause any functional changes. If you know that fully ordered execution is not ncessary, please let me know. I'll drop the conversion and instead add a comment noting the fact to reduce confusion while conversion is in progress. If you aren't fully sure, it's completely fine to let the conversion through. The behavior will stay exactly the same and we can always reconsider later. As there are follow-up workqueue core changes, I'd really appreciate if the patch can be routed through the workqueue tree w/ your acks. Thanks. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Nathan Lynch <nathanl@linux.ibm.com> Cc: linuxppc-dev@lists.ozlabs.org
238 lines
5.6 KiB
C
238 lines
5.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* temp.c Thermal management for cpu's with Thermal Assist Units
|
|
*
|
|
* Written by Troy Benjegerdes <hozer@drgw.net>
|
|
*
|
|
* TODO:
|
|
* dynamic power management to limit peak CPU temp (using ICTC)
|
|
* calibration???
|
|
*
|
|
* Silly, crazy ideas: use cpu load (from scheduler) and ICTC to extend battery
|
|
* life in portables, and add a 'performance/watt' metric somewhere in /proc
|
|
*/
|
|
|
|
#include <linux/errno.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/param.h>
|
|
#include <linux/string.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/init.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/workqueue.h>
|
|
|
|
#include <asm/interrupt.h>
|
|
#include <asm/io.h>
|
|
#include <asm/reg.h>
|
|
#include <asm/nvram.h>
|
|
#include <asm/cache.h>
|
|
#include <asm/8xx_immap.h>
|
|
#include <asm/machdep.h>
|
|
|
|
#include "setup.h"
|
|
|
|
static struct tau_temp
|
|
{
|
|
int interrupts;
|
|
unsigned char low;
|
|
unsigned char high;
|
|
unsigned char grew;
|
|
} tau[NR_CPUS];
|
|
|
|
static bool tau_int_enable;
|
|
|
|
/* TODO: put these in a /proc interface, with some sanity checks, and maybe
|
|
* dynamic adjustment to minimize # of interrupts */
|
|
/* configurable values for step size and how much to expand the window when
|
|
* we get an interrupt. These are based on the limit that was out of range */
|
|
#define step_size 2 /* step size when temp goes out of range */
|
|
#define window_expand 1 /* expand the window by this much */
|
|
/* configurable values for shrinking the window */
|
|
#define shrink_timer 2000 /* period between shrinking the window */
|
|
#define min_window 2 /* minimum window size, degrees C */
|
|
|
|
static void set_thresholds(unsigned long cpu)
|
|
{
|
|
u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0;
|
|
|
|
/* setup THRM1, threshold, valid bit, interrupt when below threshold */
|
|
mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID);
|
|
|
|
/* setup THRM2, threshold, valid bit, interrupt when above threshold */
|
|
mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie);
|
|
}
|
|
|
|
static void TAUupdate(int cpu)
|
|
{
|
|
u32 thrm;
|
|
u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V;
|
|
|
|
/* if both thresholds are crossed, the step_sizes cancel out
|
|
* and the window winds up getting expanded twice. */
|
|
thrm = mfspr(SPRN_THRM1);
|
|
if ((thrm & bits) == bits) {
|
|
mtspr(SPRN_THRM1, 0);
|
|
|
|
if (tau[cpu].low >= step_size) {
|
|
tau[cpu].low -= step_size;
|
|
tau[cpu].high -= (step_size - window_expand);
|
|
}
|
|
tau[cpu].grew = 1;
|
|
pr_debug("%s: low threshold crossed\n", __func__);
|
|
}
|
|
thrm = mfspr(SPRN_THRM2);
|
|
if ((thrm & bits) == bits) {
|
|
mtspr(SPRN_THRM2, 0);
|
|
|
|
if (tau[cpu].high <= 127 - step_size) {
|
|
tau[cpu].low += (step_size - window_expand);
|
|
tau[cpu].high += step_size;
|
|
}
|
|
tau[cpu].grew = 1;
|
|
pr_debug("%s: high threshold crossed\n", __func__);
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_TAU_INT
|
|
/*
|
|
* TAU interrupts - called when we have a thermal assist unit interrupt
|
|
* with interrupts disabled
|
|
*/
|
|
|
|
DEFINE_INTERRUPT_HANDLER_ASYNC(TAUException)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
|
|
tau[cpu].interrupts++;
|
|
|
|
TAUupdate(cpu);
|
|
}
|
|
#endif /* CONFIG_TAU_INT */
|
|
|
|
static void tau_timeout(void * info)
|
|
{
|
|
int cpu;
|
|
int size;
|
|
int shrink;
|
|
|
|
cpu = smp_processor_id();
|
|
|
|
if (!tau_int_enable)
|
|
TAUupdate(cpu);
|
|
|
|
/* Stop thermal sensor comparisons and interrupts */
|
|
mtspr(SPRN_THRM3, 0);
|
|
|
|
size = tau[cpu].high - tau[cpu].low;
|
|
if (size > min_window && ! tau[cpu].grew) {
|
|
/* do an exponential shrink of half the amount currently over size */
|
|
shrink = (2 + size - min_window) / 4;
|
|
if (shrink) {
|
|
tau[cpu].low += shrink;
|
|
tau[cpu].high -= shrink;
|
|
} else { /* size must have been min_window + 1 */
|
|
tau[cpu].low += 1;
|
|
#if 1 /* debug */
|
|
if ((tau[cpu].high - tau[cpu].low) != min_window){
|
|
printk(KERN_ERR "temp.c: line %d, logic error\n", __LINE__);
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
tau[cpu].grew = 0;
|
|
|
|
set_thresholds(cpu);
|
|
|
|
/* Restart thermal sensor comparisons and interrupts.
|
|
* The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
|
|
* recommends that "the maximum value be set in THRM3 under all
|
|
* conditions."
|
|
*/
|
|
mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
|
|
}
|
|
|
|
static struct workqueue_struct *tau_workq;
|
|
|
|
static void tau_work_func(struct work_struct *work)
|
|
{
|
|
msleep(shrink_timer);
|
|
on_each_cpu(tau_timeout, NULL, 0);
|
|
/* schedule ourselves to be run again */
|
|
queue_work(tau_workq, work);
|
|
}
|
|
|
|
static DECLARE_WORK(tau_work, tau_work_func);
|
|
|
|
/*
|
|
* setup the TAU
|
|
*
|
|
* Set things up to use THRM1 as a temperature lower bound, and THRM2 as an upper bound.
|
|
* Start off at zero
|
|
*/
|
|
|
|
int tau_initialized = 0;
|
|
|
|
static void __init TAU_init_smp(void *info)
|
|
{
|
|
unsigned long cpu = smp_processor_id();
|
|
|
|
/* set these to a reasonable value and let the timer shrink the
|
|
* window */
|
|
tau[cpu].low = 5;
|
|
tau[cpu].high = 120;
|
|
|
|
set_thresholds(cpu);
|
|
}
|
|
|
|
static int __init TAU_init(void)
|
|
{
|
|
/* We assume in SMP that if one CPU has TAU support, they
|
|
* all have it --BenH
|
|
*/
|
|
if (!cpu_has_feature(CPU_FTR_TAU)) {
|
|
printk("Thermal assist unit not available\n");
|
|
tau_initialized = 0;
|
|
return 1;
|
|
}
|
|
|
|
tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) &&
|
|
!strcmp(cur_cpu_spec->platform, "ppc750");
|
|
|
|
tau_workq = alloc_ordered_workqueue("tau", 0);
|
|
if (!tau_workq)
|
|
return -ENOMEM;
|
|
|
|
on_each_cpu(TAU_init_smp, NULL, 0);
|
|
|
|
queue_work(tau_workq, &tau_work);
|
|
|
|
pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n",
|
|
tau_int_enable ? "interrupts" : "workqueue", shrink_timer);
|
|
tau_initialized = 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
__initcall(TAU_init);
|
|
|
|
/*
|
|
* return current temp
|
|
*/
|
|
|
|
u32 cpu_temp_both(unsigned long cpu)
|
|
{
|
|
return ((tau[cpu].high << 16) | tau[cpu].low);
|
|
}
|
|
|
|
u32 cpu_temp(unsigned long cpu)
|
|
{
|
|
return ((tau[cpu].high + tau[cpu].low) / 2);
|
|
}
|
|
|
|
u32 tau_interrupts(unsigned long cpu)
|
|
{
|
|
return (tau[cpu].interrupts);
|
|
}
|