2013-01-21 04:37:57 -08:00
/*
* intel_powerclamp . c - package c - state idle injection
*
* Copyright ( c ) 2012 , Intel Corporation .
*
* Authors :
* Arjan van de Ven < arjan @ linux . intel . com >
* Jacob Pan < jacob . jun . pan @ linux . intel . com >
*
* This program is free software ; you can redistribute it and / or modify it
* under the terms and conditions of the GNU General Public License ,
* version 2 , as published by the Free Software Foundation .
*
* This program is distributed in the hope it will be useful , but WITHOUT
* ANY WARRANTY ; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE . See the GNU General Public License for
* more details .
*
* You should have received a copy of the GNU General Public License along with
* this program ; if not , write to the Free Software Foundation , Inc . ,
* 51 Franklin St - Fifth Floor , Boston , MA 02110 - 1301 USA .
*
*
* TODO :
* 1. better handle wakeup from external interrupts , currently a fixed
* compensation is added to clamping duration when excessive amount
* of wakeups are observed during idle time . the reason is that in
* case of external interrupts without need for ack , clamping down
* cpu in non - irq context does not reduce irq . for majority of the
* cases , clamping down cpu does help reduce irq as well , we should
2018-08-23 17:00:52 -07:00
* be able to differentiate the two cases and give a quantitative
2013-01-21 04:37:57 -08:00
* solution for the irqs that we can control . perhaps based on
* get_cpu_iowait_time_us ( )
*
* 2. synchronization with other hw blocks
*
*
*/
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
# include <linux/module.h>
# include <linux/kernel.h>
# include <linux/delay.h>
# include <linux/kthread.h>
# include <linux/cpu.h>
# include <linux/thermal.h>
# include <linux/slab.h>
# include <linux/tick.h>
# include <linux/debugfs.h>
# include <linux/seq_file.h>
2013-02-28 20:23:09 -08:00
# include <linux/sched/rt.h>
2017-02-01 18:07:51 +01:00
# include <uapi/linux/sched/types.h>
2013-01-21 04:37:57 -08:00
# include <asm/nmi.h>
# include <asm/msr.h>
# include <asm/mwait.h>
# include <asm/cpu_device_id.h>
# include <asm/hardirq.h>
# define MAX_TARGET_RATIO (50U)
/* For each undisturbed clamping period (no extra wake ups during idle time),
* we increment the confidence counter for the given target ratio .
* CONFIDENCE_OK defines the level where runtime calibration results are
* valid .
*/
# define CONFIDENCE_OK (3)
/* Default idle injection duration, driver adjust sleep time to meet target
* idle ratio . Similar to frequency modulation .
*/
# define DEFAULT_DURATION_JIFFIES (6)
static unsigned int target_mwait ;
static struct dentry * debug_dir ;
/* user selected target */
static unsigned int set_target_ratio ;
static unsigned int current_ratio ;
static bool should_skip ;
static bool reduce_irq ;
static atomic_t idle_wakeup_counter ;
static unsigned int control_cpu ; /* The cpu assigned to collect stat and update
* control parameters . default to BSP but BSP
* can be offlined .
*/
static bool clamping ;
2016-11-28 13:44:50 -08:00
static const struct sched_param sparam = {
. sched_priority = MAX_USER_RT_PRIO / 2 ,
} ;
struct powerclamp_worker_data {
struct kthread_worker * worker ;
struct kthread_work balancing_work ;
struct kthread_delayed_work idle_injection_work ;
unsigned int cpu ;
unsigned int count ;
unsigned int guard ;
unsigned int window_size_now ;
unsigned int target_ratio ;
unsigned int duration_jiffies ;
bool clamping ;
} ;
2013-01-21 04:37:57 -08:00
2016-11-28 13:44:50 -08:00
static struct powerclamp_worker_data * __percpu worker_data ;
2013-01-21 04:37:57 -08:00
static struct thermal_cooling_device * cooling_dev ;
static unsigned long * cpu_clamping_mask ; /* bit map for tracking per cpu
2016-11-28 13:44:50 -08:00
* clamping kthread worker
2013-01-21 04:37:57 -08:00
*/
static unsigned int duration ;
static unsigned int pkg_cstate_ratio_cur ;
static unsigned int window_size ;
static int duration_set ( const char * arg , const struct kernel_param * kp )
{
int ret = 0 ;
unsigned long new_duration ;
ret = kstrtoul ( arg , 10 , & new_duration ) ;
if ( ret )
goto exit ;
if ( new_duration > 25 | | new_duration < 6 ) {
pr_err ( " Out of recommended range %lu, between 6-25ms \n " ,
new_duration ) ;
ret = - EINVAL ;
}
duration = clamp ( new_duration , 6ul , 25ul ) ;
smp_mb ( ) ;
exit :
return ret ;
}
2015-05-27 11:09:38 +09:30
static const struct kernel_param_ops duration_ops = {
2013-01-21 04:37:57 -08:00
. set = duration_set ,
. get = param_get_int ,
} ;
module_param_cb ( duration , & duration_ops , & duration , 0644 ) ;
MODULE_PARM_DESC ( duration , " forced idle time for each attempt in msec. " ) ;
struct powerclamp_calibration_data {
unsigned long confidence ; /* used for calibration, basically a counter
* gets incremented each time a clamping
* period is completed without extra wakeups
* once that counter is reached given level ,
* compensation is deemed usable .
*/
unsigned long steady_comp ; /* steady state compensation used when
* no extra wakeups occurred .
*/
unsigned long dynamic_comp ; /* compensate excessive wakeup from idle
* mostly from external interrupts .
*/
} ;
static struct powerclamp_calibration_data cal_data [ MAX_TARGET_RATIO ] ;
static int window_size_set ( const char * arg , const struct kernel_param * kp )
{
int ret = 0 ;
unsigned long new_window_size ;
ret = kstrtoul ( arg , 10 , & new_window_size ) ;
if ( ret )
goto exit_win ;
if ( new_window_size > 10 | | new_window_size < 2 ) {
pr_err ( " Out of recommended window size %lu, between 2-10 \n " ,
new_window_size ) ;
ret = - EINVAL ;
}
window_size = clamp ( new_window_size , 2ul , 10ul ) ;
smp_mb ( ) ;
exit_win :
return ret ;
}
2015-05-27 11:09:38 +09:30
static const struct kernel_param_ops window_size_ops = {
2013-01-21 04:37:57 -08:00
. set = window_size_set ,
. get = param_get_int ,
} ;
module_param_cb ( window_size , & window_size_ops , & window_size , 0644 ) ;
MODULE_PARM_DESC ( window_size , " sliding window in number of clamping cycles \n "
" \t powerclamp controls idle ratio within this window. larger \n "
" \t window size results in slower response time but more smooth \n "
" \t clamping results. default to 2. " ) ;
static void find_target_mwait ( void )
{
unsigned int eax , ebx , ecx , edx ;
unsigned int highest_cstate = 0 ;
unsigned int highest_subcstate = 0 ;
int i ;
if ( boot_cpu_data . cpuid_level < CPUID_MWAIT_LEAF )
return ;
cpuid ( CPUID_MWAIT_LEAF , & eax , & ebx , & ecx , & edx ) ;
if ( ! ( ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED ) | |
! ( ecx & CPUID5_ECX_INTERRUPT_BREAK ) )
return ;
edx > > = MWAIT_SUBSTATE_SIZE ;
for ( i = 0 ; i < 7 & & edx ; i + + , edx > > = MWAIT_SUBSTATE_SIZE ) {
if ( edx & MWAIT_SUBSTATE_MASK ) {
highest_cstate = i ;
highest_subcstate = edx & MWAIT_SUBSTATE_MASK ;
}
}
target_mwait = ( highest_cstate < < MWAIT_SUBSTATE_SIZE ) |
( highest_subcstate - 1 ) ;
}
2015-05-07 09:03:59 -07:00
struct pkg_cstate_info {
bool skip ;
int msr_index ;
int cstate_id ;
} ;
# define PKG_CSTATE_INIT(id) { \
. msr_index = MSR_PKG_C # # id # # _RESIDENCY , \
. cstate_id = id \
}
static struct pkg_cstate_info pkg_cstates [ ] = {
PKG_CSTATE_INIT ( 2 ) ,
PKG_CSTATE_INIT ( 3 ) ,
PKG_CSTATE_INIT ( 6 ) ,
PKG_CSTATE_INIT ( 7 ) ,
PKG_CSTATE_INIT ( 8 ) ,
PKG_CSTATE_INIT ( 9 ) ,
PKG_CSTATE_INIT ( 10 ) ,
{ NULL } ,
} ;
2013-11-18 15:06:35 +08:00
static bool has_pkg_state_counter ( void )
{
2015-05-07 09:03:59 -07:00
u64 val ;
struct pkg_cstate_info * info = pkg_cstates ;
/* check if any one of the counter msrs exists */
while ( info - > msr_index ) {
if ( ! rdmsrl_safe ( info - > msr_index , & val ) )
return true ;
info + + ;
}
return false ;
2013-11-18 15:06:35 +08:00
}
2013-01-21 04:37:57 -08:00
static u64 pkg_state_counter ( void )
{
u64 val ;
u64 count = 0 ;
2015-05-07 09:03:59 -07:00
struct pkg_cstate_info * info = pkg_cstates ;
while ( info - > msr_index ) {
if ( ! info - > skip ) {
if ( ! rdmsrl_safe ( info - > msr_index , & val ) )
count + = val ;
else
info - > skip = true ;
}
info + + ;
2013-01-21 04:37:57 -08:00
}
return count ;
}
static unsigned int get_compensation ( int ratio )
{
unsigned int comp = 0 ;
/* we only use compensation if all adjacent ones are good */
if ( ratio = = 1 & &
cal_data [ ratio ] . confidence > = CONFIDENCE_OK & &
cal_data [ ratio + 1 ] . confidence > = CONFIDENCE_OK & &
cal_data [ ratio + 2 ] . confidence > = CONFIDENCE_OK ) {
comp = ( cal_data [ ratio ] . steady_comp +
cal_data [ ratio + 1 ] . steady_comp +
cal_data [ ratio + 2 ] . steady_comp ) / 3 ;
} else if ( ratio = = MAX_TARGET_RATIO - 1 & &
cal_data [ ratio ] . confidence > = CONFIDENCE_OK & &
cal_data [ ratio - 1 ] . confidence > = CONFIDENCE_OK & &
cal_data [ ratio - 2 ] . confidence > = CONFIDENCE_OK ) {
comp = ( cal_data [ ratio ] . steady_comp +
cal_data [ ratio - 1 ] . steady_comp +
cal_data [ ratio - 2 ] . steady_comp ) / 3 ;
} else if ( cal_data [ ratio ] . confidence > = CONFIDENCE_OK & &
cal_data [ ratio - 1 ] . confidence > = CONFIDENCE_OK & &
cal_data [ ratio + 1 ] . confidence > = CONFIDENCE_OK ) {
comp = ( cal_data [ ratio ] . steady_comp +
cal_data [ ratio - 1 ] . steady_comp +
cal_data [ ratio + 1 ] . steady_comp ) / 3 ;
}
/* REVISIT: simple penalty of double idle injection */
if ( reduce_irq )
comp = ratio ;
/* do not exceed limit */
if ( comp + ratio > = MAX_TARGET_RATIO )
comp = MAX_TARGET_RATIO - ratio - 1 ;
return comp ;
}
static void adjust_compensation ( int target_ratio , unsigned int win )
{
int delta ;
struct powerclamp_calibration_data * d = & cal_data [ target_ratio ] ;
/*
* adjust compensations if confidence level has not been reached or
* there are too many wakeups during the last idle injection period , we
* cannot trust the data for compensation .
*/
if ( d - > confidence > = CONFIDENCE_OK | |
atomic_read ( & idle_wakeup_counter ) >
win * num_online_cpus ( ) )
return ;
delta = set_target_ratio - current_ratio ;
/* filter out bad data */
if ( delta > = 0 & & delta < = ( 1 + target_ratio / 10 ) ) {
if ( d - > steady_comp )
d - > steady_comp =
roundup ( delta + d - > steady_comp , 2 ) / 2 ;
else
d - > steady_comp = delta ;
d - > confidence + + ;
}
}
static bool powerclamp_adjust_controls ( unsigned int target_ratio ,
unsigned int guard , unsigned int win )
{
static u64 msr_last , tsc_last ;
u64 msr_now , tsc_now ;
u64 val64 ;
/* check result for the last window */
msr_now = pkg_state_counter ( ) ;
2015-06-25 18:44:07 +02:00
tsc_now = rdtsc ( ) ;
2013-01-21 04:37:57 -08:00
/* calculate pkg cstate vs tsc ratio */
if ( ! msr_last | | ! tsc_last )
current_ratio = 1 ;
else if ( tsc_now - tsc_last ) {
val64 = 100 * ( msr_now - msr_last ) ;
do_div ( val64 , ( tsc_now - tsc_last ) ) ;
current_ratio = val64 ;
}
/* update record */
msr_last = msr_now ;
tsc_last = tsc_now ;
adjust_compensation ( target_ratio , win ) ;
/*
* too many external interrupts , set flag such
* that we can take measure later .
*/
reduce_irq = atomic_read ( & idle_wakeup_counter ) > =
2 * win * num_online_cpus ( ) ;
atomic_set ( & idle_wakeup_counter , 0 ) ;
/* if we are above target+guard, skip */
return set_target_ratio + guard < = current_ratio ;
}
2016-11-28 13:44:50 -08:00
static void clamp_balancing_func ( struct kthread_work * work )
2013-01-21 04:37:57 -08:00
{
2016-11-28 13:44:50 -08:00
struct powerclamp_worker_data * w_data ;
int sleeptime ;
unsigned long target_jiffies ;
unsigned int compensated_ratio ;
int interval ; /* jiffies to sleep for each attempt */
2013-01-21 04:37:57 -08:00
2016-11-28 13:44:50 -08:00
w_data = container_of ( work , struct powerclamp_worker_data ,
balancing_work ) ;
2013-01-21 04:37:57 -08:00
2016-11-28 13:44:50 -08:00
/*
* make sure user selected ratio does not take effect until
* the next round . adjust target_ratio if user has changed
* target such that we can converge quickly .
*/
w_data - > target_ratio = READ_ONCE ( set_target_ratio ) ;
w_data - > guard = 1 + w_data - > target_ratio / 20 ;
w_data - > window_size_now = window_size ;
w_data - > duration_jiffies = msecs_to_jiffies ( duration ) ;
w_data - > count + + ;
/*
* systems may have different ability to enter package level
* c - states , thus we need to compensate the injected idle ratio
* to achieve the actual target reported by the HW .
*/
compensated_ratio = w_data - > target_ratio +
get_compensation ( w_data - > target_ratio ) ;
if ( compensated_ratio < = 0 )
compensated_ratio = 1 ;
interval = w_data - > duration_jiffies * 100 / compensated_ratio ;
/* align idle time */
target_jiffies = roundup ( jiffies , interval ) ;
sleeptime = target_jiffies - jiffies ;
if ( sleeptime < = 0 )
sleeptime = 1 ;
if ( clamping & & w_data - > clamping & & cpu_online ( w_data - > cpu ) )
kthread_queue_delayed_work ( w_data - > worker ,
& w_data - > idle_injection_work ,
sleeptime ) ;
}
static void clamp_idle_injection_func ( struct kthread_work * work )
{
struct powerclamp_worker_data * w_data ;
w_data = container_of ( work , struct powerclamp_worker_data ,
idle_injection_work . work ) ;
/*
* only elected controlling cpu can collect stats and update
* control parameters .
*/
if ( w_data - > cpu = = control_cpu & &
! ( w_data - > count % w_data - > window_size_now ) ) {
should_skip =
powerclamp_adjust_controls ( w_data - > target_ratio ,
w_data - > guard ,
w_data - > window_size_now ) ;
smp_mb ( ) ;
2013-01-21 04:37:57 -08:00
}
2016-11-28 13:44:50 -08:00
if ( should_skip )
goto balance ;
2016-11-28 13:44:52 -08:00
play_idle ( jiffies_to_msecs ( w_data - > duration_jiffies ) ) ;
2013-01-21 04:37:57 -08:00
2016-11-28 13:44:50 -08:00
balance :
if ( clamping & & w_data - > clamping & & cpu_online ( w_data - > cpu ) )
kthread_queue_work ( w_data - > worker , & w_data - > balancing_work ) ;
2013-01-21 04:37:57 -08:00
}
/*
* 1 HZ polling while clamping is active , useful for userspace
* to monitor actual idle ratio .
*/
static void poll_pkg_cstate ( struct work_struct * dummy ) ;
static DECLARE_DELAYED_WORK ( poll_pkg_cstate_work , poll_pkg_cstate ) ;
static void poll_pkg_cstate ( struct work_struct * dummy )
{
static u64 msr_last ;
static u64 tsc_last ;
u64 msr_now ;
u64 tsc_now ;
u64 val64 ;
msr_now = pkg_state_counter ( ) ;
2015-06-25 18:44:07 +02:00
tsc_now = rdtsc ( ) ;
2013-01-21 04:37:57 -08:00
/* calculate pkg cstate vs tsc ratio */
if ( ! msr_last | | ! tsc_last )
pkg_cstate_ratio_cur = 1 ;
else {
if ( tsc_now - tsc_last ) {
val64 = 100 * ( msr_now - msr_last ) ;
do_div ( val64 , ( tsc_now - tsc_last ) ) ;
pkg_cstate_ratio_cur = val64 ;
}
}
/* update record */
msr_last = msr_now ;
tsc_last = tsc_now ;
if ( true = = clamping )
schedule_delayed_work ( & poll_pkg_cstate_work , HZ ) ;
}
2016-11-28 13:44:50 -08:00
static void start_power_clamp_worker ( unsigned long cpu )
2016-11-28 13:44:49 -08:00
{
2016-11-28 13:44:50 -08:00
struct powerclamp_worker_data * w_data = per_cpu_ptr ( worker_data , cpu ) ;
struct kthread_worker * worker ;
2016-11-28 13:44:51 -08:00
worker = kthread_create_worker_on_cpu ( cpu , 0 , " kidle_inject/%ld " , cpu ) ;
2016-11-28 13:44:50 -08:00
if ( IS_ERR ( worker ) )
2016-11-28 13:44:49 -08:00
return ;
2016-11-28 13:44:50 -08:00
w_data - > worker = worker ;
w_data - > count = 0 ;
w_data - > cpu = cpu ;
w_data - > clamping = true ;
set_bit ( cpu , cpu_clamping_mask ) ;
sched_setscheduler ( worker - > task , SCHED_FIFO , & sparam ) ;
kthread_init_work ( & w_data - > balancing_work , clamp_balancing_func ) ;
kthread_init_delayed_work ( & w_data - > idle_injection_work ,
clamp_idle_injection_func ) ;
kthread_queue_work ( w_data - > worker , & w_data - > balancing_work ) ;
}
static void stop_power_clamp_worker ( unsigned long cpu )
{
struct powerclamp_worker_data * w_data = per_cpu_ptr ( worker_data , cpu ) ;
if ( ! w_data - > worker )
return ;
w_data - > clamping = false ;
/*
* Make sure that all works that get queued after this point see
* the clamping disabled . The counter part is not needed because
* there is an implicit memory barrier when the queued work
* is proceed .
*/
smp_wmb ( ) ;
kthread_cancel_work_sync ( & w_data - > balancing_work ) ;
kthread_cancel_delayed_work_sync ( & w_data - > idle_injection_work ) ;
/*
* The balancing work still might be queued here because
* the handling of the " clapming " variable , cancel , and queue
* operations are not synchronized via a lock . But it is not
* a big deal . The balancing work is fast and destroy kthread
* will wait for it .
*/
clear_bit ( w_data - > cpu , cpu_clamping_mask ) ;
kthread_destroy_worker ( w_data - > worker ) ;
w_data - > worker = NULL ;
2016-11-28 13:44:49 -08:00
}
2013-01-21 04:37:57 -08:00
static int start_power_clamp ( void )
{
unsigned long cpu ;
2013-01-24 08:51:22 +00:00
set_target_ratio = clamp ( set_target_ratio , 0U , MAX_TARGET_RATIO - 1 ) ;
2013-01-21 04:37:57 -08:00
/* prevent cpu hotplug */
get_online_cpus ( ) ;
/* prefer BSP */
control_cpu = 0 ;
if ( ! cpu_online ( control_cpu ) )
control_cpu = smp_processor_id ( ) ;
clamping = true ;
schedule_delayed_work ( & poll_pkg_cstate_work , 0 ) ;
2016-11-28 13:44:50 -08:00
/* start one kthread worker per online cpu */
2013-01-21 04:37:57 -08:00
for_each_online_cpu ( cpu ) {
2016-11-28 13:44:50 -08:00
start_power_clamp_worker ( cpu ) ;
2013-01-21 04:37:57 -08:00
}
put_online_cpus ( ) ;
return 0 ;
}
static void end_power_clamp ( void )
{
int i ;
/*
2016-11-28 13:44:50 -08:00
* Block requeuing in all the kthread workers . They will flush and
* stop faster .
2013-01-21 04:37:57 -08:00
*/
2016-11-28 13:44:50 -08:00
clamping = false ;
2013-01-21 04:37:57 -08:00
if ( bitmap_weight ( cpu_clamping_mask , num_possible_cpus ( ) ) ) {
for_each_set_bit ( i , cpu_clamping_mask , num_possible_cpus ( ) ) {
2016-11-28 13:44:50 -08:00
pr_debug ( " clamping worker for cpu %d alive, destroy \n " ,
i ) ;
stop_power_clamp_worker ( i ) ;
2013-01-21 04:37:57 -08:00
}
}
}
2016-11-28 13:44:51 -08:00
static int powerclamp_cpu_online ( unsigned int cpu )
2013-01-21 04:37:57 -08:00
{
2016-11-28 13:44:51 -08:00
if ( clamping = = false )
return 0 ;
start_power_clamp_worker ( cpu ) ;
/* prefer BSP as controlling CPU */
if ( cpu = = 0 ) {
control_cpu = 0 ;
smp_mb ( ) ;
2013-01-21 04:37:57 -08:00
}
2016-11-28 13:44:51 -08:00
return 0 ;
2013-01-21 04:37:57 -08:00
}
2016-11-28 13:44:51 -08:00
static int powerclamp_cpu_predown ( unsigned int cpu )
{
if ( clamping = = false )
return 0 ;
2013-01-21 04:37:57 -08:00
2016-11-28 13:44:51 -08:00
stop_power_clamp_worker ( cpu ) ;
if ( cpu ! = control_cpu )
return 0 ;
2013-01-21 04:37:57 -08:00
2016-11-28 13:44:51 -08:00
control_cpu = cpumask_first ( cpu_online_mask ) ;
if ( control_cpu = = cpu )
control_cpu = cpumask_next ( cpu , cpu_online_mask ) ;
smp_mb ( ) ;
return 0 ;
2013-01-21 04:37:57 -08:00
}
static int powerclamp_get_max_state ( struct thermal_cooling_device * cdev ,
unsigned long * state )
{
* state = MAX_TARGET_RATIO ;
return 0 ;
}
static int powerclamp_get_cur_state ( struct thermal_cooling_device * cdev ,
unsigned long * state )
{
if ( true = = clamping )
* state = pkg_cstate_ratio_cur ;
else
/* to save power, do not poll idle ratio while not clamping */
* state = - 1 ; /* indicates invalid state */
return 0 ;
}
static int powerclamp_set_cur_state ( struct thermal_cooling_device * cdev ,
unsigned long new_target_ratio )
{
int ret = 0 ;
new_target_ratio = clamp ( new_target_ratio , 0UL ,
( unsigned long ) ( MAX_TARGET_RATIO - 1 ) ) ;
if ( set_target_ratio = = 0 & & new_target_ratio > 0 ) {
pr_info ( " Start idle injection to reduce power \n " ) ;
set_target_ratio = new_target_ratio ;
ret = start_power_clamp ( ) ;
goto exit_set ;
} else if ( set_target_ratio > 0 & & new_target_ratio = = 0 ) {
pr_info ( " Stop forced idle injection \n " ) ;
end_power_clamp ( ) ;
2016-08-05 15:20:41 +02:00
set_target_ratio = 0 ;
2013-01-21 04:37:57 -08:00
} else /* adjust currently running */ {
set_target_ratio = new_target_ratio ;
/* make new set_target_ratio visible to other cpus */
smp_mb ( ) ;
}
exit_set :
return ret ;
}
/* bind to generic thermal layer as cooling device*/
static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
. get_max_state = powerclamp_get_max_state ,
. get_cur_state = powerclamp_get_cur_state ,
. set_cur_state = powerclamp_set_cur_state ,
} ;
2016-11-14 11:08:45 -08:00
static const struct x86_cpu_id __initconst intel_powerclamp_ids [ ] = {
{ X86_VENDOR_INTEL , X86_FAMILY_ANY , X86_MODEL_ANY , X86_FEATURE_MWAIT } ,
{ }
} ;
MODULE_DEVICE_TABLE ( x86cpu , intel_powerclamp_ids ) ;
2015-03-25 22:16:24 +01:00
static int __init powerclamp_probe ( void )
2013-01-21 04:37:57 -08:00
{
2016-11-14 11:08:45 -08:00
if ( ! x86_match_cpu ( intel_powerclamp_ids ) ) {
2017-10-09 17:21:07 +05:30
pr_err ( " CPU does not support MWAIT \n " ) ;
2013-01-21 04:37:57 -08:00
return - ENODEV ;
}
2016-03-17 11:26:13 -07:00
/* The goal for idle time alignment is to achieve package cstate. */
if ( ! has_pkg_state_counter ( ) ) {
2017-10-09 17:21:07 +05:30
pr_info ( " No package C-state available \n " ) ;
2013-01-21 04:37:57 -08:00
return - ENODEV ;
2016-03-17 11:26:13 -07:00
}
2013-01-21 04:37:57 -08:00
/* find the deepest mwait value */
find_target_mwait ( ) ;
return 0 ;
}
static int powerclamp_debug_show ( struct seq_file * m , void * unused )
{
int i = 0 ;
seq_printf ( m , " controlling cpu: %d \n " , control_cpu ) ;
seq_printf ( m , " pct confidence steady dynamic (compensation) \n " ) ;
for ( i = 0 ; i < MAX_TARGET_RATIO ; i + + ) {
seq_printf ( m , " %d \t %lu \t %lu \t %lu \n " ,
i ,
cal_data [ i ] . confidence ,
cal_data [ i ] . steady_comp ,
cal_data [ i ] . dynamic_comp ) ;
}
return 0 ;
}
static int powerclamp_debug_open ( struct inode * inode ,
struct file * file )
{
return single_open ( file , powerclamp_debug_show , inode - > i_private ) ;
}
static const struct file_operations powerclamp_debug_fops = {
. open = powerclamp_debug_open ,
. read = seq_read ,
. llseek = seq_lseek ,
. release = single_release ,
. owner = THIS_MODULE ,
} ;
static inline void powerclamp_create_debug_files ( void )
{
debug_dir = debugfs_create_dir ( " intel_powerclamp " , NULL ) ;
if ( ! debug_dir )
return ;
if ( ! debugfs_create_file ( " powerclamp_calib " , S_IRUGO , debug_dir ,
cal_data , & powerclamp_debug_fops ) )
goto file_error ;
return ;
file_error :
debugfs_remove_recursive ( debug_dir ) ;
}
2016-11-28 13:44:51 -08:00
static enum cpuhp_state hp_state ;
2015-03-25 22:16:24 +01:00
static int __init powerclamp_init ( void )
2013-01-21 04:37:57 -08:00
{
int retval ;
int bitmap_size ;
bitmap_size = BITS_TO_LONGS ( num_possible_cpus ( ) ) * sizeof ( long ) ;
cpu_clamping_mask = kzalloc ( bitmap_size , GFP_KERNEL ) ;
if ( ! cpu_clamping_mask )
return - ENOMEM ;
/* probe cpu features and ids here */
retval = powerclamp_probe ( ) ;
if ( retval )
2013-10-04 21:53:24 +05:30
goto exit_free ;
2013-01-21 04:37:57 -08:00
/* set default limit, maybe adjusted during runtime based on feedback */
window_size = 2 ;
2016-11-28 13:44:51 -08:00
retval = cpuhp_setup_state_nocalls ( CPUHP_AP_ONLINE_DYN ,
" thermal/intel_powerclamp:online " ,
powerclamp_cpu_online ,
powerclamp_cpu_predown ) ;
if ( retval < 0 )
goto exit_free ;
hp_state = retval ;
2013-10-04 21:53:24 +05:30
2016-11-28 13:44:50 -08:00
worker_data = alloc_percpu ( struct powerclamp_worker_data ) ;
if ( ! worker_data ) {
2013-10-04 21:53:24 +05:30
retval = - ENOMEM ;
goto exit_unregister ;
}
2013-01-21 04:37:57 -08:00
cooling_dev = thermal_cooling_device_register ( " intel_powerclamp " , NULL ,
& powerclamp_cooling_ops ) ;
2013-10-04 21:53:24 +05:30
if ( IS_ERR ( cooling_dev ) ) {
retval = - ENODEV ;
goto exit_free_thread ;
}
2013-01-21 04:37:57 -08:00
if ( ! duration )
duration = jiffies_to_msecs ( DEFAULT_DURATION_JIFFIES ) ;
2013-10-04 21:53:24 +05:30
2013-01-21 04:37:57 -08:00
powerclamp_create_debug_files ( ) ;
return 0 ;
2013-10-04 21:53:24 +05:30
exit_free_thread :
2016-11-28 13:44:50 -08:00
free_percpu ( worker_data ) ;
2013-10-04 21:53:24 +05:30
exit_unregister :
2016-11-28 13:44:51 -08:00
cpuhp_remove_state_nocalls ( hp_state ) ;
2013-10-04 21:53:24 +05:30
exit_free :
kfree ( cpu_clamping_mask ) ;
return retval ;
2013-01-21 04:37:57 -08:00
}
module_init ( powerclamp_init ) ;
2015-03-25 22:16:24 +01:00
static void __exit powerclamp_exit ( void )
2013-01-21 04:37:57 -08:00
{
end_power_clamp ( ) ;
2016-11-28 13:44:51 -08:00
cpuhp_remove_state_nocalls ( hp_state ) ;
2016-11-28 13:44:50 -08:00
free_percpu ( worker_data ) ;
2013-01-21 04:37:57 -08:00
thermal_cooling_device_unregister ( cooling_dev ) ;
kfree ( cpu_clamping_mask ) ;
cancel_delayed_work_sync ( & poll_pkg_cstate_work ) ;
debugfs_remove_recursive ( debug_dir ) ;
}
module_exit ( powerclamp_exit ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_AUTHOR ( " Arjan van de Ven <arjan@linux.intel.com> " ) ;
MODULE_AUTHOR ( " Jacob Pan <jacob.jun.pan@linux.intel.com> " ) ;
MODULE_DESCRIPTION ( " Package Level C-state Idle Injection for Intel CPUs " ) ;