2019-05-20 10:18:57 +03:00
// SPDX-License-Identifier: GPL-2.0-or-later
2011-05-25 22:43:31 +04:00
/*
* fam15h_power . c - AMD Family 15 h processor power monitoring
*
2016-04-06 10:44:14 +03:00
* Copyright ( c ) 2011 - 2016 Advanced Micro Devices , Inc .
2012-10-29 21:50:47 +04:00
* Author : Andreas Herrmann < herrmann . der . user @ googlemail . com >
2011-05-25 22:43:31 +04:00
*/
# include <linux/err.h>
# include <linux/hwmon.h>
# include <linux/hwmon-sysfs.h>
# include <linux/init.h>
# include <linux/module.h>
# include <linux/pci.h>
# include <linux/bitops.h>
2016-04-06 10:44:11 +03:00
# include <linux/cpu.h>
# include <linux/cpumask.h>
2016-04-06 10:44:13 +03:00
# include <linux/time.h>
# include <linux/sched.h>
2011-05-25 22:43:31 +04:00
# include <asm/processor.h>
2015-10-30 12:56:57 +03:00
# include <asm/msr.h>
2011-05-25 22:43:31 +04:00
MODULE_DESCRIPTION ( " AMD Family 15h CPU processor power monitor " ) ;
2012-10-29 21:50:47 +04:00
MODULE_AUTHOR ( " Andreas Herrmann <herrmann.der.user@googlemail.com> " ) ;
2011-05-25 22:43:31 +04:00
MODULE_LICENSE ( " GPL " ) ;
/* D18F3 */
# define REG_NORTHBRIDGE_CAP 0xe8
/* D18F4 */
# define REG_PROCESSOR_TDP 0x1b8
/* D18F5 */
# define REG_TDP_RUNNING_AVERAGE 0xe0
# define REG_TDP_LIMIT3 0xe8
2015-10-30 12:56:55 +03:00
# define FAM15H_MIN_NUM_ATTRS 2
# define FAM15H_NUM_GROUPS 2
2016-04-06 10:44:11 +03:00
# define MAX_CUS 8
2015-10-30 12:56:55 +03:00
2016-04-06 10:44:13 +03:00
/* set maximum interval as 1 second */
# define MAX_INTERVAL 1000
2015-12-10 06:56:10 +03:00
# define PCI_DEVICE_ID_AMD_15H_M70H_NB_F4 0x15b4
2011-05-25 22:43:31 +04:00
struct fam15h_power_data {
2014-06-19 19:29:11 +04:00
struct pci_dev * pdev ;
2011-05-25 22:43:31 +04:00
unsigned int tdp_to_watts ;
unsigned int base_tdp ;
unsigned int processor_pwr_watts ;
2015-08-27 11:07:38 +03:00
unsigned int cpu_pwr_sample_ratio ;
2015-10-30 12:56:55 +03:00
const struct attribute_group * groups [ FAM15H_NUM_GROUPS ] ;
struct attribute_group group ;
2015-10-30 12:56:57 +03:00
/* maximum accumulated power of a compute unit */
u64 max_cu_acc_power ;
2016-04-06 10:44:11 +03:00
/* accumulated power of the compute units */
u64 cu_acc_power [ MAX_CUS ] ;
2016-04-06 10:44:12 +03:00
/* performance timestamp counter */
u64 cpu_sw_pwr_ptsc [ MAX_CUS ] ;
2016-04-06 10:44:13 +03:00
/* online/offline status of current compute unit */
int cu_on [ MAX_CUS ] ;
unsigned long power_period ;
2011-05-25 22:43:31 +04:00
} ;
2016-04-06 10:44:15 +03:00
static bool is_carrizo_or_later ( void )
{
return boot_cpu_data . x86 = = 0x15 & & boot_cpu_data . x86_model > = 0x60 ;
}
2016-12-22 15:05:34 +03:00
static ssize_t power1_input_show ( struct device * dev ,
struct device_attribute * attr , char * buf )
2011-05-25 22:43:31 +04:00
{
u32 val , tdp_limit , running_avg_range ;
s32 running_avg_capture ;
u64 curr_pwr_watts ;
struct fam15h_power_data * data = dev_get_drvdata ( dev ) ;
2014-06-19 19:29:11 +04:00
struct pci_dev * f4 = data - > pdev ;
2011-05-25 22:43:31 +04:00
pci_bus_read_config_dword ( f4 - > bus , PCI_DEVFN ( PCI_SLOT ( f4 - > devfn ) , 5 ) ,
REG_TDP_RUNNING_AVERAGE , & val ) ;
2015-08-27 11:07:35 +03:00
/*
* On Carrizo and later platforms , TdpRunAvgAccCap bit field
* is extended to 4 : 31 from 4 : 25.
*/
2016-04-06 10:44:15 +03:00
if ( is_carrizo_or_later ( ) ) {
2015-08-27 11:07:35 +03:00
running_avg_capture = val > > 4 ;
running_avg_capture = sign_extend32 ( running_avg_capture , 27 ) ;
} else {
running_avg_capture = ( val > > 4 ) & 0x3fffff ;
running_avg_capture = sign_extend32 ( running_avg_capture , 21 ) ;
}
2012-03-23 13:02:17 +04:00
running_avg_range = ( val & 0xf ) + 1 ;
2011-05-25 22:43:31 +04:00
pci_bus_read_config_dword ( f4 - > bus , PCI_DEVFN ( PCI_SLOT ( f4 - > devfn ) , 5 ) ,
REG_TDP_LIMIT3 , & val ) ;
2016-01-27 14:02:09 +03:00
/*
* On Carrizo and later platforms , ApmTdpLimit bit field
* is extended to 16 : 31 from 16 : 28.
*/
2016-04-06 10:44:15 +03:00
if ( is_carrizo_or_later ( ) )
2016-01-27 14:02:09 +03:00
tdp_limit = val > > 16 ;
else
tdp_limit = ( val > > 16 ) & 0x1fff ;
2012-06-21 17:26:12 +04:00
curr_pwr_watts = ( ( u64 ) ( tdp_limit +
data - > base_tdp ) ) < < running_avg_range ;
2012-03-23 13:02:17 +04:00
curr_pwr_watts - = running_avg_capture ;
2011-05-25 22:43:31 +04:00
curr_pwr_watts * = data - > tdp_to_watts ;
/*
* Convert to microWatt
*
* power is in Watt provided as fixed point integer with
* scaling factor 1 / ( 2 ^ 16 ) . For conversion we use
* ( 10 ^ 6 ) / ( 2 ^ 16 ) = 15625 / ( 2 ^ 10 )
*/
2012-03-23 13:02:17 +04:00
curr_pwr_watts = ( curr_pwr_watts * 15625 ) > > ( 10 + running_avg_range ) ;
2011-05-25 22:43:31 +04:00
return sprintf ( buf , " %u \n " , ( unsigned int ) curr_pwr_watts ) ;
}
2016-12-22 15:05:34 +03:00
static DEVICE_ATTR_RO ( power1_input ) ;
2011-05-25 22:43:31 +04:00
2016-12-22 15:05:34 +03:00
static ssize_t power1_crit_show ( struct device * dev ,
struct device_attribute * attr , char * buf )
2011-05-25 22:43:31 +04:00
{
struct fam15h_power_data * data = dev_get_drvdata ( dev ) ;
return sprintf ( buf , " %u \n " , data - > processor_pwr_watts ) ;
}
2016-12-22 15:05:34 +03:00
static DEVICE_ATTR_RO ( power1_crit ) ;
2011-05-25 22:43:31 +04:00
2016-04-06 10:44:11 +03:00
static void do_read_registers_on_cu ( void * _data )
{
struct fam15h_power_data * data = _data ;
int cpu , cu ;
cpu = smp_processor_id ( ) ;
/*
* With the new x86 topology modelling , cpu core id actually
* is compute unit id .
*/
cu = cpu_data ( cpu ) . cpu_core_id ;
rdmsrl_safe ( MSR_F15H_CU_PWR_ACCUMULATOR , & data - > cu_acc_power [ cu ] ) ;
2016-04-06 10:44:12 +03:00
rdmsrl_safe ( MSR_F15H_PTSC , & data - > cpu_sw_pwr_ptsc [ cu ] ) ;
2016-04-06 10:44:13 +03:00
data - > cu_on [ cu ] = 1 ;
2016-04-06 10:44:11 +03:00
}
/*
* This function is only able to be called when CPUID
* Fn8000_0007 : EDX [ 12 ] is set .
*/
static int read_registers ( struct fam15h_power_data * data )
{
int core , this_core ;
cpumask_var_t mask ;
2016-06-01 12:36:13 +03:00
int ret , cpu ;
2016-04-06 10:44:11 +03:00
ret = zalloc_cpumask_var ( & mask , GFP_KERNEL ) ;
if ( ! ret )
return - ENOMEM ;
2016-04-06 10:44:13 +03:00
memset ( data - > cu_on , 0 , sizeof ( int ) * MAX_CUS ) ;
2021-08-03 17:15:56 +03:00
cpus_read_lock ( ) ;
2016-04-06 10:44:11 +03:00
/*
* Choose the first online core of each compute unit , and then
* read their MSR value of power and ptsc in a single IPI ,
* because the MSR value of CPU core represent the compute
* unit ' s .
*/
core = - 1 ;
for_each_online_cpu ( cpu ) {
this_core = topology_core_id ( cpu ) ;
if ( this_core = = core )
continue ;
core = this_core ;
/* get any CPU on this compute unit */
cpumask_set_cpu ( cpumask_any ( topology_sibling_cpumask ( cpu ) ) , mask ) ;
}
2016-06-01 12:36:13 +03:00
on_each_cpu_mask ( mask , do_read_registers_on_cu , data , true ) ;
2016-04-06 10:44:11 +03:00
2021-08-03 17:15:56 +03:00
cpus_read_unlock ( ) ;
2016-04-06 10:44:11 +03:00
free_cpumask_var ( mask ) ;
return 0 ;
}
2016-12-22 15:05:34 +03:00
static ssize_t power1_average_show ( struct device * dev ,
struct device_attribute * attr , char * buf )
2016-04-06 10:44:13 +03:00
{
struct fam15h_power_data * data = dev_get_drvdata ( dev ) ;
u64 prev_cu_acc_power [ MAX_CUS ] , prev_ptsc [ MAX_CUS ] ,
jdelta [ MAX_CUS ] ;
u64 tdelta , avg_acc ;
int cu , cu_num , ret ;
signed long leftover ;
/*
* With the new x86 topology modelling , x86_max_cores is the
* compute unit number .
*/
cu_num = boot_cpu_data . x86_max_cores ;
ret = read_registers ( data ) ;
if ( ret )
return 0 ;
for ( cu = 0 ; cu < cu_num ; cu + + ) {
prev_cu_acc_power [ cu ] = data - > cu_acc_power [ cu ] ;
prev_ptsc [ cu ] = data - > cpu_sw_pwr_ptsc [ cu ] ;
}
leftover = schedule_timeout_interruptible ( msecs_to_jiffies ( data - > power_period ) ) ;
if ( leftover )
return 0 ;
ret = read_registers ( data ) ;
if ( ret )
return 0 ;
for ( cu = 0 , avg_acc = 0 ; cu < cu_num ; cu + + ) {
/* check if current compute unit is online */
if ( data - > cu_on [ cu ] = = 0 )
continue ;
if ( data - > cu_acc_power [ cu ] < prev_cu_acc_power [ cu ] ) {
jdelta [ cu ] = data - > max_cu_acc_power + data - > cu_acc_power [ cu ] ;
jdelta [ cu ] - = prev_cu_acc_power [ cu ] ;
} else {
jdelta [ cu ] = data - > cu_acc_power [ cu ] - prev_cu_acc_power [ cu ] ;
}
tdelta = data - > cpu_sw_pwr_ptsc [ cu ] - prev_ptsc [ cu ] ;
jdelta [ cu ] * = data - > cpu_pwr_sample_ratio * 1000 ;
do_div ( jdelta [ cu ] , tdelta ) ;
/* the unit is microWatt */
avg_acc + = jdelta [ cu ] ;
}
return sprintf ( buf , " %llu \n " , ( unsigned long long ) avg_acc ) ;
}
2016-12-22 15:05:34 +03:00
static DEVICE_ATTR_RO ( power1_average ) ;
2016-04-06 10:44:13 +03:00
2016-12-22 15:05:34 +03:00
static ssize_t power1_average_interval_show ( struct device * dev ,
struct device_attribute * attr ,
char * buf )
2016-04-06 10:44:13 +03:00
{
struct fam15h_power_data * data = dev_get_drvdata ( dev ) ;
return sprintf ( buf , " %lu \n " , data - > power_period ) ;
}
2016-12-22 15:05:34 +03:00
static ssize_t power1_average_interval_store ( struct device * dev ,
struct device_attribute * attr ,
const char * buf , size_t count )
2016-04-06 10:44:13 +03:00
{
struct fam15h_power_data * data = dev_get_drvdata ( dev ) ;
unsigned long temp ;
int ret ;
ret = kstrtoul ( buf , 10 , & temp ) ;
if ( ret )
return ret ;
if ( temp > MAX_INTERVAL )
return - EINVAL ;
/* the interval value should be greater than 0 */
if ( temp < = 0 )
return - EINVAL ;
data - > power_period = temp ;
return count ;
}
2016-12-22 15:05:34 +03:00
static DEVICE_ATTR_RW ( power1_average_interval ) ;
2016-04-06 10:44:13 +03:00
2015-10-30 12:56:55 +03:00
static int fam15h_power_init_attrs ( struct pci_dev * pdev ,
struct fam15h_power_data * data )
2014-09-16 23:58:04 +04:00
{
2015-10-30 12:56:55 +03:00
int n = FAM15H_MIN_NUM_ATTRS ;
struct attribute * * fam15h_power_attrs ;
2015-10-30 12:56:56 +03:00
struct cpuinfo_x86 * c = & boot_cpu_data ;
2014-09-16 23:58:04 +04:00
2015-10-30 12:56:56 +03:00
if ( c - > x86 = = 0x15 & &
( c - > x86_model < = 0xf | |
2015-12-10 06:56:10 +03:00
( c - > x86_model > = 0x60 & & c - > x86_model < = 0x7f ) ) )
2015-10-30 12:56:55 +03:00
n + = 1 ;
2014-09-16 23:58:04 +04:00
2016-04-06 10:44:13 +03:00
/* check if processor supports accumulated power */
if ( boot_cpu_has ( X86_FEATURE_ACC_POWER ) )
n + = 2 ;
2015-10-30 12:56:55 +03:00
fam15h_power_attrs = devm_kcalloc ( & pdev - > dev , n ,
sizeof ( * fam15h_power_attrs ) ,
GFP_KERNEL ) ;
2011-05-25 22:43:31 +04:00
2015-10-30 12:56:55 +03:00
if ( ! fam15h_power_attrs )
return - ENOMEM ;
n = 0 ;
fam15h_power_attrs [ n + + ] = & dev_attr_power1_crit . attr ;
2015-10-30 12:56:56 +03:00
if ( c - > x86 = = 0x15 & &
( c - > x86_model < = 0xf | |
2015-12-10 06:56:10 +03:00
( c - > x86_model > = 0x60 & & c - > x86_model < = 0x7f ) ) )
2015-10-30 12:56:55 +03:00
fam15h_power_attrs [ n + + ] = & dev_attr_power1_input . attr ;
2016-04-06 10:44:13 +03:00
if ( boot_cpu_has ( X86_FEATURE_ACC_POWER ) ) {
fam15h_power_attrs [ n + + ] = & dev_attr_power1_average . attr ;
fam15h_power_attrs [ n + + ] = & dev_attr_power1_average_interval . attr ;
}
2015-10-30 12:56:55 +03:00
data - > group . attrs = fam15h_power_attrs ;
return 0 ;
}
2011-05-25 22:43:31 +04:00
2015-08-27 11:07:33 +03:00
static bool should_load_on_this_node ( struct pci_dev * f4 )
2011-05-25 22:43:31 +04:00
{
u32 val ;
pci_bus_read_config_dword ( f4 - > bus , PCI_DEVFN ( PCI_SLOT ( f4 - > devfn ) , 3 ) ,
REG_NORTHBRIDGE_CAP , & val ) ;
if ( ( val & BIT ( 29 ) ) & & ( ( val > > 30 ) & 3 ) )
return false ;
return true ;
}
2012-04-10 02:16:34 +04:00
/*
* Newer BKDG versions have an updated recommendation on how to properly
* initialize the running average range ( was : 0xE , now : 0x9 ) . This avoids
* counter saturations resulting in bogus power readings .
* We correct this value ourselves to cope with older BIOSes .
*/
2012-09-23 22:27:32 +04:00
static const struct pci_device_id affected_device [ ] = {
2012-04-26 00:44:20 +04:00
{ PCI_VDEVICE ( AMD , PCI_DEVICE_ID_AMD_15H_NB_F4 ) } ,
{ 0 }
} ;
2012-09-23 22:27:32 +04:00
static void tweak_runavg_range ( struct pci_dev * pdev )
2012-04-10 02:16:34 +04:00
{
u32 val ;
/*
* let this quirk apply only to the current version of the
* northbridge , since future versions may change the behavior
*/
2012-04-26 00:44:20 +04:00
if ( ! pci_match_id ( affected_device , pdev ) )
2012-04-10 02:16:34 +04:00
return ;
pci_bus_read_config_dword ( pdev - > bus ,
PCI_DEVFN ( PCI_SLOT ( pdev - > devfn ) , 5 ) ,
REG_TDP_RUNNING_AVERAGE , & val ) ;
if ( ( val & 0xf ) ! = 0xe )
return ;
val & = ~ 0xf ;
val | = 0x9 ;
pci_bus_write_config_dword ( pdev - > bus ,
PCI_DEVFN ( PCI_SLOT ( pdev - > devfn ) , 5 ) ,
REG_TDP_RUNNING_AVERAGE , val ) ;
}
2012-09-23 22:27:32 +04:00
# ifdef CONFIG_PM
static int fam15h_power_resume ( struct pci_dev * pdev )
{
tweak_runavg_range ( pdev ) ;
return 0 ;
}
# else
# define fam15h_power_resume NULL
# endif
2015-10-30 12:56:55 +03:00
static int fam15h_power_init_data ( struct pci_dev * f4 ,
struct fam15h_power_data * data )
2011-05-25 22:43:31 +04:00
{
2016-04-06 10:44:13 +03:00
u32 val ;
2011-05-25 22:43:31 +04:00
u64 tmp ;
2015-10-30 12:56:55 +03:00
int ret ;
2011-05-25 22:43:31 +04:00
pci_read_config_dword ( f4 , REG_PROCESSOR_TDP , & val ) ;
data - > base_tdp = val > > 16 ;
tmp = val & 0xffff ;
pci_bus_read_config_dword ( f4 - > bus , PCI_DEVFN ( PCI_SLOT ( f4 - > devfn ) , 5 ) ,
REG_TDP_LIMIT3 , & val ) ;
data - > tdp_to_watts = ( ( val & 0x3ff ) < < 6 ) | ( ( val > > 10 ) & 0x3f ) ;
tmp * = data - > tdp_to_watts ;
/* result not allowed to be >= 256W */
if ( ( tmp > > 16 ) > = 256 )
2013-01-10 22:01:24 +04:00
dev_warn ( & f4 - > dev ,
" Bogus value for ProcessorPwrWatts (processor_pwr_watts>=%u) \n " ,
2011-05-25 22:43:31 +04:00
( unsigned int ) ( tmp > > 16 ) ) ;
/* convert to microWatt */
data - > processor_pwr_watts = ( tmp * 15625 ) > > 10 ;
2015-08-27 11:07:38 +03:00
2015-10-30 12:56:55 +03:00
ret = fam15h_power_init_attrs ( f4 , data ) ;
if ( ret )
return ret ;
2015-08-27 11:07:38 +03:00
/* CPUID Fn8000_0007:EDX[12] indicates to support accumulated power */
2016-04-06 10:44:13 +03:00
if ( ! boot_cpu_has ( X86_FEATURE_ACC_POWER ) )
2015-10-30 12:56:55 +03:00
return 0 ;
2015-08-27 11:07:38 +03:00
/*
* determine the ratio of the compute unit power accumulator
* sample period to the PTSC counter period by executing CPUID
* Fn8000_0007 : ECX
*/
2016-04-06 10:44:13 +03:00
data - > cpu_pwr_sample_ratio = cpuid_ecx ( 0x80000007 ) ;
2015-10-30 12:56:55 +03:00
2015-10-30 12:56:57 +03:00
if ( rdmsrl_safe ( MSR_F15H_CU_MAX_PWR_ACCUMULATOR , & tmp ) ) {
pr_err ( " Failed to read max compute unit power accumulator MSR \n " ) ;
return - ENODEV ;
}
data - > max_cu_acc_power = tmp ;
2016-04-06 10:44:13 +03:00
/*
* Milliseconds are a reasonable interval for the measurement .
* But it shouldn ' t set too long here , because several seconds
* would cause the read function to hang . So set default
* interval as 10 ms .
*/
data - > power_period = 10 ;
2016-04-06 10:44:11 +03:00
return read_registers ( data ) ;
2011-05-25 22:43:31 +04:00
}
2012-11-19 22:22:35 +04:00
static int fam15h_power_probe ( struct pci_dev * pdev ,
2015-10-30 12:56:55 +03:00
const struct pci_device_id * id )
2011-05-25 22:43:31 +04:00
{
struct fam15h_power_data * data ;
2012-06-02 20:58:06 +04:00
struct device * dev = & pdev - > dev ;
2014-06-19 19:29:11 +04:00
struct device * hwmon_dev ;
2015-10-30 12:56:55 +03:00
int ret ;
2011-05-25 22:43:31 +04:00
2012-04-10 02:16:34 +04:00
/*
* though we ignore every other northbridge , we still have to
* do the tweaking on _each_ node in MCM processors as the counters
* are working hand - in - hand
*/
tweak_runavg_range ( pdev ) ;
2015-08-27 11:07:33 +03:00
if ( ! should_load_on_this_node ( pdev ) )
2012-06-02 20:58:06 +04:00
return - ENODEV ;
data = devm_kzalloc ( dev , sizeof ( struct fam15h_power_data ) , GFP_KERNEL ) ;
if ( ! data )
return - ENOMEM ;
2011-05-25 22:43:31 +04:00
2015-10-30 12:56:55 +03:00
ret = fam15h_power_init_data ( pdev , data ) ;
if ( ret )
return ret ;
2014-06-19 19:29:11 +04:00
data - > pdev = pdev ;
2011-05-25 22:43:31 +04:00
2015-10-30 12:56:55 +03:00
data - > groups [ 0 ] = & data - > group ;
2014-06-19 19:29:11 +04:00
hwmon_dev = devm_hwmon_device_register_with_groups ( dev , " fam15h_power " ,
data ,
2015-10-30 12:56:55 +03:00
& data - > groups [ 0 ] ) ;
2014-06-19 19:29:11 +04:00
return PTR_ERR_OR_ZERO ( hwmon_dev ) ;
2011-05-25 22:43:31 +04:00
}
2013-12-03 11:10:29 +04:00
static const struct pci_device_id fam15h_power_id_table [ ] = {
2011-05-25 22:43:31 +04:00
{ PCI_VDEVICE ( AMD , PCI_DEVICE_ID_AMD_15H_NB_F4 ) } ,
2014-09-16 23:58:16 +04:00
{ PCI_VDEVICE ( AMD , PCI_DEVICE_ID_AMD_15H_M30H_NB_F4 ) } ,
2015-08-27 11:07:32 +03:00
{ PCI_VDEVICE ( AMD , PCI_DEVICE_ID_AMD_15H_M60H_NB_F4 ) } ,
2015-12-10 06:56:10 +03:00
{ PCI_VDEVICE ( AMD , PCI_DEVICE_ID_AMD_15H_M70H_NB_F4 ) } ,
2012-12-05 15:12:42 +04:00
{ PCI_VDEVICE ( AMD , PCI_DEVICE_ID_AMD_16H_NB_F4 ) } ,
2014-11-04 20:49:02 +03:00
{ PCI_VDEVICE ( AMD , PCI_DEVICE_ID_AMD_16H_M30H_NB_F4 ) } ,
2011-05-25 22:43:31 +04:00
{ }
} ;
MODULE_DEVICE_TABLE ( pci , fam15h_power_id_table ) ;
static struct pci_driver fam15h_power_driver = {
. name = " fam15h_power " ,
. id_table = fam15h_power_id_table ,
. probe = fam15h_power_probe ,
2012-09-23 22:27:32 +04:00
. resume = fam15h_power_resume ,
2011-05-25 22:43:31 +04:00
} ;
2012-04-03 05:25:46 +04:00
module_pci_driver ( fam15h_power_driver ) ;