2013-02-06 21:02:13 +04:00
/*
2013-04-10 02:38:18 +04:00
* intel_pstate . c : Native P state management for Intel processors
2013-02-06 21:02:13 +04:00
*
* ( C ) Copyright 2012 Intel Corporation
* Author : Dirk Brandewie < dirk . j . brandewie @ intel . com >
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; version 2
* of the License .
*/
# include <linux/kernel.h>
# include <linux/kernel_stat.h>
# include <linux/module.h>
# include <linux/ktime.h>
# include <linux/hrtimer.h>
# include <linux/tick.h>
# include <linux/slab.h>
# include <linux/sched.h>
# include <linux/list.h>
# include <linux/cpu.h>
# include <linux/cpufreq.h>
# include <linux/sysfs.h>
# include <linux/types.h>
# include <linux/fs.h>
# include <linux/debugfs.h>
2013-10-31 19:24:05 +04:00
# include <linux/acpi.h>
2015-06-02 12:01:38 +03:00
# include <linux/vmalloc.h>
2013-02-06 21:02:13 +04:00
# include <trace/events/power.h>
# include <asm/div64.h>
# include <asm/msr.h>
# include <asm/cpu_device_id.h>
2015-04-03 16:19:53 +03:00
# include <asm/cpufeature.h>
2013-02-06 21:02:13 +04:00
2015-10-15 02:12:01 +03:00
# if IS_ENABLED(CONFIG_ACPI)
# include <acpi/processor.h>
# endif
2014-02-12 22:01:07 +04:00
# define BYT_RATIOS 0x66a
# define BYT_VIDS 0x66b
# define BYT_TURBO_RATIOS 0x66c
2014-05-08 23:57:23 +04:00
# define BYT_TURBO_VIDS 0x66d
2014-02-12 22:01:07 +04:00
2014-05-29 20:32:23 +04:00
# define FRAC_BITS 8
2013-02-06 21:02:13 +04:00
# define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
# define fp_toint(X) ((X) >> FRAC_BITS)
2014-05-29 20:32:23 +04:00
2013-02-06 21:02:13 +04:00
static inline int32_t mul_fp ( int32_t x , int32_t y )
{
return ( ( int64_t ) x * ( int64_t ) y ) > > FRAC_BITS ;
}
intel_pstate: Fix overflow in busy_scaled due to long delay
The kernel may delay interrupts for a long time which can result in timers
being delayed. If this occurs the intel_pstate driver will crash with a
divide by zero error:
divide error: 0000 [#1] SMP
Modules linked in: btrfs zlib_deflate raid6_pq xor msdos ext4 mbcache jbd2 binfmt_misc arc4 md4 nls_utf8 cifs dns_resolver tcp_lp bnep bluetooth rfkill fuse dm_service_time iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ftp ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables intel_powerclamp coretemp vfat fat kvm_intel iTCO_wdt iTCO_vendor_support ipmi_devintf sr_mod kvm crct10dif_pclmul
crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel cdc_ether lrw usbnet cdrom mii gf128mul glue_helper ablk_helper cryptd lpc_ich mfd_core pcspkr sb_edac edac_core ipmi_si ipmi_msghandler ioatdma wmi shpchp acpi_pad nfsd auth_rpcgss nfs_acl lockd uinput dm_multipath sunrpc xfs libcrc32c usb_storage sd_mod crc_t10dif crct10dif_common ixgbe mgag200 syscopyarea sysfillrect sysimgblt mdio drm_kms_helper ttm igb drm ptp pps_core dca i2c_algo_bit megaraid_sas i2c_core dm_mirror dm_region_hash dm_log dm_mod
CPU: 113 PID: 0 Comm: swapper/113 Tainted: G W -------------- 3.10.0-229.1.2.el7.x86_64 #1
Hardware name: IBM x3950 X6 -[3837AC2]-/00FN827, BIOS -[A8E112BUS-1.00]- 08/27/2014
task: ffff880fe8abe660 ti: ffff880fe8ae4000 task.ti: ffff880fe8ae4000
RIP: 0010:[<ffffffff814a9279>] [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
RSP: 0018:ffff883fff4e3db8 EFLAGS: 00010206
RAX: 0000000027100000 RBX: ffff883fe6965100 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000010 RDI: 000000002e53632d
RBP: ffff883fff4e3e20 R08: 000e6f69a5a125c0 R09: ffff883fe84ec001
R10: 0000000000000002 R11: 0000000000000005 R12: 00000000000049f5
R13: 0000000000271000 R14: 00000000000049f5 R15: 0000000000000246
FS: 0000000000000000(0000) GS:ffff883fff4e0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f7668601000 CR3: 000000000190a000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
ffff883fff4e3e58 ffffffff81099dc1 0000000000000086 0000000000000071
ffff883fff4f3680 0000000000000071 fbdc8a965e33afee ffffffff810b69dd
ffff883fe84ec000 ffff883fe6965108 0000000000000100 ffffffff814a9100
Call Trace:
<IRQ>
[<ffffffff81099dc1>] ? run_posix_cpu_timers+0x51/0x840
[<ffffffff810b69dd>] ? trigger_load_balance+0x5d/0x200
[<ffffffff814a9100>] ? pid_param_set+0x130/0x130
[<ffffffff8107df56>] call_timer_fn+0x36/0x110
[<ffffffff814a9100>] ? pid_param_set+0x130/0x130
[<ffffffff8107fdcf>] run_timer_softirq+0x21f/0x320
[<ffffffff81077b2f>] __do_softirq+0xef/0x280
[<ffffffff816156dc>] call_softirq+0x1c/0x30
[<ffffffff81015d95>] do_softirq+0x65/0xa0
[<ffffffff81077ec5>] irq_exit+0x115/0x120
[<ffffffff81616355>] smp_apic_timer_interrupt+0x45/0x60
[<ffffffff81614a1d>] apic_timer_interrupt+0x6d/0x80
<EOI>
[<ffffffff814a9c32>] ? cpuidle_enter_state+0x52/0xc0
[<ffffffff814a9c28>] ? cpuidle_enter_state+0x48/0xc0
[<ffffffff814a9d65>] cpuidle_idle_call+0xc5/0x200
[<ffffffff8101d14e>] arch_cpu_idle+0xe/0x30
[<ffffffff810c67c1>] cpu_startup_entry+0xf1/0x290
[<ffffffff8104228a>] start_secondary+0x1ba/0x230
Code: 42 0f 00 45 89 e6 48 01 c2 43 8d 44 6d 00 39 d0 73 26 49 c1 e5 08 89 d2 4d 63 f4 49 63 c5 48 c1 e2 08 48 c1 e0 08 48 63 ca 48 99 <48> f7 f9 48 98 4c 0f af f0 49 c1 ee 08 8b 43 78 c1 e0 08 44 29
RIP [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
RSP <ffff883fff4e3db8>
The kernel values for cpudata for CPU 113 were:
struct cpudata {
cpu = 113,
timer = {
entry = {
next = 0x0,
prev = 0xdead000000200200
},
expires = 8357799745,
base = 0xffff883fe84ec001,
function = 0xffffffff814a9100 <intel_pstate_timer_func>,
data = 18446612406765768960,
<snip>
i_gain = 0,
d_gain = 0,
deadband = 0,
last_err = 22489
},
last_sample_time = {
tv64 = 4063132438017305
},
prev_aperf = 287326796397463,
prev_mperf = 251427432090198,
sample = {
core_pct_busy = 23081,
aperf = 2937407,
mperf = 3257884,
freq = 2524484,
time = {
tv64 = 4063149215234118
}
}
}
which results in the time between samples = last_sample_time - sample.time
= 4063149215234118 - 4063132438017305 = 16777216813 which is 16.777 seconds.
The duration between reads of the APERF and MPERF registers overflowed a s32
sized integer in intel_pstate_get_scaled_busy()'s call to div_fp(). The result
is that int_tofp(duration_us) == 0, and the kernel attempts to divide by 0.
While the kernel shouldn't be delaying for a long time, it can and does
happen and the intel_pstate driver should not panic in this situation. This
patch changes the div_fp() function to use div64_s64() to allow for "long"
division. This will avoid the overflow condition on long delays.
[v2]: use div64_s64() in div_fp()
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2015-06-15 20:43:29 +03:00
static inline int32_t div_fp ( s64 x , s64 y )
2013-02-06 21:02:13 +04:00
{
intel_pstate: Fix overflow in busy_scaled due to long delay
The kernel may delay interrupts for a long time which can result in timers
being delayed. If this occurs the intel_pstate driver will crash with a
divide by zero error:
divide error: 0000 [#1] SMP
Modules linked in: btrfs zlib_deflate raid6_pq xor msdos ext4 mbcache jbd2 binfmt_misc arc4 md4 nls_utf8 cifs dns_resolver tcp_lp bnep bluetooth rfkill fuse dm_service_time iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ftp ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables intel_powerclamp coretemp vfat fat kvm_intel iTCO_wdt iTCO_vendor_support ipmi_devintf sr_mod kvm crct10dif_pclmul
crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel cdc_ether lrw usbnet cdrom mii gf128mul glue_helper ablk_helper cryptd lpc_ich mfd_core pcspkr sb_edac edac_core ipmi_si ipmi_msghandler ioatdma wmi shpchp acpi_pad nfsd auth_rpcgss nfs_acl lockd uinput dm_multipath sunrpc xfs libcrc32c usb_storage sd_mod crc_t10dif crct10dif_common ixgbe mgag200 syscopyarea sysfillrect sysimgblt mdio drm_kms_helper ttm igb drm ptp pps_core dca i2c_algo_bit megaraid_sas i2c_core dm_mirror dm_region_hash dm_log dm_mod
CPU: 113 PID: 0 Comm: swapper/113 Tainted: G W -------------- 3.10.0-229.1.2.el7.x86_64 #1
Hardware name: IBM x3950 X6 -[3837AC2]-/00FN827, BIOS -[A8E112BUS-1.00]- 08/27/2014
task: ffff880fe8abe660 ti: ffff880fe8ae4000 task.ti: ffff880fe8ae4000
RIP: 0010:[<ffffffff814a9279>] [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
RSP: 0018:ffff883fff4e3db8 EFLAGS: 00010206
RAX: 0000000027100000 RBX: ffff883fe6965100 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000010 RDI: 000000002e53632d
RBP: ffff883fff4e3e20 R08: 000e6f69a5a125c0 R09: ffff883fe84ec001
R10: 0000000000000002 R11: 0000000000000005 R12: 00000000000049f5
R13: 0000000000271000 R14: 00000000000049f5 R15: 0000000000000246
FS: 0000000000000000(0000) GS:ffff883fff4e0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f7668601000 CR3: 000000000190a000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
ffff883fff4e3e58 ffffffff81099dc1 0000000000000086 0000000000000071
ffff883fff4f3680 0000000000000071 fbdc8a965e33afee ffffffff810b69dd
ffff883fe84ec000 ffff883fe6965108 0000000000000100 ffffffff814a9100
Call Trace:
<IRQ>
[<ffffffff81099dc1>] ? run_posix_cpu_timers+0x51/0x840
[<ffffffff810b69dd>] ? trigger_load_balance+0x5d/0x200
[<ffffffff814a9100>] ? pid_param_set+0x130/0x130
[<ffffffff8107df56>] call_timer_fn+0x36/0x110
[<ffffffff814a9100>] ? pid_param_set+0x130/0x130
[<ffffffff8107fdcf>] run_timer_softirq+0x21f/0x320
[<ffffffff81077b2f>] __do_softirq+0xef/0x280
[<ffffffff816156dc>] call_softirq+0x1c/0x30
[<ffffffff81015d95>] do_softirq+0x65/0xa0
[<ffffffff81077ec5>] irq_exit+0x115/0x120
[<ffffffff81616355>] smp_apic_timer_interrupt+0x45/0x60
[<ffffffff81614a1d>] apic_timer_interrupt+0x6d/0x80
<EOI>
[<ffffffff814a9c32>] ? cpuidle_enter_state+0x52/0xc0
[<ffffffff814a9c28>] ? cpuidle_enter_state+0x48/0xc0
[<ffffffff814a9d65>] cpuidle_idle_call+0xc5/0x200
[<ffffffff8101d14e>] arch_cpu_idle+0xe/0x30
[<ffffffff810c67c1>] cpu_startup_entry+0xf1/0x290
[<ffffffff8104228a>] start_secondary+0x1ba/0x230
Code: 42 0f 00 45 89 e6 48 01 c2 43 8d 44 6d 00 39 d0 73 26 49 c1 e5 08 89 d2 4d 63 f4 49 63 c5 48 c1 e2 08 48 c1 e0 08 48 63 ca 48 99 <48> f7 f9 48 98 4c 0f af f0 49 c1 ee 08 8b 43 78 c1 e0 08 44 29
RIP [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
RSP <ffff883fff4e3db8>
The kernel values for cpudata for CPU 113 were:
struct cpudata {
cpu = 113,
timer = {
entry = {
next = 0x0,
prev = 0xdead000000200200
},
expires = 8357799745,
base = 0xffff883fe84ec001,
function = 0xffffffff814a9100 <intel_pstate_timer_func>,
data = 18446612406765768960,
<snip>
i_gain = 0,
d_gain = 0,
deadband = 0,
last_err = 22489
},
last_sample_time = {
tv64 = 4063132438017305
},
prev_aperf = 287326796397463,
prev_mperf = 251427432090198,
sample = {
core_pct_busy = 23081,
aperf = 2937407,
mperf = 3257884,
freq = 2524484,
time = {
tv64 = 4063149215234118
}
}
}
which results in the time between samples = last_sample_time - sample.time
= 4063149215234118 - 4063132438017305 = 16777216813 which is 16.777 seconds.
The duration between reads of the APERF and MPERF registers overflowed a s32
sized integer in intel_pstate_get_scaled_busy()'s call to div_fp(). The result
is that int_tofp(duration_us) == 0, and the kernel attempts to divide by 0.
While the kernel shouldn't be delaying for a long time, it can and does
happen and the intel_pstate driver should not panic in this situation. This
patch changes the div_fp() function to use div64_s64() to allow for "long"
division. This will avoid the overflow condition on long delays.
[v2]: use div64_s64() in div_fp()
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2015-06-15 20:43:29 +03:00
return div64_s64 ( ( int64_t ) x < < FRAC_BITS , y ) ;
2013-02-06 21:02:13 +04:00
}
2014-10-13 19:37:44 +04:00
static inline int ceiling_fp ( int32_t x )
{
int mask , ret ;
ret = fp_toint ( x ) ;
mask = ( 1 < < FRAC_BITS ) - 1 ;
if ( x & mask )
ret + = 1 ;
return ret ;
}
2013-02-06 21:02:13 +04:00
struct sample {
2013-10-21 20:20:32 +04:00
int32_t core_pct_busy ;
2013-02-06 21:02:13 +04:00
u64 aperf ;
u64 mperf ;
2015-04-12 07:10:26 +03:00
u64 tsc ;
2013-02-06 21:02:13 +04:00
int freq ;
2014-05-29 20:32:24 +04:00
ktime_t time ;
2013-02-06 21:02:13 +04:00
} ;
struct pstate_data {
int current_pstate ;
int min_pstate ;
int max_pstate ;
2015-10-15 02:12:00 +03:00
int max_pstate_physical ;
2014-10-13 19:37:43 +04:00
int scaling ;
2013-02-06 21:02:13 +04:00
int turbo_pstate ;
} ;
2013-12-18 22:32:39 +04:00
struct vid_data {
2014-05-08 23:57:23 +04:00
int min ;
int max ;
int turbo ;
2013-12-18 22:32:39 +04:00
int32_t ratio ;
} ;
2013-02-06 21:02:13 +04:00
struct _pid {
int setpoint ;
int32_t integral ;
int32_t p_gain ;
int32_t i_gain ;
int32_t d_gain ;
int deadband ;
2013-10-21 20:20:32 +04:00
int32_t last_err ;
2013-02-06 21:02:13 +04:00
} ;
struct cpudata {
int cpu ;
struct timer_list timer ;
struct pstate_data pstate ;
2013-12-18 22:32:39 +04:00
struct vid_data vid ;
2013-02-06 21:02:13 +04:00
struct _pid pid ;
2014-05-29 20:32:24 +04:00
ktime_t last_sample_time ;
2013-02-06 21:02:13 +04:00
u64 prev_aperf ;
u64 prev_mperf ;
2015-04-12 07:10:26 +03:00
u64 prev_tsc ;
2014-02-12 22:01:04 +04:00
struct sample sample ;
2015-10-15 02:12:01 +03:00
# if IS_ENABLED(CONFIG_ACPI)
struct acpi_processor_performance acpi_perf_data ;
# endif
2013-02-06 21:02:13 +04:00
} ;
static struct cpudata * * all_cpu_data ;
struct pstate_adjust_policy {
int sample_rate_ms ;
int deadband ;
int setpoint ;
int p_gain_pct ;
int d_gain_pct ;
int i_gain_pct ;
} ;
2013-10-21 20:20:34 +04:00
struct pstate_funcs {
int ( * get_max ) ( void ) ;
2015-10-15 02:12:00 +03:00
int ( * get_max_physical ) ( void ) ;
2013-10-21 20:20:34 +04:00
int ( * get_min ) ( void ) ;
int ( * get_turbo ) ( void ) ;
2014-10-13 19:37:43 +04:00
int ( * get_scaling ) ( void ) ;
2013-12-18 22:32:39 +04:00
void ( * set ) ( struct cpudata * , int pstate ) ;
void ( * get_vid ) ( struct cpudata * ) ;
2013-02-06 21:02:13 +04:00
} ;
2013-10-21 20:20:34 +04:00
struct cpu_defaults {
struct pstate_adjust_policy pid_policy ;
struct pstate_funcs funcs ;
2013-02-06 21:02:13 +04:00
} ;
2013-10-21 20:20:34 +04:00
static struct pstate_adjust_policy pid_params ;
static struct pstate_funcs pstate_funcs ;
2014-11-06 20:40:47 +03:00
static int hwp_active ;
2015-10-15 02:12:01 +03:00
static int no_acpi_perf ;
2013-10-21 20:20:34 +04:00
2013-02-06 21:02:13 +04:00
struct perf_limits {
int no_turbo ;
2014-06-20 18:27:59 +04:00
int turbo_disabled ;
2013-02-06 21:02:13 +04:00
int max_perf_pct ;
int min_perf_pct ;
int32_t max_perf ;
int32_t min_perf ;
2013-05-07 19:20:26 +04:00
int max_policy_pct ;
int max_sysfs_pct ;
2015-01-30 00:03:52 +03:00
int min_policy_pct ;
int min_sysfs_pct ;
2015-10-15 02:12:03 +03:00
int max_perf_ctl ;
int min_perf_ctl ;
2013-02-06 21:02:13 +04:00
} ;
2015-10-15 14:34:15 +03:00
static struct perf_limits performance_limits = {
. no_turbo = 0 ,
. turbo_disabled = 0 ,
. max_perf_pct = 100 ,
. max_perf = int_tofp ( 1 ) ,
. min_perf_pct = 100 ,
. min_perf = int_tofp ( 1 ) ,
. max_policy_pct = 100 ,
. max_sysfs_pct = 100 ,
. min_policy_pct = 0 ,
. min_sysfs_pct = 0 ,
} ;
static struct perf_limits powersave_limits = {
2013-02-06 21:02:13 +04:00
. no_turbo = 0 ,
2014-10-13 19:37:41 +04:00
. turbo_disabled = 0 ,
2013-02-06 21:02:13 +04:00
. max_perf_pct = 100 ,
. max_perf = int_tofp ( 1 ) ,
. min_perf_pct = 0 ,
. min_perf = 0 ,
2013-05-07 19:20:26 +04:00
. max_policy_pct = 100 ,
. max_sysfs_pct = 100 ,
2015-01-30 00:03:52 +03:00
. min_policy_pct = 0 ,
. min_sysfs_pct = 0 ,
2015-10-15 02:12:03 +03:00
. max_perf_ctl = 0 ,
. min_perf_ctl = 0 ,
2013-02-06 21:02:13 +04:00
} ;
2015-10-15 14:34:15 +03:00
# ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
static struct perf_limits * limits = & performance_limits ;
# else
static struct perf_limits * limits = & powersave_limits ;
# endif
2015-10-15 02:12:01 +03:00
# if IS_ENABLED(CONFIG_ACPI)
/*
* The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
* in TURBO_RATIO_LIMIT MSR , which pstate driver stores in max_pstate and
* max_turbo_pstate fields . The PERF_CTL MSR contains 16 bit value for P state
* ratio , out of it only high 8 bits are used . For example 0x1700 is setting
* target ratio 0x17 . The _PSS control value stores in a format which can be
* directly written to PERF_CTL MSR . But in intel_pstate driver this shift
* occurs during write to PERF_CTL ( E . g . for cores core_set_pstate ( ) ) .
* This function converts the _PSS control value to intel pstate driver format
* for comparison and assignment .
*/
static int convert_to_native_pstate_format ( struct cpudata * cpu , int index )
{
return cpu - > acpi_perf_data . states [ index ] . control > > 8 ;
}
static int intel_pstate_init_perf_limits ( struct cpufreq_policy * policy )
{
struct cpudata * cpu ;
int ret ;
bool turbo_absent = false ;
int max_pstate_index ;
int min_pss_ctl , max_pss_ctl , turbo_pss_ctl ;
int i ;
cpu = all_cpu_data [ policy - > cpu ] ;
pr_debug ( " intel_pstate: default limits 0x%x 0x%x 0x%x \n " ,
cpu - > pstate . min_pstate , cpu - > pstate . max_pstate ,
cpu - > pstate . turbo_pstate ) ;
if ( ! cpu - > acpi_perf_data . shared_cpu_map & &
zalloc_cpumask_var_node ( & cpu - > acpi_perf_data . shared_cpu_map ,
GFP_KERNEL , cpu_to_node ( policy - > cpu ) ) ) {
return - ENOMEM ;
}
ret = acpi_processor_register_performance ( & cpu - > acpi_perf_data ,
policy - > cpu ) ;
if ( ret )
return ret ;
/*
* Check if the control value in _PSS is for PERF_CTL MSR , which should
* guarantee that the states returned by it map to the states in our
* list directly .
*/
if ( cpu - > acpi_perf_data . control_register . space_id ! =
ACPI_ADR_SPACE_FIXED_HARDWARE )
return - EIO ;
pr_debug ( " intel_pstate: CPU%u - ACPI _PSS perf data \n " , policy - > cpu ) ;
for ( i = 0 ; i < cpu - > acpi_perf_data . state_count ; i + + )
pr_debug ( " %cP%d: %u MHz, %u mW, 0x%x \n " ,
( i = = cpu - > acpi_perf_data . state ? ' * ' : ' ' ) , i ,
( u32 ) cpu - > acpi_perf_data . states [ i ] . core_frequency ,
( u32 ) cpu - > acpi_perf_data . states [ i ] . power ,
( u32 ) cpu - > acpi_perf_data . states [ i ] . control ) ;
/*
* If there is only one entry _PSS , simply ignore _PSS and continue as
* usual without taking _PSS into account
*/
if ( cpu - > acpi_perf_data . state_count < 2 )
return 0 ;
turbo_pss_ctl = convert_to_native_pstate_format ( cpu , 0 ) ;
min_pss_ctl = convert_to_native_pstate_format ( cpu ,
cpu - > acpi_perf_data . state_count - 1 ) ;
/* Check if there is a turbo freq in _PSS */
if ( turbo_pss_ctl < = cpu - > pstate . max_pstate & &
turbo_pss_ctl > cpu - > pstate . min_pstate ) {
pr_debug ( " intel_pstate: no turbo range exists in _PSS \n " ) ;
2015-10-15 14:34:15 +03:00
limits - > no_turbo = limits - > turbo_disabled = 1 ;
2015-10-15 02:12:01 +03:00
cpu - > pstate . turbo_pstate = cpu - > pstate . max_pstate ;
turbo_absent = true ;
}
/* Check if the max non turbo p state < Intel P state max */
max_pstate_index = turbo_absent ? 0 : 1 ;
max_pss_ctl = convert_to_native_pstate_format ( cpu , max_pstate_index ) ;
if ( max_pss_ctl < cpu - > pstate . max_pstate & &
max_pss_ctl > cpu - > pstate . min_pstate )
cpu - > pstate . max_pstate = max_pss_ctl ;
/* check If min perf > Intel P State min */
if ( min_pss_ctl > cpu - > pstate . min_pstate & &
min_pss_ctl < cpu - > pstate . max_pstate ) {
cpu - > pstate . min_pstate = min_pss_ctl ;
policy - > cpuinfo . min_freq = min_pss_ctl * cpu - > pstate . scaling ;
}
if ( turbo_absent )
policy - > cpuinfo . max_freq = cpu - > pstate . max_pstate *
cpu - > pstate . scaling ;
else {
policy - > cpuinfo . max_freq = cpu - > pstate . turbo_pstate *
cpu - > pstate . scaling ;
/*
* The _PSS table doesn ' t contain whole turbo frequency range .
* This just contains + 1 MHZ above the max non turbo frequency ,
* with control value corresponding to max turbo ratio . But
* when cpufreq set policy is called , it will call with this
* max frequency , which will cause a reduced performance as
* this driver uses real max turbo frequency as the max
* frequeny . So correct this frequency in _PSS table to
* correct max turbo frequency based on the turbo ratio .
* Also need to convert to MHz as _PSS freq is in MHz .
*/
cpu - > acpi_perf_data . states [ 0 ] . core_frequency =
turbo_pss_ctl * 100 ;
}
pr_debug ( " intel_pstate: Updated limits using _PSS 0x%x 0x%x 0x%x \n " ,
cpu - > pstate . min_pstate , cpu - > pstate . max_pstate ,
cpu - > pstate . turbo_pstate ) ;
pr_debug ( " intel_pstate: policy max_freq=%d Khz min_freq = %d KHz \n " ,
policy - > cpuinfo . max_freq , policy - > cpuinfo . min_freq ) ;
return 0 ;
}
static int intel_pstate_exit_perf_limits ( struct cpufreq_policy * policy )
{
struct cpudata * cpu ;
if ( ! no_acpi_perf )
return 0 ;
cpu = all_cpu_data [ policy - > cpu ] ;
acpi_processor_unregister_performance ( policy - > cpu ) ;
return 0 ;
}
# else
static int intel_pstate_init_perf_limits ( struct cpufreq_policy * policy )
{
return 0 ;
}
static int intel_pstate_exit_perf_limits ( struct cpufreq_policy * policy )
{
return 0 ;
}
# endif
2013-02-06 21:02:13 +04:00
static inline void pid_reset ( struct _pid * pid , int setpoint , int busy ,
2014-07-18 19:37:23 +04:00
int deadband , int integral ) {
2013-02-06 21:02:13 +04:00
pid - > setpoint = setpoint ;
pid - > deadband = deadband ;
pid - > integral = int_tofp ( integral ) ;
2014-02-12 22:01:05 +04:00
pid - > last_err = int_tofp ( setpoint ) - int_tofp ( busy ) ;
2013-02-06 21:02:13 +04:00
}
static inline void pid_p_gain_set ( struct _pid * pid , int percent )
{
pid - > p_gain = div_fp ( int_tofp ( percent ) , int_tofp ( 100 ) ) ;
}
static inline void pid_i_gain_set ( struct _pid * pid , int percent )
{
pid - > i_gain = div_fp ( int_tofp ( percent ) , int_tofp ( 100 ) ) ;
}
static inline void pid_d_gain_set ( struct _pid * pid , int percent )
{
pid - > d_gain = div_fp ( int_tofp ( percent ) , int_tofp ( 100 ) ) ;
}
2013-10-21 20:20:32 +04:00
static signed int pid_calc ( struct _pid * pid , int32_t busy )
2013-02-06 21:02:13 +04:00
{
2013-10-21 20:20:32 +04:00
signed int result ;
2013-02-06 21:02:13 +04:00
int32_t pterm , dterm , fp_error ;
int32_t integral_limit ;
2013-10-21 20:20:32 +04:00
fp_error = int_tofp ( pid - > setpoint ) - busy ;
2013-02-06 21:02:13 +04:00
2013-10-21 20:20:32 +04:00
if ( abs ( fp_error ) < = int_tofp ( pid - > deadband ) )
2013-02-06 21:02:13 +04:00
return 0 ;
pterm = mul_fp ( pid - > p_gain , fp_error ) ;
pid - > integral + = fp_error ;
2014-12-10 23:39:38 +03:00
/*
* We limit the integral here so that it will never
* get higher than 30. This prevents it from becoming
* too large an input over long periods of time and allows
* it to get factored out sooner .
*
* The value of 30 was chosen through experimentation .
*/
2013-02-06 21:02:13 +04:00
integral_limit = int_tofp ( 30 ) ;
if ( pid - > integral > integral_limit )
pid - > integral = integral_limit ;
if ( pid - > integral < - integral_limit )
pid - > integral = - integral_limit ;
2013-10-21 20:20:32 +04:00
dterm = mul_fp ( pid - > d_gain , fp_error - pid - > last_err ) ;
pid - > last_err = fp_error ;
2013-02-06 21:02:13 +04:00
result = pterm + mul_fp ( pid - > integral , pid - > i_gain ) + dterm ;
2014-06-18 00:36:10 +04:00
result = result + ( 1 < < ( FRAC_BITS - 1 ) ) ;
2013-02-06 21:02:13 +04:00
return ( signed int ) fp_toint ( result ) ;
}
static inline void intel_pstate_busy_pid_reset ( struct cpudata * cpu )
{
2013-10-21 20:20:34 +04:00
pid_p_gain_set ( & cpu - > pid , pid_params . p_gain_pct ) ;
pid_d_gain_set ( & cpu - > pid , pid_params . d_gain_pct ) ;
pid_i_gain_set ( & cpu - > pid , pid_params . i_gain_pct ) ;
2013-02-06 21:02:13 +04:00
2014-07-18 19:37:20 +04:00
pid_reset ( & cpu - > pid , pid_params . setpoint , 100 , pid_params . deadband , 0 ) ;
2013-02-06 21:02:13 +04:00
}
static inline void intel_pstate_reset_all_pid ( void )
{
unsigned int cpu ;
2014-07-18 19:37:19 +04:00
2013-02-06 21:02:13 +04:00
for_each_online_cpu ( cpu ) {
if ( all_cpu_data [ cpu ] )
intel_pstate_busy_pid_reset ( all_cpu_data [ cpu ] ) ;
}
}
2014-10-13 19:37:41 +04:00
static inline void update_turbo_state ( void )
{
u64 misc_en ;
struct cpudata * cpu ;
cpu = all_cpu_data [ 0 ] ;
rdmsrl ( MSR_IA32_MISC_ENABLE , misc_en ) ;
2015-10-15 14:34:15 +03:00
limits - > turbo_disabled =
2014-10-13 19:37:41 +04:00
( misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE | |
cpu - > pstate . max_pstate = = cpu - > pstate . turbo_pstate ) ;
}
2014-11-06 20:40:47 +03:00
static void intel_pstate_hwp_set ( void )
{
2015-09-09 21:41:22 +03:00
int min , hw_min , max , hw_max , cpu , range , adj_range ;
u64 value , cap ;
rdmsrl ( MSR_HWP_CAPABILITIES , cap ) ;
hw_min = HWP_LOWEST_PERF ( cap ) ;
hw_max = HWP_HIGHEST_PERF ( cap ) ;
range = hw_max - hw_min ;
2014-11-06 20:40:47 +03:00
get_online_cpus ( ) ;
for_each_online_cpu ( cpu ) {
rdmsrl_on_cpu ( cpu , MSR_HWP_REQUEST , & value ) ;
2015-10-15 14:34:15 +03:00
adj_range = limits - > min_perf_pct * range / 100 ;
2015-09-09 21:41:22 +03:00
min = hw_min + adj_range ;
2014-11-06 20:40:47 +03:00
value & = ~ HWP_MIN_PERF ( ~ 0L ) ;
value | = HWP_MIN_PERF ( min ) ;
2015-10-15 14:34:15 +03:00
adj_range = limits - > max_perf_pct * range / 100 ;
2015-09-09 21:41:22 +03:00
max = hw_min + adj_range ;
2015-10-15 14:34:15 +03:00
if ( limits - > no_turbo ) {
2015-09-09 21:41:22 +03:00
hw_max = HWP_GUARANTEED_PERF ( cap ) ;
if ( hw_max < max )
max = hw_max ;
2014-11-06 20:40:47 +03:00
}
value & = ~ HWP_MAX_PERF ( ~ 0L ) ;
value | = HWP_MAX_PERF ( max ) ;
wrmsrl_on_cpu ( cpu , MSR_HWP_REQUEST , value ) ;
}
put_online_cpus ( ) ;
}
2013-02-06 21:02:13 +04:00
/************************** debugfs begin ************************/
static int pid_param_set ( void * data , u64 val )
{
* ( u32 * ) data = val ;
intel_pstate_reset_all_pid ( ) ;
return 0 ;
}
2014-07-18 19:37:19 +04:00
2013-02-06 21:02:13 +04:00
static int pid_param_get ( void * data , u64 * val )
{
* val = * ( u32 * ) data ;
return 0 ;
}
2014-07-18 19:37:20 +04:00
DEFINE_SIMPLE_ATTRIBUTE ( fops_pid_param , pid_param_get , pid_param_set , " %llu \n " ) ;
2013-02-06 21:02:13 +04:00
struct pid_param {
char * name ;
void * value ;
} ;
static struct pid_param pid_files [ ] = {
2013-10-21 20:20:34 +04:00
{ " sample_rate_ms " , & pid_params . sample_rate_ms } ,
{ " d_gain_pct " , & pid_params . d_gain_pct } ,
{ " i_gain_pct " , & pid_params . i_gain_pct } ,
{ " deadband " , & pid_params . deadband } ,
{ " setpoint " , & pid_params . setpoint } ,
{ " p_gain_pct " , & pid_params . p_gain_pct } ,
2013-02-06 21:02:13 +04:00
{ NULL , NULL }
} ;
2014-07-18 19:37:17 +04:00
static void __init intel_pstate_debug_expose_params ( void )
2013-02-06 21:02:13 +04:00
{
2014-07-18 19:37:17 +04:00
struct dentry * debugfs_parent ;
2013-02-06 21:02:13 +04:00
int i = 0 ;
2014-11-06 20:40:47 +03:00
if ( hwp_active )
return ;
2013-02-06 21:02:13 +04:00
debugfs_parent = debugfs_create_dir ( " pstate_snb " , NULL ) ;
if ( IS_ERR_OR_NULL ( debugfs_parent ) )
return ;
while ( pid_files [ i ] . name ) {
debugfs_create_file ( pid_files [ i ] . name , 0660 ,
2014-07-18 19:37:23 +04:00
debugfs_parent , pid_files [ i ] . value ,
& fops_pid_param ) ;
2013-02-06 21:02:13 +04:00
i + + ;
}
}
/************************** debugfs end ************************/
/************************** sysfs begin ************************/
# define show_one(file_name, object) \
static ssize_t show_ # # file_name \
( struct kobject * kobj , struct attribute * attr , char * buf ) \
{ \
2015-10-15 14:34:15 +03:00
return sprintf ( buf , " %u \n " , limits - > object ) ; \
2013-02-06 21:02:13 +04:00
}
2015-01-29 02:03:27 +03:00
static ssize_t show_turbo_pct ( struct kobject * kobj ,
struct attribute * attr , char * buf )
{
struct cpudata * cpu ;
int total , no_turbo , turbo_pct ;
uint32_t turbo_fp ;
cpu = all_cpu_data [ 0 ] ;
total = cpu - > pstate . turbo_pstate - cpu - > pstate . min_pstate + 1 ;
no_turbo = cpu - > pstate . max_pstate - cpu - > pstate . min_pstate + 1 ;
turbo_fp = div_fp ( int_tofp ( no_turbo ) , int_tofp ( total ) ) ;
turbo_pct = 100 - fp_toint ( mul_fp ( turbo_fp , int_tofp ( 100 ) ) ) ;
return sprintf ( buf , " %u \n " , turbo_pct ) ;
}
2015-01-29 02:03:28 +03:00
static ssize_t show_num_pstates ( struct kobject * kobj ,
struct attribute * attr , char * buf )
{
struct cpudata * cpu ;
int total ;
cpu = all_cpu_data [ 0 ] ;
total = cpu - > pstate . turbo_pstate - cpu - > pstate . min_pstate + 1 ;
return sprintf ( buf , " %u \n " , total ) ;
}
2014-10-13 19:37:41 +04:00
static ssize_t show_no_turbo ( struct kobject * kobj ,
struct attribute * attr , char * buf )
{
ssize_t ret ;
update_turbo_state ( ) ;
2015-10-15 14:34:15 +03:00
if ( limits - > turbo_disabled )
ret = sprintf ( buf , " %u \n " , limits - > turbo_disabled ) ;
2014-10-13 19:37:41 +04:00
else
2015-10-15 14:34:15 +03:00
ret = sprintf ( buf , " %u \n " , limits - > no_turbo ) ;
2014-10-13 19:37:41 +04:00
return ret ;
}
2013-02-06 21:02:13 +04:00
static ssize_t store_no_turbo ( struct kobject * a , struct attribute * b ,
2014-07-18 19:37:23 +04:00
const char * buf , size_t count )
2013-02-06 21:02:13 +04:00
{
unsigned int input ;
int ret ;
2014-07-18 19:37:19 +04:00
2013-02-06 21:02:13 +04:00
ret = sscanf ( buf , " %u " , & input ) ;
if ( ret ! = 1 )
return - EINVAL ;
2014-10-13 19:37:41 +04:00
update_turbo_state ( ) ;
2015-10-15 14:34:15 +03:00
if ( limits - > turbo_disabled ) {
2015-05-31 17:46:47 +03:00
pr_warn ( " intel_pstate: Turbo disabled by BIOS or unavailable on processor \n " ) ;
2014-10-13 19:37:41 +04:00
return - EPERM ;
2014-06-20 18:27:59 +04:00
}
2014-11-06 20:40:47 +03:00
2015-10-15 14:34:15 +03:00
limits - > no_turbo = clamp_t ( int , input , 0 , 1 ) ;
2014-10-13 19:37:41 +04:00
2014-11-06 20:40:47 +03:00
if ( hwp_active )
intel_pstate_hwp_set ( ) ;
2013-02-06 21:02:13 +04:00
return count ;
}
static ssize_t store_max_perf_pct ( struct kobject * a , struct attribute * b ,
2014-07-18 19:37:23 +04:00
const char * buf , size_t count )
2013-02-06 21:02:13 +04:00
{
unsigned int input ;
int ret ;
2014-07-18 19:37:19 +04:00
2013-02-06 21:02:13 +04:00
ret = sscanf ( buf , " %u " , & input ) ;
if ( ret ! = 1 )
return - EINVAL ;
2015-10-15 14:34:15 +03:00
limits - > max_sysfs_pct = clamp_t ( int , input , 0 , 100 ) ;
limits - > max_perf_pct = min ( limits - > max_policy_pct ,
limits - > max_sysfs_pct ) ;
limits - > max_perf_pct = max ( limits - > min_policy_pct ,
limits - > max_perf_pct ) ;
limits - > max_perf_pct = max ( limits - > min_perf_pct ,
limits - > max_perf_pct ) ;
limits - > max_perf = div_fp ( int_tofp ( limits - > max_perf_pct ) ,
int_tofp ( 100 ) ) ;
2014-07-18 19:37:19 +04:00
2014-11-06 20:40:47 +03:00
if ( hwp_active )
intel_pstate_hwp_set ( ) ;
2013-02-06 21:02:13 +04:00
return count ;
}
static ssize_t store_min_perf_pct ( struct kobject * a , struct attribute * b ,
2014-07-18 19:37:23 +04:00
const char * buf , size_t count )
2013-02-06 21:02:13 +04:00
{
unsigned int input ;
int ret ;
2014-07-18 19:37:19 +04:00
2013-02-06 21:02:13 +04:00
ret = sscanf ( buf , " %u " , & input ) ;
if ( ret ! = 1 )
return - EINVAL ;
2015-01-30 00:03:52 +03:00
2015-10-15 14:34:15 +03:00
limits - > min_sysfs_pct = clamp_t ( int , input , 0 , 100 ) ;
limits - > min_perf_pct = max ( limits - > min_policy_pct ,
limits - > min_sysfs_pct ) ;
limits - > min_perf_pct = min ( limits - > max_policy_pct ,
limits - > min_perf_pct ) ;
limits - > min_perf_pct = min ( limits - > max_perf_pct ,
limits - > min_perf_pct ) ;
limits - > min_perf = div_fp ( int_tofp ( limits - > min_perf_pct ) ,
int_tofp ( 100 ) ) ;
2013-02-06 21:02:13 +04:00
2014-11-06 20:40:47 +03:00
if ( hwp_active )
intel_pstate_hwp_set ( ) ;
2013-02-06 21:02:13 +04:00
return count ;
}
show_one ( max_perf_pct , max_perf_pct ) ;
show_one ( min_perf_pct , min_perf_pct ) ;
define_one_global_rw ( no_turbo ) ;
define_one_global_rw ( max_perf_pct ) ;
define_one_global_rw ( min_perf_pct ) ;
2015-01-29 02:03:27 +03:00
define_one_global_ro ( turbo_pct ) ;
2015-01-29 02:03:28 +03:00
define_one_global_ro ( num_pstates ) ;
2013-02-06 21:02:13 +04:00
static struct attribute * intel_pstate_attributes [ ] = {
& no_turbo . attr ,
& max_perf_pct . attr ,
& min_perf_pct . attr ,
2015-01-29 02:03:27 +03:00
& turbo_pct . attr ,
2015-01-29 02:03:28 +03:00
& num_pstates . attr ,
2013-02-06 21:02:13 +04:00
NULL
} ;
static struct attribute_group intel_pstate_attr_group = {
. attrs = intel_pstate_attributes ,
} ;
2014-07-18 19:37:17 +04:00
static void __init intel_pstate_sysfs_expose_params ( void )
2013-02-06 21:02:13 +04:00
{
2014-07-18 19:37:17 +04:00
struct kobject * intel_pstate_kobject ;
2013-02-06 21:02:13 +04:00
int rc ;
intel_pstate_kobject = kobject_create_and_add ( " intel_pstate " ,
& cpu_subsys . dev_root - > kobj ) ;
BUG_ON ( ! intel_pstate_kobject ) ;
2014-07-18 19:37:20 +04:00
rc = sysfs_create_group ( intel_pstate_kobject , & intel_pstate_attr_group ) ;
2013-02-06 21:02:13 +04:00
BUG_ON ( rc ) ;
}
/************************** sysfs end ************************/
2014-11-06 20:40:47 +03:00
2015-07-14 19:46:23 +03:00
static void intel_pstate_hwp_enable ( struct cpudata * cpudata )
2014-11-06 20:40:47 +03:00
{
2015-07-14 19:46:23 +03:00
wrmsrl_on_cpu ( cpudata - > cpu , MSR_PM_ENABLE , 0x1 ) ;
2014-11-06 20:40:47 +03:00
}
2013-10-21 20:20:35 +04:00
static int byt_get_min_pstate ( void )
{
u64 value ;
2014-07-18 19:37:19 +04:00
2013-10-21 20:20:35 +04:00
rdmsrl ( BYT_RATIOS , value ) ;
2014-06-20 18:27:58 +04:00
return ( value > > 8 ) & 0x7F ;
2013-10-21 20:20:35 +04:00
}
static int byt_get_max_pstate ( void )
{
u64 value ;
2014-07-18 19:37:19 +04:00
2013-10-21 20:20:35 +04:00
rdmsrl ( BYT_RATIOS , value ) ;
2014-06-20 18:27:58 +04:00
return ( value > > 16 ) & 0x7F ;
2013-10-21 20:20:35 +04:00
}
2013-02-06 21:02:13 +04:00
2014-02-12 22:01:07 +04:00
static int byt_get_turbo_pstate ( void )
{
u64 value ;
2014-07-18 19:37:19 +04:00
2014-02-12 22:01:07 +04:00
rdmsrl ( BYT_TURBO_RATIOS , value ) ;
2014-06-20 18:27:58 +04:00
return value & 0x7F ;
2014-02-12 22:01:07 +04:00
}
2013-12-18 22:32:39 +04:00
static void byt_set_pstate ( struct cpudata * cpudata , int pstate )
{
u64 val ;
int32_t vid_fp ;
u32 vid ;
2015-07-29 18:53:10 +03:00
val = ( u64 ) pstate < < 8 ;
2015-10-15 14:34:15 +03:00
if ( limits - > no_turbo & & ! limits - > turbo_disabled )
2013-12-18 22:32:39 +04:00
val | = ( u64 ) 1 < < 32 ;
vid_fp = cpudata - > vid . min + mul_fp (
int_tofp ( pstate - cpudata - > pstate . min_pstate ) ,
cpudata - > vid . ratio ) ;
vid_fp = clamp_t ( int32_t , vid_fp , cpudata - > vid . min , cpudata - > vid . max ) ;
2014-10-13 19:37:44 +04:00
vid = ceiling_fp ( vid_fp ) ;
2013-12-18 22:32:39 +04:00
2014-05-08 23:57:23 +04:00
if ( pstate > cpudata - > pstate . max_pstate )
vid = cpudata - > vid . turbo ;
2013-12-18 22:32:39 +04:00
val | = vid ;
2015-05-12 17:59:42 +03:00
wrmsrl_on_cpu ( cpudata - > cpu , MSR_IA32_PERF_CTL , val ) ;
2013-12-18 22:32:39 +04:00
}
2014-10-13 19:37:43 +04:00
# define BYT_BCLK_FREQS 5
static int byt_freq_table [ BYT_BCLK_FREQS ] = { 833 , 1000 , 1333 , 1167 , 800 } ;
static int byt_get_scaling ( void )
{
u64 value ;
int i ;
rdmsrl ( MSR_FSB_FREQ , value ) ;
i = value & 0x3 ;
BUG_ON ( i > BYT_BCLK_FREQS ) ;
return byt_freq_table [ i ] * 100 ;
}
2013-12-18 22:32:39 +04:00
static void byt_get_vid ( struct cpudata * cpudata )
{
u64 value ;
rdmsrl ( BYT_VIDS , value ) ;
2014-06-20 18:27:58 +04:00
cpudata - > vid . min = int_tofp ( ( value > > 8 ) & 0x7f ) ;
cpudata - > vid . max = int_tofp ( ( value > > 16 ) & 0x7f ) ;
2013-12-18 22:32:39 +04:00
cpudata - > vid . ratio = div_fp (
cpudata - > vid . max - cpudata - > vid . min ,
int_tofp ( cpudata - > pstate . max_pstate -
cpudata - > pstate . min_pstate ) ) ;
2014-05-08 23:57:23 +04:00
rdmsrl ( BYT_TURBO_VIDS , value ) ;
cpudata - > vid . turbo = value & 0x7f ;
2013-12-18 22:32:39 +04:00
}
2013-10-21 20:20:34 +04:00
static int core_get_min_pstate ( void )
2013-02-06 21:02:13 +04:00
{
u64 value ;
2014-07-18 19:37:19 +04:00
2013-03-20 18:21:10 +04:00
rdmsrl ( MSR_PLATFORM_INFO , value ) ;
2013-02-06 21:02:13 +04:00
return ( value > > 40 ) & 0xFF ;
}
2015-10-15 02:12:00 +03:00
static int core_get_max_pstate_physical ( void )
2013-02-06 21:02:13 +04:00
{
u64 value ;
2014-07-18 19:37:19 +04:00
2013-03-20 18:21:10 +04:00
rdmsrl ( MSR_PLATFORM_INFO , value ) ;
2013-02-06 21:02:13 +04:00
return ( value > > 8 ) & 0xFF ;
}
2013-10-21 20:20:34 +04:00
static int core_get_max_pstate ( void )
2013-02-06 21:02:13 +04:00
{
2015-10-15 02:11:59 +03:00
u64 tar ;
u64 plat_info ;
int max_pstate ;
int err ;
rdmsrl ( MSR_PLATFORM_INFO , plat_info ) ;
max_pstate = ( plat_info > > 8 ) & 0xFF ;
err = rdmsrl_safe ( MSR_TURBO_ACTIVATION_RATIO , & tar ) ;
if ( ! err ) {
/* Do some sanity checking for safety */
if ( plat_info & 0x600000000 ) {
u64 tdp_ctrl ;
u64 tdp_ratio ;
int tdp_msr ;
err = rdmsrl_safe ( MSR_CONFIG_TDP_CONTROL , & tdp_ctrl ) ;
if ( err )
goto skip_tar ;
tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl ;
err = rdmsrl_safe ( tdp_msr , & tdp_ratio ) ;
if ( err )
goto skip_tar ;
if ( tdp_ratio - 1 = = tar ) {
max_pstate = tar ;
pr_debug ( " max_pstate=TAC %x \n " , max_pstate ) ;
} else {
goto skip_tar ;
}
}
}
2014-07-18 19:37:19 +04:00
2015-10-15 02:11:59 +03:00
skip_tar :
return max_pstate ;
2013-02-06 21:02:13 +04:00
}
2013-10-21 20:20:34 +04:00
static int core_get_turbo_pstate ( void )
2013-02-06 21:02:13 +04:00
{
u64 value ;
int nont , ret ;
2014-07-18 19:37:19 +04:00
2013-03-20 18:21:10 +04:00
rdmsrl ( MSR_NHM_TURBO_RATIO_LIMIT , value ) ;
2013-10-21 20:20:34 +04:00
nont = core_get_max_pstate ( ) ;
2014-07-18 19:37:21 +04:00
ret = ( value ) & 255 ;
2013-02-06 21:02:13 +04:00
if ( ret < = nont )
ret = nont ;
return ret ;
}
2014-10-13 19:37:43 +04:00
static inline int core_get_scaling ( void )
{
return 100000 ;
}
2013-12-18 22:32:39 +04:00
static void core_set_pstate ( struct cpudata * cpudata , int pstate )
2013-10-21 20:20:34 +04:00
{
u64 val ;
2015-07-29 18:53:10 +03:00
val = ( u64 ) pstate < < 8 ;
2015-10-15 14:34:15 +03:00
if ( limits - > no_turbo & & ! limits - > turbo_disabled )
2013-10-21 20:20:34 +04:00
val | = ( u64 ) 1 < < 32 ;
2014-03-19 19:45:54 +04:00
wrmsrl_on_cpu ( cpudata - > cpu , MSR_IA32_PERF_CTL , val ) ;
2013-10-21 20:20:34 +04:00
}
2015-04-10 20:22:18 +03:00
static int knl_get_turbo_pstate ( void )
{
u64 value ;
int nont , ret ;
rdmsrl ( MSR_NHM_TURBO_RATIO_LIMIT , value ) ;
nont = core_get_max_pstate ( ) ;
ret = ( ( ( value ) > > 8 ) & 0xFF ) ;
if ( ret < = nont )
ret = nont ;
return ret ;
}
2013-10-21 20:20:34 +04:00
static struct cpu_defaults core_params = {
. pid_policy = {
. sample_rate_ms = 10 ,
. deadband = 0 ,
. setpoint = 97 ,
. p_gain_pct = 20 ,
. d_gain_pct = 0 ,
. i_gain_pct = 0 ,
} ,
. funcs = {
. get_max = core_get_max_pstate ,
2015-10-15 02:12:00 +03:00
. get_max_physical = core_get_max_pstate_physical ,
2013-10-21 20:20:34 +04:00
. get_min = core_get_min_pstate ,
. get_turbo = core_get_turbo_pstate ,
2014-10-13 19:37:43 +04:00
. get_scaling = core_get_scaling ,
2013-10-21 20:20:34 +04:00
. set = core_set_pstate ,
} ,
} ;
2013-10-21 20:20:35 +04:00
static struct cpu_defaults byt_params = {
. pid_policy = {
. sample_rate_ms = 10 ,
. deadband = 0 ,
2015-04-10 21:06:43 +03:00
. setpoint = 60 ,
2013-10-21 20:20:35 +04:00
. p_gain_pct = 14 ,
. d_gain_pct = 0 ,
. i_gain_pct = 4 ,
} ,
. funcs = {
. get_max = byt_get_max_pstate ,
2015-10-15 02:12:00 +03:00
. get_max_physical = byt_get_max_pstate ,
2013-10-21 20:20:35 +04:00
. get_min = byt_get_min_pstate ,
2014-02-12 22:01:07 +04:00
. get_turbo = byt_get_turbo_pstate ,
2013-12-18 22:32:39 +04:00
. set = byt_set_pstate ,
2014-10-13 19:37:43 +04:00
. get_scaling = byt_get_scaling ,
2013-12-18 22:32:39 +04:00
. get_vid = byt_get_vid ,
2013-10-21 20:20:35 +04:00
} ,
} ;
2015-04-10 20:22:18 +03:00
static struct cpu_defaults knl_params = {
. pid_policy = {
. sample_rate_ms = 10 ,
. deadband = 0 ,
. setpoint = 97 ,
. p_gain_pct = 20 ,
. d_gain_pct = 0 ,
. i_gain_pct = 0 ,
} ,
. funcs = {
. get_max = core_get_max_pstate ,
2015-10-15 02:12:00 +03:00
. get_max_physical = core_get_max_pstate_physical ,
2015-04-10 20:22:18 +03:00
. get_min = core_get_min_pstate ,
. get_turbo = knl_get_turbo_pstate ,
2015-07-21 11:41:13 +03:00
. get_scaling = core_get_scaling ,
2015-04-10 20:22:18 +03:00
. set = core_set_pstate ,
} ,
} ;
2013-02-06 21:02:13 +04:00
static void intel_pstate_get_min_max ( struct cpudata * cpu , int * min , int * max )
{
int max_perf = cpu - > pstate . turbo_pstate ;
2013-10-21 20:20:33 +04:00
int max_perf_adj ;
2013-02-06 21:02:13 +04:00
int min_perf ;
2014-07-18 19:37:19 +04:00
2015-10-15 14:34:15 +03:00
if ( limits - > no_turbo | | limits - > turbo_disabled )
2013-02-06 21:02:13 +04:00
max_perf = cpu - > pstate . max_pstate ;
2014-12-10 23:39:38 +03:00
/*
* performance can be limited by user through sysfs , by cpufreq
* policy , or by cpu specific default values determined through
* experimentation .
*/
2015-10-15 14:34:15 +03:00
if ( limits - > max_perf_ctl & & limits - > max_sysfs_pct > =
limits - > max_policy_pct ) {
* max = limits - > max_perf_ctl ;
2015-10-15 02:12:03 +03:00
} else {
max_perf_adj = fp_toint ( mul_fp ( int_tofp ( max_perf ) ,
2015-10-15 14:34:15 +03:00
limits - > max_perf ) ) ;
2015-10-15 02:12:03 +03:00
* max = clamp_t ( int , max_perf_adj , cpu - > pstate . min_pstate ,
cpu - > pstate . turbo_pstate ) ;
}
2013-02-06 21:02:13 +04:00
2015-10-15 14:34:15 +03:00
if ( limits - > min_perf_ctl ) {
* min = limits - > min_perf_ctl ;
2015-10-15 02:12:03 +03:00
} else {
min_perf = fp_toint ( mul_fp ( int_tofp ( max_perf ) ,
2015-10-15 14:34:15 +03:00
limits - > min_perf ) ) ;
2015-10-15 02:12:03 +03:00
* min = clamp_t ( int , min_perf , cpu - > pstate . min_pstate , max_perf ) ;
}
2013-02-06 21:02:13 +04:00
}
2015-06-02 07:12:34 +03:00
static void intel_pstate_set_pstate ( struct cpudata * cpu , int pstate , bool force )
2013-02-06 21:02:13 +04:00
{
int max_perf , min_perf ;
2015-06-02 07:12:34 +03:00
if ( force ) {
update_turbo_state ( ) ;
2013-02-06 21:02:13 +04:00
2015-06-02 07:12:34 +03:00
intel_pstate_get_min_max ( cpu , & min_perf , & max_perf ) ;
2013-02-06 21:02:13 +04:00
2015-06-02 07:12:34 +03:00
pstate = clamp_t ( int , pstate , min_perf , max_perf ) ;
2013-02-06 21:02:13 +04:00
2015-06-02 07:12:34 +03:00
if ( pstate = = cpu - > pstate . current_pstate )
return ;
}
2014-10-13 19:37:43 +04:00
trace_cpu_frequency ( pstate * cpu - > pstate . scaling , cpu - > cpu ) ;
2013-05-07 19:20:30 +04:00
2013-02-06 21:02:13 +04:00
cpu - > pstate . current_pstate = pstate ;
2013-12-18 22:32:39 +04:00
pstate_funcs . set ( cpu , pstate ) ;
2013-02-06 21:02:13 +04:00
}
static void intel_pstate_get_cpu_pstates ( struct cpudata * cpu )
{
2013-10-21 20:20:34 +04:00
cpu - > pstate . min_pstate = pstate_funcs . get_min ( ) ;
cpu - > pstate . max_pstate = pstate_funcs . get_max ( ) ;
2015-10-15 02:12:00 +03:00
cpu - > pstate . max_pstate_physical = pstate_funcs . get_max_physical ( ) ;
2013-10-21 20:20:34 +04:00
cpu - > pstate . turbo_pstate = pstate_funcs . get_turbo ( ) ;
2014-10-13 19:37:43 +04:00
cpu - > pstate . scaling = pstate_funcs . get_scaling ( ) ;
2013-02-06 21:02:13 +04:00
2013-12-18 22:32:39 +04:00
if ( pstate_funcs . get_vid )
pstate_funcs . get_vid ( cpu ) ;
2015-06-02 07:12:34 +03:00
intel_pstate_set_pstate ( cpu , cpu - > pstate . min_pstate , false ) ;
2013-02-06 21:02:13 +04:00
}
2014-04-29 21:53:49 +04:00
static inline void intel_pstate_calc_busy ( struct cpudata * cpu )
2013-02-06 21:02:13 +04:00
{
2014-04-29 21:53:49 +04:00
struct sample * sample = & cpu - > sample ;
2014-05-30 21:10:57 +04:00
int64_t core_pct ;
2013-02-06 21:02:13 +04:00
2014-05-30 21:10:57 +04:00
core_pct = int_tofp ( sample - > aperf ) * int_tofp ( 100 ) ;
2014-07-18 19:37:27 +04:00
core_pct = div64_u64 ( core_pct , int_tofp ( sample - > mperf ) ) ;
2014-02-25 22:35:37 +04:00
2014-02-03 20:55:31 +04:00
sample - > freq = fp_toint (
2014-10-13 19:37:43 +04:00
mul_fp ( int_tofp (
2015-10-15 02:12:00 +03:00
cpu - > pstate . max_pstate_physical *
cpu - > pstate . scaling / 100 ) ,
2014-10-13 19:37:43 +04:00
core_pct ) ) ;
2014-02-03 20:55:31 +04:00
2014-05-30 21:10:57 +04:00
sample - > core_pct_busy = ( int32_t ) core_pct ;
2013-02-06 21:02:13 +04:00
}
static inline void intel_pstate_sample ( struct cpudata * cpu )
{
u64 aperf , mperf ;
2014-07-18 19:37:24 +04:00
unsigned long flags ;
2015-04-12 07:10:26 +03:00
u64 tsc ;
2013-02-06 21:02:13 +04:00
2014-07-18 19:37:24 +04:00
local_irq_save ( flags ) ;
2013-02-06 21:02:13 +04:00
rdmsrl ( MSR_IA32_APERF , aperf ) ;
rdmsrl ( MSR_IA32_MPERF , mperf ) ;
2015-10-15 22:34:21 +03:00
if ( cpu - > prev_mperf = = mperf ) {
local_irq_restore ( flags ) ;
return ;
}
2015-06-25 19:44:07 +03:00
tsc = rdtsc ( ) ;
2014-07-18 19:37:24 +04:00
local_irq_restore ( flags ) ;
2014-01-16 22:32:25 +04:00
2014-05-29 20:32:24 +04:00
cpu - > last_sample_time = cpu - > sample . time ;
cpu - > sample . time = ktime_get ( ) ;
2014-02-12 22:01:04 +04:00
cpu - > sample . aperf = aperf ;
cpu - > sample . mperf = mperf ;
2015-04-12 07:10:26 +03:00
cpu - > sample . tsc = tsc ;
2014-02-12 22:01:04 +04:00
cpu - > sample . aperf - = cpu - > prev_aperf ;
cpu - > sample . mperf - = cpu - > prev_mperf ;
2015-04-12 07:10:26 +03:00
cpu - > sample . tsc - = cpu - > prev_tsc ;
2013-05-07 19:20:25 +04:00
2014-04-29 21:53:49 +04:00
intel_pstate_calc_busy ( cpu ) ;
2013-02-06 21:02:13 +04:00
cpu - > prev_aperf = aperf ;
cpu - > prev_mperf = mperf ;
2015-04-12 07:10:26 +03:00
cpu - > prev_tsc = tsc ;
2013-02-06 21:02:13 +04:00
}
2014-11-06 20:40:47 +03:00
static inline void intel_hwp_set_sample_time ( struct cpudata * cpu )
{
int delay ;
delay = msecs_to_jiffies ( 50 ) ;
mod_timer_pinned ( & cpu - > timer , jiffies + delay ) ;
}
2013-02-06 21:02:13 +04:00
static inline void intel_pstate_set_sample_time ( struct cpudata * cpu )
{
2014-07-18 19:37:22 +04:00
int delay ;
2013-02-06 21:02:13 +04:00
2014-07-18 19:37:22 +04:00
delay = msecs_to_jiffies ( pid_params . sample_rate_ms ) ;
2013-02-06 21:02:13 +04:00
mod_timer_pinned ( & cpu - > timer , jiffies + delay ) ;
}
2013-10-21 20:20:32 +04:00
static inline int32_t intel_pstate_get_scaled_busy ( struct cpudata * cpu )
2013-02-06 21:02:13 +04:00
{
2014-05-29 20:32:24 +04:00
int32_t core_busy , max_pstate , current_pstate , sample_ratio ;
intel_pstate: Fix overflow in busy_scaled due to long delay
The kernel may delay interrupts for a long time which can result in timers
being delayed. If this occurs the intel_pstate driver will crash with a
divide by zero error:
divide error: 0000 [#1] SMP
Modules linked in: btrfs zlib_deflate raid6_pq xor msdos ext4 mbcache jbd2 binfmt_misc arc4 md4 nls_utf8 cifs dns_resolver tcp_lp bnep bluetooth rfkill fuse dm_service_time iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ftp ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables intel_powerclamp coretemp vfat fat kvm_intel iTCO_wdt iTCO_vendor_support ipmi_devintf sr_mod kvm crct10dif_pclmul
crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel cdc_ether lrw usbnet cdrom mii gf128mul glue_helper ablk_helper cryptd lpc_ich mfd_core pcspkr sb_edac edac_core ipmi_si ipmi_msghandler ioatdma wmi shpchp acpi_pad nfsd auth_rpcgss nfs_acl lockd uinput dm_multipath sunrpc xfs libcrc32c usb_storage sd_mod crc_t10dif crct10dif_common ixgbe mgag200 syscopyarea sysfillrect sysimgblt mdio drm_kms_helper ttm igb drm ptp pps_core dca i2c_algo_bit megaraid_sas i2c_core dm_mirror dm_region_hash dm_log dm_mod
CPU: 113 PID: 0 Comm: swapper/113 Tainted: G W -------------- 3.10.0-229.1.2.el7.x86_64 #1
Hardware name: IBM x3950 X6 -[3837AC2]-/00FN827, BIOS -[A8E112BUS-1.00]- 08/27/2014
task: ffff880fe8abe660 ti: ffff880fe8ae4000 task.ti: ffff880fe8ae4000
RIP: 0010:[<ffffffff814a9279>] [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
RSP: 0018:ffff883fff4e3db8 EFLAGS: 00010206
RAX: 0000000027100000 RBX: ffff883fe6965100 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000010 RDI: 000000002e53632d
RBP: ffff883fff4e3e20 R08: 000e6f69a5a125c0 R09: ffff883fe84ec001
R10: 0000000000000002 R11: 0000000000000005 R12: 00000000000049f5
R13: 0000000000271000 R14: 00000000000049f5 R15: 0000000000000246
FS: 0000000000000000(0000) GS:ffff883fff4e0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f7668601000 CR3: 000000000190a000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
ffff883fff4e3e58 ffffffff81099dc1 0000000000000086 0000000000000071
ffff883fff4f3680 0000000000000071 fbdc8a965e33afee ffffffff810b69dd
ffff883fe84ec000 ffff883fe6965108 0000000000000100 ffffffff814a9100
Call Trace:
<IRQ>
[<ffffffff81099dc1>] ? run_posix_cpu_timers+0x51/0x840
[<ffffffff810b69dd>] ? trigger_load_balance+0x5d/0x200
[<ffffffff814a9100>] ? pid_param_set+0x130/0x130
[<ffffffff8107df56>] call_timer_fn+0x36/0x110
[<ffffffff814a9100>] ? pid_param_set+0x130/0x130
[<ffffffff8107fdcf>] run_timer_softirq+0x21f/0x320
[<ffffffff81077b2f>] __do_softirq+0xef/0x280
[<ffffffff816156dc>] call_softirq+0x1c/0x30
[<ffffffff81015d95>] do_softirq+0x65/0xa0
[<ffffffff81077ec5>] irq_exit+0x115/0x120
[<ffffffff81616355>] smp_apic_timer_interrupt+0x45/0x60
[<ffffffff81614a1d>] apic_timer_interrupt+0x6d/0x80
<EOI>
[<ffffffff814a9c32>] ? cpuidle_enter_state+0x52/0xc0
[<ffffffff814a9c28>] ? cpuidle_enter_state+0x48/0xc0
[<ffffffff814a9d65>] cpuidle_idle_call+0xc5/0x200
[<ffffffff8101d14e>] arch_cpu_idle+0xe/0x30
[<ffffffff810c67c1>] cpu_startup_entry+0xf1/0x290
[<ffffffff8104228a>] start_secondary+0x1ba/0x230
Code: 42 0f 00 45 89 e6 48 01 c2 43 8d 44 6d 00 39 d0 73 26 49 c1 e5 08 89 d2 4d 63 f4 49 63 c5 48 c1 e2 08 48 c1 e0 08 48 63 ca 48 99 <48> f7 f9 48 98 4c 0f af f0 49 c1 ee 08 8b 43 78 c1 e0 08 44 29
RIP [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
RSP <ffff883fff4e3db8>
The kernel values for cpudata for CPU 113 were:
struct cpudata {
cpu = 113,
timer = {
entry = {
next = 0x0,
prev = 0xdead000000200200
},
expires = 8357799745,
base = 0xffff883fe84ec001,
function = 0xffffffff814a9100 <intel_pstate_timer_func>,
data = 18446612406765768960,
<snip>
i_gain = 0,
d_gain = 0,
deadband = 0,
last_err = 22489
},
last_sample_time = {
tv64 = 4063132438017305
},
prev_aperf = 287326796397463,
prev_mperf = 251427432090198,
sample = {
core_pct_busy = 23081,
aperf = 2937407,
mperf = 3257884,
freq = 2524484,
time = {
tv64 = 4063149215234118
}
}
}
which results in the time between samples = last_sample_time - sample.time
= 4063149215234118 - 4063132438017305 = 16777216813 which is 16.777 seconds.
The duration between reads of the APERF and MPERF registers overflowed a s32
sized integer in intel_pstate_get_scaled_busy()'s call to div_fp(). The result
is that int_tofp(duration_us) == 0, and the kernel attempts to divide by 0.
While the kernel shouldn't be delaying for a long time, it can and does
happen and the intel_pstate driver should not panic in this situation. This
patch changes the div_fp() function to use div64_s64() to allow for "long"
division. This will avoid the overflow condition on long delays.
[v2]: use div64_s64() in div_fp()
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2015-06-15 20:43:29 +03:00
s64 duration_us ;
2014-05-29 20:32:24 +04:00
u32 sample_time ;
2013-02-06 21:02:13 +04:00
2014-12-10 23:39:38 +03:00
/*
* core_busy is the ratio of actual performance to max
* max_pstate is the max non turbo pstate available
* current_pstate was the pstate that was requested during
* the last sample period .
*
* We normalize core_busy , which was our actual percent
* performance to what we requested during the last sample
* period . The result will be a percentage of busy at a
* specified pstate .
*/
2014-02-12 22:01:04 +04:00
core_busy = cpu - > sample . core_pct_busy ;
2015-10-15 02:12:00 +03:00
max_pstate = int_tofp ( cpu - > pstate . max_pstate_physical ) ;
2013-02-06 21:02:13 +04:00
current_pstate = int_tofp ( cpu - > pstate . current_pstate ) ;
2014-02-25 22:35:37 +04:00
core_busy = mul_fp ( core_busy , div_fp ( max_pstate , current_pstate ) ) ;
2014-05-29 20:32:24 +04:00
2014-12-10 23:39:38 +03:00
/*
* Since we have a deferred timer , it will not fire unless
* we are in C0 . So , determine if the actual elapsed time
* is significantly greater ( 3 x ) than our sample interval . If it
* is , then we were idle for a long enough period of time
* to adjust our busyness .
*/
2014-07-18 19:37:21 +04:00
sample_time = pid_params . sample_rate_ms * USEC_PER_MSEC ;
intel_pstate: Fix overflow in busy_scaled due to long delay
The kernel may delay interrupts for a long time which can result in timers
being delayed. If this occurs the intel_pstate driver will crash with a
divide by zero error:
divide error: 0000 [#1] SMP
Modules linked in: btrfs zlib_deflate raid6_pq xor msdos ext4 mbcache jbd2 binfmt_misc arc4 md4 nls_utf8 cifs dns_resolver tcp_lp bnep bluetooth rfkill fuse dm_service_time iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ftp ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables intel_powerclamp coretemp vfat fat kvm_intel iTCO_wdt iTCO_vendor_support ipmi_devintf sr_mod kvm crct10dif_pclmul
crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel cdc_ether lrw usbnet cdrom mii gf128mul glue_helper ablk_helper cryptd lpc_ich mfd_core pcspkr sb_edac edac_core ipmi_si ipmi_msghandler ioatdma wmi shpchp acpi_pad nfsd auth_rpcgss nfs_acl lockd uinput dm_multipath sunrpc xfs libcrc32c usb_storage sd_mod crc_t10dif crct10dif_common ixgbe mgag200 syscopyarea sysfillrect sysimgblt mdio drm_kms_helper ttm igb drm ptp pps_core dca i2c_algo_bit megaraid_sas i2c_core dm_mirror dm_region_hash dm_log dm_mod
CPU: 113 PID: 0 Comm: swapper/113 Tainted: G W -------------- 3.10.0-229.1.2.el7.x86_64 #1
Hardware name: IBM x3950 X6 -[3837AC2]-/00FN827, BIOS -[A8E112BUS-1.00]- 08/27/2014
task: ffff880fe8abe660 ti: ffff880fe8ae4000 task.ti: ffff880fe8ae4000
RIP: 0010:[<ffffffff814a9279>] [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
RSP: 0018:ffff883fff4e3db8 EFLAGS: 00010206
RAX: 0000000027100000 RBX: ffff883fe6965100 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000010 RDI: 000000002e53632d
RBP: ffff883fff4e3e20 R08: 000e6f69a5a125c0 R09: ffff883fe84ec001
R10: 0000000000000002 R11: 0000000000000005 R12: 00000000000049f5
R13: 0000000000271000 R14: 00000000000049f5 R15: 0000000000000246
FS: 0000000000000000(0000) GS:ffff883fff4e0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f7668601000 CR3: 000000000190a000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
ffff883fff4e3e58 ffffffff81099dc1 0000000000000086 0000000000000071
ffff883fff4f3680 0000000000000071 fbdc8a965e33afee ffffffff810b69dd
ffff883fe84ec000 ffff883fe6965108 0000000000000100 ffffffff814a9100
Call Trace:
<IRQ>
[<ffffffff81099dc1>] ? run_posix_cpu_timers+0x51/0x840
[<ffffffff810b69dd>] ? trigger_load_balance+0x5d/0x200
[<ffffffff814a9100>] ? pid_param_set+0x130/0x130
[<ffffffff8107df56>] call_timer_fn+0x36/0x110
[<ffffffff814a9100>] ? pid_param_set+0x130/0x130
[<ffffffff8107fdcf>] run_timer_softirq+0x21f/0x320
[<ffffffff81077b2f>] __do_softirq+0xef/0x280
[<ffffffff816156dc>] call_softirq+0x1c/0x30
[<ffffffff81015d95>] do_softirq+0x65/0xa0
[<ffffffff81077ec5>] irq_exit+0x115/0x120
[<ffffffff81616355>] smp_apic_timer_interrupt+0x45/0x60
[<ffffffff81614a1d>] apic_timer_interrupt+0x6d/0x80
<EOI>
[<ffffffff814a9c32>] ? cpuidle_enter_state+0x52/0xc0
[<ffffffff814a9c28>] ? cpuidle_enter_state+0x48/0xc0
[<ffffffff814a9d65>] cpuidle_idle_call+0xc5/0x200
[<ffffffff8101d14e>] arch_cpu_idle+0xe/0x30
[<ffffffff810c67c1>] cpu_startup_entry+0xf1/0x290
[<ffffffff8104228a>] start_secondary+0x1ba/0x230
Code: 42 0f 00 45 89 e6 48 01 c2 43 8d 44 6d 00 39 d0 73 26 49 c1 e5 08 89 d2 4d 63 f4 49 63 c5 48 c1 e2 08 48 c1 e0 08 48 63 ca 48 99 <48> f7 f9 48 98 4c 0f af f0 49 c1 ee 08 8b 43 78 c1 e0 08 44 29
RIP [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
RSP <ffff883fff4e3db8>
The kernel values for cpudata for CPU 113 were:
struct cpudata {
cpu = 113,
timer = {
entry = {
next = 0x0,
prev = 0xdead000000200200
},
expires = 8357799745,
base = 0xffff883fe84ec001,
function = 0xffffffff814a9100 <intel_pstate_timer_func>,
data = 18446612406765768960,
<snip>
i_gain = 0,
d_gain = 0,
deadband = 0,
last_err = 22489
},
last_sample_time = {
tv64 = 4063132438017305
},
prev_aperf = 287326796397463,
prev_mperf = 251427432090198,
sample = {
core_pct_busy = 23081,
aperf = 2937407,
mperf = 3257884,
freq = 2524484,
time = {
tv64 = 4063149215234118
}
}
}
which results in the time between samples = last_sample_time - sample.time
= 4063149215234118 - 4063132438017305 = 16777216813 which is 16.777 seconds.
The duration between reads of the APERF and MPERF registers overflowed a s32
sized integer in intel_pstate_get_scaled_busy()'s call to div_fp(). The result
is that int_tofp(duration_us) == 0, and the kernel attempts to divide by 0.
While the kernel shouldn't be delaying for a long time, it can and does
happen and the intel_pstate driver should not panic in this situation. This
patch changes the div_fp() function to use div64_s64() to allow for "long"
division. This will avoid the overflow condition on long delays.
[v2]: use div64_s64() in div_fp()
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2015-06-15 20:43:29 +03:00
duration_us = ktime_us_delta ( cpu - > sample . time ,
cpu - > last_sample_time ) ;
2014-05-29 20:32:24 +04:00
if ( duration_us > sample_time * 3 ) {
sample_ratio = div_fp ( int_tofp ( sample_time ) ,
2014-07-18 19:37:23 +04:00
int_tofp ( duration_us ) ) ;
2014-05-29 20:32:24 +04:00
core_busy = mul_fp ( core_busy , sample_ratio ) ;
}
2014-05-29 20:32:23 +04:00
return core_busy ;
2013-02-06 21:02:13 +04:00
}
static inline void intel_pstate_adjust_busy_pstate ( struct cpudata * cpu )
{
2013-10-21 20:20:32 +04:00
int32_t busy_scaled ;
2013-02-06 21:02:13 +04:00
struct _pid * pid ;
2014-07-18 19:37:26 +04:00
signed int ctl ;
2015-04-12 07:10:26 +03:00
int from ;
struct sample * sample ;
from = cpu - > pstate . current_pstate ;
2013-02-06 21:02:13 +04:00
pid = & cpu - > pid ;
busy_scaled = intel_pstate_get_scaled_busy ( cpu ) ;
ctl = pid_calc ( pid , busy_scaled ) ;
2014-07-18 19:37:26 +04:00
/* Negative values of ctl increase the pstate and vice versa */
2015-06-02 07:12:34 +03:00
intel_pstate_set_pstate ( cpu , cpu - > pstate . current_pstate - ctl , true ) ;
2015-04-12 07:10:26 +03:00
sample = & cpu - > sample ;
trace_pstate_sample ( fp_toint ( sample - > core_pct_busy ) ,
fp_toint ( busy_scaled ) ,
from ,
cpu - > pstate . current_pstate ,
sample - > mperf ,
sample - > aperf ,
sample - > tsc ,
sample - > freq ) ;
2013-02-06 21:02:13 +04:00
}
2014-11-06 20:40:47 +03:00
static void intel_hwp_timer_func ( unsigned long __data )
{
struct cpudata * cpu = ( struct cpudata * ) __data ;
intel_pstate_sample ( cpu ) ;
intel_hwp_set_sample_time ( cpu ) ;
}
2013-02-06 21:02:13 +04:00
static void intel_pstate_timer_func ( unsigned long __data )
{
struct cpudata * cpu = ( struct cpudata * ) __data ;
intel_pstate_sample ( cpu ) ;
2014-01-16 22:32:25 +04:00
2013-05-07 19:20:27 +04:00
intel_pstate_adjust_busy_pstate ( cpu ) ;
2014-01-16 22:32:25 +04:00
2013-02-06 21:02:13 +04:00
intel_pstate_set_sample_time ( cpu ) ;
}
# define ICPU(model, policy) \
2014-01-06 22:59:16 +04:00
{ X86_VENDOR_INTEL , 6 , model , X86_FEATURE_APERFMPERF , \
( unsigned long ) & policy }
2013-02-06 21:02:13 +04:00
static const struct x86_cpu_id intel_pstate_cpu_ids [ ] = {
2013-10-21 20:20:34 +04:00
ICPU ( 0x2a , core_params ) ,
ICPU ( 0x2d , core_params ) ,
2013-10-21 20:20:35 +04:00
ICPU ( 0x37 , byt_params ) ,
2013-10-21 20:20:34 +04:00
ICPU ( 0x3a , core_params ) ,
ICPU ( 0x3c , core_params ) ,
2014-05-08 23:57:27 +04:00
ICPU ( 0x3d , core_params ) ,
2013-10-21 20:20:34 +04:00
ICPU ( 0x3e , core_params ) ,
ICPU ( 0x3f , core_params ) ,
ICPU ( 0x45 , core_params ) ,
ICPU ( 0x46 , core_params ) ,
2014-11-06 20:50:45 +03:00
ICPU ( 0x47 , core_params ) ,
2014-08-22 14:05:44 +04:00
ICPU ( 0x4c , byt_params ) ,
2015-01-29 00:53:28 +03:00
ICPU ( 0x4e , core_params ) ,
2014-05-08 23:57:27 +04:00
ICPU ( 0x4f , core_params ) ,
2015-08-05 22:47:14 +03:00
ICPU ( 0x5e , core_params ) ,
2014-05-08 23:57:27 +04:00
ICPU ( 0x56 , core_params ) ,
2015-04-10 20:22:18 +03:00
ICPU ( 0x57 , knl_params ) ,
2013-02-06 21:02:13 +04:00
{ }
} ;
MODULE_DEVICE_TABLE ( x86cpu , intel_pstate_cpu_ids ) ;
2014-11-06 20:40:47 +03:00
static const struct x86_cpu_id intel_pstate_cpu_oob_ids [ ] = {
ICPU ( 0x56 , core_params ) ,
{ }
} ;
2013-02-06 21:02:13 +04:00
static int intel_pstate_init_cpu ( unsigned int cpunum )
{
struct cpudata * cpu ;
2014-10-13 19:37:42 +04:00
if ( ! all_cpu_data [ cpunum ] )
all_cpu_data [ cpunum ] = kzalloc ( sizeof ( struct cpudata ) ,
GFP_KERNEL ) ;
2013-02-06 21:02:13 +04:00
if ( ! all_cpu_data [ cpunum ] )
return - ENOMEM ;
cpu = all_cpu_data [ cpunum ] ;
cpu - > cpu = cpunum ;
2015-07-14 19:46:23 +03:00
if ( hwp_active )
intel_pstate_hwp_enable ( cpu ) ;
2014-07-05 03:51:33 +04:00
intel_pstate_get_cpu_pstates ( cpu ) ;
2013-10-21 20:20:34 +04:00
2013-02-06 21:02:13 +04:00
init_timer_deferrable ( & cpu - > timer ) ;
2014-07-18 19:37:20 +04:00
cpu - > timer . data = ( unsigned long ) cpu ;
2013-02-06 21:02:13 +04:00
cpu - > timer . expires = jiffies + HZ / 100 ;
2014-11-06 20:40:47 +03:00
if ( ! hwp_active )
cpu - > timer . function = intel_pstate_timer_func ;
else
cpu - > timer . function = intel_hwp_timer_func ;
2013-02-06 21:02:13 +04:00
intel_pstate_busy_pid_reset ( cpu ) ;
intel_pstate_sample ( cpu ) ;
add_timer_on ( & cpu - > timer , cpunum ) ;
2015-05-31 17:46:47 +03:00
pr_debug ( " intel_pstate: controlling: cpu %d \n " , cpunum ) ;
2013-02-06 21:02:13 +04:00
return 0 ;
}
static unsigned int intel_pstate_get ( unsigned int cpu_num )
{
struct sample * sample ;
struct cpudata * cpu ;
cpu = all_cpu_data [ cpu_num ] ;
if ( ! cpu )
return 0 ;
2014-02-12 22:01:04 +04:00
sample = & cpu - > sample ;
2013-02-06 21:02:13 +04:00
return sample - > freq ;
}
static int intel_pstate_set_policy ( struct cpufreq_policy * policy )
{
2015-10-15 02:12:03 +03:00
# if IS_ENABLED(CONFIG_ACPI)
struct cpudata * cpu ;
int i ;
# endif
pr_debug ( " intel_pstate: %s max %u policy->max %u \n " , __func__ ,
policy - > cpuinfo . max_freq , policy - > max ) ;
2013-03-06 02:15:26 +04:00
if ( ! policy - > cpuinfo . max_freq )
return - ENODEV ;
2015-01-29 23:17:13 +03:00
if ( policy - > policy = = CPUFREQ_POLICY_PERFORMANCE & &
policy - > max > = policy - > cpuinfo . max_freq ) {
2015-10-15 14:34:15 +03:00
pr_debug ( " intel_pstate: set performance \n " ) ;
limits = & performance_limits ;
2013-04-10 02:38:18 +04:00
return 0 ;
2013-02-06 21:02:13 +04:00
}
2014-11-06 20:40:47 +03:00
2015-10-15 14:34:15 +03:00
pr_debug ( " intel_pstate: set powersave \n " ) ;
limits = & powersave_limits ;
limits - > min_policy_pct = ( policy - > min * 100 ) / policy - > cpuinfo . max_freq ;
limits - > min_policy_pct = clamp_t ( int , limits - > min_policy_pct , 0 , 100 ) ;
limits - > max_policy_pct = ( policy - > max * 100 ) / policy - > cpuinfo . max_freq ;
limits - > max_policy_pct = clamp_t ( int , limits - > max_policy_pct , 0 , 100 ) ;
2015-09-09 13:27:31 +03:00
/* Normalize user input to [min_policy_pct, max_policy_pct] */
2015-10-15 14:34:15 +03:00
limits - > min_perf_pct = max ( limits - > min_policy_pct ,
limits - > min_sysfs_pct ) ;
limits - > min_perf_pct = min ( limits - > max_policy_pct ,
limits - > min_perf_pct ) ;
limits - > max_perf_pct = min ( limits - > max_policy_pct ,
limits - > max_sysfs_pct ) ;
limits - > max_perf_pct = max ( limits - > min_policy_pct ,
limits - > max_perf_pct ) ;
2015-09-09 13:27:31 +03:00
/* Make sure min_perf_pct <= max_perf_pct */
2015-10-15 14:34:15 +03:00
limits - > min_perf_pct = min ( limits - > max_perf_pct , limits - > min_perf_pct ) ;
2015-09-09 13:27:31 +03:00
2015-10-15 14:34:15 +03:00
limits - > min_perf = div_fp ( int_tofp ( limits - > min_perf_pct ) ,
int_tofp ( 100 ) ) ;
limits - > max_perf = div_fp ( int_tofp ( limits - > max_perf_pct ) ,
int_tofp ( 100 ) ) ;
2013-02-06 21:02:13 +04:00
2015-10-15 02:12:03 +03:00
# if IS_ENABLED(CONFIG_ACPI)
cpu = all_cpu_data [ policy - > cpu ] ;
for ( i = 0 ; i < cpu - > acpi_perf_data . state_count ; i + + ) {
int control ;
control = convert_to_native_pstate_format ( cpu , i ) ;
if ( control * cpu - > pstate . scaling = = policy - > max )
2015-10-15 14:34:15 +03:00
limits - > max_perf_ctl = control ;
2015-10-15 02:12:03 +03:00
if ( control * cpu - > pstate . scaling = = policy - > min )
2015-10-15 14:34:15 +03:00
limits - > min_perf_ctl = control ;
2015-10-15 02:12:03 +03:00
}
pr_debug ( " intel_pstate: max %u policy_max %u perf_ctl [0x%x-0x%x] \n " ,
2015-10-15 14:34:15 +03:00
policy - > cpuinfo . max_freq , policy - > max , limits - > min_perf_ctl ,
limits - > max_perf_ctl ) ;
2015-10-15 02:12:03 +03:00
# endif
2014-11-06 20:40:47 +03:00
if ( hwp_active )
intel_pstate_hwp_set ( ) ;
2013-02-06 21:02:13 +04:00
return 0 ;
}
static int intel_pstate_verify_policy ( struct cpufreq_policy * policy )
{
2013-10-02 12:43:19 +04:00
cpufreq_verify_within_cpu_limits ( policy ) ;
2013-02-06 21:02:13 +04:00
2014-07-18 19:37:21 +04:00
if ( policy - > policy ! = CPUFREQ_POLICY_POWERSAVE & &
2014-07-18 19:37:23 +04:00
policy - > policy ! = CPUFREQ_POLICY_PERFORMANCE )
2013-02-06 21:02:13 +04:00
return - EINVAL ;
return 0 ;
}
2014-03-19 19:45:54 +04:00
static void intel_pstate_stop_cpu ( struct cpufreq_policy * policy )
2013-02-06 21:02:13 +04:00
{
2014-03-19 19:45:54 +04:00
int cpu_num = policy - > cpu ;
struct cpudata * cpu = all_cpu_data [ cpu_num ] ;
2013-02-06 21:02:13 +04:00
2015-05-31 17:46:47 +03:00
pr_debug ( " intel_pstate: CPU %d exiting \n " , cpu_num ) ;
2014-03-19 19:45:54 +04:00
2014-03-24 18:41:29 +04:00
del_timer_sync ( & all_cpu_data [ cpu_num ] - > timer ) ;
2014-11-06 20:40:47 +03:00
if ( hwp_active )
return ;
2015-06-02 07:12:34 +03:00
intel_pstate_set_pstate ( cpu , cpu - > pstate . min_pstate , false ) ;
2013-02-06 21:02:13 +04:00
}
2013-06-19 21:54:04 +04:00
static int intel_pstate_cpu_init ( struct cpufreq_policy * policy )
2013-02-06 21:02:13 +04:00
{
struct cpudata * cpu ;
2013-10-15 22:06:14 +04:00
int rc ;
2013-02-06 21:02:13 +04:00
rc = intel_pstate_init_cpu ( policy - > cpu ) ;
if ( rc )
return rc ;
cpu = all_cpu_data [ policy - > cpu ] ;
2015-10-15 14:34:15 +03:00
if ( limits - > min_perf_pct = = 100 & & limits - > max_perf_pct = = 100 )
2013-02-06 21:02:13 +04:00
policy - > policy = CPUFREQ_POLICY_PERFORMANCE ;
else
policy - > policy = CPUFREQ_POLICY_POWERSAVE ;
2014-10-13 19:37:43 +04:00
policy - > min = cpu - > pstate . min_pstate * cpu - > pstate . scaling ;
policy - > max = cpu - > pstate . turbo_pstate * cpu - > pstate . scaling ;
2013-02-06 21:02:13 +04:00
/* cpuinfo and default policy values */
2014-10-13 19:37:43 +04:00
policy - > cpuinfo . min_freq = cpu - > pstate . min_pstate * cpu - > pstate . scaling ;
policy - > cpuinfo . max_freq =
cpu - > pstate . turbo_pstate * cpu - > pstate . scaling ;
2015-10-15 02:12:01 +03:00
if ( ! no_acpi_perf )
intel_pstate_init_perf_limits ( policy ) ;
/*
* If there is no acpi perf data or error , we ignore and use Intel P
* state calculated limits , So this is not fatal error .
*/
2013-02-06 21:02:13 +04:00
policy - > cpuinfo . transition_latency = CPUFREQ_ETERNAL ;
cpumask_set_cpu ( policy - > cpu , policy - > cpus ) ;
return 0 ;
}
2015-10-15 02:12:01 +03:00
static int intel_pstate_cpu_exit ( struct cpufreq_policy * policy )
{
return intel_pstate_exit_perf_limits ( policy ) ;
}
2013-02-06 21:02:13 +04:00
static struct cpufreq_driver intel_pstate_driver = {
. flags = CPUFREQ_CONST_LOOPS ,
. verify = intel_pstate_verify_policy ,
. setpolicy = intel_pstate_set_policy ,
. get = intel_pstate_get ,
. init = intel_pstate_cpu_init ,
2015-10-15 02:12:01 +03:00
. exit = intel_pstate_cpu_exit ,
2014-03-19 19:45:54 +04:00
. stop_cpu = intel_pstate_stop_cpu ,
2013-02-06 21:02:13 +04:00
. name = " intel_pstate " ,
} ;
2013-02-16 01:55:10 +04:00
static int __initdata no_load ;
2014-11-06 20:40:47 +03:00
static int __initdata no_hwp ;
2015-02-07 00:41:55 +03:00
static int __initdata hwp_only ;
2014-12-09 04:43:19 +03:00
static unsigned int force_load ;
2013-02-16 01:55:10 +04:00
2013-03-22 04:29:28 +04:00
static int intel_pstate_msrs_not_valid ( void )
{
2013-10-21 20:20:34 +04:00
if ( ! pstate_funcs . get_max ( ) | |
2014-07-18 19:37:23 +04:00
! pstate_funcs . get_min ( ) | |
! pstate_funcs . get_turbo ( ) )
2013-03-22 04:29:28 +04:00
return - ENODEV ;
return 0 ;
}
2013-10-21 20:20:34 +04:00
2013-10-30 19:38:32 +04:00
static void copy_pid_params ( struct pstate_adjust_policy * policy )
2013-10-21 20:20:34 +04:00
{
pid_params . sample_rate_ms = policy - > sample_rate_ms ;
pid_params . p_gain_pct = policy - > p_gain_pct ;
pid_params . i_gain_pct = policy - > i_gain_pct ;
pid_params . d_gain_pct = policy - > d_gain_pct ;
pid_params . deadband = policy - > deadband ;
pid_params . setpoint = policy - > setpoint ;
}
2013-10-30 19:38:32 +04:00
static void copy_cpu_funcs ( struct pstate_funcs * funcs )
2013-10-21 20:20:34 +04:00
{
pstate_funcs . get_max = funcs - > get_max ;
2015-10-15 02:12:00 +03:00
pstate_funcs . get_max_physical = funcs - > get_max_physical ;
2013-10-21 20:20:34 +04:00
pstate_funcs . get_min = funcs - > get_min ;
pstate_funcs . get_turbo = funcs - > get_turbo ;
2014-10-13 19:37:43 +04:00
pstate_funcs . get_scaling = funcs - > get_scaling ;
2013-10-21 20:20:34 +04:00
pstate_funcs . set = funcs - > set ;
2013-12-18 22:32:39 +04:00
pstate_funcs . get_vid = funcs - > get_vid ;
2013-10-21 20:20:34 +04:00
}
2013-10-31 19:24:05 +04:00
# if IS_ENABLED(CONFIG_ACPI)
static bool intel_pstate_no_acpi_pss ( void )
{
int i ;
for_each_possible_cpu ( i ) {
acpi_status status ;
union acpi_object * pss ;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER , NULL } ;
struct acpi_processor * pr = per_cpu ( processors , i ) ;
if ( ! pr )
continue ;
status = acpi_evaluate_object ( pr - > handle , " _PSS " , NULL , & buffer ) ;
if ( ACPI_FAILURE ( status ) )
continue ;
pss = buffer . pointer ;
if ( pss & & pss - > type = = ACPI_TYPE_PACKAGE ) {
kfree ( pss ) ;
return false ;
}
kfree ( pss ) ;
}
return true ;
}
2014-12-01 05:32:08 +03:00
static bool intel_pstate_has_acpi_ppc ( void )
{
int i ;
for_each_possible_cpu ( i ) {
struct acpi_processor * pr = per_cpu ( processors , i ) ;
if ( ! pr )
continue ;
if ( acpi_has_method ( pr - > handle , " _PPC " ) )
return true ;
}
return false ;
}
enum {
PSS ,
PPC ,
} ;
2013-10-31 19:24:05 +04:00
struct hw_vendor_info {
u16 valid ;
char oem_id [ ACPI_OEM_ID_SIZE ] ;
char oem_table_id [ ACPI_OEM_TABLE_ID_SIZE ] ;
2014-12-01 05:32:08 +03:00
int oem_pwr_table ;
2013-10-31 19:24:05 +04:00
} ;
/* Hardware vendor-specific info that has its own power management modes */
static struct hw_vendor_info vendor_info [ ] = {
2014-12-01 05:32:08 +03:00
{ 1 , " HP " , " ProLiant " , PSS } ,
{ 1 , " ORACLE " , " X4-2 " , PPC } ,
{ 1 , " ORACLE " , " X4-2L " , PPC } ,
{ 1 , " ORACLE " , " X4-2B " , PPC } ,
{ 1 , " ORACLE " , " X3-2 " , PPC } ,
{ 1 , " ORACLE " , " X3-2L " , PPC } ,
{ 1 , " ORACLE " , " X3-2B " , PPC } ,
{ 1 , " ORACLE " , " X4470M2 " , PPC } ,
{ 1 , " ORACLE " , " X4270M3 " , PPC } ,
{ 1 , " ORACLE " , " X4270M2 " , PPC } ,
{ 1 , " ORACLE " , " X4170M2 " , PPC } ,
2015-08-05 03:28:50 +03:00
{ 1 , " ORACLE " , " X4170 M3 " , PPC } ,
{ 1 , " ORACLE " , " X4275 M3 " , PPC } ,
{ 1 , " ORACLE " , " X6-2 " , PPC } ,
{ 1 , " ORACLE " , " Sudbury " , PPC } ,
2013-10-31 19:24:05 +04:00
{ 0 , " " , " " } ,
} ;
static bool intel_pstate_platform_pwr_mgmt_exists ( void )
{
struct acpi_table_header hdr ;
struct hw_vendor_info * v_info ;
2014-11-06 20:40:47 +03:00
const struct x86_cpu_id * id ;
u64 misc_pwr ;
id = x86_match_cpu ( intel_pstate_cpu_oob_ids ) ;
if ( id ) {
rdmsrl ( MSR_MISC_PWR_MGMT , misc_pwr ) ;
if ( misc_pwr & ( 1 < < 8 ) )
return true ;
}
2013-10-31 19:24:05 +04:00
2014-07-18 19:37:23 +04:00
if ( acpi_disabled | |
ACPI_FAILURE ( acpi_get_table_header ( ACPI_SIG_FADT , 0 , & hdr ) ) )
2013-10-31 19:24:05 +04:00
return false ;
for ( v_info = vendor_info ; v_info - > valid ; v_info + + ) {
2014-07-18 19:37:23 +04:00
if ( ! strncmp ( hdr . oem_id , v_info - > oem_id , ACPI_OEM_ID_SIZE ) & &
2014-12-01 05:32:08 +03:00
! strncmp ( hdr . oem_table_id , v_info - > oem_table_id ,
ACPI_OEM_TABLE_ID_SIZE ) )
switch ( v_info - > oem_pwr_table ) {
case PSS :
return intel_pstate_no_acpi_pss ( ) ;
case PPC :
2014-12-09 04:43:19 +03:00
return intel_pstate_has_acpi_ppc ( ) & &
( ! force_load ) ;
2014-12-01 05:32:08 +03:00
}
2013-10-31 19:24:05 +04:00
}
return false ;
}
# else /* CONFIG_ACPI not enabled */
static inline bool intel_pstate_platform_pwr_mgmt_exists ( void ) { return false ; }
2014-12-01 05:32:08 +03:00
static inline bool intel_pstate_has_acpi_ppc ( void ) { return false ; }
2013-10-31 19:24:05 +04:00
# endif /* CONFIG_ACPI */
2013-02-06 21:02:13 +04:00
static int __init intel_pstate_init ( void )
{
2013-03-06 02:15:27 +04:00
int cpu , rc = 0 ;
2013-02-06 21:02:13 +04:00
const struct x86_cpu_id * id ;
2015-04-03 16:19:53 +03:00
struct cpu_defaults * cpu_def ;
2013-02-06 21:02:13 +04:00
2013-02-16 01:55:10 +04:00
if ( no_load )
return - ENODEV ;
2013-02-06 21:02:13 +04:00
id = x86_match_cpu ( intel_pstate_cpu_ids ) ;
if ( ! id )
return - ENODEV ;
2013-10-31 19:24:05 +04:00
/*
* The Intel pstate driver will be ignored if the platform
* firmware has its own power management modes .
*/
if ( intel_pstate_platform_pwr_mgmt_exists ( ) )
return - ENODEV ;
2015-04-03 16:19:53 +03:00
cpu_def = ( struct cpu_defaults * ) id - > driver_data ;
2013-10-21 20:20:34 +04:00
2015-04-03 16:19:53 +03:00
copy_pid_params ( & cpu_def - > pid_policy ) ;
copy_cpu_funcs ( & cpu_def - > funcs ) ;
2013-10-21 20:20:34 +04:00
2013-03-22 04:29:28 +04:00
if ( intel_pstate_msrs_not_valid ( ) )
return - ENODEV ;
2013-02-06 21:02:13 +04:00
pr_info ( " Intel P-state driver initializing. \n " ) ;
2013-05-13 12:03:43 +04:00
all_cpu_data = vzalloc ( sizeof ( void * ) * num_possible_cpus ( ) ) ;
2013-02-06 21:02:13 +04:00
if ( ! all_cpu_data )
return - ENOMEM ;
2015-10-22 16:43:31 +03:00
if ( static_cpu_has_safe ( X86_FEATURE_HWP ) & & ! no_hwp ) {
pr_info ( " intel_pstate: HWP enabled \n " ) ;
2015-07-14 19:46:23 +03:00
hwp_active + + ;
2015-10-22 16:43:31 +03:00
}
2014-11-06 20:40:47 +03:00
2015-02-07 00:41:55 +03:00
if ( ! hwp_active & & hwp_only )
goto out ;
2013-02-06 21:02:13 +04:00
rc = cpufreq_register_driver ( & intel_pstate_driver ) ;
if ( rc )
goto out ;
intel_pstate_debug_expose_params ( ) ;
intel_pstate_sysfs_expose_params ( ) ;
2014-01-16 22:32:25 +04:00
2013-02-06 21:02:13 +04:00
return rc ;
out :
2013-03-06 02:15:27 +04:00
get_online_cpus ( ) ;
for_each_online_cpu ( cpu ) {
if ( all_cpu_data [ cpu ] ) {
del_timer_sync ( & all_cpu_data [ cpu ] - > timer ) ;
kfree ( all_cpu_data [ cpu ] ) ;
}
}
put_online_cpus ( ) ;
vfree ( all_cpu_data ) ;
2013-02-06 21:02:13 +04:00
return - ENODEV ;
}
device_initcall ( intel_pstate_init ) ;
2013-02-16 01:55:10 +04:00
static int __init intel_pstate_setup ( char * str )
{
if ( ! str )
return - EINVAL ;
if ( ! strcmp ( str , " disable " ) )
no_load = 1 ;
2015-10-22 16:43:31 +03:00
if ( ! strcmp ( str , " no_hwp " ) ) {
pr_info ( " intel_pstate: HWP disabled \n " ) ;
2014-11-06 20:40:47 +03:00
no_hwp = 1 ;
2015-10-22 16:43:31 +03:00
}
2014-12-09 04:43:19 +03:00
if ( ! strcmp ( str , " force " ) )
force_load = 1 ;
2015-02-07 00:41:55 +03:00
if ( ! strcmp ( str , " hwp_only " ) )
hwp_only = 1 ;
2015-10-15 02:12:01 +03:00
if ( ! strcmp ( str , " no_acpi " ) )
no_acpi_perf = 1 ;
2013-02-16 01:55:10 +04:00
return 0 ;
}
early_param ( " intel_pstate " , intel_pstate_setup ) ;
2013-02-06 21:02:13 +04:00
MODULE_AUTHOR ( " Dirk Brandewie <dirk.j.brandewie@intel.com> " ) ;
MODULE_DESCRIPTION ( " 'intel_pstate' - P state driver Intel Core processors " ) ;
MODULE_LICENSE ( " GPL " ) ;