2019-05-29 07:18:02 -07:00
// SPDX-License-Identifier: GPL-2.0-only
2015-06-10 13:34:24 -07:00
/* intel_pch_thermal.c - Intel PCH Thermal driver
*
* Copyright ( c ) 2015 , Intel Corporation .
*
* Authors :
* Tushar Dave < tushar . n . dave @ intel . com >
*/
2020-11-06 22:36:33 +05:30
# include <linux/acpi.h>
# include <linux/delay.h>
2015-06-10 13:34:24 -07:00
# include <linux/module.h>
# include <linux/init.h>
# include <linux/pci.h>
2020-11-06 22:36:33 +05:30
# include <linux/pm.h>
# include <linux/suspend.h>
2015-06-10 13:34:24 -07:00
# include <linux/thermal.h>
2020-11-06 22:36:33 +05:30
# include <linux/types.h>
2020-01-30 22:15:48 -08:00
# include <linux/units.h>
2015-06-10 13:34:24 -07:00
/* Intel PCH thermal Device IDs */
2016-10-03 12:36:02 -07:00
# define PCH_THERMAL_DID_HSW_1 0x9C24 /* Haswell PCH */
# define PCH_THERMAL_DID_HSW_2 0x8C24 /* Haswell PCH */
2015-06-10 13:34:24 -07:00
# define PCH_THERMAL_DID_WPT 0x9CA4 /* Wildcat Point */
2016-02-02 00:03:41 -08:00
# define PCH_THERMAL_DID_SKL 0x9D31 /* Skylake PCH */
2016-10-19 05:59:29 +09:00
# define PCH_THERMAL_DID_SKL_H 0xA131 /* Skylake PCH 100 series */
2017-11-01 09:39:51 -07:00
# define PCH_THERMAL_DID_CNL 0x9Df9 /* CNL PCH */
# define PCH_THERMAL_DID_CNL_H 0xA379 /* CNL-H PCH */
2020-07-30 13:55:03 +05:30
# define PCH_THERMAL_DID_CNL_LP 0x02F9 /* CNL-LP PCH */
2019-12-11 12:00:43 -08:00
# define PCH_THERMAL_DID_CML_H 0X06F9 /* CML-H PCH */
2020-11-13 12:49:16 -08:00
# define PCH_THERMAL_DID_LWB 0xA1B1 /* Lewisburg PCH */
2015-06-10 13:34:24 -07:00
/* Wildcat Point-LP PCH Thermal registers */
# define WPT_TEMP 0x0000 /* Temperature */
# define WPT_TSC 0x04 /* Thermal Sensor Control */
# define WPT_TSS 0x06 /* Thermal Sensor Status */
# define WPT_TSEL 0x08 /* Thermal Sensor Enable and Lock */
# define WPT_TSREL 0x0A /* Thermal Sensor Report Enable and Lock */
# define WPT_TSMIC 0x0C /* Thermal Sensor SMI Control */
# define WPT_CTT 0x0010 /* Catastrophic Trip Point */
2020-11-06 22:36:33 +05:30
# define WPT_TSPM 0x001C /* Thermal Sensor Power Management */
2015-06-10 13:34:24 -07:00
# define WPT_TAHV 0x0014 /* Thermal Alert High Value */
# define WPT_TALV 0x0018 /* Thermal Alert Low Value */
# define WPT_TL 0x00000040 /* Throttle Value */
# define WPT_PHL 0x0060 /* PCH Hot Level */
# define WPT_PHLC 0x62 /* PHL Control */
# define WPT_TAS 0x80 /* Thermal Alert Status */
# define WPT_TSPIEN 0x82 /* PCI Interrupt Event Enables */
# define WPT_TSGPEN 0x84 /* General Purpose Event Enables */
/* Wildcat Point-LP PCH Thermal Register bit definitions */
2017-07-19 17:47:31 -07:00
# define WPT_TEMP_TSR 0x01ff /* Temp TS Reading */
2015-06-10 13:34:24 -07:00
# define WPT_TSC_CPDE 0x01 /* Catastrophic Power-Down Enable */
# define WPT_TSS_TSDSS 0x10 /* Thermal Sensor Dynamic Shutdown Status */
# define WPT_TSS_GPES 0x08 /* GPE status */
# define WPT_TSEL_ETS 0x01 /* Enable TS */
# define WPT_TSEL_PLDB 0x80 /* TSEL Policy Lock-Down Bit */
# define WPT_TL_TOL 0x000001FF /* T0 Level */
# define WPT_TL_T1L 0x1ff00000 /* T1 Level */
# define WPT_TL_TTEN 0x20000000 /* TT Enable */
2020-11-06 22:36:33 +05:30
/* Resolution of 1/2 degree C and an offset of -50C */
# define PCH_TEMP_OFFSET (-50)
# define GET_WPT_TEMP(x) ((x) * MILLIDEGREE_PER_DEGREE / 2 + WPT_TEMP_OFFSET)
# define WPT_TEMP_OFFSET (PCH_TEMP_OFFSET * MILLIDEGREE_PER_DEGREE)
# define GET_PCH_TEMP(x) (((x) / 2) + PCH_TEMP_OFFSET)
/* Amount of time for each cooling delay, 100ms by default for now */
static unsigned int delay_timeout = 100 ;
module_param ( delay_timeout , int , 0644 ) ;
MODULE_PARM_DESC ( delay_timeout , " amount of time delay for each iteration. " ) ;
thermal: intel: pch: enhance overheat handling
Commit ef63b043ac86 ("thermal: intel: pch: fix S0ix failure due to PCH
temperature above threshold") introduces delay loop mechanism that allows
PCH temperature to go down below threshold during suspend so it won't
block S0ix. And the default overall delay timeout is 1 second.
However, in practice, we found that the time it takes to cool the PCH down
below threshold highly depends on the initial PCH temperature when the
delay starts, as well as the ambient temperature.
And in some cases, the 1 second delay is not sufficient. As a result, the
system stays in a shallower power state like PCx instead of S0ix, and
drains the battery power, without user' notice.
To make sure S0ix is not blocked by the PCH overheating, we
1. expand the default overall timeout to 60 seconds.
2. make sure the temperature is below threshold rather than equal to it.
At the same time, as the cooling delay can be much longer and many wakeup
events (ACPI Power Button press, USB mouse move, etc) becomes valid in the
suspend_noirq phase, add detection of wakeup event so that the driver
does not delay blindly when the system suspend is likely to abort soon.
This patch may introduce longer suspend time, but only in the cases when
the system overheats and Linux used to enter a shallower S2idle state,
say, PCx instead of S0ix.
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-05-19 22:35:07 +08:00
/* Number of iterations for cooling delay, 600 counts by default for now */
static unsigned int delay_cnt = 600 ;
2020-11-06 22:36:33 +05:30
module_param ( delay_cnt , int , 0644 ) ;
MODULE_PARM_DESC ( delay_cnt , " total number of iterations for time delay. " ) ;
2015-06-10 13:34:24 -07:00
static char driver_name [ ] = " Intel PCH thermal driver " ;
struct pch_thermal_device {
void __iomem * hw_base ;
const struct pch_dev_ops * ops ;
struct pci_dev * pdev ;
struct thermal_zone_device * tzd ;
int crt_trip_id ;
unsigned long crt_temp ;
int hot_trip_id ;
unsigned long hot_temp ;
2016-10-03 12:36:24 -07:00
int psv_trip_id ;
unsigned long psv_temp ;
2016-06-23 15:02:46 -07:00
bool bios_enabled ;
2015-06-10 13:34:24 -07:00
} ;
2016-10-03 12:36:24 -07:00
# ifdef CONFIG_ACPI
/*
* On some platforms , there is a companion ACPI device , which adds
* passive trip temperature using _PSV method . There is no specific
* passive temperature setting in MMIO interface of this PCI device .
*/
static void pch_wpt_add_acpi_psv_trip ( struct pch_thermal_device * ptd ,
int * nr_trips )
{
struct acpi_device * adev ;
ptd - > psv_trip_id = - 1 ;
adev = ACPI_COMPANION ( & ptd - > pdev - > dev ) ;
if ( adev ) {
unsigned long long r ;
acpi_status status ;
status = acpi_evaluate_integer ( adev - > handle , " _PSV " , NULL ,
& r ) ;
if ( ACPI_SUCCESS ( status ) ) {
unsigned long trip_temp ;
2020-01-30 22:15:48 -08:00
trip_temp = deci_kelvin_to_millicelsius ( r ) ;
2016-10-03 12:36:24 -07:00
if ( trip_temp ) {
ptd - > psv_temp = trip_temp ;
ptd - > psv_trip_id = * nr_trips ;
+ + ( * nr_trips ) ;
}
}
}
}
# else
static void pch_wpt_add_acpi_psv_trip ( struct pch_thermal_device * ptd ,
int * nr_trips )
{
ptd - > psv_trip_id = - 1 ;
}
# endif
2015-06-10 13:34:24 -07:00
static int pch_wpt_init ( struct pch_thermal_device * ptd , int * nr_trips )
{
u8 tsel ;
u16 trip_temp ;
* nr_trips = 0 ;
/* Check if BIOS has already enabled thermal sensor */
2017-07-19 17:44:40 -07:00
if ( WPT_TSEL_ETS & readb ( ptd - > hw_base + WPT_TSEL ) ) {
2016-06-23 15:02:46 -07:00
ptd - > bios_enabled = true ;
2015-06-10 13:34:24 -07:00
goto read_trips ;
2016-06-23 15:02:46 -07:00
}
2015-06-10 13:34:24 -07:00
tsel = readb ( ptd - > hw_base + WPT_TSEL ) ;
/*
* When TSEL ' s Policy Lock - Down bit is 1 , TSEL become RO .
* If so , thermal sensor cannot enable . Bail out .
*/
if ( tsel & WPT_TSEL_PLDB ) {
dev_err ( & ptd - > pdev - > dev , " Sensor can't be enabled \n " ) ;
return - ENODEV ;
}
writeb ( tsel | WPT_TSEL_ETS , ptd - > hw_base + WPT_TSEL ) ;
2017-07-19 17:44:40 -07:00
if ( ! ( WPT_TSEL_ETS & readb ( ptd - > hw_base + WPT_TSEL ) ) ) {
2015-06-10 13:34:24 -07:00
dev_err ( & ptd - > pdev - > dev , " Sensor can't be enabled \n " ) ;
return - ENODEV ;
}
read_trips :
ptd - > crt_trip_id = - 1 ;
trip_temp = readw ( ptd - > hw_base + WPT_CTT ) ;
trip_temp & = 0x1FF ;
if ( trip_temp ) {
2020-12-10 18:18:01 +05:30
ptd - > crt_temp = GET_WPT_TEMP ( trip_temp ) ;
2015-06-10 13:34:24 -07:00
ptd - > crt_trip_id = 0 ;
+ + ( * nr_trips ) ;
}
ptd - > hot_trip_id = - 1 ;
trip_temp = readw ( ptd - > hw_base + WPT_PHL ) ;
trip_temp & = 0x1FF ;
if ( trip_temp ) {
2020-12-10 18:18:01 +05:30
ptd - > hot_temp = GET_WPT_TEMP ( trip_temp ) ;
2015-06-10 13:34:24 -07:00
ptd - > hot_trip_id = * nr_trips ;
+ + ( * nr_trips ) ;
}
2016-10-03 12:36:24 -07:00
pch_wpt_add_acpi_psv_trip ( ptd , nr_trips ) ;
2015-06-10 13:34:24 -07:00
return 0 ;
}
2015-09-11 20:06:59 -07:00
static int pch_wpt_get_temp ( struct pch_thermal_device * ptd , int * temp )
2015-06-10 13:34:24 -07:00
{
2020-12-10 18:18:01 +05:30
* temp = GET_WPT_TEMP ( WPT_TEMP_TSR & readw ( ptd - > hw_base + WPT_TEMP ) ) ;
2015-06-10 13:34:24 -07:00
return 0 ;
}
2022-05-19 22:35:06 +08:00
/* Cool the PCH when it's overheat in .suspend_noirq phase */
2016-06-23 15:02:46 -07:00
static int pch_wpt_suspend ( struct pch_thermal_device * ptd )
{
u8 tsel ;
thermal: intel: pch: improve the cooling delay log
Previously, during suspend, intel_pch_thermal driver logs for every
cooling iteration, about the current PCH temperature and number of cooling
iterations that have been tried, like below
[ 100.955526] intel_pch_thermal 0000:00:14.2: CPU-PCH current temp [53C] higher than the threshold temp [50C], sleep 1 times for 100 ms duration
[ 101.064156] intel_pch_thermal 0000:00:14.2: CPU-PCH current temp [53C] higher than the threshold temp [50C], sleep 2 times for 100 ms duration
After changing the default delay_cnt to 600, in practice, it is common to
see tens of the above messages if the system is suspended when PCH
overheats. Thus, change this log message from dev_warn to dev_dbg because
it is only useful when we want to check the temperature trend.
At the same time, there is always a one-line message given by the driver
with the patch applied, with below four possibilities.
1. PCH is cool, no cooling delay needed
[ 1791.902853] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [48C]
2. PCH overheats and becomes cool after the cooling delays
[ 1475.511617] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [49C] after 30700 ms delay
3. PCH still overheats after the overall cooling timeout
[ 2250.157487] intel_pch_thermal 0000:00:12.0: CPU-PCH is hot [60C] after 60000 ms delay. S0ix might fail
4. PCH aborts cooling because of wakeup event detected during the delay
[ 1933.639509] intel_pch_thermal 0000:00:12.0: Wakeup event detected, abort cooling
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-05-19 22:35:08 +08:00
int pch_delay_cnt = 0 ;
2020-11-06 22:36:33 +05:30
u16 pch_thr_temp , pch_cur_temp ;
2016-06-23 15:02:46 -07:00
2020-11-06 22:36:33 +05:30
/* Shutdown the thermal sensor if it is not enabled by BIOS */
if ( ! ptd - > bios_enabled ) {
tsel = readb ( ptd - > hw_base + WPT_TSEL ) ;
writeb ( tsel & 0xFE , ptd - > hw_base + WPT_TSEL ) ;
2016-06-23 15:02:46 -07:00
return 0 ;
2020-11-06 22:36:33 +05:30
}
2016-06-23 15:02:46 -07:00
2020-11-06 22:36:33 +05:30
/* Do not check temperature if it is not a S0ix capable platform */
2020-11-16 18:38:07 -08:00
# ifdef CONFIG_ACPI
2020-11-06 22:36:33 +05:30
if ( ! ( acpi_gbl_FADT . flags & ACPI_FADT_LOW_POWER_S0 ) )
return 0 ;
2020-11-16 18:38:07 -08:00
# else
return 0 ;
# endif
2020-11-06 22:36:33 +05:30
/* Do not check temperature if it is not s2idle */
if ( pm_suspend_via_firmware ( ) )
return 0 ;
/* Get the PCH temperature threshold value */
pch_thr_temp = GET_PCH_TEMP ( WPT_TEMP_TSR & readw ( ptd - > hw_base + WPT_TSPM ) ) ;
/* Get the PCH current temperature value */
pch_cur_temp = GET_PCH_TEMP ( WPT_TEMP_TSR & readw ( ptd - > hw_base + WPT_TEMP ) ) ;
/*
* If current PCH temperature is higher than configured PCH threshold
* value , run some delay loop with sleep to let the current temperature
* go down below the threshold value which helps to allow system enter
* lower power S0ix suspend state . Even after delay loop if PCH current
* temperature stays above threshold , notify the warning message
* which helps to indentify the reason why S0ix entry was rejected .
*/
thermal: intel: pch: improve the cooling delay log
Previously, during suspend, intel_pch_thermal driver logs for every
cooling iteration, about the current PCH temperature and number of cooling
iterations that have been tried, like below
[ 100.955526] intel_pch_thermal 0000:00:14.2: CPU-PCH current temp [53C] higher than the threshold temp [50C], sleep 1 times for 100 ms duration
[ 101.064156] intel_pch_thermal 0000:00:14.2: CPU-PCH current temp [53C] higher than the threshold temp [50C], sleep 2 times for 100 ms duration
After changing the default delay_cnt to 600, in practice, it is common to
see tens of the above messages if the system is suspended when PCH
overheats. Thus, change this log message from dev_warn to dev_dbg because
it is only useful when we want to check the temperature trend.
At the same time, there is always a one-line message given by the driver
with the patch applied, with below four possibilities.
1. PCH is cool, no cooling delay needed
[ 1791.902853] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [48C]
2. PCH overheats and becomes cool after the cooling delays
[ 1475.511617] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [49C] after 30700 ms delay
3. PCH still overheats after the overall cooling timeout
[ 2250.157487] intel_pch_thermal 0000:00:12.0: CPU-PCH is hot [60C] after 60000 ms delay. S0ix might fail
4. PCH aborts cooling because of wakeup event detected during the delay
[ 1933.639509] intel_pch_thermal 0000:00:12.0: Wakeup event detected, abort cooling
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-05-19 22:35:08 +08:00
while ( pch_delay_cnt < delay_cnt ) {
thermal: intel: pch: enhance overheat handling
Commit ef63b043ac86 ("thermal: intel: pch: fix S0ix failure due to PCH
temperature above threshold") introduces delay loop mechanism that allows
PCH temperature to go down below threshold during suspend so it won't
block S0ix. And the default overall delay timeout is 1 second.
However, in practice, we found that the time it takes to cool the PCH down
below threshold highly depends on the initial PCH temperature when the
delay starts, as well as the ambient temperature.
And in some cases, the 1 second delay is not sufficient. As a result, the
system stays in a shallower power state like PCx instead of S0ix, and
drains the battery power, without user' notice.
To make sure S0ix is not blocked by the PCH overheating, we
1. expand the default overall timeout to 60 seconds.
2. make sure the temperature is below threshold rather than equal to it.
At the same time, as the cooling delay can be much longer and many wakeup
events (ACPI Power Button press, USB mouse move, etc) becomes valid in the
suspend_noirq phase, add detection of wakeup event so that the driver
does not delay blindly when the system suspend is likely to abort soon.
This patch may introduce longer suspend time, but only in the cases when
the system overheats and Linux used to enter a shallower S2idle state,
say, PCx instead of S0ix.
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-05-19 22:35:07 +08:00
if ( pch_cur_temp < pch_thr_temp )
break ;
thermal: intel: pch: improve the cooling delay log
Previously, during suspend, intel_pch_thermal driver logs for every
cooling iteration, about the current PCH temperature and number of cooling
iterations that have been tried, like below
[ 100.955526] intel_pch_thermal 0000:00:14.2: CPU-PCH current temp [53C] higher than the threshold temp [50C], sleep 1 times for 100 ms duration
[ 101.064156] intel_pch_thermal 0000:00:14.2: CPU-PCH current temp [53C] higher than the threshold temp [50C], sleep 2 times for 100 ms duration
After changing the default delay_cnt to 600, in practice, it is common to
see tens of the above messages if the system is suspended when PCH
overheats. Thus, change this log message from dev_warn to dev_dbg because
it is only useful when we want to check the temperature trend.
At the same time, there is always a one-line message given by the driver
with the patch applied, with below four possibilities.
1. PCH is cool, no cooling delay needed
[ 1791.902853] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [48C]
2. PCH overheats and becomes cool after the cooling delays
[ 1475.511617] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [49C] after 30700 ms delay
3. PCH still overheats after the overall cooling timeout
[ 2250.157487] intel_pch_thermal 0000:00:12.0: CPU-PCH is hot [60C] after 60000 ms delay. S0ix might fail
4. PCH aborts cooling because of wakeup event detected during the delay
[ 1933.639509] intel_pch_thermal 0000:00:12.0: Wakeup event detected, abort cooling
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-05-19 22:35:08 +08:00
if ( pm_wakeup_pending ( ) ) {
dev_warn ( & ptd - > pdev - > dev , " Wakeup event detected, abort cooling \n " ) ;
return 0 ;
}
2020-11-06 22:36:33 +05:30
thermal: intel: pch: improve the cooling delay log
Previously, during suspend, intel_pch_thermal driver logs for every
cooling iteration, about the current PCH temperature and number of cooling
iterations that have been tried, like below
[ 100.955526] intel_pch_thermal 0000:00:14.2: CPU-PCH current temp [53C] higher than the threshold temp [50C], sleep 1 times for 100 ms duration
[ 101.064156] intel_pch_thermal 0000:00:14.2: CPU-PCH current temp [53C] higher than the threshold temp [50C], sleep 2 times for 100 ms duration
After changing the default delay_cnt to 600, in practice, it is common to
see tens of the above messages if the system is suspended when PCH
overheats. Thus, change this log message from dev_warn to dev_dbg because
it is only useful when we want to check the temperature trend.
At the same time, there is always a one-line message given by the driver
with the patch applied, with below four possibilities.
1. PCH is cool, no cooling delay needed
[ 1791.902853] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [48C]
2. PCH overheats and becomes cool after the cooling delays
[ 1475.511617] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [49C] after 30700 ms delay
3. PCH still overheats after the overall cooling timeout
[ 2250.157487] intel_pch_thermal 0000:00:12.0: CPU-PCH is hot [60C] after 60000 ms delay. S0ix might fail
4. PCH aborts cooling because of wakeup event detected during the delay
[ 1933.639509] intel_pch_thermal 0000:00:12.0: Wakeup event detected, abort cooling
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-05-19 22:35:08 +08:00
pch_delay_cnt + + ;
dev_dbg ( & ptd - > pdev - > dev ,
2020-11-06 22:36:33 +05:30
" CPU-PCH current temp [%dC] higher than the threshold temp [%dC], sleep %d times for %d ms duration \n " ,
pch_cur_temp , pch_thr_temp , pch_delay_cnt , delay_timeout ) ;
msleep ( delay_timeout ) ;
/* Read the PCH current temperature for next cycle. */
pch_cur_temp = GET_PCH_TEMP ( WPT_TEMP_TSR & readw ( ptd - > hw_base + WPT_TEMP ) ) ;
}
2016-06-23 15:02:46 -07:00
thermal: intel: pch: enhance overheat handling
Commit ef63b043ac86 ("thermal: intel: pch: fix S0ix failure due to PCH
temperature above threshold") introduces delay loop mechanism that allows
PCH temperature to go down below threshold during suspend so it won't
block S0ix. And the default overall delay timeout is 1 second.
However, in practice, we found that the time it takes to cool the PCH down
below threshold highly depends on the initial PCH temperature when the
delay starts, as well as the ambient temperature.
And in some cases, the 1 second delay is not sufficient. As a result, the
system stays in a shallower power state like PCx instead of S0ix, and
drains the battery power, without user' notice.
To make sure S0ix is not blocked by the PCH overheating, we
1. expand the default overall timeout to 60 seconds.
2. make sure the temperature is below threshold rather than equal to it.
At the same time, as the cooling delay can be much longer and many wakeup
events (ACPI Power Button press, USB mouse move, etc) becomes valid in the
suspend_noirq phase, add detection of wakeup event so that the driver
does not delay blindly when the system suspend is likely to abort soon.
This patch may introduce longer suspend time, but only in the cases when
the system overheats and Linux used to enter a shallower S2idle state,
say, PCx instead of S0ix.
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-05-19 22:35:07 +08:00
if ( pch_cur_temp > = pch_thr_temp )
2020-11-06 22:36:33 +05:30
dev_warn ( & ptd - > pdev - > dev ,
thermal: intel: pch: improve the cooling delay log
Previously, during suspend, intel_pch_thermal driver logs for every
cooling iteration, about the current PCH temperature and number of cooling
iterations that have been tried, like below
[ 100.955526] intel_pch_thermal 0000:00:14.2: CPU-PCH current temp [53C] higher than the threshold temp [50C], sleep 1 times for 100 ms duration
[ 101.064156] intel_pch_thermal 0000:00:14.2: CPU-PCH current temp [53C] higher than the threshold temp [50C], sleep 2 times for 100 ms duration
After changing the default delay_cnt to 600, in practice, it is common to
see tens of the above messages if the system is suspended when PCH
overheats. Thus, change this log message from dev_warn to dev_dbg because
it is only useful when we want to check the temperature trend.
At the same time, there is always a one-line message given by the driver
with the patch applied, with below four possibilities.
1. PCH is cool, no cooling delay needed
[ 1791.902853] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [48C]
2. PCH overheats and becomes cool after the cooling delays
[ 1475.511617] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [49C] after 30700 ms delay
3. PCH still overheats after the overall cooling timeout
[ 2250.157487] intel_pch_thermal 0000:00:12.0: CPU-PCH is hot [60C] after 60000 ms delay. S0ix might fail
4. PCH aborts cooling because of wakeup event detected during the delay
[ 1933.639509] intel_pch_thermal 0000:00:12.0: Wakeup event detected, abort cooling
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-05-19 22:35:08 +08:00
" CPU-PCH is hot [%dC] after %d ms delay. S0ix might fail \n " ,
pch_cur_temp , pch_delay_cnt * delay_timeout ) ;
else {
if ( pch_delay_cnt )
dev_info ( & ptd - > pdev - > dev ,
" CPU-PCH is cool [%dC] after %d ms delay \n " ,
pch_cur_temp , pch_delay_cnt * delay_timeout ) ;
else
dev_info ( & ptd - > pdev - > dev ,
" CPU-PCH is cool [%dC] \n " ,
pch_cur_temp ) ;
}
2016-06-23 15:02:46 -07:00
return 0 ;
}
static int pch_wpt_resume ( struct pch_thermal_device * ptd )
{
u8 tsel ;
if ( ptd - > bios_enabled )
return 0 ;
tsel = readb ( ptd - > hw_base + WPT_TSEL ) ;
writeb ( tsel | WPT_TSEL_ETS , ptd - > hw_base + WPT_TSEL ) ;
return 0 ;
}
2015-06-10 13:34:24 -07:00
struct pch_dev_ops {
int ( * hw_init ) ( struct pch_thermal_device * ptd , int * nr_trips ) ;
2015-09-11 20:06:59 -07:00
int ( * get_temp ) ( struct pch_thermal_device * ptd , int * temp ) ;
2016-06-23 15:02:46 -07:00
int ( * suspend ) ( struct pch_thermal_device * ptd ) ;
int ( * resume ) ( struct pch_thermal_device * ptd ) ;
2015-06-10 13:34:24 -07:00
} ;
/* dev ops for Wildcat Point */
2015-10-11 13:01:28 +02:00
static const struct pch_dev_ops pch_dev_ops_wpt = {
2015-06-10 13:34:24 -07:00
. hw_init = pch_wpt_init ,
. get_temp = pch_wpt_get_temp ,
2016-06-23 15:02:46 -07:00
. suspend = pch_wpt_suspend ,
. resume = pch_wpt_resume ,
2015-06-10 13:34:24 -07:00
} ;
2015-09-11 20:06:59 -07:00
static int pch_thermal_get_temp ( struct thermal_zone_device * tzd , int * temp )
2015-06-10 13:34:24 -07:00
{
struct pch_thermal_device * ptd = tzd - > devdata ;
return ptd - > ops - > get_temp ( ptd , temp ) ;
}
static int pch_get_trip_type ( struct thermal_zone_device * tzd , int trip ,
enum thermal_trip_type * type )
{
struct pch_thermal_device * ptd = tzd - > devdata ;
if ( ptd - > crt_trip_id = = trip )
* type = THERMAL_TRIP_CRITICAL ;
else if ( ptd - > hot_trip_id = = trip )
* type = THERMAL_TRIP_HOT ;
2016-10-03 12:36:24 -07:00
else if ( ptd - > psv_trip_id = = trip )
* type = THERMAL_TRIP_PASSIVE ;
2015-06-10 13:34:24 -07:00
else
return - EINVAL ;
return 0 ;
}
2015-09-11 20:06:59 -07:00
static int pch_get_trip_temp ( struct thermal_zone_device * tzd , int trip , int * temp )
2015-06-10 13:34:24 -07:00
{
struct pch_thermal_device * ptd = tzd - > devdata ;
if ( ptd - > crt_trip_id = = trip )
* temp = ptd - > crt_temp ;
else if ( ptd - > hot_trip_id = = trip )
* temp = ptd - > hot_temp ;
2016-10-03 12:36:24 -07:00
else if ( ptd - > psv_trip_id = = trip )
* temp = ptd - > psv_temp ;
2015-06-10 13:34:24 -07:00
else
return - EINVAL ;
return 0 ;
}
2020-12-22 01:23:44 +08:00
static void pch_critical ( struct thermal_zone_device * tzd )
{
dev_dbg ( & tzd - > device , " %s: critical temperature reached \n " , tzd - > type ) ;
}
2015-06-10 13:34:24 -07:00
static struct thermal_zone_device_ops tzd_ops = {
. get_temp = pch_thermal_get_temp ,
. get_trip_type = pch_get_trip_type ,
. get_trip_temp = pch_get_trip_temp ,
2020-12-22 01:23:44 +08:00
. critical = pch_critical ,
2015-06-10 13:34:24 -07:00
} ;
2016-10-19 05:59:29 +09:00
enum board_ids {
board_hsw ,
board_wpt ,
board_skl ,
2017-11-01 09:39:51 -07:00
board_cnl ,
2019-12-11 12:00:43 -08:00
board_cml ,
2020-11-13 12:49:16 -08:00
board_lwb ,
2016-10-19 05:59:29 +09:00
} ;
static const struct board_info {
const char * name ;
const struct pch_dev_ops * ops ;
} board_info [ ] = {
[ board_hsw ] = {
. name = " pch_haswell " ,
. ops = & pch_dev_ops_wpt ,
} ,
[ board_wpt ] = {
. name = " pch_wildcat_point " ,
. ops = & pch_dev_ops_wpt ,
} ,
[ board_skl ] = {
. name = " pch_skylake " ,
. ops = & pch_dev_ops_wpt ,
} ,
2017-11-01 09:39:51 -07:00
[ board_cnl ] = {
. name = " pch_cannonlake " ,
. ops = & pch_dev_ops_wpt ,
} ,
2019-12-11 12:00:43 -08:00
[ board_cml ] = {
. name = " pch_cometlake " ,
. ops = & pch_dev_ops_wpt ,
2020-11-13 12:49:16 -08:00
} ,
[ board_lwb ] = {
. name = " pch_lewisburg " ,
. ops = & pch_dev_ops_wpt ,
} ,
2016-10-19 05:59:29 +09:00
} ;
2015-06-10 13:34:24 -07:00
static int intel_pch_thermal_probe ( struct pci_dev * pdev ,
const struct pci_device_id * id )
{
2016-10-19 05:59:29 +09:00
enum board_ids board_id = id - > driver_data ;
const struct board_info * bi = & board_info [ board_id ] ;
2015-06-10 13:34:24 -07:00
struct pch_thermal_device * ptd ;
int err ;
int nr_trips ;
ptd = devm_kzalloc ( & pdev - > dev , sizeof ( * ptd ) , GFP_KERNEL ) ;
if ( ! ptd )
return - ENOMEM ;
2016-10-19 05:59:29 +09:00
ptd - > ops = bi - > ops ;
2015-06-10 13:34:24 -07:00
pci_set_drvdata ( pdev , ptd ) ;
ptd - > pdev = pdev ;
err = pci_enable_device ( pdev ) ;
if ( err ) {
dev_err ( & pdev - > dev , " failed to enable pci device \n " ) ;
return err ;
}
err = pci_request_regions ( pdev , driver_name ) ;
if ( err ) {
dev_err ( & pdev - > dev , " failed to request pci region \n " ) ;
goto error_disable ;
}
ptd - > hw_base = pci_ioremap_bar ( pdev , 0 ) ;
if ( ! ptd - > hw_base ) {
err = - ENOMEM ;
dev_err ( & pdev - > dev , " failed to map mem base \n " ) ;
goto error_release ;
}
err = ptd - > ops - > hw_init ( ptd , & nr_trips ) ;
if ( err )
goto error_cleanup ;
2016-10-19 05:59:29 +09:00
ptd - > tzd = thermal_zone_device_register ( bi - > name , nr_trips , 0 , ptd ,
2015-06-10 13:34:24 -07:00
& tzd_ops , NULL , 0 , 0 ) ;
if ( IS_ERR ( ptd - > tzd ) ) {
dev_err ( & pdev - > dev , " Failed to register thermal zone %s \n " ,
2016-10-19 05:59:29 +09:00
bi - > name ) ;
2015-06-10 13:34:24 -07:00
err = PTR_ERR ( ptd - > tzd ) ;
goto error_cleanup ;
}
2020-06-29 14:29:22 +02:00
err = thermal_zone_device_enable ( ptd - > tzd ) ;
if ( err )
goto err_unregister ;
2015-06-10 13:34:24 -07:00
return 0 ;
2020-06-29 14:29:22 +02:00
err_unregister :
thermal_zone_device_unregister ( ptd - > tzd ) ;
2015-06-10 13:34:24 -07:00
error_cleanup :
iounmap ( ptd - > hw_base ) ;
error_release :
pci_release_regions ( pdev ) ;
error_disable :
pci_disable_device ( pdev ) ;
dev_err ( & pdev - > dev , " pci device failed to probe \n " ) ;
return err ;
}
static void intel_pch_thermal_remove ( struct pci_dev * pdev )
{
struct pch_thermal_device * ptd = pci_get_drvdata ( pdev ) ;
thermal_zone_device_unregister ( ptd - > tzd ) ;
iounmap ( ptd - > hw_base ) ;
pci_set_drvdata ( pdev , NULL ) ;
2019-12-06 15:55:31 +08:00
pci_release_regions ( pdev ) ;
2015-06-10 13:34:24 -07:00
pci_disable_device ( pdev ) ;
}
2022-05-19 22:35:06 +08:00
static int intel_pch_thermal_suspend_noirq ( struct device * device )
2016-06-23 15:02:46 -07:00
{
2019-07-24 20:23:37 +08:00
struct pch_thermal_device * ptd = dev_get_drvdata ( device ) ;
2016-06-23 15:02:46 -07:00
return ptd - > ops - > suspend ( ptd ) ;
}
static int intel_pch_thermal_resume ( struct device * device )
{
2019-07-24 20:23:37 +08:00
struct pch_thermal_device * ptd = dev_get_drvdata ( device ) ;
2016-06-23 15:02:46 -07:00
return ptd - > ops - > resume ( ptd ) ;
}
2017-08-02 23:28:40 +05:30
static const struct pci_device_id intel_pch_thermal_id [ ] = {
2016-10-19 05:59:29 +09:00
{ PCI_DEVICE ( PCI_VENDOR_ID_INTEL , PCH_THERMAL_DID_HSW_1 ) ,
. driver_data = board_hsw , } ,
{ PCI_DEVICE ( PCI_VENDOR_ID_INTEL , PCH_THERMAL_DID_HSW_2 ) ,
. driver_data = board_hsw , } ,
{ PCI_DEVICE ( PCI_VENDOR_ID_INTEL , PCH_THERMAL_DID_WPT ) ,
. driver_data = board_wpt , } ,
{ PCI_DEVICE ( PCI_VENDOR_ID_INTEL , PCH_THERMAL_DID_SKL ) ,
. driver_data = board_skl , } ,
{ PCI_DEVICE ( PCI_VENDOR_ID_INTEL , PCH_THERMAL_DID_SKL_H ) ,
. driver_data = board_skl , } ,
2017-11-01 09:39:51 -07:00
{ PCI_DEVICE ( PCI_VENDOR_ID_INTEL , PCH_THERMAL_DID_CNL ) ,
. driver_data = board_cnl , } ,
{ PCI_DEVICE ( PCI_VENDOR_ID_INTEL , PCH_THERMAL_DID_CNL_H ) ,
. driver_data = board_cnl , } ,
2020-07-30 13:55:03 +05:30
{ PCI_DEVICE ( PCI_VENDOR_ID_INTEL , PCH_THERMAL_DID_CNL_LP ) ,
. driver_data = board_cnl , } ,
2019-12-11 12:00:43 -08:00
{ PCI_DEVICE ( PCI_VENDOR_ID_INTEL , PCH_THERMAL_DID_CML_H ) ,
. driver_data = board_cml , } ,
2020-11-13 12:49:16 -08:00
{ PCI_DEVICE ( PCI_VENDOR_ID_INTEL , PCH_THERMAL_DID_LWB ) ,
. driver_data = board_lwb , } ,
2015-06-10 13:34:24 -07:00
{ 0 , } ,
} ;
MODULE_DEVICE_TABLE ( pci , intel_pch_thermal_id ) ;
2016-06-23 15:02:46 -07:00
static const struct dev_pm_ops intel_pch_pm_ops = {
2022-05-19 22:35:06 +08:00
. suspend_noirq = intel_pch_thermal_suspend_noirq ,
2016-06-23 15:02:46 -07:00
. resume = intel_pch_thermal_resume ,
} ;
2015-06-10 13:34:24 -07:00
static struct pci_driver intel_pch_thermal_driver = {
. name = " intel_pch_thermal " ,
. id_table = intel_pch_thermal_id ,
. probe = intel_pch_thermal_probe ,
. remove = intel_pch_thermal_remove ,
2016-06-23 15:02:46 -07:00
. driver . pm = & intel_pch_pm_ops ,
2015-06-10 13:34:24 -07:00
} ;
module_pci_driver ( intel_pch_thermal_driver ) ;
MODULE_LICENSE ( " GPL v2 " ) ;
MODULE_DESCRIPTION ( " Intel PCH Thermal driver " ) ;