2018-01-26 12:50:27 -06:00
// SPDX-License-Identifier: GPL-2.0
2005-04-16 15:20:36 -07:00
/*
2007-11-28 12:23:18 -08:00
* ( C ) Copyright 2002 - 2004 , 2007 Greg Kroah - Hartman < greg @ kroah . com >
* ( C ) Copyright 2007 Novell Inc .
2005-04-16 15:20:36 -07:00
*/
# include <linux/pci.h>
# include <linux/module.h>
# include <linux/init.h>
# include <linux/device.h>
2005-07-06 19:56:03 +02:00
# include <linux/mempolicy.h>
2005-10-30 15:03:48 -08:00
# include <linux/string.h>
# include <linux/slab.h>
2005-11-07 00:59:43 -08:00
# include <linux/sched.h>
2020-06-25 18:34:42 -04:00
# include <linux/sched/isolation.h>
2008-12-31 23:54:56 +10:30
# include <linux/cpu.h>
2010-02-17 23:44:58 +01:00
# include <linux/pm_runtime.h>
2011-07-06 10:51:40 +02:00
# include <linux/suspend.h>
2013-11-27 15:19:25 -07:00
# include <linux/kexec.h>
2018-04-28 08:21:58 +05:30
# include <linux/of_device.h>
# include <linux/acpi.h>
2020-09-11 10:12:44 +02:00
# include <linux/dma-map-ops.h>
bus: platform,amba,fsl-mc,PCI: Add device DMA ownership management
The devices on platform/amba/fsl-mc/PCI buses could be bound to drivers
with the device DMA managed by kernel drivers or user-space applications.
Unfortunately, multiple devices may be placed in the same IOMMU group
because they cannot be isolated from each other. The DMA on these devices
must either be entirely under kernel control or userspace control, never
a mixture. Otherwise the driver integrity is not guaranteed because they
could access each other through the peer-to-peer accesses which by-pass
the IOMMU protection.
This checks and sets the default DMA mode during driver binding, and
cleanups during driver unbinding. In the default mode, the device DMA is
managed by the device driver which handles DMA operations through the
kernel DMA APIs (see Documentation/core-api/dma-api.rst).
For cases where the devices are assigned for userspace control through the
userspace driver framework(i.e. VFIO), the drivers(for example, vfio_pci/
vfio_platfrom etc.) may set a new flag (driver_managed_dma) to skip this
default setting in the assumption that the drivers know what they are
doing with the device DMA.
Calling iommu_device_use_default_domain() before {of,acpi}_dma_configure
is currently a problem. As things stand, the IOMMU driver ignored the
initial iommu_probe_device() call when the device was added, since at
that point it had no fwspec yet. In this situation,
{of,acpi}_iommu_configure() are retriggering iommu_probe_device() after
the IOMMU driver has seen the firmware data via .of_xlate to learn that
it actually responsible for the given device. As the result, before
that gets fixed, iommu_use_default_domain() goes at the end, and calls
arch_teardown_dma_ops() if it fails.
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stuart Yoder <stuyoder@gmail.com>
Cc: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/20220418005000.897664-5-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
2022-04-18 08:49:53 +08:00
# include <linux/iommu.h>
2005-04-16 15:20:36 -07:00
# include "pci.h"
2018-03-09 11:06:56 -06:00
# include "pcie/portdrv.h"
2005-04-16 15:20:36 -07:00
2005-06-30 02:18:12 -07:00
struct pci_dynid {
struct list_head node ;
struct pci_device_id id ;
} ;
2005-04-16 15:20:36 -07:00
2009-09-03 15:26:36 +09:00
/**
* pci_add_dynid - add a new PCI device ID to this driver and re - probe devices
* @ drv : target pci driver
* @ vendor : PCI vendor ID
* @ device : PCI device ID
* @ subvendor : PCI subvendor ID
* @ subdevice : PCI subdevice ID
* @ class : PCI class
* @ class_mask : PCI class mask
* @ driver_data : private driver data
*
* Adds a new dynamic pci device ID to this driver and causes the
* driver to probe for all devices again . @ drv must have been
* registered prior to calling this function .
*
* CONTEXT :
* Does GFP_KERNEL allocation .
*
* RETURNS :
* 0 on success , - errno on failure .
*/
int pci_add_dynid ( struct pci_driver * drv ,
unsigned int vendor , unsigned int device ,
unsigned int subvendor , unsigned int subdevice ,
unsigned int class , unsigned int class_mask ,
unsigned long driver_data )
{
struct pci_dynid * dynid ;
dynid = kzalloc ( sizeof ( * dynid ) , GFP_KERNEL ) ;
if ( ! dynid )
return - ENOMEM ;
dynid - > id . vendor = vendor ;
dynid - > id . device = device ;
dynid - > id . subvendor = subvendor ;
dynid - > id . subdevice = subdevice ;
dynid - > id . class = class ;
dynid - > id . class_mask = class_mask ;
dynid - > id . driver_data = driver_data ;
2005-07-06 09:09:38 -07:00
2009-09-03 15:26:36 +09:00
spin_lock ( & drv - > dynids . lock ) ;
list_add_tail ( & dynid - > node , & drv - > dynids . list ) ;
spin_unlock ( & drv - > dynids . lock ) ;
2014-07-25 10:36:14 +02:00
return driver_attach ( & drv - > driver ) ;
2009-09-03 15:26:36 +09:00
}
2014-04-25 14:32:25 -06:00
EXPORT_SYMBOL_GPL ( pci_add_dynid ) ;
2009-09-03 15:26:36 +09:00
static void pci_free_dynids ( struct pci_driver * drv )
{
struct pci_dynid * dynid , * n ;
2005-07-06 09:09:38 -07:00
2009-09-03 15:26:36 +09:00
spin_lock ( & drv - > dynids . lock ) ;
list_for_each_entry_safe ( dynid , n , & drv - > dynids . list , node ) {
list_del ( & dynid - > node ) ;
kfree ( dynid ) ;
}
spin_unlock ( & drv - > dynids . lock ) ;
}
2020-11-17 13:44:08 +08:00
/**
* pci_match_id - See if a PCI device matches a given pci_id table
* @ ids : array of PCI device ID structures to search in
* @ dev : the PCI device structure to match against .
*
* Used by a driver to check whether a PCI device is in its list of
* supported devices . Returns the matching pci_device_id structure or
* % NULL if there is no match .
*
* Deprecated ; don ' t use this as it will not catch any dynamic IDs
* that a driver might want to check for .
*/
const struct pci_device_id * pci_match_id ( const struct pci_device_id * ids ,
struct pci_dev * dev )
{
if ( ids ) {
while ( ids - > vendor | | ids - > subvendor | | ids - > class_mask ) {
if ( pci_match_one_device ( ids , dev ) )
return ids ;
ids + + ;
}
}
return NULL ;
}
EXPORT_SYMBOL ( pci_match_id ) ;
static const struct pci_device_id pci_device_id_any = {
. vendor = PCI_ANY_ID ,
. device = PCI_ANY_ID ,
. subvendor = PCI_ANY_ID ,
. subdevice = PCI_ANY_ID ,
} ;
/**
* pci_match_device - See if a device matches a driver ' s list of IDs
* @ drv : the PCI driver to match against
* @ dev : the PCI device structure to match against
*
* Used by a driver to check whether a PCI device is in its list of
* supported devices or in the dynids list , which may have been augmented
* via the sysfs " new_id " file . Returns the matching pci_device_id
* structure or % NULL if there is no match .
*/
static const struct pci_device_id * pci_match_device ( struct pci_driver * drv ,
struct pci_dev * dev )
{
struct pci_dynid * dynid ;
2021-08-26 13:39:08 +03:00
const struct pci_device_id * found_id = NULL , * ids ;
2020-11-17 13:44:08 +08:00
/* When driver_override is set, only bind to the matching driver */
if ( dev - > driver_override & & strcmp ( dev - > driver_override , drv - > name ) )
return NULL ;
/* Look at the dynamic ids first, before the static ones */
spin_lock ( & drv - > dynids . lock ) ;
list_for_each_entry ( dynid , & drv - > dynids . list , node ) {
if ( pci_match_one_device ( & dynid - > id , dev ) ) {
found_id = & dynid - > id ;
break ;
}
}
spin_unlock ( & drv - > dynids . lock ) ;
2021-08-26 13:39:08 +03:00
if ( found_id )
return found_id ;
for ( ids = drv - > id_table ; ( found_id = pci_match_id ( ids , dev ) ) ;
ids = found_id + 1 ) {
/*
* The match table is split based on driver_override .
* In case override_only was set , enforce driver_override
* matching .
*/
if ( found_id - > override_only ) {
if ( dev - > driver_override )
return found_id ;
} else {
return found_id ;
}
}
2020-11-17 13:44:08 +08:00
/* driver_override will always match, send a dummy id */
2021-08-26 13:39:08 +03:00
if ( dev - > driver_override )
return & pci_device_id_any ;
return NULL ;
2020-11-17 13:44:08 +08:00
}
2005-04-16 15:20:36 -07:00
/**
2020-10-23 18:33:10 +02:00
* new_id_store - sysfs frontend to pci_add_dynid ( )
2005-10-23 11:57:38 -07:00
* @ driver : target device driver
* @ buf : buffer for scanning device ID data
* @ count : input size
2005-04-16 15:20:36 -07:00
*
2009-09-03 15:26:36 +09:00
* Allow PCI IDs to be added to an existing driver via sysfs .
2005-04-16 15:20:36 -07:00
*/
2017-06-09 11:03:06 +02:00
static ssize_t new_id_store ( struct device_driver * driver , const char * buf ,
2014-04-18 20:13:49 -04:00
size_t count )
2005-04-16 15:20:36 -07:00
{
struct pci_driver * pdrv = to_pci_driver ( driver ) ;
2008-08-17 21:06:59 +02:00
const struct pci_device_id * ids = pdrv - > id_table ;
2019-02-08 09:54:39 -07:00
u32 vendor , device , subvendor = PCI_ANY_ID ,
2014-04-18 20:13:49 -04:00
subdevice = PCI_ANY_ID , class = 0 , class_mask = 0 ;
unsigned long driver_data = 0 ;
2020-10-06 16:54:39 -05:00
int fields ;
2014-04-01 21:32:59 -04:00
int retval = 0 ;
2005-04-16 15:20:36 -07:00
2008-08-17 21:06:59 +02:00
fields = sscanf ( buf , " %x %x %x %x %x %x %lx " ,
2005-04-16 15:20:36 -07:00
& vendor , & device , & subvendor , & subdevice ,
& class , & class_mask , & driver_data ) ;
2007-04-07 17:21:28 +02:00
if ( fields < 2 )
2005-04-16 15:20:36 -07:00
return - EINVAL ;
2014-04-01 21:32:59 -04:00
if ( fields ! = 7 ) {
struct pci_dev * pdev = kzalloc ( sizeof ( * pdev ) , GFP_KERNEL ) ;
if ( ! pdev )
return - ENOMEM ;
pdev - > vendor = vendor ;
pdev - > device = device ;
pdev - > subsystem_vendor = subvendor ;
pdev - > subsystem_device = subdevice ;
pdev - > class = class ;
2020-11-17 13:44:09 +08:00
if ( pci_match_device ( pdrv , pdev ) )
2014-04-01 21:32:59 -04:00
retval = - EEXIST ;
kfree ( pdev ) ;
if ( retval )
return retval ;
}
2008-08-17 21:06:59 +02:00
/* Only accept driver_data values that match an existing id_table
entry */
2008-11-25 19:36:10 -08:00
if ( ids ) {
retval = - EINVAL ;
while ( ids - > vendor | | ids - > subvendor | | ids - > class_mask ) {
if ( driver_data = = ids - > driver_data ) {
retval = 0 ;
break ;
}
ids + + ;
2008-08-17 21:06:59 +02:00
}
2008-11-25 19:36:10 -08:00
if ( retval ) /* No match */
return retval ;
2008-08-17 21:06:59 +02:00
}
2009-09-03 15:26:36 +09:00
retval = pci_add_dynid ( pdrv , vendor , device , subvendor , subdevice ,
class , class_mask , driver_data ) ;
2006-08-28 11:43:25 -07:00
if ( retval )
return retval ;
2005-04-16 15:20:36 -07:00
return count ;
}
2017-06-09 11:03:06 +02:00
static DRIVER_ATTR_WO ( new_id ) ;
2005-04-16 15:20:36 -07:00
2009-02-23 21:52:23 -08:00
/**
2020-10-23 18:33:10 +02:00
* remove_id_store - remove a PCI device ID from this driver
2009-02-23 21:52:23 -08:00
* @ driver : target device driver
* @ buf : buffer for scanning device ID data
* @ count : input size
*
* Removes a dynamic pci device ID to this driver .
*/
2017-06-09 11:03:06 +02:00
static ssize_t remove_id_store ( struct device_driver * driver , const char * buf ,
2014-04-18 20:13:49 -04:00
size_t count )
2009-02-23 21:52:23 -08:00
{
struct pci_dynid * dynid , * n ;
struct pci_driver * pdrv = to_pci_driver ( driver ) ;
2019-02-08 09:54:39 -07:00
u32 vendor , device , subvendor = PCI_ANY_ID ,
2009-02-23 21:52:23 -08:00
subdevice = PCI_ANY_ID , class = 0 , class_mask = 0 ;
2020-10-06 16:54:39 -05:00
int fields ;
2015-09-10 18:40:31 +08:00
size_t retval = - ENODEV ;
2009-02-23 21:52:23 -08:00
fields = sscanf ( buf , " %x %x %x %x %x %x " ,
& vendor , & device , & subvendor , & subdevice ,
& class , & class_mask ) ;
if ( fields < 2 )
return - EINVAL ;
spin_lock ( & pdrv - > dynids . lock ) ;
list_for_each_entry_safe ( dynid , n , & pdrv - > dynids . list , node ) {
struct pci_device_id * id = & dynid - > id ;
if ( ( id - > vendor = = vendor ) & &
( id - > device = = device ) & &
( subvendor = = PCI_ANY_ID | | id - > subvendor = = subvendor ) & &
( subdevice = = PCI_ANY_ID | | id - > subdevice = = subdevice ) & &
! ( ( id - > class ^ class ) & class_mask ) ) {
list_del ( & dynid - > node ) ;
kfree ( dynid ) ;
2015-09-10 18:40:31 +08:00
retval = count ;
2009-02-23 21:52:23 -08:00
break ;
}
}
spin_unlock ( & pdrv - > dynids . lock ) ;
2015-09-10 18:40:31 +08:00
return retval ;
2009-02-23 21:52:23 -08:00
}
2017-06-09 11:03:06 +02:00
static DRIVER_ATTR_WO ( remove_id ) ;
2009-02-23 21:52:23 -08:00
2013-10-07 14:51:20 -06:00
static struct attribute * pci_drv_attrs [ ] = {
& driver_attr_new_id . attr ,
& driver_attr_remove_id . attr ,
NULL ,
2012-08-08 14:47:51 +04:00
} ;
2013-10-07 14:51:20 -06:00
ATTRIBUTE_GROUPS ( pci_drv ) ;
2009-02-23 21:52:23 -08:00
2008-12-31 23:54:56 +10:30
struct drv_dev_and_id {
struct pci_driver * drv ;
struct pci_dev * dev ;
const struct pci_device_id * id ;
} ;
static long local_pci_probe ( void * _ddi )
{
struct drv_dev_and_id * ddi = _ddi ;
2012-11-20 16:08:22 +08:00
struct pci_dev * pci_dev = ddi - > dev ;
struct pci_driver * pci_drv = ddi - > drv ;
struct device * dev = & pci_dev - > dev ;
2010-06-08 15:23:51 -04:00
int rc ;
2012-11-20 16:08:22 +08:00
/*
* Unbound PCI devices are always put in D0 , regardless of
* runtime PM status . During probe , the device is set to
* active and the usage count is incremented . If the driver
2015-09-18 03:08:40 +02:00
* supports runtime PM , it should call pm_runtime_put_noidle ( ) ,
* or any other runtime PM helper function decrementing the usage
* count , in its probe routine and pm_runtime_get_noresume ( ) in
* its remove routine .
2010-06-08 15:23:51 -04:00
*/
2012-11-20 16:08:22 +08:00
pm_runtime_get_sync ( dev ) ;
2021-11-10 12:01:14 -06:00
pci_dev - > driver = pci_drv ;
2012-11-20 16:08:22 +08:00
rc = pci_drv - > probe ( pci_dev , ddi - > id ) ;
2013-11-01 14:34:55 -05:00
if ( ! rc )
return rc ;
if ( rc < 0 ) {
2021-11-10 12:01:14 -06:00
pci_dev - > driver = NULL ;
2012-11-20 16:08:22 +08:00
pm_runtime_put_sync ( dev ) ;
2013-11-01 14:34:55 -05:00
return rc ;
2010-06-08 15:23:51 -04:00
}
2013-11-01 14:34:55 -05:00
/*
* Probe function should return < 0 for failure , 0 for success
* Treat values > 0 as success , but warn .
*/
2019-10-07 07:55:18 -05:00
pci_warn ( pci_dev , " Driver probe function unexpectedly returned %d \n " ,
rc ) ;
2013-11-01 14:34:55 -05:00
return 0 ;
2008-12-31 23:54:56 +10:30
}
2017-05-24 10:15:32 +02:00
static bool pci_physfn_is_probed ( struct pci_dev * dev )
{
# ifdef CONFIG_PCI_IOV
return dev - > is_virtfn & & dev - > physfn - > is_probed ;
# else
return false ;
# endif
}
2005-07-06 19:56:03 +02:00
static int pci_call_probe ( struct pci_driver * drv , struct pci_dev * dev ,
const struct pci_device_id * id )
{
2017-05-24 10:15:32 +02:00
int error , node , cpu ;
2008-12-31 23:54:56 +10:30
struct drv_dev_and_id ddi = { drv , dev , id } ;
2013-11-18 10:59:59 -07:00
/*
* Execute driver initialization on node where the device is
* attached . This way the driver likely allocates its local memory
* on the right node .
*/
2008-12-31 23:54:56 +10:30
node = dev_to_node ( & dev - > dev ) ;
2017-05-24 10:15:32 +02:00
dev - > is_probed = 1 ;
cpu_hotplug_disable ( ) ;
2013-11-18 10:59:59 -07:00
/*
2017-05-24 10:15:32 +02:00
* Prevent nesting work_on_cpu ( ) for the case where a Virtual Function
* device is probed from work_on_cpu ( ) of the Physical device .
2013-11-18 10:59:59 -07:00
*/
2017-05-24 10:15:32 +02:00
if ( node < 0 | | node > = MAX_NUMNODES | | ! node_online ( node ) | |
2022-02-07 16:59:03 +01:00
pci_physfn_is_probed ( dev ) ) {
2017-05-24 10:15:32 +02:00
cpu = nr_cpu_ids ;
2022-02-07 16:59:03 +01:00
} else {
cpumask_var_t wq_domain_mask ;
if ( ! zalloc_cpumask_var ( & wq_domain_mask , GFP_KERNEL ) ) {
error = - ENOMEM ;
goto out ;
}
cpumask_and ( wq_domain_mask ,
2022-02-07 16:59:06 +01:00
housekeeping_cpumask ( HK_TYPE_WQ ) ,
housekeeping_cpumask ( HK_TYPE_DOMAIN ) ) ;
2022-02-07 16:59:03 +01:00
2020-06-25 18:34:42 -04:00
cpu = cpumask_any_and ( cpumask_of_node ( node ) ,
2022-02-07 16:59:03 +01:00
wq_domain_mask ) ;
free_cpumask_var ( wq_domain_mask ) ;
}
2017-05-24 10:15:32 +02:00
if ( cpu < nr_cpu_ids )
error = work_on_cpu ( cpu , local_pci_probe , & ddi ) ;
else
2008-12-31 23:54:56 +10:30
error = local_pci_probe ( & ddi ) ;
2022-02-07 16:59:03 +01:00
out :
2017-05-24 10:15:32 +02:00
dev - > is_probed = 0 ;
cpu_hotplug_enable ( ) ;
2005-07-06 19:56:03 +02:00
return error ;
}
2005-04-16 15:20:36 -07:00
/**
2010-11-18 15:02:31 -08:00
* __pci_device_probe - check if a driver wants to claim a specific PCI device
2005-10-23 11:57:38 -07:00
* @ drv : driver to call to check if it wants the PCI device
* @ pci_dev : PCI device being probed
2013-11-14 11:28:18 -07:00
*
2005-10-23 11:57:38 -07:00
* returns 0 on success , else error .
2021-11-10 12:01:14 -06:00
* side - effect : pci_dev - > driver is set to drv when drv claims pci_dev .
2005-04-16 15:20:36 -07:00
*/
2014-04-18 20:13:49 -04:00
static int __pci_device_probe ( struct pci_driver * drv , struct pci_dev * pci_dev )
2005-06-30 02:18:12 -07:00
{
const struct pci_device_id * id ;
2005-04-16 15:20:36 -07:00
int error = 0 ;
2021-10-04 14:59:26 +02:00
if ( drv - > probe ) {
2005-06-30 02:18:12 -07:00
error = - ENODEV ;
id = pci_match_device ( drv , pci_dev ) ;
if ( id )
2005-07-06 19:56:03 +02:00
error = pci_call_probe ( drv , pci_dev , id ) ;
2005-04-16 15:20:36 -07:00
}
return error ;
}
2015-06-10 16:54:58 +08:00
int __weak pcibios_alloc_irq ( struct pci_dev * dev )
{
return 0 ;
}
void __weak pcibios_free_irq ( struct pci_dev * dev )
{
}
2017-04-13 01:51:40 +03:00
# ifdef CONFIG_PCI_IOV
static inline bool pci_device_can_probe ( struct pci_dev * pdev )
{
2019-05-09 13:27:22 -06:00
return ( ! pdev - > is_virtfn | | pdev - > physfn - > sriov - > drivers_autoprobe | |
pdev - > driver_override ) ;
2017-04-13 01:51:40 +03:00
}
# else
static inline bool pci_device_can_probe ( struct pci_dev * pdev )
{
return true ;
}
# endif
2014-04-18 20:13:49 -04:00
static int pci_device_probe ( struct device * dev )
2005-04-16 15:20:36 -07:00
{
2015-06-10 16:54:58 +08:00
int error ;
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
struct pci_driver * drv = to_pci_driver ( dev - > driver ) ;
2019-05-01 11:00:16 -06:00
if ( ! pci_device_can_probe ( pci_dev ) )
return - ENODEV ;
2017-06-28 15:14:04 -05:00
pci_assign_irq ( pci_dev ) ;
2015-06-10 16:54:58 +08:00
error = pcibios_alloc_irq ( pci_dev ) ;
if ( error < 0 )
return error ;
2005-04-16 15:20:36 -07:00
pci_dev_get ( pci_dev ) ;
2019-05-01 11:00:16 -06:00
error = __pci_device_probe ( drv , pci_dev ) ;
if ( error ) {
pcibios_free_irq ( pci_dev ) ;
pci_dev_put ( pci_dev ) ;
2015-06-10 16:54:58 +08:00
}
2005-04-16 15:20:36 -07:00
return error ;
}
2021-07-13 21:35:22 +02:00
static void pci_device_remove ( struct device * dev )
2005-04-16 15:20:36 -07:00
{
2014-04-18 20:13:49 -04:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2021-11-10 12:03:34 -06:00
struct pci_driver * drv = pci_dev - > driver ;
2005-04-16 15:20:36 -07:00
2021-10-04 14:59:25 +02:00
if ( drv - > remove ) {
pm_runtime_get_sync ( dev ) ;
drv - > remove ( pci_dev ) ;
pm_runtime_put_noidle ( dev ) ;
2005-04-16 15:20:36 -07:00
}
2021-10-04 14:59:25 +02:00
pcibios_free_irq ( pci_dev ) ;
2021-11-10 12:01:14 -06:00
pci_dev - > driver = NULL ;
2021-10-04 14:59:25 +02:00
pci_iov_remove ( pci_dev ) ;
2005-04-16 15:20:36 -07:00
2010-06-08 15:23:51 -04:00
/* Undo the runtime PM settings in local_pci_probe() */
2012-11-20 16:08:22 +08:00
pm_runtime_put_sync ( dev ) ;
2010-06-08 15:23:51 -04:00
2006-10-20 14:45:32 -07:00
/*
* If the device is still on , set the power state as " unknown " ,
* since it might change by the next time we load the driver .
*/
if ( pci_dev - > current_state = = PCI_D0 )
pci_dev - > current_state = PCI_UNKNOWN ;
2005-04-16 15:20:36 -07:00
/*
* We would love to complain here if pci_dev - > is_enabled is set , that
* the driver should have called pci_disable_device ( ) , but the
* unfortunate fact is there are too many odd BIOS and bridge setups
2013-11-14 11:28:18 -07:00
* that don ' t like drivers doing that all of the time .
2005-04-16 15:20:36 -07:00
* Oh well , we can dream of sane hardware when we sleep , no matter how
* horrible the crap we have to deal with is when we are awake . . .
*/
pci_dev_put ( pci_dev ) ;
}
2008-05-20 00:49:04 +02:00
static void pci_device_shutdown ( struct device * dev )
{
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2021-11-10 12:03:34 -06:00
struct pci_driver * drv = pci_dev - > driver ;
2008-05-20 00:49:04 +02:00
2012-10-24 14:54:14 +08:00
pm_runtime_resume ( dev ) ;
2008-05-20 00:49:04 +02:00
if ( drv & & drv - > shutdown )
drv - > shutdown ( pci_dev ) ;
2012-02-07 00:50:35 +01:00
2012-04-27 13:00:33 -06:00
/*
2013-11-27 15:19:25 -07:00
* If this is a kexec reboot , turn off Bus Master bit on the
* device to tell it to not continue to do DMA . Don ' t touch
* devices in D3cold or unknown states .
* If it is not a kexec reboot , firmware will hit the PCI
* devices with big hammer and stop their DMA any way .
2012-04-27 13:00:33 -06:00
*/
2013-11-27 15:19:25 -07:00
if ( kexec_in_progress & & ( pci_dev - > current_state < = PCI_D3hot ) )
2013-03-14 18:49:37 +04:00
pci_clear_master ( pci_dev ) ;
2008-05-20 00:49:04 +02:00
}
2022-04-20 16:11:35 +02:00
# ifdef CONFIG_PM_SLEEP
2010-02-17 23:44:58 +01:00
2022-04-20 16:11:35 +02:00
/* Auxiliary functions used for system resume */
2010-02-17 23:44:58 +01:00
/**
* pci_restore_standard_config - restore standard config registers of PCI device
* @ pci_dev : PCI device to handle
*/
static int pci_restore_standard_config ( struct pci_dev * pci_dev )
{
pci_update_current_state ( pci_dev , PCI_UNKNOWN ) ;
if ( pci_dev - > current_state ! = PCI_D0 ) {
int error = pci_set_power_state ( pci_dev , PCI_D0 ) ;
if ( error )
return error ;
}
2010-11-30 17:43:26 -06:00
pci_restore_state ( pci_dev ) ;
2017-07-12 03:05:39 +02:00
pci_pme_restore ( pci_dev ) ;
2010-11-30 17:43:26 -06:00
return 0 ;
2010-02-17 23:44:58 +01:00
}
2022-04-20 16:11:35 +02:00
# endif /* CONFIG_PM_SLEEP */
# ifdef CONFIG_PM
/* Auxiliary functions used for system resume and run-time resume */
2010-02-17 23:44:58 +01:00
PCI/PM: Run resume fixups before disabling wakeup events
pci_pm_resume() and pci_pm_restore() call pci_pm_default_resume(), which
runs resume fixups before disabling wakeup events:
static void pci_pm_default_resume(struct pci_dev *pci_dev)
{
pci_fixup_device(pci_fixup_resume, pci_dev);
pci_enable_wake(pci_dev, PCI_D0, false);
}
pci_pm_runtime_resume() does both of these, but in the opposite order:
pci_enable_wake(pci_dev, PCI_D0, false);
pci_fixup_device(pci_fixup_resume, pci_dev);
We should always use the same ordering unless there's a reason to do
otherwise. Change pci_pm_runtime_resume() to call pci_pm_default_resume()
instead of open-coding this, so the fixups are always done before disabling
wakeup events.
pci_pm_default_resume() is called from pci_pm_runtime_resume(), which is
under #ifdef CONFIG_PM. If SUSPEND and HIBERNATION are disabled, PM_SLEEP
is disabled also, so move pci_pm_default_resume() from #ifdef
CONFIG_PM_SLEEP to #ifdef CONFIG_PM.
Link: https://lore.kernel.org/r/20191014230016.240912-5-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-10-12 17:15:57 -05:00
static void pci_pm_default_resume ( struct pci_dev * pci_dev )
{
pci_fixup_device ( pci_fixup_resume , pci_dev ) ;
pci_enable_wake ( pci_dev , PCI_D0 , false ) ;
}
2022-05-05 20:18:09 +02:00
static void pci_pm_power_up_and_verify_state ( struct pci_dev * pci_dev )
2010-02-17 23:44:58 +01:00
{
PCI / PM: restore the original behavior of pci_set_power_state()
Commit cc2893b6 (PCI: Ensure we re-enable devices on resume)
addressed the problem with USB not being powered after resume on
recent Lenovo machines, but it did that in a suboptimal way.
Namely, it should have changed the relevant code paths only,
which are pci_pm_resume_noirq() and pci_pm_restore_noirq() supposed
to restore the device's power and standard configuration registers
after system resume from suspend or hibernation. Instead, however,
it modified pci_set_power_state() which is executed in several
other situations too. That resulted in some undesirable effects,
like attempting to change a device's power state in the same way
multiple times in a row (up to as many as 4 times in a row in the
snd_hda_intel driver).
Fix the bug addressed by commit cc2893b6 in an alternative way,
by forcibly powering up all devices in pci_pm_default_resume_early(),
which is called by pci_pm_resume_noirq() and pci_pm_restore_noirq()
to restore the device's power and standard configuration registers,
and modifying pci_pm_runtime_resume() to avoid the forcible power-up
if not necessary. Then, revert the changes made by commit cc2893b6
to make the confusion introduced by it go away.
Acked-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2012-07-05 15:20:00 -06:00
pci_power_up ( pci_dev ) ;
2019-11-05 11:13:43 +01:00
pci_update_current_state ( pci_dev , PCI_D0 ) ;
2022-05-05 20:18:09 +02:00
}
static void pci_pm_default_resume_early ( struct pci_dev * pci_dev )
{
pci_pm_power_up_and_verify_state ( pci_dev ) ;
PCI / PM: restore the original behavior of pci_set_power_state()
Commit cc2893b6 (PCI: Ensure we re-enable devices on resume)
addressed the problem with USB not being powered after resume on
recent Lenovo machines, but it did that in a suboptimal way.
Namely, it should have changed the relevant code paths only,
which are pci_pm_resume_noirq() and pci_pm_restore_noirq() supposed
to restore the device's power and standard configuration registers
after system resume from suspend or hibernation. Instead, however,
it modified pci_set_power_state() which is executed in several
other situations too. That resulted in some undesirable effects,
like attempting to change a device's power state in the same way
multiple times in a row (up to as many as 4 times in a row in the
snd_hda_intel driver).
Fix the bug addressed by commit cc2893b6 in an alternative way,
by forcibly powering up all devices in pci_pm_default_resume_early(),
which is called by pci_pm_resume_noirq() and pci_pm_restore_noirq()
to restore the device's power and standard configuration registers,
and modifying pci_pm_runtime_resume() to avoid the forcible power-up
if not necessary. Then, revert the changes made by commit cc2893b6
to make the confusion introduced by it go away.
Acked-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2012-07-05 15:20:00 -06:00
pci_restore_state ( pci_dev ) ;
2017-07-12 03:05:39 +02:00
pci_pme_restore ( pci_dev ) ;
2010-02-17 23:44:58 +01:00
}
2022-04-14 15:04:13 +02:00
static void pci_pm_bridge_power_up_actions ( struct pci_dev * pci_dev )
{
2023-09-18 08:30:41 +03:00
int ret ;
ret = pci_bridge_wait_for_secondary_bus ( pci_dev , " resume " ) ;
if ( ret ) {
/*
* The downstream link failed to come up , so mark the
* devices below as disconnected to make sure we don ' t
* attempt to resume them .
*/
pci_walk_bus ( pci_dev - > subordinate , pci_dev_set_disconnected ,
NULL ) ;
return ;
}
2023-04-04 15:32:55 -05:00
2022-04-14 15:04:13 +02:00
/*
* When powering on a bridge from D3cold , the whole hierarchy may be
* powered on into D0uninitialized state , resume them to give them a
* chance to suspend again
*/
pci_resume_bus ( pci_dev - > subordinate ) ;
}
2022-04-20 16:11:35 +02:00
# endif /* CONFIG_PM */
# ifdef CONFIG_PM_SLEEP
2009-01-07 13:03:42 +01:00
/*
* Default " suspend " method for devices that have no driver provided suspend ,
* or not even a driver at all ( second part ) .
2008-05-20 00:49:04 +02:00
*/
2009-01-07 14:15:17 +01:00
static void pci_pm_set_unknown_state ( struct pci_dev * pci_dev )
2008-05-20 00:49:04 +02:00
{
/*
* mark its power state as " unknown " , since we don ' t know if
* e . g . the BIOS will change its device state when we suspend .
*/
if ( pci_dev - > current_state = = PCI_D0 )
pci_dev - > current_state = PCI_UNKNOWN ;
}
2008-12-08 00:34:57 +01:00
/*
* Default " resume " method for devices that have no driver provided resume ,
* or not even a driver at all ( second part ) .
*/
2009-01-07 14:15:17 +01:00
static int pci_pm_reenable_device ( struct pci_dev * pci_dev )
2008-12-08 00:34:57 +01:00
{
int retval ;
2021-10-06 23:38:27 +00:00
/* if the device was enabled before suspend, re-enable */
2008-05-20 00:49:04 +02:00
retval = pci_reenable_device ( pci_dev ) ;
/*
* if the device was busmaster before the suspend , make it busmaster
* again
*/
if ( pci_dev - > is_busmaster )
pci_set_master ( pci_dev ) ;
return retval ;
}
static int pci_legacy_suspend ( struct device * dev , pm_message_t state )
2005-04-16 15:20:36 -07:00
{
2014-04-18 20:13:49 -04:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2021-11-10 12:03:34 -06:00
struct pci_driver * drv = pci_dev - > driver ;
2009-03-16 22:40:26 +01:00
2006-03-23 01:38:34 -08:00
if ( drv & & drv - > suspend ) {
2009-02-04 01:59:09 +01:00
pci_power_t prev = pci_dev - > current_state ;
2009-03-16 22:40:26 +01:00
int error ;
2009-01-16 21:54:43 +01:00
2009-03-16 22:39:56 +01:00
error = drv - > suspend ( pci_dev , state ) ;
2022-03-08 04:07:39 +09:00
suspend_report_result ( dev , drv - > suspend , error ) ;
2009-03-16 22:39:56 +01:00
if ( error )
return error ;
2009-01-16 21:54:43 +01:00
2009-03-16 22:40:26 +01:00
if ( ! pci_dev - > state_saved & & pci_dev - > current_state ! = PCI_D0
2009-02-04 01:59:09 +01:00
& & pci_dev - > current_state ! = PCI_UNKNOWN ) {
2019-10-07 07:52:28 -05:00
pci_WARN_ONCE ( pci_dev , pci_dev - > current_state ! = prev ,
" PCI PM: Device state not saved by %pS \n " ,
drv - > suspend ) ;
2009-02-04 01:59:09 +01:00
}
2006-03-23 01:38:34 -08:00
}
2009-01-07 13:09:37 +01:00
pci_fixup_device ( pci_fixup_suspend , pci_dev ) ;
2009-03-16 22:40:26 +01:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
2022-10-25 14:35:02 -05:00
static int pci_legacy_suspend_late ( struct device * dev )
2006-06-24 14:50:29 -07:00
{
2014-04-18 20:13:49 -04:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2009-03-16 22:40:26 +01:00
if ( ! pci_dev - > state_saved )
pci_save_state ( pci_dev ) ;
pci_pm_set_unknown_state ( pci_dev ) ;
2014-06-03 22:04:09 +02:00
pci_fixup_device ( pci_fixup_suspend_late , pci_dev ) ;
2009-03-16 22:40:26 +01:00
return 0 ;
2006-06-24 14:50:29 -07:00
}
2005-04-16 15:20:36 -07:00
2008-05-20 00:49:04 +02:00
static int pci_legacy_resume ( struct device * dev )
2005-04-16 15:20:36 -07:00
{
2014-04-18 20:13:49 -04:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2021-11-10 12:03:34 -06:00
struct pci_driver * drv = pci_dev - > driver ;
2005-04-16 15:20:36 -07:00
2009-01-07 13:09:37 +01:00
pci_fixup_device ( pci_fixup_resume , pci_dev ) ;
2009-01-16 21:54:43 +01:00
return drv & & drv - > resume ?
drv - > resume ( pci_dev ) : pci_pm_reenable_device ( pci_dev ) ;
2005-04-16 15:20:36 -07:00
}
2009-01-07 13:05:05 +01:00
/* Auxiliary functions used by the new power management framework */
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
static void pci_pm_default_suspend ( struct pci_dev * pci_dev )
PCI PM: Avoid touching devices behind bridges in unknown state
It generally is better to avoid accessing devices behind bridges that
may not be in the D0 power state, because in that case the bridges'
secondary buses may not be accessible. For this reason, during the
early phase of resume (ie. with interrupts disabled), before
restoring the standard config registers of a device, check the power
state of the bridge the device is behind and postpone the restoration
of the device's config space, as well as any other operations that
would involve accessing the device, if that state is not D0.
In such cases the restoration of the device's config space will be
retried during the "normal" phase of resume (ie. with interrupts
enabled), so that the bridge can be put into D0 before that happens.
Also, save standard configuration registers of PCI devices during the
"normal" phase of suspend (ie. with interrupts enabled), so that the
bridges the devices are behind can be put into low power states (we
don't put bridges into low power states at the moment, but we may
want to do it in the future and it seems reasonable to design for
that).
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-01-07 13:07:15 +01:00
{
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
/* Disable non-bridge devices without PM support */
2014-05-04 12:23:36 +08:00
if ( ! pci_has_subordinate ( pci_dev ) )
2009-02-04 02:01:15 +01:00
pci_disable_enabled_device ( pci_dev ) ;
PCI PM: Avoid touching devices behind bridges in unknown state
It generally is better to avoid accessing devices behind bridges that
may not be in the D0 power state, because in that case the bridges'
secondary buses may not be accessible. For this reason, during the
early phase of resume (ie. with interrupts disabled), before
restoring the standard config registers of a device, check the power
state of the bridge the device is behind and postpone the restoration
of the device's config space, as well as any other operations that
would involve accessing the device, if that state is not D0.
In such cases the restoration of the device's config space will be
retried during the "normal" phase of resume (ie. with interrupts
enabled), so that the bridge can be put into D0 before that happens.
Also, save standard configuration registers of PCI devices during the
"normal" phase of suspend (ie. with interrupts enabled), so that the
bridges the devices are behind can be put into low power states (we
don't put bridges into low power states at the moment, but we may
want to do it in the future and it seems reasonable to design for
that).
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-01-07 13:07:15 +01:00
}
2009-01-07 13:06:10 +01:00
static bool pci_has_legacy_pm_support ( struct pci_dev * pci_dev )
{
2021-11-10 12:03:34 -06:00
struct pci_driver * drv = pci_dev - > driver ;
2019-10-31 17:37:54 -05:00
bool ret = drv & & ( drv - > suspend | | drv - > resume ) ;
2009-01-07 14:15:17 +01:00
/*
* Legacy PM support is used by default , so warn if the new framework is
* supported as well . Drivers are supposed to support either the
* former , or the latter , but not both at the same time .
*/
2019-10-07 07:52:28 -05:00
pci_WARN ( pci_dev , ret & & drv - > driver . pm , " device %04x:%04x \n " ,
pci_dev - > vendor , pci_dev - > device ) ;
2009-01-07 14:15:17 +01:00
return ret ;
2009-01-07 13:06:10 +01:00
}
2009-01-07 13:05:05 +01:00
/* New power management framework */
2008-05-20 00:49:04 +02:00
static int pci_pm_prepare ( struct device * dev )
{
2019-06-07 00:32:31 +02:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2019-10-14 13:46:50 -05:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2008-05-20 00:49:04 +02:00
2019-10-14 13:46:50 -05:00
if ( pm & & pm - > prepare ) {
int error = pm - > prepare ( dev ) ;
PM / core: Add NEVER_SKIP and SMART_PREPARE driver flags
The motivation for this change is to provide a way to work around
a problem with the direct-complete mechanism used for avoiding
system suspend/resume handling for devices in runtime suspend.
The problem is that some middle layer code (the PCI bus type and
the ACPI PM domain in particular) returns positive values from its
system suspend ->prepare callbacks regardless of whether the driver's
->prepare returns a positive value or 0, which effectively prevents
drivers from being able to control the direct-complete feature.
Some drivers need that control, however, and the PCI bus type has
grown its own flag to deal with this issue, but since it is not
limited to PCI, it is better to address it by adding driver flags at
the core level.
To that end, add a driver_flags field to struct dev_pm_info for flags
that can be set by device drivers at the probe time to inform the PM
core and/or bus types, PM domains and so on on the capabilities and/or
preferences of device drivers. Also add two static inline helpers
for setting that field and testing it against a given set of flags
and make the driver core clear it automatically on driver remove
and probe failures.
Define and document two PM driver flags related to the direct-
complete feature: NEVER_SKIP and SMART_PREPARE that can be used,
respectively, to indicate to the PM core that the direct-complete
mechanism should never be used for the device and to inform the
middle layer code (bus types, PM domains etc) that it can only
request the PM core to use the direct-complete mechanism for
the device (by returning a positive value from its ->prepare
callback) if it also has been requested by the driver.
While at it, make the core check pm_runtime_suspended() when
setting power.direct_complete so that it doesn't need to be
checked by ->prepare callbacks.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2017-10-25 14:12:29 +02:00
if ( error < 0 )
2015-01-21 02:17:42 +01:00
return error ;
PM / core: Add NEVER_SKIP and SMART_PREPARE driver flags
The motivation for this change is to provide a way to work around
a problem with the direct-complete mechanism used for avoiding
system suspend/resume handling for devices in runtime suspend.
The problem is that some middle layer code (the PCI bus type and
the ACPI PM domain in particular) returns positive values from its
system suspend ->prepare callbacks regardless of whether the driver's
->prepare returns a positive value or 0, which effectively prevents
drivers from being able to control the direct-complete feature.
Some drivers need that control, however, and the PCI bus type has
grown its own flag to deal with this issue, but since it is not
limited to PCI, it is better to address it by adding driver flags at
the core level.
To that end, add a driver_flags field to struct dev_pm_info for flags
that can be set by device drivers at the probe time to inform the PM
core and/or bus types, PM domains and so on on the capabilities and/or
preferences of device drivers. Also add two static inline helpers
for setting that field and testing it against a given set of flags
and make the driver core clear it automatically on driver remove
and probe failures.
Define and document two PM driver flags related to the direct-
complete feature: NEVER_SKIP and SMART_PREPARE that can be used,
respectively, to indicate to the PM core that the direct-complete
mechanism should never be used for the device and to inform the
middle layer code (bus types, PM domains etc) that it can only
request the PM core to use the direct-complete mechanism for
the device (by returning a positive value from its ->prepare
callback) if it also has been requested by the driver.
While at it, make the core check pm_runtime_suspended() when
setting power.direct_complete so that it doesn't need to be
checked by ->prepare callbacks.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2017-10-25 14:12:29 +02:00
if ( ! error & & dev_pm_test_driver_flags ( dev , DPM_FLAG_SMART_PREPARE ) )
return 0 ;
2015-01-21 02:17:42 +01:00
}
2019-06-07 00:32:31 +02:00
if ( pci_dev_need_resume ( pci_dev ) )
return 0 ;
/*
* The PME setting needs to be adjusted here in case the direct - complete
* optimization is used with respect to this device .
*/
pci_dev_adjust_pme ( pci_dev ) ;
return 1 ;
2008-05-20 00:49:04 +02:00
}
2015-09-30 01:10:24 +02:00
static void pci_pm_complete ( struct device * dev )
{
2016-09-18 05:39:20 +02:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
pci_dev_complete_resume ( pci_dev ) ;
pm_generic_complete ( dev ) ;
/* Resume device if platform firmware has put it in reset-power-on */
2017-11-18 15:33:52 +01:00
if ( pm_runtime_suspended ( dev ) & & pm_resume_via_firmware ( ) ) {
2016-09-18 05:39:20 +02:00
pci_power_t pre_sleep_state = pci_dev - > current_state ;
2019-06-25 14:09:12 +02:00
pci_refresh_power_state ( pci_dev ) ;
/*
* On platforms with ACPI this check may also trigger for
* devices sharing power resources if one of those power
* resources has been activated as a result of a change of the
* power state of another device sharing it . However , in that
* case it is also better to resume the device , in general .
*/
2016-09-18 05:39:20 +02:00
if ( pci_dev - > current_state < pre_sleep_state )
pm_request_resume ( dev ) ;
}
2015-09-30 01:10:24 +02:00
}
2008-05-20 00:49:04 +02:00
2010-02-17 23:44:58 +01:00
# else /* !CONFIG_PM_SLEEP */
# define pci_pm_prepare NULL
2015-09-30 01:10:24 +02:00
# define pci_pm_complete NULL
2010-02-17 23:44:58 +01:00
# endif /* !CONFIG_PM_SLEEP */
2008-05-20 00:49:04 +02:00
# ifdef CONFIG_SUSPEND
2018-03-09 11:06:54 -06:00
static void pcie_pme_root_status_cleanup ( struct pci_dev * pci_dev )
{
/*
* Some BIOSes forget to clear Root PME Status bits after system
* wakeup , which breaks ACPI - based runtime wakeup on PCI Express .
* Clear those bits now just in case ( shouldn ' t hurt ) .
*/
if ( pci_is_pcie ( pci_dev ) & &
2018-03-09 11:06:55 -06:00
( pci_pcie_type ( pci_dev ) = = PCI_EXP_TYPE_ROOT_PORT | |
pci_pcie_type ( pci_dev ) = = PCI_EXP_TYPE_RC_EC ) )
2018-03-09 11:06:54 -06:00
pcie_clear_root_pme_status ( pci_dev ) ;
}
2008-05-20 00:49:04 +02:00
static int pci_pm_suspend ( struct device * dev )
{
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2009-07-24 22:11:32 -07:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2008-05-20 00:49:04 +02:00
PCI: PM: Avoid possible suspend-to-idle issue
If a PCI driver leaves the device handled by it in D0 and calls
pci_save_state() on the device in its ->suspend() or ->suspend_late()
callback, it can expect the device to stay in D0 over the whole
s2idle cycle. However, that may not be the case if there is a
spurious wakeup while the system is suspended, because in that case
pci_pm_suspend_noirq() will run again after pci_pm_resume_noirq()
which calls pci_restore_state(), via pci_pm_default_resume_early(),
so state_saved is cleared and the second iteration of
pci_pm_suspend_noirq() will invoke pci_prepare_to_sleep() which
may change the power state of the device.
To avoid that, add a new internal flag, skip_bus_pm, that will be set
by pci_pm_suspend_noirq() when it runs for the first time during the
given system suspend-resume cycle if the state of the device has
been saved already and the device is still in D0. Setting that flag
will cause the next iterations of pci_pm_suspend_noirq() to set
state_saved for pci_pm_resume_noirq(), so that it always restores the
device state from the originally saved data, and avoid calling
pci_prepare_to_sleep() for the device.
Fixes: 33e4f80ee69b ("ACPI / PM: Ignore spurious SCI wakeups from suspend-to-idle")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2019-05-17 11:08:50 +02:00
pci_dev - > skip_bus_pm = false ;
PCI/PM: Always disable PTM for all devices during suspend
We want to disable PTM on Root Ports because that allows some chips, e.g.,
Intel mobile chips since Coffee Lake, to enter a lower-power PM state.
That means we also have to disable PTM on downstream devices. PCIe r6.0,
sec 2.2.8, recommends that functions support generation of messages in
non-D0 states, so we have to assume Switch Upstream Ports or Endpoints may
send PTM Requests while in D1, D2, and D3hot. A PTM message received by a
Downstream Port (including a Root Port) with PTM disabled must be treated
as an Unsupported Request (sec 6.21.3).
PTM was previously disabled only for Root Ports, and it was disabled in
pci_prepare_to_sleep(), which is not called at all if a driver supports
legacy PM or does its own state saving.
Instead, disable PTM early in pci_pm_suspend() and pci_pm_runtime_suspend()
so we do it in all cases.
Previously PTM was disabled *after* saving device state, so the state
restore on resume automatically re-enabled it. Since we now disable PTM
*before* saving state, we must explicitly re-enable it in pci_pm_resume()
and pci_pm_runtime_resume().
Here's a sample of errors that occur when PTM is disabled only on the Root
Port. With this topology:
0000:00:1d.0 Root Port to [bus 08-71]
0000:08:00.0 Switch Upstream Port to [bus 09-71]
Kai-Heng reported errors like this:
pcieport 0000:00:1d.0: [20] UnsupReq (First)
pcieport 0000:00:1d.0: AER: TLP Header: 34000000 08000052 00000000 00000000
Decoding TLP header 0x34...... (0011 0100b) and 0x08000052:
Fmt 001b 4 DW header, no data
Type 1 0100b Msg (Local - Terminate at Receiver)
Requester ID 0x0800 Bus 08 Devfn 00.0
Message Code 0x52 0101 0010b PTM Request
The 00:1d.0 Root Port logged an Unsupported Request error when it received
a PTM Request with Requester ID 08:00.0.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215453
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216210
Fixes: a697f072f5da ("PCI: Disable PTM during suspend to save power")
Link: https://lore.kernel.org/r/20220909202505.314195-10-helgaas@kernel.org
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Rajvi Jingar <rajvi.jingar@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2022-09-09 15:25:05 -05:00
/*
* Disabling PTM allows some systems , e . g . , Intel mobile chips
* since Coffee Lake , to enter a lower - power PM state .
*/
pci_suspend_ptm ( pci_dev ) ;
2009-01-07 13:09:37 +01:00
if ( pci_has_legacy_pm_support ( pci_dev ) )
return pci_legacy_suspend ( dev , PMSG_SUSPEND ) ;
2009-01-07 14:15:17 +01:00
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
if ( ! pm ) {
pci_pm_default_suspend ( pci_dev ) ;
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
return 0 ;
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
}
PCI / PM: Resume runtime-suspended devices later during system suspend
Runtime-suspended devices are resumed during system suspend by
pci_pm_prepare() for two reasons: First, because they may need
to be reprogrammed in order to change their wakeup settings and,
second, because they may need to be operatonal for their children
to be successfully suspended. That is a problem, though, if there
are many runtime-suspended devices that need to be resumed this
way during system suspend, because the .prepare() PM callbacks of
devices are executed sequentially and the times taken by them
accumulate, which may increase the total system suspend time quite
a bit.
For this reason, move the resume of runtime-suspended devices up
to the next phase of device suspend (during system suspend), except
for the ones that have power.ignore_children set. The exception is
made, because the devices with power.ignore_children set may still
be necessary for their children to be successfully suspended (during
system suspend) and they won't be resumed automatically as a result
of the runtime resume of their children.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2014-02-26 01:00:30 +01:00
/*
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
* PCI devices suspended at run time may need to be resumed at this
* point , because in general it may be necessary to reconfigure them for
* system suspend . Namely , if the device is expected to wake up the
* system from the sleep state , it may have to be reconfigured for this
* purpose , or if the device is not expected to wake up the system from
* the sleep state , it should be prevented from signaling wakeup events
* going forward .
*
* Also if the driver of the device does not indicate that its system
* suspend callbacks can cope with runtime - suspended devices , it is
* better to resume the device from runtime suspend here .
PCI / PM: Resume runtime-suspended devices later during system suspend
Runtime-suspended devices are resumed during system suspend by
pci_pm_prepare() for two reasons: First, because they may need
to be reprogrammed in order to change their wakeup settings and,
second, because they may need to be operatonal for their children
to be successfully suspended. That is a problem, though, if there
are many runtime-suspended devices that need to be resumed this
way during system suspend, because the .prepare() PM callbacks of
devices are executed sequentially and the times taken by them
accumulate, which may increase the total system suspend time quite
a bit.
For this reason, move the resume of runtime-suspended devices up
to the next phase of device suspend (during system suspend), except
for the ones that have power.ignore_children set. The exception is
made, because the devices with power.ignore_children set may still
be necessary for their children to be successfully suspended (during
system suspend) and they won't be resumed automatically as a result
of the runtime resume of their children.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2014-02-26 01:00:30 +01:00
*/
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
if ( ! dev_pm_test_driver_flags ( dev , DPM_FLAG_SMART_SUSPEND ) | |
2019-06-07 00:32:31 +02:00
pci_dev_need_resume ( pci_dev ) ) {
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
pm_runtime_resume ( dev ) ;
2018-05-18 10:17:42 +02:00
pci_dev - > state_saved = false ;
2019-06-07 00:32:31 +02:00
} else {
pci_dev_adjust_pme ( pci_dev ) ;
2018-05-18 10:17:42 +02:00
}
PCI / PM: Resume runtime-suspended devices later during system suspend
Runtime-suspended devices are resumed during system suspend by
pci_pm_prepare() for two reasons: First, because they may need
to be reprogrammed in order to change their wakeup settings and,
second, because they may need to be operatonal for their children
to be successfully suspended. That is a problem, though, if there
are many runtime-suspended devices that need to be resumed this
way during system suspend, because the .prepare() PM callbacks of
devices are executed sequentially and the times taken by them
accumulate, which may increase the total system suspend time quite
a bit.
For this reason, move the resume of runtime-suspended devices up
to the next phase of device suspend (during system suspend), except
for the ones that have power.ignore_children set. The exception is
made, because the devices with power.ignore_children set may still
be necessary for their children to be successfully suspended (during
system suspend) and they won't be resumed automatically as a result
of the runtime resume of their children.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2014-02-26 01:00:30 +01:00
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
if ( pm - > suspend ) {
pci_power_t prev = pci_dev - > current_state ;
int error ;
2009-02-04 01:56:14 +01:00
error = pm - > suspend ( dev ) ;
2022-03-08 04:07:39 +09:00
suspend_report_result ( dev , pm - > suspend , error ) ;
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
if ( error )
return error ;
2009-03-16 22:40:26 +01:00
if ( ! pci_dev - > state_saved & & pci_dev - > current_state ! = PCI_D0
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
& & pci_dev - > current_state ! = PCI_UNKNOWN ) {
2019-10-07 07:52:28 -05:00
pci_WARN_ONCE ( pci_dev , pci_dev - > current_state ! = prev ,
" PCI PM: State of device not saved by %pS \n " ,
pm - > suspend ) ;
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
}
2008-05-20 00:49:04 +02:00
}
2009-01-07 13:03:42 +01:00
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
return 0 ;
2008-05-20 00:49:04 +02:00
}
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
static int pci_pm_suspend_late ( struct device * dev )
{
2020-04-18 18:52:48 +02:00
if ( dev_pm_skip_suspend ( dev ) )
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
return 0 ;
pci_fixup_device ( pci_fixup_suspend , to_pci_dev ( dev ) ) ;
return pm_generic_suspend_late ( dev ) ;
}
2008-05-20 00:49:04 +02:00
static int pci_pm_suspend_noirq ( struct device * dev )
2005-04-08 14:53:31 +09:00
{
2008-12-08 00:34:57 +01:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2009-07-24 22:11:32 -07:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2005-04-08 14:53:31 +09:00
2020-04-18 18:52:48 +02:00
if ( dev_pm_skip_suspend ( dev ) )
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
return 0 ;
2009-01-07 14:15:17 +01:00
if ( pci_has_legacy_pm_support ( pci_dev ) )
2022-10-25 14:35:02 -05:00
return pci_legacy_suspend_late ( dev ) ;
2009-01-07 14:15:17 +01:00
2009-03-16 22:40:50 +01:00
if ( ! pm ) {
pci_save_state ( pci_dev ) ;
2014-06-03 22:04:09 +02:00
goto Fixup ;
2009-03-16 22:40:50 +01:00
}
2009-03-16 22:40:26 +01:00
if ( pm - > suspend_noirq ) {
pci_power_t prev = pci_dev - > current_state ;
int error ;
error = pm - > suspend_noirq ( dev ) ;
2022-03-08 04:07:39 +09:00
suspend_report_result ( dev , pm - > suspend_noirq , error ) ;
2009-03-16 22:40:26 +01:00
if ( error )
return error ;
if ( ! pci_dev - > state_saved & & pci_dev - > current_state ! = PCI_D0
& & pci_dev - > current_state ! = PCI_UNKNOWN ) {
2019-10-07 07:52:28 -05:00
pci_WARN_ONCE ( pci_dev , pci_dev - > current_state ! = prev ,
" PCI PM: State of device not saved by %pS \n " ,
pm - > suspend_noirq ) ;
2014-06-03 22:04:09 +02:00
goto Fixup ;
2009-03-16 22:40:26 +01:00
}
2008-05-20 00:49:04 +02:00
}
2022-08-30 03:49:12 -07:00
if ( ! pci_dev - > state_saved ) {
pci_save_state ( pci_dev ) ;
PCI: PM: Avoid possible suspend-to-idle issue
If a PCI driver leaves the device handled by it in D0 and calls
pci_save_state() on the device in its ->suspend() or ->suspend_late()
callback, it can expect the device to stay in D0 over the whole
s2idle cycle. However, that may not be the case if there is a
spurious wakeup while the system is suspended, because in that case
pci_pm_suspend_noirq() will run again after pci_pm_resume_noirq()
which calls pci_restore_state(), via pci_pm_default_resume_early(),
so state_saved is cleared and the second iteration of
pci_pm_suspend_noirq() will invoke pci_prepare_to_sleep() which
may change the power state of the device.
To avoid that, add a new internal flag, skip_bus_pm, that will be set
by pci_pm_suspend_noirq() when it runs for the first time during the
given system suspend-resume cycle if the state of the device has
been saved already and the device is still in D0. Setting that flag
will cause the next iterations of pci_pm_suspend_noirq() to set
state_saved for pci_pm_resume_noirq(), so that it always restores the
device state from the originally saved data, and avoid calling
pci_prepare_to_sleep() for the device.
Fixes: 33e4f80ee69b ("ACPI / PM: Ignore spurious SCI wakeups from suspend-to-idle")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2019-05-17 11:08:50 +02:00
/*
2022-08-30 03:49:12 -07:00
* If the device is a bridge with a child in D0 below it ,
* it needs to stay in D0 , so check skip_bus_pm to avoid
* putting it into a low - power state in that case .
PCI: PM: Avoid possible suspend-to-idle issue
If a PCI driver leaves the device handled by it in D0 and calls
pci_save_state() on the device in its ->suspend() or ->suspend_late()
callback, it can expect the device to stay in D0 over the whole
s2idle cycle. However, that may not be the case if there is a
spurious wakeup while the system is suspended, because in that case
pci_pm_suspend_noirq() will run again after pci_pm_resume_noirq()
which calls pci_restore_state(), via pci_pm_default_resume_early(),
so state_saved is cleared and the second iteration of
pci_pm_suspend_noirq() will invoke pci_prepare_to_sleep() which
may change the power state of the device.
To avoid that, add a new internal flag, skip_bus_pm, that will be set
by pci_pm_suspend_noirq() when it runs for the first time during the
given system suspend-resume cycle if the state of the device has
been saved already and the device is still in D0. Setting that flag
will cause the next iterations of pci_pm_suspend_noirq() to set
state_saved for pci_pm_resume_noirq(), so that it always restores the
device state from the originally saved data, and avoid calling
pci_prepare_to_sleep() for the device.
Fixes: 33e4f80ee69b ("ACPI / PM: Ignore spurious SCI wakeups from suspend-to-idle")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2019-05-17 11:08:50 +02:00
*/
2022-08-30 03:49:12 -07:00
if ( ! pci_dev - > skip_bus_pm & & pci_power_manageable ( pci_dev ) )
2009-03-16 22:40:26 +01:00
pci_prepare_to_sleep ( pci_dev ) ;
}
2009-01-07 13:11:28 +01:00
2019-10-07 07:55:18 -05:00
pci_dbg ( pci_dev , " PCI PM: Suspend power state: %s \n " ,
2017-09-30 01:31:15 +02:00
pci_power_name ( pci_dev - > current_state ) ) ;
PCI: PM: Skip devices in D0 for suspend-to-idle
Commit d491f2b75237 ("PCI: PM: Avoid possible suspend-to-idle issue")
attempted to avoid a problem with devices whose drivers want them to
stay in D0 over suspend-to-idle and resume, but it did not go as far
as it should with that.
Namely, first of all, the power state of a PCI bridge with a
downstream device in D0 must be D0 (based on the PCI PM spec r1.2,
sec 6, table 6-1, if the bridge is not in D0, there can be no PCI
transactions on its secondary bus), but that is not actively enforced
during system-wide PM transitions, so use the skip_bus_pm flag
introduced by commit d491f2b75237 for that.
Second, the configuration of devices left in D0 (whatever the reason)
during suspend-to-idle need not be changed and attempting to put them
into D0 again by force is pointless, so explicitly avoid doing that.
Fixes: d491f2b75237 ("PCI: PM: Avoid possible suspend-to-idle issue")
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
2019-06-13 23:59:45 +02:00
if ( pci_dev - > current_state = = PCI_D0 ) {
pci_dev - > skip_bus_pm = true ;
/*
* Per PCI PM r1 .2 , table 6 - 1 , a bridge must be in D0 if any
* downstream device is in D0 , so avoid changing the power state
* of the parent bridge by setting the skip_bus_pm flag for it .
*/
if ( pci_dev - > bus - > self )
pci_dev - > bus - > self - > skip_bus_pm = true ;
}
2019-06-26 00:20:23 +02:00
if ( pci_dev - > skip_bus_pm & & pm_suspend_no_platform ( ) ) {
2019-10-07 07:55:18 -05:00
pci_dbg ( pci_dev , " PCI PM: Skipped \n " ) ;
PCI: PM: Skip devices in D0 for suspend-to-idle
Commit d491f2b75237 ("PCI: PM: Avoid possible suspend-to-idle issue")
attempted to avoid a problem with devices whose drivers want them to
stay in D0 over suspend-to-idle and resume, but it did not go as far
as it should with that.
Namely, first of all, the power state of a PCI bridge with a
downstream device in D0 must be D0 (based on the PCI PM spec r1.2,
sec 6, table 6-1, if the bridge is not in D0, there can be no PCI
transactions on its secondary bus), but that is not actively enforced
during system-wide PM transitions, so use the skip_bus_pm flag
introduced by commit d491f2b75237 for that.
Second, the configuration of devices left in D0 (whatever the reason)
during suspend-to-idle need not be changed and attempting to put them
into D0 again by force is pointless, so explicitly avoid doing that.
Fixes: d491f2b75237 ("PCI: PM: Avoid possible suspend-to-idle issue")
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
2019-06-13 23:59:45 +02:00
goto Fixup ;
}
2009-03-16 22:40:26 +01:00
pci_pm_set_unknown_state ( pci_dev ) ;
PCI: EHCI: fix crash during suspend on ASUS computers
Quite a few ASUS computers experience a nasty problem, related to the
EHCI controllers, when going into system suspend. It was observed
that the problem didn't occur if the controllers were not put into the
D3 power state before starting the suspend, and commit
151b61284776be2d6f02d48c23c3625678960b97 (USB: EHCI: fix crash during
suspend on ASUS computers) was created to do this.
It turned out this approach messed up other computers that didn't have
the problem -- it prevented USB wakeup from working. Consequently
commit c2fb8a3fa25513de8fedb38509b1f15a5bbee47b (USB: add
NO_D3_DURING_SLEEP flag and revert 151b61284776be2) was merged; it
reverted the earlier commit and added a whitelist of known good board
names.
Now we know the actual cause of the problem. Thanks to AceLan Kao for
tracking it down.
According to him, an engineer at ASUS explained that some of their
BIOSes contain a bug that was added in an attempt to work around a
problem in early versions of Windows. When the computer goes into S3
suspend, the BIOS tries to verify that the EHCI controllers were first
quiesced by the OS. Nothing's wrong with this, but the BIOS does it
by checking that the PCI COMMAND registers contain 0 without checking
the controllers' power state. If the register isn't 0, the BIOS
assumes the controller needs to be quiesced and tries to do so. This
involves making various MMIO accesses to the controller, which don't
work very well if the controller is already in D3. The end result is
a system hang or memory corruption.
Since the value in the PCI COMMAND register doesn't matter once the
controller has been suspended, and since the value will be restored
anyway when the controller is resumed, we can work around the BIOS bug
simply by setting the register to 0 during system suspend. This patch
(as1590) does so and also reverts the second commit mentioned above,
which is now unnecessary.
In theory we could do this for every PCI device. However to avoid
introducing new problems, the patch restricts itself to EHCI host
controllers.
Finally the affected systems can suspend with USB wakeup working
properly.
Reference: https://bugzilla.kernel.org/show_bug.cgi?id=37632
Reference: https://bugzilla.kernel.org/show_bug.cgi?id=42728
Based-on-patch-by: AceLan Kao <acelan.kao@canonical.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Dâniel Fraga <fragabr@gmail.com>
Tested-by: Javier Marcet <jmarcet@gmail.com>
Tested-by: Andrey Rahmatullin <wrar@wrar.name>
Tested-by: Oleksij Rempel <bug-track@fisher-privat.net>
Tested-by: Pavel Pisa <pisa@cmp.felk.cvut.cz>
Cc: stable <stable@vger.kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2012-07-09 11:09:21 -04:00
/*
* Some BIOSes from ASUS have a bug : If a USB EHCI host controller ' s
* PCI COMMAND register isn ' t 0 , the BIOS assumes that the controller
* hasn ' t been quiesced and tries to turn it off . If the controller
* is already in D3 , this can hang or cause memory corruption .
*
* Since the value of the COMMAND register doesn ' t matter once the
* device has been suspended , we can safely set it to 0 here .
*/
if ( pci_dev - > class = = PCI_CLASS_SERIAL_USB_EHCI )
pci_write_config_word ( pci_dev , PCI_COMMAND , 0 ) ;
2014-06-03 22:04:09 +02:00
Fixup :
pci_fixup_device ( pci_fixup_suspend_late , pci_dev ) ;
2017-11-18 15:33:52 +01:00
/*
* If the target system sleep state is suspend - to - idle , it is sufficient
* to check whether or not the device ' s wakeup settings are good for
* runtime PM . Otherwise , the pm_resume_via_firmware ( ) check will cause
* pci_pm_complete ( ) to take care of fixing up the device ' s state
* anyway , if need be .
*/
2020-04-18 18:52:19 +02:00
if ( device_can_wakeup ( dev ) & & ! device_may_wakeup ( dev ) )
dev - > power . may_skip_resume = false ;
2017-11-18 15:33:52 +01:00
2009-03-16 22:40:26 +01:00
return 0 ;
2005-04-08 14:53:31 +09:00
}
2005-04-16 15:20:36 -07:00
2009-01-07 13:12:22 +01:00
static int pci_pm_resume_noirq ( struct device * dev )
2008-05-20 00:49:04 +02:00
{
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2019-10-14 13:46:50 -05:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
PCI/PM: Add missing link delays required by the PCIe spec
Currently Linux does not follow PCIe spec regarding the required delays
after reset. A concrete example is a Thunderbolt add-in-card that consists
of a PCIe switch and two PCIe endpoints:
+-1b.0-[01-6b]----00.0-[02-6b]--+-00.0-[03]----00.0 TBT controller
+-01.0-[04-36]-- DS hotplug port
+-02.0-[37]----00.0 xHCI controller
\-04.0-[38-6b]-- DS hotplug port
The root port (1b.0) and the PCIe switch downstream ports are all PCIe Gen3
so they support 8GT/s link speeds.
We wait for the PCIe hierarchy to enter D3cold (runtime):
pcieport 0000:00:1b.0: power state changed by ACPI to D3cold
When it wakes up from D3cold, according to the PCIe 5.0 section 5.8 the
PCIe switch is put to reset and its power is re-applied. This means that we
must follow the rules in PCIe 5.0 section 6.6.1.
For the PCIe Gen3 ports we are dealing with here, the following applies:
With a Downstream Port that supports Link speeds greater than 5.0 GT/s,
software must wait a minimum of 100 ms after Link training completes
before sending a Configuration Request to the device immediately below
that Port. Software can determine when Link training completes by polling
the Data Link Layer Link Active bit or by setting up an associated
interrupt (see Section 6.7.3.3).
Translating this into the above topology we would need to do this (DLLLA
stands for Data Link Layer Link Active):
0000:00:1b.0: wait for 100 ms after DLLLA is set before access to 0000:01:00.0
0000:02:00.0: wait for 100 ms after DLLLA is set before access to 0000:03:00.0
0000:02:02.0: wait for 100 ms after DLLLA is set before access to 0000:37:00.0
I've instrumented the kernel with some additional logging so we can see the
actual delays performed:
pcieport 0000:00:1b.0: power state changed by ACPI to D0
pcieport 0000:00:1b.0: waiting for D3cold delay of 100 ms
pcieport 0000:00:1b.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:01.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:04.0: waiting for D3hot delay of 10 ms
For the switch upstream port (01:00.0 reachable through 00:1b.0 root port)
we wait for 100 ms but not taking into account the DLLLA requirement. We
then wait 10 ms for D3hot -> D0 transition of the root port and the two
downstream hotplug ports. This means that we deviate from what the spec
requires.
Performing the same check for system sleep (s2idle) transitions it turns
out to be even worse. None of the mandatory delays are performed. If this
would be S3 instead of s2idle then according to PCI FW spec 3.2 section
4.6.8. there is a specific _DSM that allows the OS to skip the delays but
this platform does not provide the _DSM and does not go to S3 anyway so no
firmware is involved that could already handle these delays.
On this particular platform these delays are not actually needed because
there is an additional delay as part of the ACPI power resource that is
used to turn on power to the hierarchy but since that additional delay is
not required by any of standards (PCIe, ACPI) it is not present in the
Intel Ice Lake, for example where missing the mandatory delays causes
pciehp to start tearing down the stack too early (links are not yet
trained). Below is an example how it looks like when this happens:
pcieport 0000:83:04.0: pciehp: Slot(4): Card not present
pcieport 0000:87:04.0: PME# disabled
pcieport 0000:83:04.0: pciehp: pciehp_unconfigure_device: domain:bus:dev = 0000:86:00
pcieport 0000:86:00.0: Refused to change power state, currently in D3
pcieport 0000:86:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x201ff)
pcieport 0000:86:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
There is also one reported case (see the bugzilla link below) where the
missing delay causes xHCI on a Titan Ridge controller fail to runtime
resume when USB-C dock is plugged. This does not involve pciehp but instead
the PCI core fails to runtime resume the xHCI device:
pcieport 0000:04:02.0: restoring config space at offset 0xc (was 0x10000, writing 0x10020)
pcieport 0000:04:02.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100406)
xhci_hcd 0000:39:00.0: Refused to change power state, currently in D3
xhci_hcd 0000:39:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x1ff)
xhci_hcd 0000:39:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
Add a new function pci_bridge_wait_for_secondary_bus() that is called on
PCI core resume and runtime resume paths accordingly if the bridge entered
D3cold (and thus went through reset).
This is second attempt to add the missing delays. The previous solution in
c2bf1fc212f7 ("PCI: Add missing link delays required by the PCIe spec") was
reverted because of two issues it caused:
1. One system become unresponsive after S3 resume due to PME service
spinning in pcie_pme_work_fn(). The root port in question reports that
the xHCI sent PME but the xHCI device itself does not have PME status
set. The PME status bit is never cleared in the root port resulting
the indefinite loop in pcie_pme_work_fn().
2. Slows down resume if the root/downstream port does not support Data
Link Layer Active Reporting because pcie_wait_for_link_delay() waits
1100 ms in that case.
This version should avoid the above issues because we restrict the delay to
happen only if the port went into D3cold.
Link: https://lore.kernel.org/linux-pci/SL2P216MB01878BBCD75F21D882AEEA2880C60@SL2P216MB0187.KORP216.PROD.OUTLOOK.COM/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=203885
Link: https://lore.kernel.org/r/20191112091617.70282-3-mika.westerberg@linux.intel.com
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2019-11-12 12:16:17 +03:00
pci_power_t prev_state = pci_dev - > current_state ;
bool skip_bus_pm = pci_dev - > skip_bus_pm ;
2008-05-20 00:49:04 +02:00
2020-04-18 18:52:30 +02:00
if ( dev_pm_skip_resume ( dev ) )
2017-11-18 15:33:52 +01:00
return 0 ;
PCI: PM: Skip devices in D0 for suspend-to-idle
Commit d491f2b75237 ("PCI: PM: Avoid possible suspend-to-idle issue")
attempted to avoid a problem with devices whose drivers want them to
stay in D0 over suspend-to-idle and resume, but it did not go as far
as it should with that.
Namely, first of all, the power state of a PCI bridge with a
downstream device in D0 must be D0 (based on the PCI PM spec r1.2,
sec 6, table 6-1, if the bridge is not in D0, there can be no PCI
transactions on its secondary bus), but that is not actively enforced
during system-wide PM transitions, so use the skip_bus_pm flag
introduced by commit d491f2b75237 for that.
Second, the configuration of devices left in D0 (whatever the reason)
during suspend-to-idle need not be changed and attempting to put them
into D0 again by force is pointless, so explicitly avoid doing that.
Fixes: d491f2b75237 ("PCI: PM: Avoid possible suspend-to-idle issue")
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
2019-06-13 23:59:45 +02:00
/*
* In the suspend - to - idle case , devices left in D0 during suspend will
* stay in D0 , so it is not necessary to restore or update their
2019-06-26 00:20:23 +02:00
* configuration here and attempting to put them into D0 again is
* pointless , so avoid doing that .
PCI: PM: Skip devices in D0 for suspend-to-idle
Commit d491f2b75237 ("PCI: PM: Avoid possible suspend-to-idle issue")
attempted to avoid a problem with devices whose drivers want them to
stay in D0 over suspend-to-idle and resume, but it did not go as far
as it should with that.
Namely, first of all, the power state of a PCI bridge with a
downstream device in D0 must be D0 (based on the PCI PM spec r1.2,
sec 6, table 6-1, if the bridge is not in D0, there can be no PCI
transactions on its secondary bus), but that is not actively enforced
during system-wide PM transitions, so use the skip_bus_pm flag
introduced by commit d491f2b75237 for that.
Second, the configuration of devices left in D0 (whatever the reason)
during suspend-to-idle need not be changed and attempting to put them
into D0 again by force is pointless, so explicitly avoid doing that.
Fixes: d491f2b75237 ("PCI: PM: Avoid possible suspend-to-idle issue")
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
2019-06-13 23:59:45 +02:00
*/
PCI/PM: Add missing link delays required by the PCIe spec
Currently Linux does not follow PCIe spec regarding the required delays
after reset. A concrete example is a Thunderbolt add-in-card that consists
of a PCIe switch and two PCIe endpoints:
+-1b.0-[01-6b]----00.0-[02-6b]--+-00.0-[03]----00.0 TBT controller
+-01.0-[04-36]-- DS hotplug port
+-02.0-[37]----00.0 xHCI controller
\-04.0-[38-6b]-- DS hotplug port
The root port (1b.0) and the PCIe switch downstream ports are all PCIe Gen3
so they support 8GT/s link speeds.
We wait for the PCIe hierarchy to enter D3cold (runtime):
pcieport 0000:00:1b.0: power state changed by ACPI to D3cold
When it wakes up from D3cold, according to the PCIe 5.0 section 5.8 the
PCIe switch is put to reset and its power is re-applied. This means that we
must follow the rules in PCIe 5.0 section 6.6.1.
For the PCIe Gen3 ports we are dealing with here, the following applies:
With a Downstream Port that supports Link speeds greater than 5.0 GT/s,
software must wait a minimum of 100 ms after Link training completes
before sending a Configuration Request to the device immediately below
that Port. Software can determine when Link training completes by polling
the Data Link Layer Link Active bit or by setting up an associated
interrupt (see Section 6.7.3.3).
Translating this into the above topology we would need to do this (DLLLA
stands for Data Link Layer Link Active):
0000:00:1b.0: wait for 100 ms after DLLLA is set before access to 0000:01:00.0
0000:02:00.0: wait for 100 ms after DLLLA is set before access to 0000:03:00.0
0000:02:02.0: wait for 100 ms after DLLLA is set before access to 0000:37:00.0
I've instrumented the kernel with some additional logging so we can see the
actual delays performed:
pcieport 0000:00:1b.0: power state changed by ACPI to D0
pcieport 0000:00:1b.0: waiting for D3cold delay of 100 ms
pcieport 0000:00:1b.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:01.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:04.0: waiting for D3hot delay of 10 ms
For the switch upstream port (01:00.0 reachable through 00:1b.0 root port)
we wait for 100 ms but not taking into account the DLLLA requirement. We
then wait 10 ms for D3hot -> D0 transition of the root port and the two
downstream hotplug ports. This means that we deviate from what the spec
requires.
Performing the same check for system sleep (s2idle) transitions it turns
out to be even worse. None of the mandatory delays are performed. If this
would be S3 instead of s2idle then according to PCI FW spec 3.2 section
4.6.8. there is a specific _DSM that allows the OS to skip the delays but
this platform does not provide the _DSM and does not go to S3 anyway so no
firmware is involved that could already handle these delays.
On this particular platform these delays are not actually needed because
there is an additional delay as part of the ACPI power resource that is
used to turn on power to the hierarchy but since that additional delay is
not required by any of standards (PCIe, ACPI) it is not present in the
Intel Ice Lake, for example where missing the mandatory delays causes
pciehp to start tearing down the stack too early (links are not yet
trained). Below is an example how it looks like when this happens:
pcieport 0000:83:04.0: pciehp: Slot(4): Card not present
pcieport 0000:87:04.0: PME# disabled
pcieport 0000:83:04.0: pciehp: pciehp_unconfigure_device: domain:bus:dev = 0000:86:00
pcieport 0000:86:00.0: Refused to change power state, currently in D3
pcieport 0000:86:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x201ff)
pcieport 0000:86:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
There is also one reported case (see the bugzilla link below) where the
missing delay causes xHCI on a Titan Ridge controller fail to runtime
resume when USB-C dock is plugged. This does not involve pciehp but instead
the PCI core fails to runtime resume the xHCI device:
pcieport 0000:04:02.0: restoring config space at offset 0xc (was 0x10000, writing 0x10020)
pcieport 0000:04:02.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100406)
xhci_hcd 0000:39:00.0: Refused to change power state, currently in D3
xhci_hcd 0000:39:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x1ff)
xhci_hcd 0000:39:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
Add a new function pci_bridge_wait_for_secondary_bus() that is called on
PCI core resume and runtime resume paths accordingly if the bridge entered
D3cold (and thus went through reset).
This is second attempt to add the missing delays. The previous solution in
c2bf1fc212f7 ("PCI: Add missing link delays required by the PCIe spec") was
reverted because of two issues it caused:
1. One system become unresponsive after S3 resume due to PME service
spinning in pcie_pme_work_fn(). The root port in question reports that
the xHCI sent PME but the xHCI device itself does not have PME status
set. The PME status bit is never cleared in the root port resulting
the indefinite loop in pcie_pme_work_fn().
2. Slows down resume if the root/downstream port does not support Data
Link Layer Active Reporting because pcie_wait_for_link_delay() waits
1100 ms in that case.
This version should avoid the above issues because we restrict the delay to
happen only if the port went into D3cold.
Link: https://lore.kernel.org/linux-pci/SL2P216MB01878BBCD75F21D882AEEA2880C60@SL2P216MB0187.KORP216.PROD.OUTLOOK.COM/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=203885
Link: https://lore.kernel.org/r/20191112091617.70282-3-mika.westerberg@linux.intel.com
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2019-11-12 12:16:17 +03:00
if ( ! ( skip_bus_pm & & pm_suspend_no_platform ( ) ) )
PCI: PM: Skip devices in D0 for suspend-to-idle
Commit d491f2b75237 ("PCI: PM: Avoid possible suspend-to-idle issue")
attempted to avoid a problem with devices whose drivers want them to
stay in D0 over suspend-to-idle and resume, but it did not go as far
as it should with that.
Namely, first of all, the power state of a PCI bridge with a
downstream device in D0 must be D0 (based on the PCI PM spec r1.2,
sec 6, table 6-1, if the bridge is not in D0, there can be no PCI
transactions on its secondary bus), but that is not actively enforced
during system-wide PM transitions, so use the skip_bus_pm flag
introduced by commit d491f2b75237 for that.
Second, the configuration of devices left in D0 (whatever the reason)
during suspend-to-idle need not be changed and attempting to put them
into D0 again by force is pointless, so explicitly avoid doing that.
Fixes: d491f2b75237 ("PCI: PM: Avoid possible suspend-to-idle issue")
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
2019-06-13 23:59:45 +02:00
pci_pm_default_resume_early ( pci_dev ) ;
pci_fixup_device ( pci_fixup_resume_early , pci_dev ) ;
2019-10-10 16:54:36 -05:00
pcie_pme_root_status_cleanup ( pci_dev ) ;
2009-01-16 21:54:43 +01:00
PCI/PM: Add missing link delays required by the PCIe spec
Currently Linux does not follow PCIe spec regarding the required delays
after reset. A concrete example is a Thunderbolt add-in-card that consists
of a PCIe switch and two PCIe endpoints:
+-1b.0-[01-6b]----00.0-[02-6b]--+-00.0-[03]----00.0 TBT controller
+-01.0-[04-36]-- DS hotplug port
+-02.0-[37]----00.0 xHCI controller
\-04.0-[38-6b]-- DS hotplug port
The root port (1b.0) and the PCIe switch downstream ports are all PCIe Gen3
so they support 8GT/s link speeds.
We wait for the PCIe hierarchy to enter D3cold (runtime):
pcieport 0000:00:1b.0: power state changed by ACPI to D3cold
When it wakes up from D3cold, according to the PCIe 5.0 section 5.8 the
PCIe switch is put to reset and its power is re-applied. This means that we
must follow the rules in PCIe 5.0 section 6.6.1.
For the PCIe Gen3 ports we are dealing with here, the following applies:
With a Downstream Port that supports Link speeds greater than 5.0 GT/s,
software must wait a minimum of 100 ms after Link training completes
before sending a Configuration Request to the device immediately below
that Port. Software can determine when Link training completes by polling
the Data Link Layer Link Active bit or by setting up an associated
interrupt (see Section 6.7.3.3).
Translating this into the above topology we would need to do this (DLLLA
stands for Data Link Layer Link Active):
0000:00:1b.0: wait for 100 ms after DLLLA is set before access to 0000:01:00.0
0000:02:00.0: wait for 100 ms after DLLLA is set before access to 0000:03:00.0
0000:02:02.0: wait for 100 ms after DLLLA is set before access to 0000:37:00.0
I've instrumented the kernel with some additional logging so we can see the
actual delays performed:
pcieport 0000:00:1b.0: power state changed by ACPI to D0
pcieport 0000:00:1b.0: waiting for D3cold delay of 100 ms
pcieport 0000:00:1b.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:01.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:04.0: waiting for D3hot delay of 10 ms
For the switch upstream port (01:00.0 reachable through 00:1b.0 root port)
we wait for 100 ms but not taking into account the DLLLA requirement. We
then wait 10 ms for D3hot -> D0 transition of the root port and the two
downstream hotplug ports. This means that we deviate from what the spec
requires.
Performing the same check for system sleep (s2idle) transitions it turns
out to be even worse. None of the mandatory delays are performed. If this
would be S3 instead of s2idle then according to PCI FW spec 3.2 section
4.6.8. there is a specific _DSM that allows the OS to skip the delays but
this platform does not provide the _DSM and does not go to S3 anyway so no
firmware is involved that could already handle these delays.
On this particular platform these delays are not actually needed because
there is an additional delay as part of the ACPI power resource that is
used to turn on power to the hierarchy but since that additional delay is
not required by any of standards (PCIe, ACPI) it is not present in the
Intel Ice Lake, for example where missing the mandatory delays causes
pciehp to start tearing down the stack too early (links are not yet
trained). Below is an example how it looks like when this happens:
pcieport 0000:83:04.0: pciehp: Slot(4): Card not present
pcieport 0000:87:04.0: PME# disabled
pcieport 0000:83:04.0: pciehp: pciehp_unconfigure_device: domain:bus:dev = 0000:86:00
pcieport 0000:86:00.0: Refused to change power state, currently in D3
pcieport 0000:86:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x201ff)
pcieport 0000:86:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
There is also one reported case (see the bugzilla link below) where the
missing delay causes xHCI on a Titan Ridge controller fail to runtime
resume when USB-C dock is plugged. This does not involve pciehp but instead
the PCI core fails to runtime resume the xHCI device:
pcieport 0000:04:02.0: restoring config space at offset 0xc (was 0x10000, writing 0x10020)
pcieport 0000:04:02.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100406)
xhci_hcd 0000:39:00.0: Refused to change power state, currently in D3
xhci_hcd 0000:39:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x1ff)
xhci_hcd 0000:39:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
Add a new function pci_bridge_wait_for_secondary_bus() that is called on
PCI core resume and runtime resume paths accordingly if the bridge entered
D3cold (and thus went through reset).
This is second attempt to add the missing delays. The previous solution in
c2bf1fc212f7 ("PCI: Add missing link delays required by the PCIe spec") was
reverted because of two issues it caused:
1. One system become unresponsive after S3 resume due to PME service
spinning in pcie_pme_work_fn(). The root port in question reports that
the xHCI sent PME but the xHCI device itself does not have PME status
set. The PME status bit is never cleared in the root port resulting
the indefinite loop in pcie_pme_work_fn().
2. Slows down resume if the root/downstream port does not support Data
Link Layer Active Reporting because pcie_wait_for_link_delay() waits
1100 ms in that case.
This version should avoid the above issues because we restrict the delay to
happen only if the port went into D3cold.
Link: https://lore.kernel.org/linux-pci/SL2P216MB01878BBCD75F21D882AEEA2880C60@SL2P216MB0187.KORP216.PROD.OUTLOOK.COM/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=203885
Link: https://lore.kernel.org/r/20191112091617.70282-3-mika.westerberg@linux.intel.com
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2019-11-12 12:16:17 +03:00
if ( ! skip_bus_pm & & prev_state = = PCI_D3cold )
2022-04-14 15:04:13 +02:00
pci_pm_bridge_power_up_actions ( pci_dev ) ;
PCI/PM: Add missing link delays required by the PCIe spec
Currently Linux does not follow PCIe spec regarding the required delays
after reset. A concrete example is a Thunderbolt add-in-card that consists
of a PCIe switch and two PCIe endpoints:
+-1b.0-[01-6b]----00.0-[02-6b]--+-00.0-[03]----00.0 TBT controller
+-01.0-[04-36]-- DS hotplug port
+-02.0-[37]----00.0 xHCI controller
\-04.0-[38-6b]-- DS hotplug port
The root port (1b.0) and the PCIe switch downstream ports are all PCIe Gen3
so they support 8GT/s link speeds.
We wait for the PCIe hierarchy to enter D3cold (runtime):
pcieport 0000:00:1b.0: power state changed by ACPI to D3cold
When it wakes up from D3cold, according to the PCIe 5.0 section 5.8 the
PCIe switch is put to reset and its power is re-applied. This means that we
must follow the rules in PCIe 5.0 section 6.6.1.
For the PCIe Gen3 ports we are dealing with here, the following applies:
With a Downstream Port that supports Link speeds greater than 5.0 GT/s,
software must wait a minimum of 100 ms after Link training completes
before sending a Configuration Request to the device immediately below
that Port. Software can determine when Link training completes by polling
the Data Link Layer Link Active bit or by setting up an associated
interrupt (see Section 6.7.3.3).
Translating this into the above topology we would need to do this (DLLLA
stands for Data Link Layer Link Active):
0000:00:1b.0: wait for 100 ms after DLLLA is set before access to 0000:01:00.0
0000:02:00.0: wait for 100 ms after DLLLA is set before access to 0000:03:00.0
0000:02:02.0: wait for 100 ms after DLLLA is set before access to 0000:37:00.0
I've instrumented the kernel with some additional logging so we can see the
actual delays performed:
pcieport 0000:00:1b.0: power state changed by ACPI to D0
pcieport 0000:00:1b.0: waiting for D3cold delay of 100 ms
pcieport 0000:00:1b.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:01.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:04.0: waiting for D3hot delay of 10 ms
For the switch upstream port (01:00.0 reachable through 00:1b.0 root port)
we wait for 100 ms but not taking into account the DLLLA requirement. We
then wait 10 ms for D3hot -> D0 transition of the root port and the two
downstream hotplug ports. This means that we deviate from what the spec
requires.
Performing the same check for system sleep (s2idle) transitions it turns
out to be even worse. None of the mandatory delays are performed. If this
would be S3 instead of s2idle then according to PCI FW spec 3.2 section
4.6.8. there is a specific _DSM that allows the OS to skip the delays but
this platform does not provide the _DSM and does not go to S3 anyway so no
firmware is involved that could already handle these delays.
On this particular platform these delays are not actually needed because
there is an additional delay as part of the ACPI power resource that is
used to turn on power to the hierarchy but since that additional delay is
not required by any of standards (PCIe, ACPI) it is not present in the
Intel Ice Lake, for example where missing the mandatory delays causes
pciehp to start tearing down the stack too early (links are not yet
trained). Below is an example how it looks like when this happens:
pcieport 0000:83:04.0: pciehp: Slot(4): Card not present
pcieport 0000:87:04.0: PME# disabled
pcieport 0000:83:04.0: pciehp: pciehp_unconfigure_device: domain:bus:dev = 0000:86:00
pcieport 0000:86:00.0: Refused to change power state, currently in D3
pcieport 0000:86:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x201ff)
pcieport 0000:86:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
There is also one reported case (see the bugzilla link below) where the
missing delay causes xHCI on a Titan Ridge controller fail to runtime
resume when USB-C dock is plugged. This does not involve pciehp but instead
the PCI core fails to runtime resume the xHCI device:
pcieport 0000:04:02.0: restoring config space at offset 0xc (was 0x10000, writing 0x10020)
pcieport 0000:04:02.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100406)
xhci_hcd 0000:39:00.0: Refused to change power state, currently in D3
xhci_hcd 0000:39:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x1ff)
xhci_hcd 0000:39:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
Add a new function pci_bridge_wait_for_secondary_bus() that is called on
PCI core resume and runtime resume paths accordingly if the bridge entered
D3cold (and thus went through reset).
This is second attempt to add the missing delays. The previous solution in
c2bf1fc212f7 ("PCI: Add missing link delays required by the PCIe spec") was
reverted because of two issues it caused:
1. One system become unresponsive after S3 resume due to PME service
spinning in pcie_pme_work_fn(). The root port in question reports that
the xHCI sent PME but the xHCI device itself does not have PME status
set. The PME status bit is never cleared in the root port resulting
the indefinite loop in pcie_pme_work_fn().
2. Slows down resume if the root/downstream port does not support Data
Link Layer Active Reporting because pcie_wait_for_link_delay() waits
1100 ms in that case.
This version should avoid the above issues because we restrict the delay to
happen only if the port went into D3cold.
Link: https://lore.kernel.org/linux-pci/SL2P216MB01878BBCD75F21D882AEEA2880C60@SL2P216MB0187.KORP216.PROD.OUTLOOK.COM/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=203885
Link: https://lore.kernel.org/r/20191112091617.70282-3-mika.westerberg@linux.intel.com
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2019-11-12 12:16:17 +03:00
2009-01-07 13:09:37 +01:00
if ( pci_has_legacy_pm_support ( pci_dev ) )
2019-10-31 17:53:04 -05:00
return 0 ;
2009-01-07 14:15:17 +01:00
2019-10-14 13:46:50 -05:00
if ( pm & & pm - > resume_noirq )
return pm - > resume_noirq ( dev ) ;
2008-05-20 00:49:04 +02:00
2019-10-14 13:46:50 -05:00
return 0 ;
2008-05-20 00:49:04 +02:00
}
PM: sleep: core: Do not skip callbacks in the resume phase
The current code in device_resume_noirq() causes the entire early
resume and resume phases of device suspend to be skipped for
devices for which the noirq resume phase have been skipped (due
to the LEAVE_SUSPENDED flag being set) on the premise that those
devices should stay in runtime-suspend after system-wide resume.
However, that may not be correct in two situations. First, the
middle layer (subsystem) noirq resume callback may be missing for
a given device, but its early resume callback may be present and it
may need to do something even if it decides to skip the driver
callback. Second, if the device's wakeup settings were adjusted
in the suspend phase without resuming the device (that was in
runtime suspend at that time), they most likely need to be
adjusted again in the resume phase and so the driver callback
in that phase needs to be run.
For the above reason, modify the core to allow the middle layer
->resume_late callback to run even if its ->resume_noirq callback
is missing (and the core has skipped the driver-level callback
in that phase) and to allow all device callbacks to run in the
resume phase. Also make the core set the PM-runtime status of
devices with SMART_SUSPEND set whose resume callbacks are not
skipped to "active" in the "noirq" resume phase and update the
affected subsystems (PCI and ACPI) accordingly.
After this change, middle-layer (subsystem) callbacks will always
be invoked in all phases of system suspend and resume and driver
callbacks will always run in the prepare, suspend, resume, and
complete phases for all devices.
For devices with SMART_SUSPEND set, driver callbacks will be
skipped in the late and noirq phases of system suspend if those
devices remain in runtime suspend in __device_suspend_late().
Driver callbacks will also be skipped for them during the
noirq and early phases of the "thaw" transition related to
hibernation in that case.
Setting LEAVE_SUSPENDED means that the driver allows its callbacks
to be skipped in the noirq and early phases of system resume, but
some additional conditions need to be met for that to happen (among
other things, the power.may_skip_resume flag needs to be set for the
device during system suspend for the driver callbacks to be skipped
during the subsequent resume transition).
For all devices with SMART_SUSPEND set whose driver callbacks are
invoked during system resume, the PM-runtime status will be set to
"active" (by the core).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2020-04-18 18:52:08 +02:00
static int pci_pm_resume_early ( struct device * dev )
{
2020-04-18 18:52:30 +02:00
if ( dev_pm_skip_resume ( dev ) )
PM: sleep: core: Do not skip callbacks in the resume phase
The current code in device_resume_noirq() causes the entire early
resume and resume phases of device suspend to be skipped for
devices for which the noirq resume phase have been skipped (due
to the LEAVE_SUSPENDED flag being set) on the premise that those
devices should stay in runtime-suspend after system-wide resume.
However, that may not be correct in two situations. First, the
middle layer (subsystem) noirq resume callback may be missing for
a given device, but its early resume callback may be present and it
may need to do something even if it decides to skip the driver
callback. Second, if the device's wakeup settings were adjusted
in the suspend phase without resuming the device (that was in
runtime suspend at that time), they most likely need to be
adjusted again in the resume phase and so the driver callback
in that phase needs to be run.
For the above reason, modify the core to allow the middle layer
->resume_late callback to run even if its ->resume_noirq callback
is missing (and the core has skipped the driver-level callback
in that phase) and to allow all device callbacks to run in the
resume phase. Also make the core set the PM-runtime status of
devices with SMART_SUSPEND set whose resume callbacks are not
skipped to "active" in the "noirq" resume phase and update the
affected subsystems (PCI and ACPI) accordingly.
After this change, middle-layer (subsystem) callbacks will always
be invoked in all phases of system suspend and resume and driver
callbacks will always run in the prepare, suspend, resume, and
complete phases for all devices.
For devices with SMART_SUSPEND set, driver callbacks will be
skipped in the late and noirq phases of system suspend if those
devices remain in runtime suspend in __device_suspend_late().
Driver callbacks will also be skipped for them during the
noirq and early phases of the "thaw" transition related to
hibernation in that case.
Setting LEAVE_SUSPENDED means that the driver allows its callbacks
to be skipped in the noirq and early phases of system resume, but
some additional conditions need to be met for that to happen (among
other things, the power.may_skip_resume flag needs to be set for the
device during system suspend for the driver callbacks to be skipped
during the subsequent resume transition).
For all devices with SMART_SUSPEND set whose driver callbacks are
invoked during system resume, the PM-runtime status will be set to
"active" (by the core).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2020-04-18 18:52:08 +02:00
return 0 ;
return pm_generic_resume_early ( dev ) ;
}
2009-01-07 13:12:22 +01:00
static int pci_pm_resume ( struct device * dev )
2008-05-20 00:49:04 +02:00
{
2008-12-08 00:34:57 +01:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2009-07-24 22:11:32 -07:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2008-05-20 00:49:04 +02:00
2009-01-26 21:43:08 +01:00
/*
* This is necessary for the suspend error path in which resume is
* called without restoring the standard config registers of the device .
*/
if ( pci_dev - > state_saved )
pci_restore_standard_config ( pci_dev ) ;
PCI/PM: Always disable PTM for all devices during suspend
We want to disable PTM on Root Ports because that allows some chips, e.g.,
Intel mobile chips since Coffee Lake, to enter a lower-power PM state.
That means we also have to disable PTM on downstream devices. PCIe r6.0,
sec 2.2.8, recommends that functions support generation of messages in
non-D0 states, so we have to assume Switch Upstream Ports or Endpoints may
send PTM Requests while in D1, D2, and D3hot. A PTM message received by a
Downstream Port (including a Root Port) with PTM disabled must be treated
as an Unsupported Request (sec 6.21.3).
PTM was previously disabled only for Root Ports, and it was disabled in
pci_prepare_to_sleep(), which is not called at all if a driver supports
legacy PM or does its own state saving.
Instead, disable PTM early in pci_pm_suspend() and pci_pm_runtime_suspend()
so we do it in all cases.
Previously PTM was disabled *after* saving device state, so the state
restore on resume automatically re-enabled it. Since we now disable PTM
*before* saving state, we must explicitly re-enable it in pci_pm_resume()
and pci_pm_runtime_resume().
Here's a sample of errors that occur when PTM is disabled only on the Root
Port. With this topology:
0000:00:1d.0 Root Port to [bus 08-71]
0000:08:00.0 Switch Upstream Port to [bus 09-71]
Kai-Heng reported errors like this:
pcieport 0000:00:1d.0: [20] UnsupReq (First)
pcieport 0000:00:1d.0: AER: TLP Header: 34000000 08000052 00000000 00000000
Decoding TLP header 0x34...... (0011 0100b) and 0x08000052:
Fmt 001b 4 DW header, no data
Type 1 0100b Msg (Local - Terminate at Receiver)
Requester ID 0x0800 Bus 08 Devfn 00.0
Message Code 0x52 0101 0010b PTM Request
The 00:1d.0 Root Port logged an Unsupported Request error when it received
a PTM Request with Requester ID 08:00.0.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215453
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216210
Fixes: a697f072f5da ("PCI: Disable PTM during suspend to save power")
Link: https://lore.kernel.org/r/20220909202505.314195-10-helgaas@kernel.org
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Rajvi Jingar <rajvi.jingar@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2022-09-09 15:25:05 -05:00
pci_resume_ptm ( pci_dev ) ;
2009-01-07 13:09:37 +01:00
if ( pci_has_legacy_pm_support ( pci_dev ) )
2009-01-07 13:12:22 +01:00
return pci_legacy_resume ( dev ) ;
2009-01-07 14:15:17 +01:00
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
pci_pm_default_resume ( pci_dev ) ;
PCI PM: Avoid touching devices behind bridges in unknown state
It generally is better to avoid accessing devices behind bridges that
may not be in the D0 power state, because in that case the bridges'
secondary buses may not be accessible. For this reason, during the
early phase of resume (ie. with interrupts disabled), before
restoring the standard config registers of a device, check the power
state of the bridge the device is behind and postpone the restoration
of the device's config space, as well as any other operations that
would involve accessing the device, if that state is not D0.
In such cases the restoration of the device's config space will be
retried during the "normal" phase of resume (ie. with interrupts
enabled), so that the bridge can be put into D0 before that happens.
Also, save standard configuration registers of PCI devices during the
"normal" phase of suspend (ie. with interrupts enabled), so that the
bridges the devices are behind can be put into low power states (we
don't put bridges into low power states at the moment, but we may
want to do it in the future and it seems reasonable to design for
that).
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-01-07 13:07:15 +01:00
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
if ( pm ) {
if ( pm - > resume )
2019-10-14 13:46:50 -05:00
return pm - > resume ( dev ) ;
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
} else {
pci_pm_reenable_device ( pci_dev ) ;
}
2008-05-20 00:49:04 +02:00
2019-10-14 13:46:50 -05:00
return 0 ;
2008-05-20 00:49:04 +02:00
}
# else /* !CONFIG_SUSPEND */
# define pci_pm_suspend NULL
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
# define pci_pm_suspend_late NULL
2008-05-20 00:49:04 +02:00
# define pci_pm_suspend_noirq NULL
# define pci_pm_resume NULL
PM: sleep: core: Do not skip callbacks in the resume phase
The current code in device_resume_noirq() causes the entire early
resume and resume phases of device suspend to be skipped for
devices for which the noirq resume phase have been skipped (due
to the LEAVE_SUSPENDED flag being set) on the premise that those
devices should stay in runtime-suspend after system-wide resume.
However, that may not be correct in two situations. First, the
middle layer (subsystem) noirq resume callback may be missing for
a given device, but its early resume callback may be present and it
may need to do something even if it decides to skip the driver
callback. Second, if the device's wakeup settings were adjusted
in the suspend phase without resuming the device (that was in
runtime suspend at that time), they most likely need to be
adjusted again in the resume phase and so the driver callback
in that phase needs to be run.
For the above reason, modify the core to allow the middle layer
->resume_late callback to run even if its ->resume_noirq callback
is missing (and the core has skipped the driver-level callback
in that phase) and to allow all device callbacks to run in the
resume phase. Also make the core set the PM-runtime status of
devices with SMART_SUSPEND set whose resume callbacks are not
skipped to "active" in the "noirq" resume phase and update the
affected subsystems (PCI and ACPI) accordingly.
After this change, middle-layer (subsystem) callbacks will always
be invoked in all phases of system suspend and resume and driver
callbacks will always run in the prepare, suspend, resume, and
complete phases for all devices.
For devices with SMART_SUSPEND set, driver callbacks will be
skipped in the late and noirq phases of system suspend if those
devices remain in runtime suspend in __device_suspend_late().
Driver callbacks will also be skipped for them during the
noirq and early phases of the "thaw" transition related to
hibernation in that case.
Setting LEAVE_SUSPENDED means that the driver allows its callbacks
to be skipped in the noirq and early phases of system resume, but
some additional conditions need to be met for that to happen (among
other things, the power.may_skip_resume flag needs to be set for the
device during system suspend for the driver callbacks to be skipped
during the subsequent resume transition).
For all devices with SMART_SUSPEND set whose driver callbacks are
invoked during system resume, the PM-runtime status will be set to
"active" (by the core).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2020-04-18 18:52:08 +02:00
# define pci_pm_resume_early NULL
2008-05-20 00:49:04 +02:00
# define pci_pm_resume_noirq NULL
# endif /* !CONFIG_SUSPEND */
2011-04-11 22:54:42 +02:00
# ifdef CONFIG_HIBERNATE_CALLBACKS
2008-05-20 00:49:04 +02:00
static int pci_pm_freeze ( struct device * dev )
{
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2009-07-24 22:11:32 -07:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2008-05-20 00:49:04 +02:00
2009-01-07 13:09:37 +01:00
if ( pci_has_legacy_pm_support ( pci_dev ) )
return pci_legacy_suspend ( dev , PMSG_FREEZE ) ;
2009-01-07 14:15:17 +01:00
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
if ( ! pm ) {
pci_pm_default_suspend ( pci_dev ) ;
return 0 ;
2008-05-20 00:49:04 +02:00
}
PCI / PM: Resume runtime-suspended devices later during system suspend
Runtime-suspended devices are resumed during system suspend by
pci_pm_prepare() for two reasons: First, because they may need
to be reprogrammed in order to change their wakeup settings and,
second, because they may need to be operatonal for their children
to be successfully suspended. That is a problem, though, if there
are many runtime-suspended devices that need to be resumed this
way during system suspend, because the .prepare() PM callbacks of
devices are executed sequentially and the times taken by them
accumulate, which may increase the total system suspend time quite
a bit.
For this reason, move the resume of runtime-suspended devices up
to the next phase of device suspend (during system suspend), except
for the ones that have power.ignore_children set. The exception is
made, because the devices with power.ignore_children set may still
be necessary for their children to be successfully suspended (during
system suspend) and they won't be resumed automatically as a result
of the runtime resume of their children.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2014-02-26 01:00:30 +01:00
/*
2019-07-01 12:44:25 +02:00
* Resume all runtime - suspended devices before creating a snapshot
* image of system memory , because the restore kernel generally cannot
* be expected to always handle them consistently and they need to be
* put into the runtime - active metastate during system resume anyway ,
* so it is better to ensure that the state saved in the image will be
* always consistent with that .
PCI / PM: Resume runtime-suspended devices later during system suspend
Runtime-suspended devices are resumed during system suspend by
pci_pm_prepare() for two reasons: First, because they may need
to be reprogrammed in order to change their wakeup settings and,
second, because they may need to be operatonal for their children
to be successfully suspended. That is a problem, though, if there
are many runtime-suspended devices that need to be resumed this
way during system suspend, because the .prepare() PM callbacks of
devices are executed sequentially and the times taken by them
accumulate, which may increase the total system suspend time quite
a bit.
For this reason, move the resume of runtime-suspended devices up
to the next phase of device suspend (during system suspend), except
for the ones that have power.ignore_children set. The exception is
made, because the devices with power.ignore_children set may still
be necessary for their children to be successfully suspended (during
system suspend) and they won't be resumed automatically as a result
of the runtime resume of their children.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2014-02-26 01:00:30 +01:00
*/
2019-07-01 12:44:25 +02:00
pm_runtime_resume ( dev ) ;
pci_dev - > state_saved = false ;
PCI / PM: Resume runtime-suspended devices later during system suspend
Runtime-suspended devices are resumed during system suspend by
pci_pm_prepare() for two reasons: First, because they may need
to be reprogrammed in order to change their wakeup settings and,
second, because they may need to be operatonal for their children
to be successfully suspended. That is a problem, though, if there
are many runtime-suspended devices that need to be resumed this
way during system suspend, because the .prepare() PM callbacks of
devices are executed sequentially and the times taken by them
accumulate, which may increase the total system suspend time quite
a bit.
For this reason, move the resume of runtime-suspended devices up
to the next phase of device suspend (during system suspend), except
for the ones that have power.ignore_children set. The exception is
made, because the devices with power.ignore_children set may still
be necessary for their children to be successfully suspended (during
system suspend) and they won't be resumed automatically as a result
of the runtime resume of their children.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2014-02-26 01:00:30 +01:00
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
if ( pm - > freeze ) {
int error ;
error = pm - > freeze ( dev ) ;
2022-03-08 04:07:39 +09:00
suspend_report_result ( dev , pm - > freeze , error ) ;
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
if ( error )
return error ;
}
return 0 ;
2008-05-20 00:49:04 +02:00
}
static int pci_pm_freeze_noirq ( struct device * dev )
{
2008-12-08 00:34:57 +01:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2019-10-14 13:46:50 -05:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2008-05-20 00:49:04 +02:00
2009-01-07 14:15:17 +01:00
if ( pci_has_legacy_pm_support ( pci_dev ) )
2022-10-25 14:35:02 -05:00
return pci_legacy_suspend_late ( dev ) ;
2009-01-07 14:15:17 +01:00
2019-10-14 13:46:50 -05:00
if ( pm & & pm - > freeze_noirq ) {
2009-03-16 22:40:26 +01:00
int error ;
2019-10-14 13:46:50 -05:00
error = pm - > freeze_noirq ( dev ) ;
2022-03-08 04:07:39 +09:00
suspend_report_result ( dev , pm - > freeze_noirq , error ) ;
2009-03-16 22:40:26 +01:00
if ( error )
return error ;
2008-05-20 00:49:04 +02:00
}
2009-03-16 22:40:26 +01:00
if ( ! pci_dev - > state_saved )
pci_save_state ( pci_dev ) ;
2009-01-07 13:11:28 +01:00
2009-03-16 22:40:26 +01:00
pci_pm_set_unknown_state ( pci_dev ) ;
return 0 ;
2008-05-20 00:49:04 +02:00
}
2009-01-07 13:12:22 +01:00
static int pci_pm_thaw_noirq ( struct device * dev )
2008-05-20 00:49:04 +02:00
{
2008-12-08 00:34:57 +01:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2019-10-14 13:46:50 -05:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2013-08-20 16:41:02 +02:00
2017-12-15 03:07:18 +01:00
/*
2019-10-31 17:53:04 -05:00
* The pm - > thaw_noirq ( ) callback assumes the device has been
* returned to D0 and its config state has been restored .
PCI/PM: Always return devices to D0 when thawing
pci_pm_thaw_noirq() is supposed to return the device to D0 and restore its
configuration registers, but previously it only did that for devices whose
drivers implemented the new power management ops.
Hibernation, e.g., via "echo disk > /sys/power/state", involves freezing
devices, creating a hibernation image, thawing devices, writing the image,
and powering off. The fact that thawing did not return devices with legacy
power management to D0 caused errors, e.g., in this path:
pci_pm_thaw_noirq
if (pci_has_legacy_pm_support(pci_dev)) # true for Mellanox VF driver
return pci_legacy_resume_early(dev) # ... legacy PM skips the rest
pci_set_power_state(pci_dev, PCI_D0)
pci_restore_state(pci_dev)
pci_pm_thaw
if (pci_has_legacy_pm_support(pci_dev))
pci_legacy_resume
drv->resume
mlx4_resume
...
pci_enable_msix_range
...
if (dev->current_state != PCI_D0) # <---
return -EINVAL;
which caused these warnings:
mlx4_core a6d1:00:02.0: INTx is not supported in multi-function mode, aborting
PM: dpm_run_callback(): pci_pm_thaw+0x0/0xd7 returns -95
PM: Device a6d1:00:02.0 failed to thaw: error -95
Return devices to D0 and restore config registers for all devices, not just
those whose drivers support new power management.
[bhelgaas: also call pci_restore_state() before pci_legacy_resume_early(),
update comment, add stable tag, commit log]
Link: https://lore.kernel.org/r/KU1P153MB016637CAEAD346F0AA8E3801BFAD0@KU1P153MB0166.APCP153.PROD.OUTLOOK.COM
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: stable@vger.kernel.org # v4.13+
2019-08-14 01:06:55 +00:00
*
* In addition , pci_restore_state ( ) restores MSI - X state in MMIO
* space , which requires the device to be in D0 , so return it to D0
* in case the driver ' s " freeze " callbacks put it into a low - power
* state .
2017-12-15 03:07:18 +01:00
*/
2022-05-05 20:18:09 +02:00
pci_pm_power_up_and_verify_state ( pci_dev ) ;
PCI/PM: Restore the status of PCI devices across hibernation
Currently we saw a lot of "No irq handler" errors during hibernation, which
caused the system hang finally:
ata4.00: qc timeout (cmd 0xec)
ata4.00: failed to IDENTIFY (I/O error, err_mask=0x4)
ata4.00: revalidation failed (errno=-5)
ata4: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
do_IRQ: 31.151 No irq handler for vector
According to above logs, there is an interrupt triggered and it is
dispatched to CPU31 with a vector number 151, but there is no handler for
it, thus this IRQ will not get acked and will cause an IRQ flood which
kills the system. To be more specific, the 31.151 is an interrupt from the
AHCI host controller.
After some investigation, the reason why this issue is triggered is because
the thaw_noirq() function does not restore the MSI/MSI-X settings across
hibernation.
The scenario is illustrated below:
1. Before hibernation, IRQ 34 is the handler for the AHCI device, which
is bound to CPU31.
2. Hibernation starts, the AHCI device is put into low power state.
3. All the nonboot CPUs are put offline, so IRQ 34 has to be migrated to
the last alive one - CPU0.
4. After the snapshot has been created, all the nonboot CPUs are brought
up again; IRQ 34 remains bound to CPU0.
5. AHCI devices are put into D0.
6. The snapshot is written to the disk.
The issue is triggered in step 6. The AHCI interrupt should be delivered
to CPU0, however it is delivered to the original CPU31 instead, which
causes the "No irq handler" issue.
Ying Huang has provided a clue that, in step 3 it is possible that writing
to the register might not take effect as the PCI devices have been
suspended.
In step 3, the IRQ 34 affinity should be modified from CPU31 to CPU0, but
in fact it is not. In __pci_write_msi_msg(), if the device is already in
low power state, the low level MSI message entry will not be updated but
cached. During the device restore process after a normal suspend/resume,
pci_restore_msi_state() writes the cached MSI back to the hardware.
But this is not the case for hibernation. pci_restore_msi_state() is not
currently called in pci_pm_thaw_noirq(), although pci_save_state() has
saved the necessary PCI cached information in pci_pm_freeze_noirq().
Restore the PCI status for the device during hibernation. Otherwise the
status might be lost across hibernation (for example, settings for MSI,
MSI-X, ATS, ACS, IOV, etc.), which might cause problems during hibernation.
Suggested-by: Ying Huang <ying.huang@intel.com>
Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
[bhelgaas: changelog]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: stable@vger.kernel.org
Cc: Len Brown <len.brown@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: Ying Huang <ying.huang@intel.com>
2017-05-25 16:49:07 +08:00
pci_restore_state ( pci_dev ) ;
2009-01-07 13:11:28 +01:00
PCI/PM: Always return devices to D0 when thawing
pci_pm_thaw_noirq() is supposed to return the device to D0 and restore its
configuration registers, but previously it only did that for devices whose
drivers implemented the new power management ops.
Hibernation, e.g., via "echo disk > /sys/power/state", involves freezing
devices, creating a hibernation image, thawing devices, writing the image,
and powering off. The fact that thawing did not return devices with legacy
power management to D0 caused errors, e.g., in this path:
pci_pm_thaw_noirq
if (pci_has_legacy_pm_support(pci_dev)) # true for Mellanox VF driver
return pci_legacy_resume_early(dev) # ... legacy PM skips the rest
pci_set_power_state(pci_dev, PCI_D0)
pci_restore_state(pci_dev)
pci_pm_thaw
if (pci_has_legacy_pm_support(pci_dev))
pci_legacy_resume
drv->resume
mlx4_resume
...
pci_enable_msix_range
...
if (dev->current_state != PCI_D0) # <---
return -EINVAL;
which caused these warnings:
mlx4_core a6d1:00:02.0: INTx is not supported in multi-function mode, aborting
PM: dpm_run_callback(): pci_pm_thaw+0x0/0xd7 returns -95
PM: Device a6d1:00:02.0 failed to thaw: error -95
Return devices to D0 and restore config registers for all devices, not just
those whose drivers support new power management.
[bhelgaas: also call pci_restore_state() before pci_legacy_resume_early(),
update comment, add stable tag, commit log]
Link: https://lore.kernel.org/r/KU1P153MB016637CAEAD346F0AA8E3801BFAD0@KU1P153MB0166.APCP153.PROD.OUTLOOK.COM
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: stable@vger.kernel.org # v4.13+
2019-08-14 01:06:55 +00:00
if ( pci_has_legacy_pm_support ( pci_dev ) )
2019-10-31 17:53:04 -05:00
return 0 ;
PCI/PM: Always return devices to D0 when thawing
pci_pm_thaw_noirq() is supposed to return the device to D0 and restore its
configuration registers, but previously it only did that for devices whose
drivers implemented the new power management ops.
Hibernation, e.g., via "echo disk > /sys/power/state", involves freezing
devices, creating a hibernation image, thawing devices, writing the image,
and powering off. The fact that thawing did not return devices with legacy
power management to D0 caused errors, e.g., in this path:
pci_pm_thaw_noirq
if (pci_has_legacy_pm_support(pci_dev)) # true for Mellanox VF driver
return pci_legacy_resume_early(dev) # ... legacy PM skips the rest
pci_set_power_state(pci_dev, PCI_D0)
pci_restore_state(pci_dev)
pci_pm_thaw
if (pci_has_legacy_pm_support(pci_dev))
pci_legacy_resume
drv->resume
mlx4_resume
...
pci_enable_msix_range
...
if (dev->current_state != PCI_D0) # <---
return -EINVAL;
which caused these warnings:
mlx4_core a6d1:00:02.0: INTx is not supported in multi-function mode, aborting
PM: dpm_run_callback(): pci_pm_thaw+0x0/0xd7 returns -95
PM: Device a6d1:00:02.0 failed to thaw: error -95
Return devices to D0 and restore config registers for all devices, not just
those whose drivers support new power management.
[bhelgaas: also call pci_restore_state() before pci_legacy_resume_early(),
update comment, add stable tag, commit log]
Link: https://lore.kernel.org/r/KU1P153MB016637CAEAD346F0AA8E3801BFAD0@KU1P153MB0166.APCP153.PROD.OUTLOOK.COM
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: stable@vger.kernel.org # v4.13+
2019-08-14 01:06:55 +00:00
2019-10-14 13:46:50 -05:00
if ( pm & & pm - > thaw_noirq )
return pm - > thaw_noirq ( dev ) ;
2008-05-20 00:49:04 +02:00
2019-10-14 13:46:50 -05:00
return 0 ;
2008-05-20 00:49:04 +02:00
}
2009-01-07 13:12:22 +01:00
static int pci_pm_thaw ( struct device * dev )
2008-05-20 00:49:04 +02:00
{
2008-12-08 00:34:57 +01:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2009-07-24 22:11:32 -07:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2008-05-20 00:49:04 +02:00
int error = 0 ;
2009-01-07 13:09:37 +01:00
if ( pci_has_legacy_pm_support ( pci_dev ) )
2009-01-07 13:12:22 +01:00
return pci_legacy_resume ( dev ) ;
2009-01-07 14:15:17 +01:00
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
if ( pm ) {
if ( pm - > thaw )
error = pm - > thaw ( dev ) ;
} else {
pci_pm_reenable_device ( pci_dev ) ;
}
2008-05-20 00:49:04 +02:00
2009-09-09 23:49:59 +02:00
pci_dev - > state_saved = false ;
2008-05-20 00:49:04 +02:00
return error ;
}
static int pci_pm_poweroff ( struct device * dev )
{
2008-12-08 00:34:57 +01:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2009-07-24 22:11:32 -07:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2008-05-20 00:49:04 +02:00
2009-01-07 13:09:37 +01:00
if ( pci_has_legacy_pm_support ( pci_dev ) )
return pci_legacy_suspend ( dev , PMSG_HIBERNATE ) ;
2009-01-07 14:15:17 +01:00
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
if ( ! pm ) {
pci_pm_default_suspend ( pci_dev ) ;
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
return 0 ;
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
}
PCI / PM: Resume runtime-suspended devices later during system suspend
Runtime-suspended devices are resumed during system suspend by
pci_pm_prepare() for two reasons: First, because they may need
to be reprogrammed in order to change their wakeup settings and,
second, because they may need to be operatonal for their children
to be successfully suspended. That is a problem, though, if there
are many runtime-suspended devices that need to be resumed this
way during system suspend, because the .prepare() PM callbacks of
devices are executed sequentially and the times taken by them
accumulate, which may increase the total system suspend time quite
a bit.
For this reason, move the resume of runtime-suspended devices up
to the next phase of device suspend (during system suspend), except
for the ones that have power.ignore_children set. The exception is
made, because the devices with power.ignore_children set may still
be necessary for their children to be successfully suspended (during
system suspend) and they won't be resumed automatically as a result
of the runtime resume of their children.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2014-02-26 01:00:30 +01:00
/* The reason to do that is the same as in pci_pm_suspend(). */
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
if ( ! dev_pm_test_driver_flags ( dev , DPM_FLAG_SMART_SUSPEND ) | |
2019-06-07 00:32:31 +02:00
pci_dev_need_resume ( pci_dev ) ) {
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
pm_runtime_resume ( dev ) ;
2019-06-07 00:32:31 +02:00
pci_dev - > state_saved = false ;
} else {
pci_dev_adjust_pme ( pci_dev ) ;
}
PCI / PM: Resume runtime-suspended devices later during system suspend
Runtime-suspended devices are resumed during system suspend by
pci_pm_prepare() for two reasons: First, because they may need
to be reprogrammed in order to change their wakeup settings and,
second, because they may need to be operatonal for their children
to be successfully suspended. That is a problem, though, if there
are many runtime-suspended devices that need to be resumed this
way during system suspend, because the .prepare() PM callbacks of
devices are executed sequentially and the times taken by them
accumulate, which may increase the total system suspend time quite
a bit.
For this reason, move the resume of runtime-suspended devices up
to the next phase of device suspend (during system suspend), except
for the ones that have power.ignore_children set. The exception is
made, because the devices with power.ignore_children set may still
be necessary for their children to be successfully suspended (during
system suspend) and they won't be resumed automatically as a result
of the runtime resume of their children.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2014-02-26 01:00:30 +01:00
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
if ( pm - > poweroff ) {
2009-03-16 22:40:26 +01:00
int error ;
2009-02-04 01:56:14 +01:00
error = pm - > poweroff ( dev ) ;
2022-03-08 04:07:39 +09:00
suspend_report_result ( dev , pm - > poweroff , error ) ;
2009-03-16 22:40:26 +01:00
if ( error )
return error ;
2008-05-20 00:49:04 +02:00
}
2009-03-16 22:40:26 +01:00
return 0 ;
2008-05-20 00:49:04 +02:00
}
2009-01-07 13:02:36 +01:00
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
static int pci_pm_poweroff_late ( struct device * dev )
{
2020-04-18 18:52:48 +02:00
if ( dev_pm_skip_suspend ( dev ) )
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
return 0 ;
2013-08-20 16:41:02 +02:00
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
pci_fixup_device ( pci_fixup_suspend , to_pci_dev ( dev ) ) ;
return pm_generic_poweroff_late ( dev ) ;
2008-05-20 00:49:04 +02:00
}
static int pci_pm_poweroff_noirq ( struct device * dev )
{
2009-03-16 22:40:26 +01:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2019-10-14 13:46:50 -05:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2008-05-20 00:49:04 +02:00
2020-04-18 18:52:48 +02:00
if ( dev_pm_skip_suspend ( dev ) )
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
return 0 ;
2019-10-14 13:46:50 -05:00
if ( pci_has_legacy_pm_support ( pci_dev ) )
2022-10-25 14:35:02 -05:00
return pci_legacy_suspend_late ( dev ) ;
2009-01-07 14:15:17 +01:00
2019-10-14 13:46:50 -05:00
if ( ! pm ) {
2014-06-03 22:04:09 +02:00
pci_fixup_device ( pci_fixup_suspend_late , pci_dev ) ;
2009-03-16 22:40:26 +01:00
return 0 ;
2014-06-03 22:04:09 +02:00
}
2009-03-16 22:40:26 +01:00
2019-10-14 13:46:50 -05:00
if ( pm - > poweroff_noirq ) {
2009-03-16 22:40:26 +01:00
int error ;
2019-10-14 13:46:50 -05:00
error = pm - > poweroff_noirq ( dev ) ;
2022-03-08 04:07:39 +09:00
suspend_report_result ( dev , pm - > poweroff_noirq , error ) ;
2009-03-16 22:40:26 +01:00
if ( error )
return error ;
2008-05-20 00:49:04 +02:00
}
2014-05-04 12:23:36 +08:00
if ( ! pci_dev - > state_saved & & ! pci_has_subordinate ( pci_dev ) )
2009-03-16 22:40:26 +01:00
pci_prepare_to_sleep ( pci_dev ) ;
2012-08-12 23:26:07 +02:00
/*
* The reason for doing this here is the same as for the analogous code
* in pci_pm_suspend_noirq ( ) .
*/
if ( pci_dev - > class = = PCI_CLASS_SERIAL_USB_EHCI )
pci_write_config_word ( pci_dev , PCI_COMMAND , 0 ) ;
2014-06-03 22:04:09 +02:00
pci_fixup_device ( pci_fixup_suspend_late , pci_dev ) ;
2009-03-16 22:40:26 +01:00
return 0 ;
2008-05-20 00:49:04 +02:00
}
2009-01-07 13:12:22 +01:00
static int pci_pm_restore_noirq ( struct device * dev )
2008-05-20 00:49:04 +02:00
{
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2019-10-14 13:46:50 -05:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2013-08-20 16:41:02 +02:00
2010-02-17 23:44:58 +01:00
pci_pm_default_resume_early ( pci_dev ) ;
PCI: PM: Skip devices in D0 for suspend-to-idle
Commit d491f2b75237 ("PCI: PM: Avoid possible suspend-to-idle issue")
attempted to avoid a problem with devices whose drivers want them to
stay in D0 over suspend-to-idle and resume, but it did not go as far
as it should with that.
Namely, first of all, the power state of a PCI bridge with a
downstream device in D0 must be D0 (based on the PCI PM spec r1.2,
sec 6, table 6-1, if the bridge is not in D0, there can be no PCI
transactions on its secondary bus), but that is not actively enforced
during system-wide PM transitions, so use the skip_bus_pm flag
introduced by commit d491f2b75237 for that.
Second, the configuration of devices left in D0 (whatever the reason)
during suspend-to-idle need not be changed and attempting to put them
into D0 again by force is pointless, so explicitly avoid doing that.
Fixes: d491f2b75237 ("PCI: PM: Avoid possible suspend-to-idle issue")
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
2019-06-13 23:59:45 +02:00
pci_fixup_device ( pci_fixup_resume_early , pci_dev ) ;
2009-01-16 21:54:43 +01:00
2009-01-07 13:09:37 +01:00
if ( pci_has_legacy_pm_support ( pci_dev ) )
2019-10-31 17:53:04 -05:00
return 0 ;
2009-01-07 14:15:17 +01:00
2019-10-14 13:46:50 -05:00
if ( pm & & pm - > restore_noirq )
return pm - > restore_noirq ( dev ) ;
2008-05-20 00:49:04 +02:00
2019-10-14 13:46:50 -05:00
return 0 ;
2008-05-20 00:49:04 +02:00
}
2009-01-07 13:12:22 +01:00
static int pci_pm_restore ( struct device * dev )
2008-05-20 00:49:04 +02:00
{
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2009-07-24 22:11:32 -07:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2008-05-20 00:49:04 +02:00
2009-01-26 21:43:08 +01:00
/*
* This is necessary for the hibernation error path in which restore is
* called without restoring the standard config registers of the device .
*/
if ( pci_dev - > state_saved )
pci_restore_standard_config ( pci_dev ) ;
2009-01-07 13:09:37 +01:00
if ( pci_has_legacy_pm_support ( pci_dev ) )
2009-01-07 13:12:22 +01:00
return pci_legacy_resume ( dev ) ;
2009-01-07 14:15:17 +01:00
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
pci_pm_default_resume ( pci_dev ) ;
PCI PM: Avoid touching devices behind bridges in unknown state
It generally is better to avoid accessing devices behind bridges that
may not be in the D0 power state, because in that case the bridges'
secondary buses may not be accessible. For this reason, during the
early phase of resume (ie. with interrupts disabled), before
restoring the standard config registers of a device, check the power
state of the bridge the device is behind and postpone the restoration
of the device's config space, as well as any other operations that
would involve accessing the device, if that state is not D0.
In such cases the restoration of the device's config space will be
retried during the "normal" phase of resume (ie. with interrupts
enabled), so that the bridge can be put into D0 before that happens.
Also, save standard configuration registers of PCI devices during the
"normal" phase of suspend (ie. with interrupts enabled), so that the
bridges the devices are behind can be put into low power states (we
don't put bridges into low power states at the moment, but we may
want to do it in the future and it seems reasonable to design for
that).
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-01-07 13:07:15 +01:00
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
if ( pm ) {
if ( pm - > restore )
2019-10-14 13:46:50 -05:00
return pm - > restore ( dev ) ;
PCI PM: make the PM core more careful with drivers using the new PM framework
Currently, the PM core always attempts to manage devices with drivers
that use the new PM framework. In particular, it attempts to disable
the devices (which is unnecessary), to save their state (which may be
undesirable if the driver has done that already) and to put them into
low power states (again, this may be undesirable if the driver has
already put the device into a low power state). That need not be
the right thing to do, so make the core be more careful in this
respect.
Generally, there are the following categories of devices to consider:
* bridge devices without drivers
* non-bridge devices without drivers
* bridge devices with drivers
* non-bridge devices with drivers
and each of them should be handled differently.
For bridge devices without drivers the PCI PM core will save their
state on suspend and restore it (early) during resume, after putting
them into D0 if necessary. It will not attempt to do anything else
to these devices.
For non-bridge devices without drivers the PCI PM core will disable
them and save their state on suspend. During resume, it will put
them into D0, if necessary, restore their state (early) and reenable
them.
For bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already.
Still, the core will restore their state (early) during resume,
after putting them into D0, if necessary.
For non-bridge devices with drivers the PCI PM core will only save
their state on suspend if the driver hasn't done that already. Also,
if the state of the device hasn't been saved by the driver, the core
will attempt to put the device into a low power state. During
resume the core will restore the state of the device (early), after
putting it into D0, if necessary.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-02-04 02:09:07 +01:00
} else {
pci_pm_reenable_device ( pci_dev ) ;
}
2008-05-20 00:49:04 +02:00
2019-10-14 13:46:50 -05:00
return 0 ;
2005-04-08 14:53:31 +09:00
}
2005-04-16 15:20:36 -07:00
2011-04-11 22:54:42 +02:00
# else /* !CONFIG_HIBERNATE_CALLBACKS */
2008-05-20 00:49:04 +02:00
# define pci_pm_freeze NULL
# define pci_pm_freeze_noirq NULL
# define pci_pm_thaw NULL
# define pci_pm_thaw_noirq NULL
# define pci_pm_poweroff NULL
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
# define pci_pm_poweroff_late NULL
2008-05-20 00:49:04 +02:00
# define pci_pm_poweroff_noirq NULL
# define pci_pm_restore NULL
# define pci_pm_restore_noirq NULL
2011-04-11 22:54:42 +02:00
# endif /* !CONFIG_HIBERNATE_CALLBACKS */
2008-05-20 00:49:04 +02:00
2014-11-27 23:16:57 +01:00
# ifdef CONFIG_PM
2010-02-17 23:44:58 +01:00
static int pci_pm_runtime_suspend ( struct device * dev )
{
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
pci_power_t prev = pci_dev - > current_state ;
int error ;
PCI/PM: Always disable PTM for all devices during suspend
We want to disable PTM on Root Ports because that allows some chips, e.g.,
Intel mobile chips since Coffee Lake, to enter a lower-power PM state.
That means we also have to disable PTM on downstream devices. PCIe r6.0,
sec 2.2.8, recommends that functions support generation of messages in
non-D0 states, so we have to assume Switch Upstream Ports or Endpoints may
send PTM Requests while in D1, D2, and D3hot. A PTM message received by a
Downstream Port (including a Root Port) with PTM disabled must be treated
as an Unsupported Request (sec 6.21.3).
PTM was previously disabled only for Root Ports, and it was disabled in
pci_prepare_to_sleep(), which is not called at all if a driver supports
legacy PM or does its own state saving.
Instead, disable PTM early in pci_pm_suspend() and pci_pm_runtime_suspend()
so we do it in all cases.
Previously PTM was disabled *after* saving device state, so the state
restore on resume automatically re-enabled it. Since we now disable PTM
*before* saving state, we must explicitly re-enable it in pci_pm_resume()
and pci_pm_runtime_resume().
Here's a sample of errors that occur when PTM is disabled only on the Root
Port. With this topology:
0000:00:1d.0 Root Port to [bus 08-71]
0000:08:00.0 Switch Upstream Port to [bus 09-71]
Kai-Heng reported errors like this:
pcieport 0000:00:1d.0: [20] UnsupReq (First)
pcieport 0000:00:1d.0: AER: TLP Header: 34000000 08000052 00000000 00000000
Decoding TLP header 0x34...... (0011 0100b) and 0x08000052:
Fmt 001b 4 DW header, no data
Type 1 0100b Msg (Local - Terminate at Receiver)
Requester ID 0x0800 Bus 08 Devfn 00.0
Message Code 0x52 0101 0010b PTM Request
The 00:1d.0 Root Port logged an Unsupported Request error when it received
a PTM Request with Requester ID 08:00.0.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215453
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216210
Fixes: a697f072f5da ("PCI: Disable PTM during suspend to save power")
Link: https://lore.kernel.org/r/20220909202505.314195-10-helgaas@kernel.org
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Rajvi Jingar <rajvi.jingar@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2022-09-09 15:25:05 -05:00
pci_suspend_ptm ( pci_dev ) ;
2012-11-20 16:08:22 +08:00
/*
2021-11-10 12:03:34 -06:00
* If pci_dev - > driver is not set ( unbound ) , we leave the device in D0 ,
* but it may go to D3cold when the bridge above it runtime suspends .
* Save its config space in case that happens .
2012-11-20 16:08:22 +08:00
*/
2021-11-10 12:03:34 -06:00
if ( ! pci_dev - > driver ) {
PCI: Restore config space on runtime resume despite being unbound
We leave PCI devices not bound to a driver in D0 during runtime suspend.
But they may have a parent which is bound and can be transitioned to
D3cold at runtime. Once the parent goes to D3cold, the unbound child
may go to D3cold as well. When the child goes to D3cold, its internal
state, including configuration of BARs, MSI, ASPM, MPS, etc., is lost.
One example are recent hybrid graphics laptops which cut power to the
discrete GPU when the root port above it goes to ACPI power state D3.
Users may provoke this by unbinding the GPU driver and allowing runtime
PM on the GPU via sysfs: The PM core will then treat the GPU as
"suspended", which in turn allows the root port to runtime suspend,
causing the power resources listed in its _PR3 object to be powered off.
The GPU's BARs will be uninitialized when a driver later probes it.
Another example are hybrid graphics laptops where the GPU itself (rather
than the root port) is capable of runtime suspending to D3cold. If the
GPU's integrated HDA controller is not bound and the GPU's driver
decides to runtime suspend to D3cold, the HDA controller's BARs will be
uninitialized when a driver later probes it.
Fix by saving and restoring config space over a runtime suspend cycle
even if the device is not bound.
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Peter Wu <peter@lekensteyn.nl> # Nvidia Optimus
Tested-by: Lukas Wunner <lukas@wunner.de> # MacBook Pro
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
[lukas: add commit message, bikeshed code comments for clarity]
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: https://patchwork.freedesktop.org/patch/msgid/92fb6e6ae2730915eb733c08e2f76c6a313e3860.1520068884.git.lukas@wunner.de
2018-03-03 10:53:24 +01:00
pci_save_state ( pci_dev ) ;
2012-11-20 16:08:22 +08:00
return 0 ;
PCI: Restore config space on runtime resume despite being unbound
We leave PCI devices not bound to a driver in D0 during runtime suspend.
But they may have a parent which is bound and can be transitioned to
D3cold at runtime. Once the parent goes to D3cold, the unbound child
may go to D3cold as well. When the child goes to D3cold, its internal
state, including configuration of BARs, MSI, ASPM, MPS, etc., is lost.
One example are recent hybrid graphics laptops which cut power to the
discrete GPU when the root port above it goes to ACPI power state D3.
Users may provoke this by unbinding the GPU driver and allowing runtime
PM on the GPU via sysfs: The PM core will then treat the GPU as
"suspended", which in turn allows the root port to runtime suspend,
causing the power resources listed in its _PR3 object to be powered off.
The GPU's BARs will be uninitialized when a driver later probes it.
Another example are hybrid graphics laptops where the GPU itself (rather
than the root port) is capable of runtime suspending to D3cold. If the
GPU's integrated HDA controller is not bound and the GPU's driver
decides to runtime suspend to D3cold, the HDA controller's BARs will be
uninitialized when a driver later probes it.
Fix by saving and restoring config space over a runtime suspend cycle
even if the device is not bound.
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Peter Wu <peter@lekensteyn.nl> # Nvidia Optimus
Tested-by: Lukas Wunner <lukas@wunner.de> # MacBook Pro
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
[lukas: add commit message, bikeshed code comments for clarity]
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: https://patchwork.freedesktop.org/patch/msgid/92fb6e6ae2730915eb733c08e2f76c6a313e3860.1520068884.git.lukas@wunner.de
2018-03-03 10:53:24 +01:00
}
2012-11-20 16:08:22 +08:00
2013-02-04 15:56:05 +04:00
pci_dev - > state_saved = false ;
2018-10-23 14:45:52 +03:00
if ( pm & & pm - > runtime_suspend ) {
error = pm - > runtime_suspend ( dev ) ;
2015-11-30 21:02:55 +02:00
/*
* - EBUSY and - EAGAIN is used to request the runtime PM core
* to schedule a new suspend , so log the event only with debug
* log level .
*/
2018-10-23 14:45:52 +03:00
if ( error = = - EBUSY | | error = = - EAGAIN ) {
2019-10-07 07:55:18 -05:00
pci_dbg ( pci_dev , " can't suspend now (%ps returned %d) \n " ,
2015-11-30 21:02:55 +02:00
pm - > runtime_suspend , error ) ;
2018-10-23 14:45:52 +03:00
return error ;
} else if ( error ) {
2019-10-07 07:55:18 -05:00
pci_err ( pci_dev , " can't suspend (%ps returned %d) \n " ,
2015-11-30 21:02:55 +02:00
pm - > runtime_suspend , error ) ;
2018-10-23 14:45:52 +03:00
return error ;
}
2015-11-30 21:02:55 +02:00
}
2010-02-17 23:44:58 +01:00
pci_fixup_device ( pci_fixup_suspend , pci_dev ) ;
2018-10-23 14:45:52 +03:00
if ( pm & & pm - > runtime_suspend
& & ! pci_dev - > state_saved & & pci_dev - > current_state ! = PCI_D0
2010-02-17 23:44:58 +01:00
& & pci_dev - > current_state ! = PCI_UNKNOWN ) {
2019-10-07 07:52:28 -05:00
pci_WARN_ONCE ( pci_dev , pci_dev - > current_state ! = prev ,
" PCI PM: State of device not saved by %pS \n " ,
pm - > runtime_suspend ) ;
2010-02-17 23:44:58 +01:00
return 0 ;
}
2012-10-29 17:26:54 -06:00
if ( ! pci_dev - > state_saved ) {
2010-02-17 23:44:58 +01:00
pci_save_state ( pci_dev ) ;
2012-10-29 17:26:54 -06:00
pci_finish_runtime_suspend ( pci_dev ) ;
}
2010-02-17 23:44:58 +01:00
return 0 ;
}
static int pci_pm_runtime_resume ( struct device * dev )
{
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
PCI/PM: Add missing link delays required by the PCIe spec
Currently Linux does not follow PCIe spec regarding the required delays
after reset. A concrete example is a Thunderbolt add-in-card that consists
of a PCIe switch and two PCIe endpoints:
+-1b.0-[01-6b]----00.0-[02-6b]--+-00.0-[03]----00.0 TBT controller
+-01.0-[04-36]-- DS hotplug port
+-02.0-[37]----00.0 xHCI controller
\-04.0-[38-6b]-- DS hotplug port
The root port (1b.0) and the PCIe switch downstream ports are all PCIe Gen3
so they support 8GT/s link speeds.
We wait for the PCIe hierarchy to enter D3cold (runtime):
pcieport 0000:00:1b.0: power state changed by ACPI to D3cold
When it wakes up from D3cold, according to the PCIe 5.0 section 5.8 the
PCIe switch is put to reset and its power is re-applied. This means that we
must follow the rules in PCIe 5.0 section 6.6.1.
For the PCIe Gen3 ports we are dealing with here, the following applies:
With a Downstream Port that supports Link speeds greater than 5.0 GT/s,
software must wait a minimum of 100 ms after Link training completes
before sending a Configuration Request to the device immediately below
that Port. Software can determine when Link training completes by polling
the Data Link Layer Link Active bit or by setting up an associated
interrupt (see Section 6.7.3.3).
Translating this into the above topology we would need to do this (DLLLA
stands for Data Link Layer Link Active):
0000:00:1b.0: wait for 100 ms after DLLLA is set before access to 0000:01:00.0
0000:02:00.0: wait for 100 ms after DLLLA is set before access to 0000:03:00.0
0000:02:02.0: wait for 100 ms after DLLLA is set before access to 0000:37:00.0
I've instrumented the kernel with some additional logging so we can see the
actual delays performed:
pcieport 0000:00:1b.0: power state changed by ACPI to D0
pcieport 0000:00:1b.0: waiting for D3cold delay of 100 ms
pcieport 0000:00:1b.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:01.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:04.0: waiting for D3hot delay of 10 ms
For the switch upstream port (01:00.0 reachable through 00:1b.0 root port)
we wait for 100 ms but not taking into account the DLLLA requirement. We
then wait 10 ms for D3hot -> D0 transition of the root port and the two
downstream hotplug ports. This means that we deviate from what the spec
requires.
Performing the same check for system sleep (s2idle) transitions it turns
out to be even worse. None of the mandatory delays are performed. If this
would be S3 instead of s2idle then according to PCI FW spec 3.2 section
4.6.8. there is a specific _DSM that allows the OS to skip the delays but
this platform does not provide the _DSM and does not go to S3 anyway so no
firmware is involved that could already handle these delays.
On this particular platform these delays are not actually needed because
there is an additional delay as part of the ACPI power resource that is
used to turn on power to the hierarchy but since that additional delay is
not required by any of standards (PCIe, ACPI) it is not present in the
Intel Ice Lake, for example where missing the mandatory delays causes
pciehp to start tearing down the stack too early (links are not yet
trained). Below is an example how it looks like when this happens:
pcieport 0000:83:04.0: pciehp: Slot(4): Card not present
pcieport 0000:87:04.0: PME# disabled
pcieport 0000:83:04.0: pciehp: pciehp_unconfigure_device: domain:bus:dev = 0000:86:00
pcieport 0000:86:00.0: Refused to change power state, currently in D3
pcieport 0000:86:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x201ff)
pcieport 0000:86:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
There is also one reported case (see the bugzilla link below) where the
missing delay causes xHCI on a Titan Ridge controller fail to runtime
resume when USB-C dock is plugged. This does not involve pciehp but instead
the PCI core fails to runtime resume the xHCI device:
pcieport 0000:04:02.0: restoring config space at offset 0xc (was 0x10000, writing 0x10020)
pcieport 0000:04:02.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100406)
xhci_hcd 0000:39:00.0: Refused to change power state, currently in D3
xhci_hcd 0000:39:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x1ff)
xhci_hcd 0000:39:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
Add a new function pci_bridge_wait_for_secondary_bus() that is called on
PCI core resume and runtime resume paths accordingly if the bridge entered
D3cold (and thus went through reset).
This is second attempt to add the missing delays. The previous solution in
c2bf1fc212f7 ("PCI: Add missing link delays required by the PCIe spec") was
reverted because of two issues it caused:
1. One system become unresponsive after S3 resume due to PME service
spinning in pcie_pme_work_fn(). The root port in question reports that
the xHCI sent PME but the xHCI device itself does not have PME status
set. The PME status bit is never cleared in the root port resulting
the indefinite loop in pcie_pme_work_fn().
2. Slows down resume if the root/downstream port does not support Data
Link Layer Active Reporting because pcie_wait_for_link_delay() waits
1100 ms in that case.
This version should avoid the above issues because we restrict the delay to
happen only if the port went into D3cold.
Link: https://lore.kernel.org/linux-pci/SL2P216MB01878BBCD75F21D882AEEA2880C60@SL2P216MB0187.KORP216.PROD.OUTLOOK.COM/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=203885
Link: https://lore.kernel.org/r/20191112091617.70282-3-mika.westerberg@linux.intel.com
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2019-11-12 12:16:17 +03:00
pci_power_t prev_state = pci_dev - > current_state ;
2019-10-14 13:46:50 -05:00
int error = 0 ;
2010-02-17 23:44:58 +01:00
2012-11-20 16:08:22 +08:00
/*
PCI: Restore config space on runtime resume despite being unbound
We leave PCI devices not bound to a driver in D0 during runtime suspend.
But they may have a parent which is bound and can be transitioned to
D3cold at runtime. Once the parent goes to D3cold, the unbound child
may go to D3cold as well. When the child goes to D3cold, its internal
state, including configuration of BARs, MSI, ASPM, MPS, etc., is lost.
One example are recent hybrid graphics laptops which cut power to the
discrete GPU when the root port above it goes to ACPI power state D3.
Users may provoke this by unbinding the GPU driver and allowing runtime
PM on the GPU via sysfs: The PM core will then treat the GPU as
"suspended", which in turn allows the root port to runtime suspend,
causing the power resources listed in its _PR3 object to be powered off.
The GPU's BARs will be uninitialized when a driver later probes it.
Another example are hybrid graphics laptops where the GPU itself (rather
than the root port) is capable of runtime suspending to D3cold. If the
GPU's integrated HDA controller is not bound and the GPU's driver
decides to runtime suspend to D3cold, the HDA controller's BARs will be
uninitialized when a driver later probes it.
Fix by saving and restoring config space over a runtime suspend cycle
even if the device is not bound.
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Peter Wu <peter@lekensteyn.nl> # Nvidia Optimus
Tested-by: Lukas Wunner <lukas@wunner.de> # MacBook Pro
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
[lukas: add commit message, bikeshed code comments for clarity]
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: https://patchwork.freedesktop.org/patch/msgid/92fb6e6ae2730915eb733c08e2f76c6a313e3860.1520068884.git.lukas@wunner.de
2018-03-03 10:53:24 +01:00
* Restoring config space is necessary even if the device is not bound
* to a driver because although we left it in D0 , it may have gone to
* D3cold when the bridge above it runtime suspended .
2012-11-20 16:08:22 +08:00
*/
2022-04-08 20:29:01 +02:00
pci_pm_default_resume_early ( pci_dev ) ;
PCI/PM: Always disable PTM for all devices during suspend
We want to disable PTM on Root Ports because that allows some chips, e.g.,
Intel mobile chips since Coffee Lake, to enter a lower-power PM state.
That means we also have to disable PTM on downstream devices. PCIe r6.0,
sec 2.2.8, recommends that functions support generation of messages in
non-D0 states, so we have to assume Switch Upstream Ports or Endpoints may
send PTM Requests while in D1, D2, and D3hot. A PTM message received by a
Downstream Port (including a Root Port) with PTM disabled must be treated
as an Unsupported Request (sec 6.21.3).
PTM was previously disabled only for Root Ports, and it was disabled in
pci_prepare_to_sleep(), which is not called at all if a driver supports
legacy PM or does its own state saving.
Instead, disable PTM early in pci_pm_suspend() and pci_pm_runtime_suspend()
so we do it in all cases.
Previously PTM was disabled *after* saving device state, so the state
restore on resume automatically re-enabled it. Since we now disable PTM
*before* saving state, we must explicitly re-enable it in pci_pm_resume()
and pci_pm_runtime_resume().
Here's a sample of errors that occur when PTM is disabled only on the Root
Port. With this topology:
0000:00:1d.0 Root Port to [bus 08-71]
0000:08:00.0 Switch Upstream Port to [bus 09-71]
Kai-Heng reported errors like this:
pcieport 0000:00:1d.0: [20] UnsupReq (First)
pcieport 0000:00:1d.0: AER: TLP Header: 34000000 08000052 00000000 00000000
Decoding TLP header 0x34...... (0011 0100b) and 0x08000052:
Fmt 001b 4 DW header, no data
Type 1 0100b Msg (Local - Terminate at Receiver)
Requester ID 0x0800 Bus 08 Devfn 00.0
Message Code 0x52 0101 0010b PTM Request
The 00:1d.0 Root Port logged an Unsupported Request error when it received
a PTM Request with Requester ID 08:00.0.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215453
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216210
Fixes: a697f072f5da ("PCI: Disable PTM during suspend to save power")
Link: https://lore.kernel.org/r/20220909202505.314195-10-helgaas@kernel.org
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Rajvi Jingar <rajvi.jingar@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2022-09-09 15:25:05 -05:00
pci_resume_ptm ( pci_dev ) ;
PCI: Restore config space on runtime resume despite being unbound
We leave PCI devices not bound to a driver in D0 during runtime suspend.
But they may have a parent which is bound and can be transitioned to
D3cold at runtime. Once the parent goes to D3cold, the unbound child
may go to D3cold as well. When the child goes to D3cold, its internal
state, including configuration of BARs, MSI, ASPM, MPS, etc., is lost.
One example are recent hybrid graphics laptops which cut power to the
discrete GPU when the root port above it goes to ACPI power state D3.
Users may provoke this by unbinding the GPU driver and allowing runtime
PM on the GPU via sysfs: The PM core will then treat the GPU as
"suspended", which in turn allows the root port to runtime suspend,
causing the power resources listed in its _PR3 object to be powered off.
The GPU's BARs will be uninitialized when a driver later probes it.
Another example are hybrid graphics laptops where the GPU itself (rather
than the root port) is capable of runtime suspending to D3cold. If the
GPU's integrated HDA controller is not bound and the GPU's driver
decides to runtime suspend to D3cold, the HDA controller's BARs will be
uninitialized when a driver later probes it.
Fix by saving and restoring config space over a runtime suspend cycle
even if the device is not bound.
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Peter Wu <peter@lekensteyn.nl> # Nvidia Optimus
Tested-by: Lukas Wunner <lukas@wunner.de> # MacBook Pro
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
[lukas: add commit message, bikeshed code comments for clarity]
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: https://patchwork.freedesktop.org/patch/msgid/92fb6e6ae2730915eb733c08e2f76c6a313e3860.1520068884.git.lukas@wunner.de
2018-03-03 10:53:24 +01:00
2021-11-10 12:03:34 -06:00
if ( ! pci_dev - > driver )
2012-11-20 16:08:22 +08:00
return 0 ;
PCI / PM: restore the original behavior of pci_set_power_state()
Commit cc2893b6 (PCI: Ensure we re-enable devices on resume)
addressed the problem with USB not being powered after resume on
recent Lenovo machines, but it did that in a suboptimal way.
Namely, it should have changed the relevant code paths only,
which are pci_pm_resume_noirq() and pci_pm_restore_noirq() supposed
to restore the device's power and standard configuration registers
after system resume from suspend or hibernation. Instead, however,
it modified pci_set_power_state() which is executed in several
other situations too. That resulted in some undesirable effects,
like attempting to change a device's power state in the same way
multiple times in a row (up to as many as 4 times in a row in the
snd_hda_intel driver).
Fix the bug addressed by commit cc2893b6 in an alternative way,
by forcibly powering up all devices in pci_pm_default_resume_early(),
which is called by pci_pm_resume_noirq() and pci_pm_restore_noirq()
to restore the device's power and standard configuration registers,
and modifying pci_pm_runtime_resume() to avoid the forcible power-up
if not necessary. Then, revert the changes made by commit cc2893b6
to make the confusion introduced by it go away.
Acked-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2012-07-05 15:20:00 -06:00
pci_fixup_device ( pci_fixup_resume_early , pci_dev ) ;
PCI/PM: Run resume fixups before disabling wakeup events
pci_pm_resume() and pci_pm_restore() call pci_pm_default_resume(), which
runs resume fixups before disabling wakeup events:
static void pci_pm_default_resume(struct pci_dev *pci_dev)
{
pci_fixup_device(pci_fixup_resume, pci_dev);
pci_enable_wake(pci_dev, PCI_D0, false);
}
pci_pm_runtime_resume() does both of these, but in the opposite order:
pci_enable_wake(pci_dev, PCI_D0, false);
pci_fixup_device(pci_fixup_resume, pci_dev);
We should always use the same ordering unless there's a reason to do
otherwise. Change pci_pm_runtime_resume() to call pci_pm_default_resume()
instead of open-coding this, so the fixups are always done before disabling
wakeup events.
pci_pm_default_resume() is called from pci_pm_runtime_resume(), which is
under #ifdef CONFIG_PM. If SUSPEND and HIBERNATION are disabled, PM_SLEEP
is disabled also, so move pci_pm_default_resume() from #ifdef
CONFIG_PM_SLEEP to #ifdef CONFIG_PM.
Link: https://lore.kernel.org/r/20191014230016.240912-5-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-10-12 17:15:57 -05:00
pci_pm_default_resume ( pci_dev ) ;
2010-02-17 23:44:58 +01:00
PCI/PM: Add missing link delays required by the PCIe spec
Currently Linux does not follow PCIe spec regarding the required delays
after reset. A concrete example is a Thunderbolt add-in-card that consists
of a PCIe switch and two PCIe endpoints:
+-1b.0-[01-6b]----00.0-[02-6b]--+-00.0-[03]----00.0 TBT controller
+-01.0-[04-36]-- DS hotplug port
+-02.0-[37]----00.0 xHCI controller
\-04.0-[38-6b]-- DS hotplug port
The root port (1b.0) and the PCIe switch downstream ports are all PCIe Gen3
so they support 8GT/s link speeds.
We wait for the PCIe hierarchy to enter D3cold (runtime):
pcieport 0000:00:1b.0: power state changed by ACPI to D3cold
When it wakes up from D3cold, according to the PCIe 5.0 section 5.8 the
PCIe switch is put to reset and its power is re-applied. This means that we
must follow the rules in PCIe 5.0 section 6.6.1.
For the PCIe Gen3 ports we are dealing with here, the following applies:
With a Downstream Port that supports Link speeds greater than 5.0 GT/s,
software must wait a minimum of 100 ms after Link training completes
before sending a Configuration Request to the device immediately below
that Port. Software can determine when Link training completes by polling
the Data Link Layer Link Active bit or by setting up an associated
interrupt (see Section 6.7.3.3).
Translating this into the above topology we would need to do this (DLLLA
stands for Data Link Layer Link Active):
0000:00:1b.0: wait for 100 ms after DLLLA is set before access to 0000:01:00.0
0000:02:00.0: wait for 100 ms after DLLLA is set before access to 0000:03:00.0
0000:02:02.0: wait for 100 ms after DLLLA is set before access to 0000:37:00.0
I've instrumented the kernel with some additional logging so we can see the
actual delays performed:
pcieport 0000:00:1b.0: power state changed by ACPI to D0
pcieport 0000:00:1b.0: waiting for D3cold delay of 100 ms
pcieport 0000:00:1b.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:01.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:04.0: waiting for D3hot delay of 10 ms
For the switch upstream port (01:00.0 reachable through 00:1b.0 root port)
we wait for 100 ms but not taking into account the DLLLA requirement. We
then wait 10 ms for D3hot -> D0 transition of the root port and the two
downstream hotplug ports. This means that we deviate from what the spec
requires.
Performing the same check for system sleep (s2idle) transitions it turns
out to be even worse. None of the mandatory delays are performed. If this
would be S3 instead of s2idle then according to PCI FW spec 3.2 section
4.6.8. there is a specific _DSM that allows the OS to skip the delays but
this platform does not provide the _DSM and does not go to S3 anyway so no
firmware is involved that could already handle these delays.
On this particular platform these delays are not actually needed because
there is an additional delay as part of the ACPI power resource that is
used to turn on power to the hierarchy but since that additional delay is
not required by any of standards (PCIe, ACPI) it is not present in the
Intel Ice Lake, for example where missing the mandatory delays causes
pciehp to start tearing down the stack too early (links are not yet
trained). Below is an example how it looks like when this happens:
pcieport 0000:83:04.0: pciehp: Slot(4): Card not present
pcieport 0000:87:04.0: PME# disabled
pcieport 0000:83:04.0: pciehp: pciehp_unconfigure_device: domain:bus:dev = 0000:86:00
pcieport 0000:86:00.0: Refused to change power state, currently in D3
pcieport 0000:86:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x201ff)
pcieport 0000:86:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
There is also one reported case (see the bugzilla link below) where the
missing delay causes xHCI on a Titan Ridge controller fail to runtime
resume when USB-C dock is plugged. This does not involve pciehp but instead
the PCI core fails to runtime resume the xHCI device:
pcieport 0000:04:02.0: restoring config space at offset 0xc (was 0x10000, writing 0x10020)
pcieport 0000:04:02.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100406)
xhci_hcd 0000:39:00.0: Refused to change power state, currently in D3
xhci_hcd 0000:39:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x1ff)
xhci_hcd 0000:39:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
Add a new function pci_bridge_wait_for_secondary_bus() that is called on
PCI core resume and runtime resume paths accordingly if the bridge entered
D3cold (and thus went through reset).
This is second attempt to add the missing delays. The previous solution in
c2bf1fc212f7 ("PCI: Add missing link delays required by the PCIe spec") was
reverted because of two issues it caused:
1. One system become unresponsive after S3 resume due to PME service
spinning in pcie_pme_work_fn(). The root port in question reports that
the xHCI sent PME but the xHCI device itself does not have PME status
set. The PME status bit is never cleared in the root port resulting
the indefinite loop in pcie_pme_work_fn().
2. Slows down resume if the root/downstream port does not support Data
Link Layer Active Reporting because pcie_wait_for_link_delay() waits
1100 ms in that case.
This version should avoid the above issues because we restrict the delay to
happen only if the port went into D3cold.
Link: https://lore.kernel.org/linux-pci/SL2P216MB01878BBCD75F21D882AEEA2880C60@SL2P216MB0187.KORP216.PROD.OUTLOOK.COM/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=203885
Link: https://lore.kernel.org/r/20191112091617.70282-3-mika.westerberg@linux.intel.com
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2019-11-12 12:16:17 +03:00
if ( prev_state = = PCI_D3cold )
2022-04-14 15:04:13 +02:00
pci_pm_bridge_power_up_actions ( pci_dev ) ;
PCI/PM: Add missing link delays required by the PCIe spec
Currently Linux does not follow PCIe spec regarding the required delays
after reset. A concrete example is a Thunderbolt add-in-card that consists
of a PCIe switch and two PCIe endpoints:
+-1b.0-[01-6b]----00.0-[02-6b]--+-00.0-[03]----00.0 TBT controller
+-01.0-[04-36]-- DS hotplug port
+-02.0-[37]----00.0 xHCI controller
\-04.0-[38-6b]-- DS hotplug port
The root port (1b.0) and the PCIe switch downstream ports are all PCIe Gen3
so they support 8GT/s link speeds.
We wait for the PCIe hierarchy to enter D3cold (runtime):
pcieport 0000:00:1b.0: power state changed by ACPI to D3cold
When it wakes up from D3cold, according to the PCIe 5.0 section 5.8 the
PCIe switch is put to reset and its power is re-applied. This means that we
must follow the rules in PCIe 5.0 section 6.6.1.
For the PCIe Gen3 ports we are dealing with here, the following applies:
With a Downstream Port that supports Link speeds greater than 5.0 GT/s,
software must wait a minimum of 100 ms after Link training completes
before sending a Configuration Request to the device immediately below
that Port. Software can determine when Link training completes by polling
the Data Link Layer Link Active bit or by setting up an associated
interrupt (see Section 6.7.3.3).
Translating this into the above topology we would need to do this (DLLLA
stands for Data Link Layer Link Active):
0000:00:1b.0: wait for 100 ms after DLLLA is set before access to 0000:01:00.0
0000:02:00.0: wait for 100 ms after DLLLA is set before access to 0000:03:00.0
0000:02:02.0: wait for 100 ms after DLLLA is set before access to 0000:37:00.0
I've instrumented the kernel with some additional logging so we can see the
actual delays performed:
pcieport 0000:00:1b.0: power state changed by ACPI to D0
pcieport 0000:00:1b.0: waiting for D3cold delay of 100 ms
pcieport 0000:00:1b.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:01.0: waiting for D3hot delay of 10 ms
pcieport 0000:02:04.0: waiting for D3hot delay of 10 ms
For the switch upstream port (01:00.0 reachable through 00:1b.0 root port)
we wait for 100 ms but not taking into account the DLLLA requirement. We
then wait 10 ms for D3hot -> D0 transition of the root port and the two
downstream hotplug ports. This means that we deviate from what the spec
requires.
Performing the same check for system sleep (s2idle) transitions it turns
out to be even worse. None of the mandatory delays are performed. If this
would be S3 instead of s2idle then according to PCI FW spec 3.2 section
4.6.8. there is a specific _DSM that allows the OS to skip the delays but
this platform does not provide the _DSM and does not go to S3 anyway so no
firmware is involved that could already handle these delays.
On this particular platform these delays are not actually needed because
there is an additional delay as part of the ACPI power resource that is
used to turn on power to the hierarchy but since that additional delay is
not required by any of standards (PCIe, ACPI) it is not present in the
Intel Ice Lake, for example where missing the mandatory delays causes
pciehp to start tearing down the stack too early (links are not yet
trained). Below is an example how it looks like when this happens:
pcieport 0000:83:04.0: pciehp: Slot(4): Card not present
pcieport 0000:87:04.0: PME# disabled
pcieport 0000:83:04.0: pciehp: pciehp_unconfigure_device: domain:bus:dev = 0000:86:00
pcieport 0000:86:00.0: Refused to change power state, currently in D3
pcieport 0000:86:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x201ff)
pcieport 0000:86:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
There is also one reported case (see the bugzilla link below) where the
missing delay causes xHCI on a Titan Ridge controller fail to runtime
resume when USB-C dock is plugged. This does not involve pciehp but instead
the PCI core fails to runtime resume the xHCI device:
pcieport 0000:04:02.0: restoring config space at offset 0xc (was 0x10000, writing 0x10020)
pcieport 0000:04:02.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100406)
xhci_hcd 0000:39:00.0: Refused to change power state, currently in D3
xhci_hcd 0000:39:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x1ff)
xhci_hcd 0000:39:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0)
...
Add a new function pci_bridge_wait_for_secondary_bus() that is called on
PCI core resume and runtime resume paths accordingly if the bridge entered
D3cold (and thus went through reset).
This is second attempt to add the missing delays. The previous solution in
c2bf1fc212f7 ("PCI: Add missing link delays required by the PCIe spec") was
reverted because of two issues it caused:
1. One system become unresponsive after S3 resume due to PME service
spinning in pcie_pme_work_fn(). The root port in question reports that
the xHCI sent PME but the xHCI device itself does not have PME status
set. The PME status bit is never cleared in the root port resulting
the indefinite loop in pcie_pme_work_fn().
2. Slows down resume if the root/downstream port does not support Data
Link Layer Active Reporting because pcie_wait_for_link_delay() waits
1100 ms in that case.
This version should avoid the above issues because we restrict the delay to
happen only if the port went into D3cold.
Link: https://lore.kernel.org/linux-pci/SL2P216MB01878BBCD75F21D882AEEA2880C60@SL2P216MB0187.KORP216.PROD.OUTLOOK.COM/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=203885
Link: https://lore.kernel.org/r/20191112091617.70282-3-mika.westerberg@linux.intel.com
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2019-11-12 12:16:17 +03:00
2018-10-23 14:45:52 +03:00
if ( pm & & pm - > runtime_resume )
2019-10-14 13:46:50 -05:00
error = pm - > runtime_resume ( dev ) ;
PCI/PM: add PCIe runtime D3cold support
This patch adds runtime D3cold support and corresponding ACPI platform
support. This patch only enables runtime D3cold support; it does not
enable D3cold support during system suspend/hibernate.
D3cold is the deepest power saving state for a PCIe device, where its main
power is removed. While it is in D3cold, you can't access the device at
all, not even its configuration space (which is still accessible in D3hot).
Therefore the PCI PM registers can not be used to transition into/out of
the D3cold state; that must be done by platform logic such as ACPI _PR3.
To support wakeup from D3cold, a system may provide auxiliary power, which
allows a device to request wakeup using a Beacon or the sideband WAKE#
signal. WAKE# is usually connected to platform logic such as ACPI GPE.
This is quite different from other power saving states, where devices
request wakeup via a PME message on the PCIe link.
Some devices, such as those in plug-in slots, have no direct platform
logic. For example, there is usually no ACPI _PR3 for them. D3cold
support for these devices can be done via the PCIe Downstream Port leading
to the device. When the PCIe port is powered on/off, the device is powered
on/off too. Wakeup events from the device will be notified to the
corresponding PCIe port.
For more information about PCIe D3cold and corresponding ACPI support,
please refer to:
- PCI Express Base Specification Revision 2.0
- Advanced Configuration and Power Interface Specification Revision 5.0
[bhelgaas: changelog]
Reviewed-by: Rafael J. Wysocki <rjw@sisk.pl>
Originally-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2012-06-23 10:23:51 +08:00
2019-10-14 13:46:50 -05:00
return error ;
2010-02-17 23:44:58 +01:00
}
static int pci_pm_runtime_idle ( struct device * dev )
{
2021-11-10 12:03:34 -06:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2010-02-17 23:44:58 +01:00
const struct dev_pm_ops * pm = dev - > driver ? dev - > driver - > pm : NULL ;
2012-11-20 16:08:22 +08:00
/*
2021-11-10 12:03:34 -06:00
* If pci_dev - > driver is not set ( unbound ) , the device should
* always remain in D0 regardless of the runtime PM status
2012-11-20 16:08:22 +08:00
*/
2021-11-10 12:03:34 -06:00
if ( ! pci_dev - > driver )
PM / Runtime: Rework the "runtime idle" helper routine
The "runtime idle" helper routine, rpm_idle(), currently ignores
return values from .runtime_idle() callbacks executed by it.
However, it turns out that many subsystems use
pm_generic_runtime_idle() which checks the return value of the
driver's callback and executes pm_runtime_suspend() for the device
unless that value is not 0. If that logic is moved to rpm_idle()
instead, pm_generic_runtime_idle() can be dropped and its users
will not need any .runtime_idle() callbacks any more.
Moreover, the PCI, SCSI, and SATA subsystems' .runtime_idle()
routines, pci_pm_runtime_idle(), scsi_runtime_idle(), and
ata_port_runtime_idle(), respectively, as well as a few drivers'
ones may be simplified if rpm_idle() calls rpm_suspend() after 0 has
been returned by the .runtime_idle() callback executed by it.
To reduce overall code bloat, make the changes described above.
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Kevin Hilman <khilman@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
2013-06-03 21:49:52 +02:00
return 0 ;
2012-11-20 16:08:22 +08:00
2010-02-17 23:44:58 +01:00
if ( ! pm )
return - ENOSYS ;
PM / Runtime: Rework the "runtime idle" helper routine
The "runtime idle" helper routine, rpm_idle(), currently ignores
return values from .runtime_idle() callbacks executed by it.
However, it turns out that many subsystems use
pm_generic_runtime_idle() which checks the return value of the
driver's callback and executes pm_runtime_suspend() for the device
unless that value is not 0. If that logic is moved to rpm_idle()
instead, pm_generic_runtime_idle() can be dropped and its users
will not need any .runtime_idle() callbacks any more.
Moreover, the PCI, SCSI, and SATA subsystems' .runtime_idle()
routines, pci_pm_runtime_idle(), scsi_runtime_idle(), and
ata_port_runtime_idle(), respectively, as well as a few drivers'
ones may be simplified if rpm_idle() calls rpm_suspend() after 0 has
been returned by the .runtime_idle() callback executed by it.
To reduce overall code bloat, make the changes described above.
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Kevin Hilman <khilman@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
2013-06-03 21:49:52 +02:00
if ( pm - > runtime_idle )
2019-10-14 13:46:50 -05:00
return pm - > runtime_idle ( dev ) ;
2010-02-17 23:44:58 +01:00
2019-10-14 13:46:50 -05:00
return 0 ;
2010-02-17 23:44:58 +01:00
}
2013-10-04 12:04:44 -06:00
static const struct dev_pm_ops pci_dev_pm_ops = {
2008-10-06 22:46:05 +02:00
. prepare = pci_pm_prepare ,
2015-09-30 01:10:24 +02:00
. complete = pci_pm_complete ,
2008-10-06 22:46:05 +02:00
. suspend = pci_pm_suspend ,
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
. suspend_late = pci_pm_suspend_late ,
2008-10-06 22:46:05 +02:00
. resume = pci_pm_resume ,
PM: sleep: core: Do not skip callbacks in the resume phase
The current code in device_resume_noirq() causes the entire early
resume and resume phases of device suspend to be skipped for
devices for which the noirq resume phase have been skipped (due
to the LEAVE_SUSPENDED flag being set) on the premise that those
devices should stay in runtime-suspend after system-wide resume.
However, that may not be correct in two situations. First, the
middle layer (subsystem) noirq resume callback may be missing for
a given device, but its early resume callback may be present and it
may need to do something even if it decides to skip the driver
callback. Second, if the device's wakeup settings were adjusted
in the suspend phase without resuming the device (that was in
runtime suspend at that time), they most likely need to be
adjusted again in the resume phase and so the driver callback
in that phase needs to be run.
For the above reason, modify the core to allow the middle layer
->resume_late callback to run even if its ->resume_noirq callback
is missing (and the core has skipped the driver-level callback
in that phase) and to allow all device callbacks to run in the
resume phase. Also make the core set the PM-runtime status of
devices with SMART_SUSPEND set whose resume callbacks are not
skipped to "active" in the "noirq" resume phase and update the
affected subsystems (PCI and ACPI) accordingly.
After this change, middle-layer (subsystem) callbacks will always
be invoked in all phases of system suspend and resume and driver
callbacks will always run in the prepare, suspend, resume, and
complete phases for all devices.
For devices with SMART_SUSPEND set, driver callbacks will be
skipped in the late and noirq phases of system suspend if those
devices remain in runtime suspend in __device_suspend_late().
Driver callbacks will also be skipped for them during the
noirq and early phases of the "thaw" transition related to
hibernation in that case.
Setting LEAVE_SUSPENDED means that the driver allows its callbacks
to be skipped in the noirq and early phases of system resume, but
some additional conditions need to be met for that to happen (among
other things, the power.may_skip_resume flag needs to be set for the
device during system suspend for the driver callbacks to be skipped
during the subsequent resume transition).
For all devices with SMART_SUSPEND set whose driver callbacks are
invoked during system resume, the PM-runtime status will be set to
"active" (by the core).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2020-04-18 18:52:08 +02:00
. resume_early = pci_pm_resume_early ,
2008-10-06 22:46:05 +02:00
. freeze = pci_pm_freeze ,
. thaw = pci_pm_thaw ,
. poweroff = pci_pm_poweroff ,
PCI / PM: Take SMART_SUSPEND driver flag into account
Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its
system-wide PM callbacks and make sure that all code that should not
run in parallel with pci_pm_runtime_resume() is executed in the "late"
phases of system suspend, freeze and poweroff transitions.
[Note that the pm_runtime_suspended() check in pci_dev_keep_suspended()
is an optimization, because if is not passed, all of the subsequent
checks may be skipped and some of them are much more overhead in
general.]
Also use the observation that if the device is in runtime suspend
at the beginning of the "late" phase of a system-wide suspend-like
transition, its state cannot change going forward (runtime PM is
disabled for it at that time) until the transition is over and the
subsequent system-wide PM callbacks should be skipped for it (as
they generally assume the device to not be suspended), so add checks
for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq()
and pci_pm_poweroff_late/noirq().
Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is
called during the subsequent system-wide resume transition and if
the device was left in runtime suspend previously, its runtime PM
status needs to be changed to "active" as it is going to be put
into the full-power state, so add checks for that too to these
functions.
In turn, if pci_pm_thaw_noirq() runs after the device has been
left in runtime suspend, the subsequent "thaw" callbacks need
to be skipped for it (as they may not work correctly with a
suspended device), so set the power.direct_complete flag for the
device then to make the PM core skip those callbacks.
In addition to the above add a core helper for checking if
DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is
"suspended" at the same time, which is done quite often in the new
code (and will be done elsewhere going forward too).
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
2017-10-26 12:12:22 +02:00
. poweroff_late = pci_pm_poweroff_late ,
2008-10-06 22:46:05 +02:00
. restore = pci_pm_restore ,
2008-05-20 00:49:04 +02:00
. suspend_noirq = pci_pm_suspend_noirq ,
. resume_noirq = pci_pm_resume_noirq ,
. freeze_noirq = pci_pm_freeze_noirq ,
. thaw_noirq = pci_pm_thaw_noirq ,
. poweroff_noirq = pci_pm_poweroff_noirq ,
. restore_noirq = pci_pm_restore_noirq ,
2010-02-17 23:44:58 +01:00
. runtime_suspend = pci_pm_runtime_suspend ,
. runtime_resume = pci_pm_runtime_resume ,
. runtime_idle = pci_pm_runtime_idle ,
2008-05-20 00:49:04 +02:00
} ;
2008-10-06 22:46:05 +02:00
# define PCI_PM_OPS_PTR (&pci_dev_pm_ops)
2008-05-20 00:49:04 +02:00
2014-11-27 23:16:57 +01:00
# else /* !CONFIG_PM */
# define pci_pm_runtime_suspend NULL
# define pci_pm_runtime_resume NULL
# define pci_pm_runtime_idle NULL
2008-05-20 00:49:04 +02:00
# define PCI_PM_OPS_PTR NULL
2014-11-27 23:16:57 +01:00
# endif /* !CONFIG_PM */
2008-05-20 00:49:04 +02:00
2005-04-16 15:20:36 -07:00
/**
2005-10-27 23:12:54 +02:00
* __pci_register_driver - register a new pci driver
2005-04-16 15:20:36 -07:00
* @ drv : the driver structure to register
2005-10-27 23:12:54 +02:00
* @ owner : owner module of drv
2007-02-10 14:41:56 -08:00
* @ mod_name : module name string
2013-11-14 11:28:18 -07:00
*
2005-04-16 15:20:36 -07:00
* Adds the driver structure to the list of registered drivers .
2013-11-14 11:28:18 -07:00
* Returns a negative value on error , otherwise 0.
* If no error occurred , the driver remains registered even if
2005-04-16 15:20:36 -07:00
* no device was claimed during registration .
*/
2007-01-15 11:50:02 -08:00
int __pci_register_driver ( struct pci_driver * drv , struct module * owner ,
const char * mod_name )
2005-04-16 15:20:36 -07:00
{
/* initialize common driver fields */
drv - > driver . name = drv - > name ;
drv - > driver . bus = & pci_bus_type ;
2005-10-27 23:12:54 +02:00
drv - > driver . owner = owner ;
2007-01-15 11:50:02 -08:00
drv - > driver . mod_name = mod_name ;
2017-07-19 15:01:06 +02:00
drv - > driver . groups = drv - > groups ;
2021-05-12 10:26:39 -04:00
drv - > driver . dev_groups = drv - > dev_groups ;
2006-08-16 17:42:18 +01:00
2005-06-30 02:18:12 -07:00
spin_lock_init ( & drv - > dynids . lock ) ;
INIT_LIST_HEAD ( & drv - > dynids . list ) ;
2005-04-16 15:20:36 -07:00
/* register with core */
2012-08-08 14:47:51 +04:00
return driver_register ( & drv - > driver ) ;
2005-04-16 15:20:36 -07:00
}
2014-04-25 14:32:25 -06:00
EXPORT_SYMBOL ( __pci_register_driver ) ;
2005-04-16 15:20:36 -07:00
/**
* pci_unregister_driver - unregister a pci driver
* @ drv : the driver structure to unregister
2013-11-14 11:28:18 -07:00
*
2005-04-16 15:20:36 -07:00
* Deletes the driver structure from the list of registered PCI drivers ,
* gives it a chance to clean up by calling its remove ( ) function for
* each device it was responsible for , and marks those devices as
* driverless .
*/
2014-04-18 20:13:49 -04:00
void pci_unregister_driver ( struct pci_driver * drv )
2005-04-16 15:20:36 -07:00
{
driver_unregister ( & drv - > driver ) ;
pci_free_dynids ( drv ) ;
}
2014-04-25 14:32:25 -06:00
EXPORT_SYMBOL ( pci_unregister_driver ) ;
2005-04-16 15:20:36 -07:00
static struct pci_driver pci_compat_driver = {
. name = " compat "
} ;
/**
* pci_dev_driver - get the pci_driver of a device
* @ dev : the device to query
*
2013-11-14 11:28:18 -07:00
* Returns the appropriate pci_driver structure or % NULL if there is no
2005-04-16 15:20:36 -07:00
* registered driver for the device .
*/
2014-04-18 20:13:49 -04:00
struct pci_driver * pci_dev_driver ( const struct pci_dev * dev )
2005-04-16 15:20:36 -07:00
{
2020-10-06 16:49:17 -05:00
int i ;
2021-11-10 12:03:34 -06:00
if ( dev - > driver )
return dev - > driver ;
2020-10-06 16:49:17 -05:00
for ( i = 0 ; i < = PCI_ROM_RESOURCE ; i + + )
if ( dev - > resource [ i ] . flags & IORESOURCE_BUSY )
return & pci_compat_driver ;
2005-04-16 15:20:36 -07:00
return NULL ;
}
2014-04-25 14:32:25 -06:00
EXPORT_SYMBOL ( pci_dev_driver ) ;
2005-04-16 15:20:36 -07:00
/**
* pci_bus_match - Tell if a PCI device structure has a matching PCI device id structure
* @ dev : the PCI device structure to match against
2005-10-23 11:57:38 -07:00
* @ drv : the device driver to search for matching PCI device id structures
2013-11-14 11:28:18 -07:00
*
2005-04-16 15:20:36 -07:00
* Used by a driver to check whether a PCI device present in the
2005-10-23 11:57:38 -07:00
* system is in its list of supported devices . Returns the matching
2005-04-16 15:20:36 -07:00
* pci_device_id structure or % NULL if there is no match .
*/
2005-06-30 02:18:12 -07:00
static int pci_bus_match ( struct device * dev , struct device_driver * drv )
2005-04-16 15:20:36 -07:00
{
2005-06-30 02:18:12 -07:00
struct pci_dev * pci_dev = to_pci_dev ( dev ) ;
2013-01-21 13:20:51 -08:00
struct pci_driver * pci_drv ;
2005-04-16 15:20:36 -07:00
const struct pci_device_id * found_id ;
2013-01-21 13:20:51 -08:00
if ( ! pci_dev - > match_driver )
return 0 ;
pci_drv = to_pci_driver ( drv ) ;
2005-06-30 02:18:12 -07:00
found_id = pci_match_device ( pci_drv , pci_dev ) ;
2005-04-16 15:20:36 -07:00
if ( found_id )
return 1 ;
2005-06-30 02:18:12 -07:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
/**
* pci_dev_get - increments the reference count of the pci device structure
* @ dev : the device being referenced
*
* Each live reference to a device should be refcounted .
*
* Drivers for PCI devices should normally record such references in
* their probe ( ) methods , when they bind to a device , and release
* them by calling pci_dev_put ( ) , in their disconnect ( ) methods .
*
* A pointer to the device with the incremented reference counter is returned .
*/
struct pci_dev * pci_dev_get ( struct pci_dev * dev )
{
if ( dev )
get_device ( & dev - > dev ) ;
return dev ;
}
2014-04-25 14:32:25 -06:00
EXPORT_SYMBOL ( pci_dev_get ) ;
2005-04-16 15:20:36 -07:00
/**
* pci_dev_put - release a use of the pci device structure
* @ dev : device that ' s been disconnected
*
* Must be called when a user of a device is finished with it . When the last
* user of the device calls this function , the memory of the device is freed .
*/
void pci_dev_put ( struct pci_dev * dev )
{
if ( dev )
put_device ( & dev - > dev ) ;
}
2014-04-25 14:32:25 -06:00
EXPORT_SYMBOL ( pci_dev_put ) ;
2005-04-16 15:20:36 -07:00
2023-01-11 12:30:17 +01:00
static int pci_uevent ( const struct device * dev , struct kobj_uevent_env * env )
2012-11-21 15:34:58 -05:00
{
2023-01-11 12:30:17 +01:00
const struct pci_dev * pdev ;
2012-11-21 15:34:58 -05:00
if ( ! dev )
return - ENODEV ;
pdev = to_pci_dev ( dev ) ;
if ( add_uevent_var ( env , " PCI_CLASS=%04X " , pdev - > class ) )
return - ENOMEM ;
if ( add_uevent_var ( env , " PCI_ID=%04X:%04X " , pdev - > vendor , pdev - > device ) )
return - ENOMEM ;
if ( add_uevent_var ( env , " PCI_SUBSYS_ID=%04X:%04X " , pdev - > subsystem_vendor ,
pdev - > subsystem_device ) )
return - ENOMEM ;
if ( add_uevent_var ( env , " PCI_SLOT_NAME=%s " , pci_name ( pdev ) ) )
return - ENOMEM ;
PCI: Generate uppercase hex for modalias var in uevent
Some implementations of modprobe fail to load the driver for a PCI device
automatically because the "interface" part of the modalias from the kernel
is lowercase, and the modalias from file2alias is uppercase.
The "interface" is the low-order byte of the Class Code, defined in PCI
r3.0, Appendix D. Most interface types defined in the spec do not use
alpha characters, so they won't be affected. For example, 00h, 01h, 10h,
20h, etc. are unaffected.
Print the "interface" byte of the Class Code in uppercase hex, as we
already do for the Vendor ID, Device ID, Class, etc.
Commit 89ec3dcf17fd ("PCI: Generate uppercase hex for modalias interface
class") fixed only half of the problem. Some udev implementations rely on
the uevent file and not the modalias file.
Fixes: d1ded203adf1 ("PCI: add MODALIAS to hotplug event for pci devices")
Fixes: 89ec3dcf17fd ("PCI: Generate uppercase hex for modalias interface class")
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
CC: stable@vger.kernel.org
2014-12-02 17:35:04 +01:00
if ( add_uevent_var ( env , " MODALIAS=pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02X " ,
2012-11-21 15:34:58 -05:00
pdev - > vendor , pdev - > device ,
pdev - > subsystem_vendor , pdev - > subsystem_device ,
( u8 ) ( pdev - > class > > 16 ) , ( u8 ) ( pdev - > class > > 8 ) ,
( u8 ) ( pdev - > class ) ) )
return - ENOMEM ;
2014-04-05 15:08:22 -06:00
2012-11-21 15:34:58 -05:00
return 0 ;
}
2021-07-31 14:39:04 +02:00
# if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH)
2018-02-08 23:20:35 +11:00
/**
* pci_uevent_ers - emit a uevent during recovery path of PCI device
* @ pdev : PCI device undergoing error recovery
* @ err_type : type of error event
*/
void pci_uevent_ers ( struct pci_dev * pdev , enum pci_ers_result err_type )
{
int idx = 0 ;
char * envp [ 3 ] ;
switch ( err_type ) {
case PCI_ERS_RESULT_NONE :
case PCI_ERS_RESULT_CAN_RECOVER :
envp [ idx + + ] = " ERROR_EVENT=BEGIN_RECOVERY " ;
envp [ idx + + ] = " DEVICE_ONLINE=0 " ;
break ;
case PCI_ERS_RESULT_RECOVERED :
envp [ idx + + ] = " ERROR_EVENT=SUCCESSFUL_RECOVERY " ;
envp [ idx + + ] = " DEVICE_ONLINE=1 " ;
break ;
case PCI_ERS_RESULT_DISCONNECT :
envp [ idx + + ] = " ERROR_EVENT=FAILED_RECOVERY " ;
envp [ idx + + ] = " DEVICE_ONLINE=0 " ;
break ;
default :
break ;
}
if ( idx > 0 ) {
envp [ idx + + ] = NULL ;
kobject_uevent_env ( & pdev - > dev . kobj , KOBJ_CHANGE , envp ) ;
}
}
# endif
2017-01-18 14:04:38 +01:00
static int pci_bus_num_vf ( struct device * dev )
{
return pci_num_vf ( to_pci_dev ( dev ) ) ;
}
2018-04-28 08:21:58 +05:30
/**
* pci_dma_configure - Setup DMA configuration
* @ dev : ptr to dev structure
*
* Function to update PCI devices ' s DMA configuration using the same
* info from the OF node or ACPI node of host bridge ' s parent ( if any ) .
*/
static int pci_dma_configure ( struct device * dev )
{
bus: platform,amba,fsl-mc,PCI: Add device DMA ownership management
The devices on platform/amba/fsl-mc/PCI buses could be bound to drivers
with the device DMA managed by kernel drivers or user-space applications.
Unfortunately, multiple devices may be placed in the same IOMMU group
because they cannot be isolated from each other. The DMA on these devices
must either be entirely under kernel control or userspace control, never
a mixture. Otherwise the driver integrity is not guaranteed because they
could access each other through the peer-to-peer accesses which by-pass
the IOMMU protection.
This checks and sets the default DMA mode during driver binding, and
cleanups during driver unbinding. In the default mode, the device DMA is
managed by the device driver which handles DMA operations through the
kernel DMA APIs (see Documentation/core-api/dma-api.rst).
For cases where the devices are assigned for userspace control through the
userspace driver framework(i.e. VFIO), the drivers(for example, vfio_pci/
vfio_platfrom etc.) may set a new flag (driver_managed_dma) to skip this
default setting in the assumption that the drivers know what they are
doing with the device DMA.
Calling iommu_device_use_default_domain() before {of,acpi}_dma_configure
is currently a problem. As things stand, the IOMMU driver ignored the
initial iommu_probe_device() call when the device was added, since at
that point it had no fwspec yet. In this situation,
{of,acpi}_iommu_configure() are retriggering iommu_probe_device() after
the IOMMU driver has seen the firmware data via .of_xlate to learn that
it actually responsible for the given device. As the result, before
that gets fixed, iommu_use_default_domain() goes at the end, and calls
arch_teardown_dma_ops() if it fails.
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stuart Yoder <stuyoder@gmail.com>
Cc: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/20220418005000.897664-5-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
2022-04-18 08:49:53 +08:00
struct pci_driver * driver = to_pci_driver ( dev - > driver ) ;
2018-04-28 08:21:58 +05:30
struct device * bridge ;
int ret = 0 ;
bridge = pci_get_host_bridge_device ( to_pci_dev ( dev ) ) ;
if ( IS_ENABLED ( CONFIG_OF ) & & bridge - > parent & &
bridge - > parent - > of_node ) {
2018-05-03 16:25:08 +02:00
ret = of_dma_configure ( dev , bridge - > parent - > of_node , true ) ;
2018-04-28 08:21:58 +05:30
} else if ( has_acpi_companion ( bridge ) ) {
struct acpi_device * adev = to_acpi_device_node ( bridge - > fwnode ) ;
2018-12-06 13:20:49 -08:00
ret = acpi_dma_configure ( dev , acpi_get_dma_attr ( adev ) ) ;
2018-04-28 08:21:58 +05:30
}
pci_put_host_bridge_device ( bridge ) ;
bus: platform,amba,fsl-mc,PCI: Add device DMA ownership management
The devices on platform/amba/fsl-mc/PCI buses could be bound to drivers
with the device DMA managed by kernel drivers or user-space applications.
Unfortunately, multiple devices may be placed in the same IOMMU group
because they cannot be isolated from each other. The DMA on these devices
must either be entirely under kernel control or userspace control, never
a mixture. Otherwise the driver integrity is not guaranteed because they
could access each other through the peer-to-peer accesses which by-pass
the IOMMU protection.
This checks and sets the default DMA mode during driver binding, and
cleanups during driver unbinding. In the default mode, the device DMA is
managed by the device driver which handles DMA operations through the
kernel DMA APIs (see Documentation/core-api/dma-api.rst).
For cases where the devices are assigned for userspace control through the
userspace driver framework(i.e. VFIO), the drivers(for example, vfio_pci/
vfio_platfrom etc.) may set a new flag (driver_managed_dma) to skip this
default setting in the assumption that the drivers know what they are
doing with the device DMA.
Calling iommu_device_use_default_domain() before {of,acpi}_dma_configure
is currently a problem. As things stand, the IOMMU driver ignored the
initial iommu_probe_device() call when the device was added, since at
that point it had no fwspec yet. In this situation,
{of,acpi}_iommu_configure() are retriggering iommu_probe_device() after
the IOMMU driver has seen the firmware data via .of_xlate to learn that
it actually responsible for the given device. As the result, before
that gets fixed, iommu_use_default_domain() goes at the end, and calls
arch_teardown_dma_ops() if it fails.
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stuart Yoder <stuyoder@gmail.com>
Cc: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/20220418005000.897664-5-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
2022-04-18 08:49:53 +08:00
if ( ! ret & & ! driver - > driver_managed_dma ) {
ret = iommu_device_use_default_domain ( dev ) ;
if ( ret )
arch_teardown_dma_ops ( dev ) ;
}
2018-04-28 08:21:58 +05:30
return ret ;
}
bus: platform,amba,fsl-mc,PCI: Add device DMA ownership management
The devices on platform/amba/fsl-mc/PCI buses could be bound to drivers
with the device DMA managed by kernel drivers or user-space applications.
Unfortunately, multiple devices may be placed in the same IOMMU group
because they cannot be isolated from each other. The DMA on these devices
must either be entirely under kernel control or userspace control, never
a mixture. Otherwise the driver integrity is not guaranteed because they
could access each other through the peer-to-peer accesses which by-pass
the IOMMU protection.
This checks and sets the default DMA mode during driver binding, and
cleanups during driver unbinding. In the default mode, the device DMA is
managed by the device driver which handles DMA operations through the
kernel DMA APIs (see Documentation/core-api/dma-api.rst).
For cases where the devices are assigned for userspace control through the
userspace driver framework(i.e. VFIO), the drivers(for example, vfio_pci/
vfio_platfrom etc.) may set a new flag (driver_managed_dma) to skip this
default setting in the assumption that the drivers know what they are
doing with the device DMA.
Calling iommu_device_use_default_domain() before {of,acpi}_dma_configure
is currently a problem. As things stand, the IOMMU driver ignored the
initial iommu_probe_device() call when the device was added, since at
that point it had no fwspec yet. In this situation,
{of,acpi}_iommu_configure() are retriggering iommu_probe_device() after
the IOMMU driver has seen the firmware data via .of_xlate to learn that
it actually responsible for the given device. As the result, before
that gets fixed, iommu_use_default_domain() goes at the end, and calls
arch_teardown_dma_ops() if it fails.
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stuart Yoder <stuyoder@gmail.com>
Cc: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/20220418005000.897664-5-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
2022-04-18 08:49:53 +08:00
static void pci_dma_cleanup ( struct device * dev )
{
struct pci_driver * driver = to_pci_driver ( dev - > driver ) ;
if ( ! driver - > driver_managed_dma )
iommu_device_unuse_default_domain ( dev ) ;
}
2005-04-16 15:20:36 -07:00
struct bus_type pci_bus_type = {
. name = " pci " ,
. match = pci_bus_match ,
2005-11-16 09:00:00 +01:00
. uevent = pci_uevent ,
2006-01-05 14:30:22 +00:00
. probe = pci_device_probe ,
. remove = pci_device_remove ,
2006-06-24 14:50:29 -07:00
. shutdown = pci_device_shutdown ,
2013-10-06 23:55:40 -07:00
. dev_groups = pci_dev_groups ,
2013-10-07 14:51:02 -06:00
. bus_groups = pci_bus_groups ,
2013-10-07 14:51:20 -06:00
. drv_groups = pci_drv_groups ,
2008-05-20 00:49:04 +02:00
. pm = PCI_PM_OPS_PTR ,
2017-01-18 14:04:38 +01:00
. num_vf = pci_bus_num_vf ,
2018-04-28 08:21:58 +05:30
. dma_configure = pci_dma_configure ,
bus: platform,amba,fsl-mc,PCI: Add device DMA ownership management
The devices on platform/amba/fsl-mc/PCI buses could be bound to drivers
with the device DMA managed by kernel drivers or user-space applications.
Unfortunately, multiple devices may be placed in the same IOMMU group
because they cannot be isolated from each other. The DMA on these devices
must either be entirely under kernel control or userspace control, never
a mixture. Otherwise the driver integrity is not guaranteed because they
could access each other through the peer-to-peer accesses which by-pass
the IOMMU protection.
This checks and sets the default DMA mode during driver binding, and
cleanups during driver unbinding. In the default mode, the device DMA is
managed by the device driver which handles DMA operations through the
kernel DMA APIs (see Documentation/core-api/dma-api.rst).
For cases where the devices are assigned for userspace control through the
userspace driver framework(i.e. VFIO), the drivers(for example, vfio_pci/
vfio_platfrom etc.) may set a new flag (driver_managed_dma) to skip this
default setting in the assumption that the drivers know what they are
doing with the device DMA.
Calling iommu_device_use_default_domain() before {of,acpi}_dma_configure
is currently a problem. As things stand, the IOMMU driver ignored the
initial iommu_probe_device() call when the device was added, since at
that point it had no fwspec yet. In this situation,
{of,acpi}_iommu_configure() are retriggering iommu_probe_device() after
the IOMMU driver has seen the firmware data via .of_xlate to learn that
it actually responsible for the given device. As the result, before
that gets fixed, iommu_use_default_domain() goes at the end, and calls
arch_teardown_dma_ops() if it fails.
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stuart Yoder <stuyoder@gmail.com>
Cc: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/20220418005000.897664-5-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
2022-04-18 08:49:53 +08:00
. dma_cleanup = pci_dma_cleanup ,
2005-04-16 15:20:36 -07:00
} ;
2014-04-25 14:32:25 -06:00
EXPORT_SYMBOL ( pci_bus_type ) ;
2005-04-16 15:20:36 -07:00
2018-03-09 11:06:56 -06:00
# ifdef CONFIG_PCIEPORTBUS
static int pcie_port_bus_match ( struct device * dev , struct device_driver * drv )
{
struct pcie_device * pciedev ;
struct pcie_port_service_driver * driver ;
if ( drv - > bus ! = & pcie_port_bus_type | | dev - > bus ! = & pcie_port_bus_type )
return 0 ;
pciedev = to_pcie_device ( dev ) ;
driver = to_service_driver ( drv ) ;
if ( driver - > service ! = pciedev - > service )
return 0 ;
if ( driver - > port_type ! = PCIE_ANY_PORT & &
driver - > port_type ! = pci_pcie_type ( pciedev - > port ) )
return 0 ;
return 1 ;
}
struct bus_type pcie_port_bus_type = {
. name = " pci_express " ,
. match = pcie_port_bus_match ,
} ;
# endif
2005-04-16 15:20:36 -07:00
static int __init pci_driver_init ( void )
{
2018-03-09 11:06:56 -06:00
int ret ;
ret = bus_register ( & pci_bus_type ) ;
if ( ret )
return ret ;
# ifdef CONFIG_PCIEPORTBUS
ret = bus_register ( & pcie_port_bus_type ) ;
if ( ret )
return ret ;
# endif
2018-07-30 09:38:42 +02:00
dma_debug_add_bus ( & pci_bus_type ) ;
2018-03-09 11:06:56 -06:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
postcore_initcall ( pci_driver_init ) ;