2018-01-26 20:45:16 +03:00
// SPDX-License-Identifier: GPL-2.0
2005-04-17 02:20:36 +04:00
/*
2018-03-10 01:36:33 +03:00
* PCI detection and setup code
2005-04-17 02:20:36 +04:00
*/
# include <linux/kernel.h>
# include <linux/delay.h>
# include <linux/init.h>
# include <linux/pci.h>
2019-09-03 14:30:59 +03:00
# include <linux/msi.h>
2015-10-29 01:50:53 +03:00
# include <linux/of_device.h>
2015-03-03 20:52:13 +03:00
# include <linux/of_pci.h>
2014-09-13 06:02:00 +04:00
# include <linux/pci_hotplug.h>
2005-04-17 02:20:36 +04:00
# include <linux/slab.h>
# include <linux/module.h>
# include <linux/cpumask.h>
2015-09-17 18:09:37 +03:00
# include <linux/aer.h>
2015-10-29 01:50:54 +03:00
# include <linux/acpi.h>
2018-03-07 10:39:13 +03:00
# include <linux/hypervisor.h>
2016-02-17 00:56:22 +03:00
# include <linux/irqdomain.h>
2016-06-02 11:17:13 +03:00
# include <linux/pm_runtime.h>
2005-04-08 09:53:31 +04:00
# include "pci.h"
2005-04-17 02:20:36 +04:00
# define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */
# define CARDBUS_RESERVE_BUSNR 3
2014-01-11 04:14:48 +04:00
static struct resource busn_resource = {
2012-05-18 05:51:12 +04:00
. name = " PCI busn " ,
. start = 0 ,
. end = 255 ,
. flags = IORESOURCE_BUS ,
} ;
2005-04-17 02:20:36 +04:00
/* Ugh. Need to stop exporting this to modules. */
LIST_HEAD ( pci_root_buses ) ;
EXPORT_SYMBOL ( pci_root_buses ) ;
2012-05-18 05:51:11 +04:00
static LIST_HEAD ( pci_domain_busn_res_list ) ;
struct pci_domain_busn_res {
struct list_head list ;
struct resource res ;
int domain_nr ;
} ;
static struct resource * get_pci_domain_busn_res ( int domain_nr )
{
struct pci_domain_busn_res * r ;
list_for_each_entry ( r , & pci_domain_busn_res_list , list )
if ( r - > domain_nr = = domain_nr )
return & r - > res ;
r = kzalloc ( sizeof ( * r ) , GFP_KERNEL ) ;
if ( ! r )
return NULL ;
r - > domain_nr = domain_nr ;
r - > res . start = 0 ;
r - > res . end = 0xff ;
r - > res . flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED ;
list_add_tail ( & r - > list , & pci_domain_busn_res_list ) ;
return & r - > res ;
}
2007-07-16 10:39:39 +04:00
/*
2017-11-30 19:58:14 +03:00
* Some device drivers need know if PCI is initiated .
* Basically , we think PCI is not initiated when there
2008-02-14 09:30:39 +03:00
* is no device to be found on the pci_bus_type .
2007-07-16 10:39:39 +04:00
*/
int no_pci_devices ( void )
{
2008-02-14 09:30:39 +03:00
struct device * dev ;
int no_devices ;
2007-07-16 10:39:39 +04:00
2019-07-24 01:18:37 +03:00
dev = bus_find_next_device ( & pci_bus_type , NULL ) ;
2008-02-14 09:30:39 +03:00
no_devices = ( dev = = NULL ) ;
put_device ( dev ) ;
return no_devices ;
}
2007-07-16 10:39:39 +04:00
EXPORT_SYMBOL ( no_pci_devices ) ;
2005-04-17 02:20:36 +04:00
/*
* PCI Bus Class
*/
2007-05-23 06:47:54 +04:00
static void release_pcibus_dev ( struct device * dev )
2005-04-17 02:20:36 +04:00
{
2007-05-23 06:47:54 +04:00
struct pci_bus * pci_bus = to_pci_bus ( dev ) ;
2005-04-17 02:20:36 +04:00
2014-11-11 07:02:17 +03:00
put_device ( pci_bus - > bridge ) ;
2010-02-23 20:24:36 +03:00
pci_bus_remove_resources ( pci_bus ) ;
2011-04-11 05:37:07 +04:00
pci_release_bus_of_node ( pci_bus ) ;
2005-04-17 02:20:36 +04:00
kfree ( pci_bus ) ;
}
static struct class pcibus_class = {
. name = " pci_bus " ,
2007-05-23 06:47:54 +04:00
. dev_release = & release_pcibus_dev ,
2013-07-25 02:05:17 +04:00
. dev_groups = pcibus_groups ,
2005-04-17 02:20:36 +04:00
} ;
static int __init pcibus_class_init ( void )
{
return class_register ( & pcibus_class ) ;
}
postcore_initcall ( pcibus_class_init ) ;
2008-07-28 21:38:59 +04:00
static u64 pci_size ( u64 base , u64 maxbase , u64 mask )
2005-04-17 02:20:36 +04:00
{
2008-07-28 21:38:59 +04:00
u64 size = mask & maxbase ; /* Find the significant bits */
2005-04-17 02:20:36 +04:00
if ( ! size )
return 0 ;
2017-11-30 19:58:14 +03:00
/*
* Get the lowest of them to find the decode size , and from that
* the extent .
*/
2018-10-13 03:49:19 +03:00
size = size & ~ ( size - 1 ) ;
2005-04-17 02:20:36 +04:00
2017-11-30 19:58:14 +03:00
/*
* base = = maxbase can be valid only if the BAR has already been
* programmed with all 1 s .
*/
2018-10-13 03:49:19 +03:00
if ( base = = maxbase & & ( ( base | ( size - 1 ) ) & mask ) ! = mask )
2005-04-17 02:20:36 +04:00
return 0 ;
return size ;
}
2011-06-14 23:04:35 +04:00
static inline unsigned long decode_bar ( struct pci_dev * dev , u32 bar )
2008-07-28 21:38:59 +04:00
{
PCI: treat mem BAR type "11" (reserved) as 32-bit, not 64-bit, BAR
This fixes a minor regression where broken PCI devices that use the
reserved "11" memory BAR type worked before e354597cce but not after.
The low four bits of a memory BAR are "PTT0" where P=1 for prefetchable
BARs, and TT is as follows:
00 32-bit BAR, anywhere in lower 4GB
01 anywhere below 1MB (reserved as of PCI 2.2)
10 64-bit BAR
11 reserved
Prior to e354597cce, we treated "0100" as a 64-bit BAR and all others,
including prefetchable 64-bit BARs ("1100") as 32-bit BARs. The e354597cce
fix, which appeared in 2.6.28, treats "x1x0" as 64-bit BARs, so the
reserved "x110" types are treated as 64-bit instead of 32-bit.
This patch returns to treating the reserved "11" type as a 32-bit BAR and
adds a warning if we see it.
It also logs a note if we see a 1M BAR. This is not a warning, because
such hardware conforms to pre-PCI 2.2 spec, but I think it's worth noting
because Linux ignores the 1M restriction if it ever has to assign the BAR.
CC: Peter Chubb <peterc@gelato.unsw.edu.au>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=35952
Reported-by: Jan Zwiegers <jan@radicalsystems.co.za>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-06-14 23:04:29 +04:00
u32 mem_type ;
2011-06-14 23:04:35 +04:00
unsigned long flags ;
PCI: treat mem BAR type "11" (reserved) as 32-bit, not 64-bit, BAR
This fixes a minor regression where broken PCI devices that use the
reserved "11" memory BAR type worked before e354597cce but not after.
The low four bits of a memory BAR are "PTT0" where P=1 for prefetchable
BARs, and TT is as follows:
00 32-bit BAR, anywhere in lower 4GB
01 anywhere below 1MB (reserved as of PCI 2.2)
10 64-bit BAR
11 reserved
Prior to e354597cce, we treated "0100" as a 64-bit BAR and all others,
including prefetchable 64-bit BARs ("1100") as 32-bit BARs. The e354597cce
fix, which appeared in 2.6.28, treats "x1x0" as 64-bit BARs, so the
reserved "x110" types are treated as 64-bit instead of 32-bit.
This patch returns to treating the reserved "11" type as a 32-bit BAR and
adds a warning if we see it.
It also logs a note if we see a 1M BAR. This is not a warning, because
such hardware conforms to pre-PCI 2.2 spec, but I think it's worth noting
because Linux ignores the 1M restriction if it ever has to assign the BAR.
CC: Peter Chubb <peterc@gelato.unsw.edu.au>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=35952
Reported-by: Jan Zwiegers <jan@radicalsystems.co.za>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-06-14 23:04:29 +04:00
2008-07-28 21:38:59 +04:00
if ( ( bar & PCI_BASE_ADDRESS_SPACE ) = = PCI_BASE_ADDRESS_SPACE_IO ) {
2011-06-14 23:04:35 +04:00
flags = bar & ~ PCI_BASE_ADDRESS_IO_MASK ;
flags | = IORESOURCE_IO ;
return flags ;
2008-07-28 21:38:59 +04:00
}
2006-11-30 00:53:10 +03:00
2011-06-14 23:04:35 +04:00
flags = bar & ~ PCI_BASE_ADDRESS_MEM_MASK ;
flags | = IORESOURCE_MEM ;
if ( flags & PCI_BASE_ADDRESS_MEM_PREFETCH )
flags | = IORESOURCE_PREFETCH ;
2006-11-30 00:53:10 +03:00
PCI: treat mem BAR type "11" (reserved) as 32-bit, not 64-bit, BAR
This fixes a minor regression where broken PCI devices that use the
reserved "11" memory BAR type worked before e354597cce but not after.
The low four bits of a memory BAR are "PTT0" where P=1 for prefetchable
BARs, and TT is as follows:
00 32-bit BAR, anywhere in lower 4GB
01 anywhere below 1MB (reserved as of PCI 2.2)
10 64-bit BAR
11 reserved
Prior to e354597cce, we treated "0100" as a 64-bit BAR and all others,
including prefetchable 64-bit BARs ("1100") as 32-bit BARs. The e354597cce
fix, which appeared in 2.6.28, treats "x1x0" as 64-bit BARs, so the
reserved "x110" types are treated as 64-bit instead of 32-bit.
This patch returns to treating the reserved "11" type as a 32-bit BAR and
adds a warning if we see it.
It also logs a note if we see a 1M BAR. This is not a warning, because
such hardware conforms to pre-PCI 2.2 spec, but I think it's worth noting
because Linux ignores the 1M restriction if it ever has to assign the BAR.
CC: Peter Chubb <peterc@gelato.unsw.edu.au>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=35952
Reported-by: Jan Zwiegers <jan@radicalsystems.co.za>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-06-14 23:04:29 +04:00
mem_type = bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK ;
switch ( mem_type ) {
case PCI_BASE_ADDRESS_MEM_TYPE_32 :
break ;
case PCI_BASE_ADDRESS_MEM_TYPE_1M :
2012-08-23 20:53:08 +04:00
/* 1M mem BAR treated as 32-bit BAR */
PCI: treat mem BAR type "11" (reserved) as 32-bit, not 64-bit, BAR
This fixes a minor regression where broken PCI devices that use the
reserved "11" memory BAR type worked before e354597cce but not after.
The low four bits of a memory BAR are "PTT0" where P=1 for prefetchable
BARs, and TT is as follows:
00 32-bit BAR, anywhere in lower 4GB
01 anywhere below 1MB (reserved as of PCI 2.2)
10 64-bit BAR
11 reserved
Prior to e354597cce, we treated "0100" as a 64-bit BAR and all others,
including prefetchable 64-bit BARs ("1100") as 32-bit BARs. The e354597cce
fix, which appeared in 2.6.28, treats "x1x0" as 64-bit BARs, so the
reserved "x110" types are treated as 64-bit instead of 32-bit.
This patch returns to treating the reserved "11" type as a 32-bit BAR and
adds a warning if we see it.
It also logs a note if we see a 1M BAR. This is not a warning, because
such hardware conforms to pre-PCI 2.2 spec, but I think it's worth noting
because Linux ignores the 1M restriction if it ever has to assign the BAR.
CC: Peter Chubb <peterc@gelato.unsw.edu.au>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=35952
Reported-by: Jan Zwiegers <jan@radicalsystems.co.za>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-06-14 23:04:29 +04:00
break ;
case PCI_BASE_ADDRESS_MEM_TYPE_64 :
2011-06-14 23:04:35 +04:00
flags | = IORESOURCE_MEM_64 ;
break ;
PCI: treat mem BAR type "11" (reserved) as 32-bit, not 64-bit, BAR
This fixes a minor regression where broken PCI devices that use the
reserved "11" memory BAR type worked before e354597cce but not after.
The low four bits of a memory BAR are "PTT0" where P=1 for prefetchable
BARs, and TT is as follows:
00 32-bit BAR, anywhere in lower 4GB
01 anywhere below 1MB (reserved as of PCI 2.2)
10 64-bit BAR
11 reserved
Prior to e354597cce, we treated "0100" as a 64-bit BAR and all others,
including prefetchable 64-bit BARs ("1100") as 32-bit BARs. The e354597cce
fix, which appeared in 2.6.28, treats "x1x0" as 64-bit BARs, so the
reserved "x110" types are treated as 64-bit instead of 32-bit.
This patch returns to treating the reserved "11" type as a 32-bit BAR and
adds a warning if we see it.
It also logs a note if we see a 1M BAR. This is not a warning, because
such hardware conforms to pre-PCI 2.2 spec, but I think it's worth noting
because Linux ignores the 1M restriction if it ever has to assign the BAR.
CC: Peter Chubb <peterc@gelato.unsw.edu.au>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=35952
Reported-by: Jan Zwiegers <jan@radicalsystems.co.za>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-06-14 23:04:29 +04:00
default :
2012-08-23 20:53:08 +04:00
/* mem unknown type treated as 32-bit BAR */
PCI: treat mem BAR type "11" (reserved) as 32-bit, not 64-bit, BAR
This fixes a minor regression where broken PCI devices that use the
reserved "11" memory BAR type worked before e354597cce but not after.
The low four bits of a memory BAR are "PTT0" where P=1 for prefetchable
BARs, and TT is as follows:
00 32-bit BAR, anywhere in lower 4GB
01 anywhere below 1MB (reserved as of PCI 2.2)
10 64-bit BAR
11 reserved
Prior to e354597cce, we treated "0100" as a 64-bit BAR and all others,
including prefetchable 64-bit BARs ("1100") as 32-bit BARs. The e354597cce
fix, which appeared in 2.6.28, treats "x1x0" as 64-bit BARs, so the
reserved "x110" types are treated as 64-bit instead of 32-bit.
This patch returns to treating the reserved "11" type as a 32-bit BAR and
adds a warning if we see it.
It also logs a note if we see a 1M BAR. This is not a warning, because
such hardware conforms to pre-PCI 2.2 spec, but I think it's worth noting
because Linux ignores the 1M restriction if it ever has to assign the BAR.
CC: Peter Chubb <peterc@gelato.unsw.edu.au>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=35952
Reported-by: Jan Zwiegers <jan@radicalsystems.co.za>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-06-14 23:04:29 +04:00
break ;
}
2011-06-14 23:04:35 +04:00
return flags ;
2006-11-30 00:53:10 +03:00
}
2013-08-23 02:19:18 +04:00
# define PCI_COMMAND_DECODE_ENABLE (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)
2008-11-21 21:40:40 +03:00
/**
2017-11-30 19:58:14 +03:00
* pci_read_base - Read a PCI BAR
2008-11-21 21:40:40 +03:00
* @ dev : the PCI device
* @ type : type of the BAR
* @ res : resource buffer to be filled in
* @ pos : BAR position in the config space
*
* Returns 1 if the BAR is 64 - bit , or 0 if 32 - bit .
2008-07-28 21:38:59 +04:00
*/
2008-11-21 21:40:40 +03:00
int __pci_read_base ( struct pci_dev * dev , enum pci_bar_type type ,
2014-04-19 04:13:49 +04:00
struct resource * res , unsigned int pos )
2006-11-30 00:53:10 +03:00
{
2017-04-10 20:46:54 +03:00
u32 l = 0 , sz = 0 , mask ;
2014-04-15 01:25:54 +04:00
u64 l64 , sz64 , mask64 ;
2010-07-16 21:19:22 +04:00
u16 orig_cmd ;
2013-05-25 15:36:27 +04:00
struct pci_bus_region region , inverted_region ;
2008-07-28 21:38:59 +04:00
2009-10-29 18:24:59 +03:00
mask = type ? PCI_ROM_ADDRESS_MASK : ~ 0 ;
2008-07-28 21:38:59 +04:00
2012-08-23 20:53:08 +04:00
/* No printks while decoding is disabled! */
2010-07-16 21:19:22 +04:00
if ( ! dev - > mmio_always_on ) {
pci_read_config_word ( dev , PCI_COMMAND , & orig_cmd ) ;
2013-08-23 02:19:18 +04:00
if ( orig_cmd & PCI_COMMAND_DECODE_ENABLE ) {
pci_write_config_word ( dev , PCI_COMMAND ,
orig_cmd & ~ PCI_COMMAND_DECODE_ENABLE ) ;
}
2010-07-16 21:19:22 +04:00
}
2008-07-28 21:38:59 +04:00
res - > name = pci_name ( dev ) ;
pci_read_config_dword ( dev , pos , & l ) ;
2009-10-29 18:24:59 +03:00
pci_write_config_dword ( dev , pos , l | mask ) ;
2008-07-28 21:38:59 +04:00
pci_read_config_dword ( dev , pos , & sz ) ;
pci_write_config_dword ( dev , pos , l ) ;
/*
* All bits set in sz means the device isn ' t working properly .
2010-04-22 19:02:43 +04:00
* If the BAR isn ' t implemented , all bits must be 0. If it ' s a
* memory BAR or a ROM , bit 0 must be clear ; if it ' s an io BAR , bit
* 1 must be clear .
2008-07-28 21:38:59 +04:00
*/
2014-10-30 20:54:43 +03:00
if ( sz = = 0xffffffff )
sz = 0 ;
2008-07-28 21:38:59 +04:00
/*
* I don ' t know how l can have all bits set . Copied from old code .
* Maybe it fixes a bug on some ancient platform .
*/
if ( l = = 0xffffffff )
l = 0 ;
if ( type = = pci_bar_unknown ) {
2011-06-14 23:04:35 +04:00
res - > flags = decode_bar ( dev , l ) ;
res - > flags | = IORESOURCE_SIZEALIGN ;
if ( res - > flags & IORESOURCE_IO ) {
2014-10-30 20:54:43 +03:00
l64 = l & PCI_BASE_ADDRESS_IO_MASK ;
sz64 = sz & PCI_BASE_ADDRESS_IO_MASK ;
mask64 = PCI_BASE_ADDRESS_IO_MASK & ( u32 ) IO_SPACE_LIMIT ;
2008-07-28 21:38:59 +04:00
} else {
2014-10-30 20:54:43 +03:00
l64 = l & PCI_BASE_ADDRESS_MEM_MASK ;
sz64 = sz & PCI_BASE_ADDRESS_MEM_MASK ;
mask64 = ( u32 ) PCI_BASE_ADDRESS_MEM_MASK ;
2008-07-28 21:38:59 +04:00
}
} else {
2016-11-29 02:21:02 +03:00
if ( l & PCI_ROM_ADDRESS_ENABLE )
res - > flags | = IORESOURCE_ROM_ENABLE ;
2014-10-30 20:54:43 +03:00
l64 = l & PCI_ROM_ADDRESS_MASK ;
sz64 = sz & PCI_ROM_ADDRESS_MASK ;
2017-04-14 23:38:02 +03:00
mask64 = PCI_ROM_ADDRESS_MASK ;
2008-07-28 21:38:59 +04:00
}
2011-06-14 23:04:35 +04:00
if ( res - > flags & IORESOURCE_MEM_64 ) {
2008-07-28 21:38:59 +04:00
pci_read_config_dword ( dev , pos + 4 , & l ) ;
pci_write_config_dword ( dev , pos + 4 , ~ 0 ) ;
pci_read_config_dword ( dev , pos + 4 , & sz ) ;
pci_write_config_dword ( dev , pos + 4 , l ) ;
l64 | = ( ( u64 ) l < < 32 ) ;
sz64 | = ( ( u64 ) sz < < 32 ) ;
2014-10-30 20:54:43 +03:00
mask64 | = ( ( u64 ) ~ 0 < < 32 ) ;
}
2008-07-28 21:38:59 +04:00
2014-10-30 20:54:43 +03:00
if ( ! dev - > mmio_always_on & & ( orig_cmd & PCI_COMMAND_DECODE_ENABLE ) )
pci_write_config_word ( dev , PCI_COMMAND , orig_cmd ) ;
2008-07-28 21:38:59 +04:00
2014-10-30 20:54:43 +03:00
if ( ! sz64 )
goto fail ;
2008-07-28 21:38:59 +04:00
2014-10-30 20:54:43 +03:00
sz64 = pci_size ( l64 , sz64 , mask64 ) ;
2014-10-30 20:54:50 +03:00
if ( ! sz64 ) {
2018-01-18 21:55:24 +03:00
pci_info ( dev , FW_BUG " reg 0x%x: invalid BAR (can't size) \n " ,
2014-10-30 20:54:50 +03:00
pos ) ;
2014-10-30 20:54:43 +03:00
goto fail ;
2014-10-30 20:54:50 +03:00
}
2014-10-30 20:54:43 +03:00
if ( res - > flags & IORESOURCE_MEM_64 ) {
PCI: Add pci_bus_addr_t
David Ahern reported that d63e2e1f3df9 ("sparc/PCI: Clip bridge windows
to fit in upstream windows") fails to boot on sparc/T5-8:
pci 0000:06:00.0: reg 0x184: can't handle BAR above 4GB (bus address 0x110204000)
The problem is that sparc64 assumed that dma_addr_t only needed to hold DMA
addresses, i.e., bus addresses returned via the DMA API (dma_map_single(),
etc.), while the PCI core assumed dma_addr_t could hold *any* bus address,
including raw BAR values. On sparc64, all DMA addresses fit in 32 bits, so
dma_addr_t is a 32-bit type. However, BAR values can be 64 bits wide, so
they don't fit in a dma_addr_t. d63e2e1f3df9 added new checking that
tripped over this mismatch.
Add pci_bus_addr_t, which is wide enough to hold any PCI bus address,
including both raw BAR values and DMA addresses. This will be 64 bits
on 64-bit platforms and on platforms with a 64-bit dma_addr_t. Then
dma_addr_t only needs to be wide enough to hold addresses from the DMA API.
[bhelgaas: changelog, bugzilla, Kconfig to ensure pci_bus_addr_t is at
least as wide as dma_addr_t, documentation]
Fixes: d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows")
Fixes: 23b13bc76f35 ("PCI: Fail safely if we can't handle BARs larger than 4GB")
Link: http://lkml.kernel.org/r/CAE9FiQU1gJY1LYrxs+ma5LCTEEe4xmtjRG0aXJ9K_Tsu+m9Wuw@mail.gmail.com
Link: http://lkml.kernel.org/r/1427857069-6789-1-git-send-email-yinghai@kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=96231
Reported-by: David Ahern <david.ahern@oracle.com>
Tested-by: David Ahern <david.ahern@oracle.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
CC: stable@vger.kernel.org # v3.19+
2015-05-28 03:23:51 +03:00
if ( ( sizeof ( pci_bus_addr_t ) < 8 | | sizeof ( resource_size_t ) < 8 )
& & sz64 > 0x100000000ULL ) {
2014-04-15 01:25:54 +04:00
res - > flags | = IORESOURCE_UNSET | IORESOURCE_DISABLED ;
res - > start = 0 ;
res - > end = 0 ;
2018-01-18 21:55:24 +03:00
pci_err ( dev , " reg 0x%x: can't handle BAR larger than 4GB (size %#010llx) \n " ,
2014-10-30 20:54:43 +03:00
pos , ( unsigned long long ) sz64 ) ;
2014-04-15 01:25:54 +04:00
goto out ;
2009-10-27 22:26:47 +03:00
}
PCI: Add pci_bus_addr_t
David Ahern reported that d63e2e1f3df9 ("sparc/PCI: Clip bridge windows
to fit in upstream windows") fails to boot on sparc/T5-8:
pci 0000:06:00.0: reg 0x184: can't handle BAR above 4GB (bus address 0x110204000)
The problem is that sparc64 assumed that dma_addr_t only needed to hold DMA
addresses, i.e., bus addresses returned via the DMA API (dma_map_single(),
etc.), while the PCI core assumed dma_addr_t could hold *any* bus address,
including raw BAR values. On sparc64, all DMA addresses fit in 32 bits, so
dma_addr_t is a 32-bit type. However, BAR values can be 64 bits wide, so
they don't fit in a dma_addr_t. d63e2e1f3df9 added new checking that
tripped over this mismatch.
Add pci_bus_addr_t, which is wide enough to hold any PCI bus address,
including both raw BAR values and DMA addresses. This will be 64 bits
on 64-bit platforms and on platforms with a 64-bit dma_addr_t. Then
dma_addr_t only needs to be wide enough to hold addresses from the DMA API.
[bhelgaas: changelog, bugzilla, Kconfig to ensure pci_bus_addr_t is at
least as wide as dma_addr_t, documentation]
Fixes: d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows")
Fixes: 23b13bc76f35 ("PCI: Fail safely if we can't handle BARs larger than 4GB")
Link: http://lkml.kernel.org/r/CAE9FiQU1gJY1LYrxs+ma5LCTEEe4xmtjRG0aXJ9K_Tsu+m9Wuw@mail.gmail.com
Link: http://lkml.kernel.org/r/1427857069-6789-1-git-send-email-yinghai@kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=96231
Reported-by: David Ahern <david.ahern@oracle.com>
Tested-by: David Ahern <david.ahern@oracle.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
CC: stable@vger.kernel.org # v3.19+
2015-05-28 03:23:51 +03:00
if ( ( sizeof ( pci_bus_addr_t ) < 8 ) & & l ) {
2014-04-30 04:37:47 +04:00
/* Above 32-bit boundary; try to reallocate */
2014-02-26 22:26:00 +04:00
res - > flags | = IORESOURCE_UNSET ;
2014-04-30 04:42:49 +04:00
res - > start = 0 ;
2018-10-13 03:49:19 +03:00
res - > end = sz64 - 1 ;
2018-01-18 21:55:24 +03:00
pci_info ( dev , " reg 0x%x: can't handle BAR above 4GB (bus address %#010llx) \n " ,
2014-10-30 20:54:43 +03:00
pos , ( unsigned long long ) l64 ) ;
2014-04-30 04:42:49 +04:00
goto out ;
2008-07-28 21:38:59 +04:00
}
}
2014-10-30 20:54:43 +03:00
region . start = l64 ;
2018-10-13 03:49:19 +03:00
region . end = l64 + sz64 - 1 ;
2014-10-30 20:54:43 +03:00
PCI: Convert pcibios_resource_to_bus() to take a pci_bus, not a pci_dev
These interfaces:
pcibios_resource_to_bus(struct pci_dev *dev, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_dev *dev, *resource, *bus_region)
took a pci_dev, but they really depend only on the pci_bus. And we want to
use them in resource allocation paths where we have the bus but not a
device, so this patch converts them to take the pci_bus instead of the
pci_dev:
pcibios_resource_to_bus(struct pci_bus *bus, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_bus *bus, *resource, *bus_region)
In fact, with standard PCI-PCI bridges, they only depend on the host
bridge, because that's the only place address translation occurs, but
we aren't going that far yet.
[bhelgaas: changelog]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2013-12-10 10:54:40 +04:00
pcibios_bus_to_resource ( dev - > bus , res , & region ) ;
pcibios_resource_to_bus ( dev - > bus , & inverted_region , res ) ;
2013-05-25 15:36:27 +04:00
/*
* If " A " is a BAR value ( a bus address ) , " bus_to_resource(A) " is
* the corresponding resource address ( the physical address used by
* the CPU . Converting that resource address back to a bus address
* should yield the original BAR value :
*
* resource_to_bus ( bus_to_resource ( A ) ) = = A
*
* If it doesn ' t , CPU accesses to " bus_to_resource(A) " will not
* be claimed by the device .
*/
if ( inverted_region . start ! = region . start ) {
res - > flags | = IORESOURCE_UNSET ;
res - > start = 0 ;
2014-04-15 01:26:50 +04:00
res - > end = region . end - region . start ;
2018-01-18 21:55:24 +03:00
pci_info ( dev , " reg 0x%x: initial BAR value %#010llx invalid \n " ,
2014-10-30 20:54:43 +03:00
pos , ( unsigned long long ) region . start ) ;
2013-05-25 15:36:27 +04:00
}
2013-05-25 15:36:26 +04:00
2012-08-23 20:53:08 +04:00
goto out ;
fail :
res - > flags = 0 ;
out :
2014-04-30 04:37:47 +04:00
if ( res - > flags )
2019-04-20 07:07:20 +03:00
pci_info ( dev , " reg 0x%x: %pR \n " , pos , res ) ;
2012-08-23 20:53:08 +04:00
2011-06-14 23:04:35 +04:00
return ( res - > flags & IORESOURCE_MEM_64 ) ? 1 : 0 ;
2006-11-30 00:53:10 +03:00
}
2005-04-17 02:20:36 +04:00
static void pci_read_bases ( struct pci_dev * dev , unsigned int howmany , int rom )
{
2008-07-28 21:38:59 +04:00
unsigned int pos , reg ;
2006-11-30 00:53:10 +03:00
2016-05-11 19:27:16 +03:00
if ( dev - > non_compliant_bars )
return ;
2018-03-03 07:33:10 +03:00
/* Per PCIe r4.0, sec 9.3.4.1.11, the VF BARs are all RO Zero */
if ( dev - > is_virtfn )
return ;
2008-07-28 21:38:59 +04:00
for ( pos = 0 ; pos < howmany ; pos + + ) {
struct resource * res = & dev - > resource [ pos ] ;
2005-04-17 02:20:36 +04:00
reg = PCI_BASE_ADDRESS_0 + ( pos < < 2 ) ;
2008-07-28 21:38:59 +04:00
pos + = __pci_read_base ( dev , pci_bar_unknown , res , reg ) ;
2005-04-17 02:20:36 +04:00
}
2008-07-28 21:38:59 +04:00
2005-04-17 02:20:36 +04:00
if ( rom ) {
2008-07-28 21:38:59 +04:00
struct resource * res = & dev - > resource [ PCI_ROM_RESOURCE ] ;
2005-04-17 02:20:36 +04:00
dev - > rom_base_reg = rom ;
2008-07-28 21:38:59 +04:00
res - > flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
2015-08-11 06:07:06 +03:00
IORESOURCE_READONLY | IORESOURCE_SIZEALIGN ;
2008-07-28 21:38:59 +04:00
__pci_read_base ( dev , pci_bar_mem32 , res , rom ) ;
2005-04-17 02:20:36 +04:00
}
}
2019-01-19 20:35:04 +03:00
static void pci_read_bridge_windows ( struct pci_dev * bridge )
{
u16 io ;
u32 pmem , tmp ;
pci_read_config_word ( bridge , PCI_IO_BASE , & io ) ;
if ( ! io ) {
pci_write_config_word ( bridge , PCI_IO_BASE , 0xe0f0 ) ;
pci_read_config_word ( bridge , PCI_IO_BASE , & io ) ;
pci_write_config_word ( bridge , PCI_IO_BASE , 0x0 ) ;
}
if ( io )
bridge - > io_window = 1 ;
/*
* DECchip 21050 pass 2 errata : the bridge may miss an address
* disconnect boundary by one PCI data phase . Workaround : do not
* use prefetching on this device .
*/
if ( bridge - > vendor = = PCI_VENDOR_ID_DEC & & bridge - > device = = 0x0001 )
return ;
pci_read_config_dword ( bridge , PCI_PREF_MEMORY_BASE , & pmem ) ;
if ( ! pmem ) {
pci_write_config_dword ( bridge , PCI_PREF_MEMORY_BASE ,
0xffe0fff0 ) ;
pci_read_config_dword ( bridge , PCI_PREF_MEMORY_BASE , & pmem ) ;
pci_write_config_dword ( bridge , PCI_PREF_MEMORY_BASE , 0x0 ) ;
}
if ( ! pmem )
return ;
bridge - > pref_window = 1 ;
if ( ( pmem & PCI_PREF_RANGE_TYPE_MASK ) = = PCI_PREF_RANGE_TYPE_64 ) {
/*
* Bridge claims to have a 64 - bit prefetchable memory
* window ; verify that the upper bits are actually
* writable .
*/
pci_read_config_dword ( bridge , PCI_PREF_BASE_UPPER32 , & pmem ) ;
pci_write_config_dword ( bridge , PCI_PREF_BASE_UPPER32 ,
0xffffffff ) ;
pci_read_config_dword ( bridge , PCI_PREF_BASE_UPPER32 , & tmp ) ;
pci_write_config_dword ( bridge , PCI_PREF_BASE_UPPER32 , pmem ) ;
if ( tmp )
bridge - > pref_64_window = 1 ;
}
}
2012-11-22 00:35:00 +04:00
static void pci_read_bridge_io ( struct pci_bus * child )
2005-04-17 02:20:36 +04:00
{
struct pci_dev * dev = child - > self ;
u8 io_base_lo , io_limit_lo ;
2012-07-09 23:38:57 +04:00
unsigned long io_mask , io_granularity , base , limit ;
2012-02-24 07:19:00 +04:00
struct pci_bus_region region ;
2012-07-09 23:38:57 +04:00
struct resource * res ;
io_mask = PCI_IO_RANGE_MASK ;
io_granularity = 0x1000 ;
if ( dev - > io_window_1k ) {
/* Support 1K I/O space granularity */
io_mask = PCI_IO_1K_RANGE_MASK ;
io_granularity = 0x400 ;
}
2005-04-17 02:20:36 +04:00
res = child - > resource [ 0 ] ;
pci_read_config_byte ( dev , PCI_IO_BASE , & io_base_lo ) ;
pci_read_config_byte ( dev , PCI_IO_LIMIT , & io_limit_lo ) ;
2012-07-09 23:38:57 +04:00
base = ( io_base_lo & io_mask ) < < 8 ;
limit = ( io_limit_lo & io_mask ) < < 8 ;
2005-04-17 02:20:36 +04:00
if ( ( io_base_lo & PCI_IO_RANGE_TYPE_MASK ) = = PCI_IO_RANGE_TYPE_32 ) {
u16 io_base_hi , io_limit_hi ;
2012-06-19 17:45:44 +04:00
2005-04-17 02:20:36 +04:00
pci_read_config_word ( dev , PCI_IO_BASE_UPPER16 , & io_base_hi ) ;
pci_read_config_word ( dev , PCI_IO_LIMIT_UPPER16 , & io_limit_hi ) ;
2012-06-19 17:45:44 +04:00
base | = ( ( unsigned long ) io_base_hi < < 16 ) ;
limit | = ( ( unsigned long ) io_limit_hi < < 16 ) ;
2005-04-17 02:20:36 +04:00
}
2012-07-09 23:38:41 +04:00
if ( base < = limit ) {
2005-04-17 02:20:36 +04:00
res - > flags = ( io_base_lo & PCI_IO_RANGE_TYPE_MASK ) | IORESOURCE_IO ;
2012-02-24 07:19:00 +04:00
region . start = base ;
2012-07-09 23:38:57 +04:00
region . end = limit + io_granularity - 1 ;
PCI: Convert pcibios_resource_to_bus() to take a pci_bus, not a pci_dev
These interfaces:
pcibios_resource_to_bus(struct pci_dev *dev, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_dev *dev, *resource, *bus_region)
took a pci_dev, but they really depend only on the pci_bus. And we want to
use them in resource allocation paths where we have the bus but not a
device, so this patch converts them to take the pci_bus instead of the
pci_dev:
pcibios_resource_to_bus(struct pci_bus *bus, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_bus *bus, *resource, *bus_region)
In fact, with standard PCI-PCI bridges, they only depend on the host
bridge, because that's the only place address translation occurs, but
we aren't going that far yet.
[bhelgaas: changelog]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2013-12-10 10:54:40 +04:00
pcibios_bus_to_resource ( dev - > bus , res , & region ) ;
2019-04-20 07:07:20 +03:00
pci_info ( dev , " bridge window %pR \n " , res ) ;
2005-04-17 02:20:36 +04:00
}
2010-02-23 20:24:21 +03:00
}
2012-11-22 00:35:00 +04:00
static void pci_read_bridge_mmio ( struct pci_bus * child )
2010-02-23 20:24:21 +03:00
{
struct pci_dev * dev = child - > self ;
u16 mem_base_lo , mem_limit_lo ;
unsigned long base , limit ;
2012-02-24 07:19:00 +04:00
struct pci_bus_region region ;
2010-02-23 20:24:21 +03:00
struct resource * res ;
2005-04-17 02:20:36 +04:00
res = child - > resource [ 1 ] ;
pci_read_config_word ( dev , PCI_MEMORY_BASE , & mem_base_lo ) ;
pci_read_config_word ( dev , PCI_MEMORY_LIMIT , & mem_limit_lo ) ;
2012-06-19 17:45:44 +04:00
base = ( ( unsigned long ) mem_base_lo & PCI_MEMORY_RANGE_MASK ) < < 16 ;
limit = ( ( unsigned long ) mem_limit_lo & PCI_MEMORY_RANGE_MASK ) < < 16 ;
2012-07-09 23:38:41 +04:00
if ( base < = limit ) {
2005-04-17 02:20:36 +04:00
res - > flags = ( mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK ) | IORESOURCE_MEM ;
2012-02-24 07:19:00 +04:00
region . start = base ;
region . end = limit + 0xfffff ;
PCI: Convert pcibios_resource_to_bus() to take a pci_bus, not a pci_dev
These interfaces:
pcibios_resource_to_bus(struct pci_dev *dev, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_dev *dev, *resource, *bus_region)
took a pci_dev, but they really depend only on the pci_bus. And we want to
use them in resource allocation paths where we have the bus but not a
device, so this patch converts them to take the pci_bus instead of the
pci_dev:
pcibios_resource_to_bus(struct pci_bus *bus, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_bus *bus, *resource, *bus_region)
In fact, with standard PCI-PCI bridges, they only depend on the host
bridge, because that's the only place address translation occurs, but
we aren't going that far yet.
[bhelgaas: changelog]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2013-12-10 10:54:40 +04:00
pcibios_bus_to_resource ( dev - > bus , res , & region ) ;
2019-04-20 07:07:20 +03:00
pci_info ( dev , " bridge window %pR \n " , res ) ;
2005-04-17 02:20:36 +04:00
}
2010-02-23 20:24:21 +03:00
}
2012-11-22 00:35:00 +04:00
static void pci_read_bridge_mmio_pref ( struct pci_bus * child )
2010-02-23 20:24:21 +03:00
{
struct pci_dev * dev = child - > self ;
u16 mem_base_lo , mem_limit_lo ;
2014-11-20 00:30:32 +03:00
u64 base64 , limit64 ;
PCI: Add pci_bus_addr_t
David Ahern reported that d63e2e1f3df9 ("sparc/PCI: Clip bridge windows
to fit in upstream windows") fails to boot on sparc/T5-8:
pci 0000:06:00.0: reg 0x184: can't handle BAR above 4GB (bus address 0x110204000)
The problem is that sparc64 assumed that dma_addr_t only needed to hold DMA
addresses, i.e., bus addresses returned via the DMA API (dma_map_single(),
etc.), while the PCI core assumed dma_addr_t could hold *any* bus address,
including raw BAR values. On sparc64, all DMA addresses fit in 32 bits, so
dma_addr_t is a 32-bit type. However, BAR values can be 64 bits wide, so
they don't fit in a dma_addr_t. d63e2e1f3df9 added new checking that
tripped over this mismatch.
Add pci_bus_addr_t, which is wide enough to hold any PCI bus address,
including both raw BAR values and DMA addresses. This will be 64 bits
on 64-bit platforms and on platforms with a 64-bit dma_addr_t. Then
dma_addr_t only needs to be wide enough to hold addresses from the DMA API.
[bhelgaas: changelog, bugzilla, Kconfig to ensure pci_bus_addr_t is at
least as wide as dma_addr_t, documentation]
Fixes: d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows")
Fixes: 23b13bc76f35 ("PCI: Fail safely if we can't handle BARs larger than 4GB")
Link: http://lkml.kernel.org/r/CAE9FiQU1gJY1LYrxs+ma5LCTEEe4xmtjRG0aXJ9K_Tsu+m9Wuw@mail.gmail.com
Link: http://lkml.kernel.org/r/1427857069-6789-1-git-send-email-yinghai@kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=96231
Reported-by: David Ahern <david.ahern@oracle.com>
Tested-by: David Ahern <david.ahern@oracle.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
CC: stable@vger.kernel.org # v3.19+
2015-05-28 03:23:51 +03:00
pci_bus_addr_t base , limit ;
2012-02-24 07:19:00 +04:00
struct pci_bus_region region ;
2010-02-23 20:24:21 +03:00
struct resource * res ;
2005-04-17 02:20:36 +04:00
res = child - > resource [ 2 ] ;
pci_read_config_word ( dev , PCI_PREF_MEMORY_BASE , & mem_base_lo ) ;
pci_read_config_word ( dev , PCI_PREF_MEMORY_LIMIT , & mem_limit_lo ) ;
2014-11-20 00:30:32 +03:00
base64 = ( mem_base_lo & PCI_PREF_RANGE_MASK ) < < 16 ;
limit64 = ( mem_limit_lo & PCI_PREF_RANGE_MASK ) < < 16 ;
2005-04-17 02:20:36 +04:00
if ( ( mem_base_lo & PCI_PREF_RANGE_TYPE_MASK ) = = PCI_PREF_RANGE_TYPE_64 ) {
u32 mem_base_hi , mem_limit_hi ;
2012-06-19 17:45:44 +04:00
2005-04-17 02:20:36 +04:00
pci_read_config_dword ( dev , PCI_PREF_BASE_UPPER32 , & mem_base_hi ) ;
pci_read_config_dword ( dev , PCI_PREF_LIMIT_UPPER32 , & mem_limit_hi ) ;
/*
* Some bridges set the base > limit by default , and some
* ( broken ) BIOSes do not initialize them . If we find
* this , just assume they are not being used .
*/
if ( mem_base_hi < = mem_limit_hi ) {
2014-11-20 00:30:32 +03:00
base64 | = ( u64 ) mem_base_hi < < 32 ;
limit64 | = ( u64 ) mem_limit_hi < < 32 ;
2005-04-17 02:20:36 +04:00
}
}
2014-11-20 00:30:32 +03:00
PCI: Add pci_bus_addr_t
David Ahern reported that d63e2e1f3df9 ("sparc/PCI: Clip bridge windows
to fit in upstream windows") fails to boot on sparc/T5-8:
pci 0000:06:00.0: reg 0x184: can't handle BAR above 4GB (bus address 0x110204000)
The problem is that sparc64 assumed that dma_addr_t only needed to hold DMA
addresses, i.e., bus addresses returned via the DMA API (dma_map_single(),
etc.), while the PCI core assumed dma_addr_t could hold *any* bus address,
including raw BAR values. On sparc64, all DMA addresses fit in 32 bits, so
dma_addr_t is a 32-bit type. However, BAR values can be 64 bits wide, so
they don't fit in a dma_addr_t. d63e2e1f3df9 added new checking that
tripped over this mismatch.
Add pci_bus_addr_t, which is wide enough to hold any PCI bus address,
including both raw BAR values and DMA addresses. This will be 64 bits
on 64-bit platforms and on platforms with a 64-bit dma_addr_t. Then
dma_addr_t only needs to be wide enough to hold addresses from the DMA API.
[bhelgaas: changelog, bugzilla, Kconfig to ensure pci_bus_addr_t is at
least as wide as dma_addr_t, documentation]
Fixes: d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows")
Fixes: 23b13bc76f35 ("PCI: Fail safely if we can't handle BARs larger than 4GB")
Link: http://lkml.kernel.org/r/CAE9FiQU1gJY1LYrxs+ma5LCTEEe4xmtjRG0aXJ9K_Tsu+m9Wuw@mail.gmail.com
Link: http://lkml.kernel.org/r/1427857069-6789-1-git-send-email-yinghai@kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=96231
Reported-by: David Ahern <david.ahern@oracle.com>
Tested-by: David Ahern <david.ahern@oracle.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
CC: stable@vger.kernel.org # v3.19+
2015-05-28 03:23:51 +03:00
base = ( pci_bus_addr_t ) base64 ;
limit = ( pci_bus_addr_t ) limit64 ;
2014-11-20 00:30:32 +03:00
if ( base ! = base64 ) {
2018-01-18 21:55:24 +03:00
pci_err ( dev , " can't handle bridge window above 4GB (bus address %#010llx) \n " ,
2014-11-20 00:30:32 +03:00
( unsigned long long ) base64 ) ;
return ;
}
2012-07-09 23:38:41 +04:00
if ( base < = limit ) {
2009-04-24 07:48:32 +04:00
res - > flags = ( mem_base_lo & PCI_PREF_RANGE_TYPE_MASK ) |
IORESOURCE_MEM | IORESOURCE_PREFETCH ;
if ( res - > flags & PCI_PREF_RANGE_TYPE_64 )
res - > flags | = IORESOURCE_MEM_64 ;
2012-02-24 07:19:00 +04:00
region . start = base ;
region . end = limit + 0xfffff ;
PCI: Convert pcibios_resource_to_bus() to take a pci_bus, not a pci_dev
These interfaces:
pcibios_resource_to_bus(struct pci_dev *dev, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_dev *dev, *resource, *bus_region)
took a pci_dev, but they really depend only on the pci_bus. And we want to
use them in resource allocation paths where we have the bus but not a
device, so this patch converts them to take the pci_bus instead of the
pci_dev:
pcibios_resource_to_bus(struct pci_bus *bus, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_bus *bus, *resource, *bus_region)
In fact, with standard PCI-PCI bridges, they only depend on the host
bridge, because that's the only place address translation occurs, but
we aren't going that far yet.
[bhelgaas: changelog]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2013-12-10 10:54:40 +04:00
pcibios_bus_to_resource ( dev - > bus , res , & region ) ;
2019-04-20 07:07:20 +03:00
pci_info ( dev , " bridge window %pR \n " , res ) ;
2005-04-17 02:20:36 +04:00
}
}
2012-11-22 00:35:00 +04:00
void pci_read_bridge_bases ( struct pci_bus * child )
2010-02-23 20:24:21 +03:00
{
struct pci_dev * dev = child - > self ;
2010-02-23 20:24:36 +03:00
struct resource * res ;
2010-02-23 20:24:21 +03:00
int i ;
if ( pci_is_root_bus ( child ) ) /* It's a host bus, nothing to read */
return ;
2018-01-18 21:55:24 +03:00
pci_info ( dev , " PCI bridge to %pR%s \n " ,
2012-05-18 05:51:11 +04:00
& child - > busn_res ,
2010-02-23 20:24:21 +03:00
dev - > transparent ? " (subtractive decode) " : " " ) ;
2010-02-23 20:24:36 +03:00
pci_bus_remove_resources ( child ) ;
for ( i = 0 ; i < PCI_BRIDGE_RESOURCE_NUM ; i + + )
child - > resource [ i ] = & dev - > resource [ PCI_BRIDGE_RESOURCES + i ] ;
2010-02-23 20:24:21 +03:00
pci_read_bridge_io ( child ) ;
pci_read_bridge_mmio ( child ) ;
pci_read_bridge_mmio_pref ( child ) ;
2010-02-23 20:24:26 +03:00
if ( dev - > transparent ) {
2010-02-23 20:24:36 +03:00
pci_bus_for_each_resource ( child - > parent , res , i ) {
2014-04-15 02:10:54 +04:00
if ( res & & res - > flags ) {
2010-02-23 20:24:36 +03:00
pci_bus_add_resource ( child , res ,
PCI_SUBTRACTIVE_DECODE ) ;
2019-04-20 07:07:20 +03:00
pci_info ( dev , " bridge window %pR (subtractive decode) \n " ,
2010-02-23 20:24:36 +03:00
res ) ;
}
2010-02-23 20:24:26 +03:00
}
}
2010-02-23 20:24:21 +03:00
}
2014-09-29 18:29:26 +04:00
static struct pci_bus * pci_alloc_bus ( struct pci_bus * parent )
2005-04-17 02:20:36 +04:00
{
struct pci_bus * b ;
2006-02-28 17:34:49 +03:00
b = kzalloc ( sizeof ( * b ) , GFP_KERNEL ) ;
2013-06-06 00:22:11 +04:00
if ( ! b )
return NULL ;
INIT_LIST_HEAD ( & b - > node ) ;
INIT_LIST_HEAD ( & b - > children ) ;
INIT_LIST_HEAD ( & b - > devices ) ;
INIT_LIST_HEAD ( & b - > slots ) ;
INIT_LIST_HEAD ( & b - > resources ) ;
b - > max_bus_speed = PCI_SPEED_UNKNOWN ;
b - > cur_bus_speed = PCI_SPEED_UNKNOWN ;
2014-09-29 18:29:26 +04:00
# ifdef CONFIG_PCI_DOMAINS_GENERIC
if ( parent )
b - > domain_nr = parent - > domain_nr ;
# endif
2005-04-17 02:20:36 +04:00
return b ;
}
2017-06-28 23:13:53 +03:00
static void devm_pci_release_host_bridge_dev ( struct device * dev )
2013-06-08 02:16:51 +04:00
{
struct pci_host_bridge * bridge = to_pci_host_bridge ( dev ) ;
if ( bridge - > release_fn )
bridge - > release_fn ( bridge ) ;
2018-05-15 12:07:01 +03:00
pci_free_resource_list ( & bridge - > windows ) ;
2019-10-08 04:23:25 +03:00
pci_free_resource_list ( & bridge - > dma_ranges ) ;
2017-06-28 23:13:53 +03:00
}
2013-06-08 02:16:51 +04:00
2017-06-28 23:13:53 +03:00
static void pci_release_host_bridge_dev ( struct device * dev )
{
devm_pci_release_host_bridge_dev ( dev ) ;
2018-05-15 12:07:01 +03:00
kfree ( to_pci_host_bridge ( dev ) ) ;
2013-06-08 02:16:51 +04:00
}
2019-03-18 19:07:18 +03:00
static void pci_init_host_bridge ( struct pci_host_bridge * bridge )
2012-04-03 05:31:53 +04:00
{
2013-06-06 00:22:11 +04:00
INIT_LIST_HEAD ( & bridge - > windows ) ;
2019-05-03 17:05:32 +03:00
INIT_LIST_HEAD ( & bridge - > dma_ranges ) ;
2016-11-25 13:57:09 +03:00
2018-03-09 20:21:25 +03:00
/*
* We assume we can manage these PCIe features . Some systems may
* reserve these for use by the platform itself , e . g . , an ACPI BIOS
* may implement its own AER handling and use _OSC to prevent the
* OS from interfering .
*/
bridge - > native_aer = 1 ;
2018-05-24 01:22:19 +03:00
bridge - > native_pcie_hotplug = 1 ;
2018-05-24 01:40:23 +03:00
bridge - > native_shpc_hotplug = 1 ;
2018-03-09 20:21:25 +03:00
bridge - > native_pme = 1 ;
2018-04-17 18:58:09 +03:00
bridge - > native_ltr = 1 ;
2019-03-18 19:07:18 +03:00
}
struct pci_host_bridge * pci_alloc_host_bridge ( size_t priv )
{
struct pci_host_bridge * bridge ;
bridge = kzalloc ( sizeof ( * bridge ) + priv , GFP_KERNEL ) ;
if ( ! bridge )
return NULL ;
pci_init_host_bridge ( bridge ) ;
bridge - > dev . release = pci_release_host_bridge_dev ;
2018-03-09 20:21:25 +03:00
2012-04-03 05:31:53 +04:00
return bridge ;
}
2016-11-25 13:57:11 +03:00
EXPORT_SYMBOL ( pci_alloc_host_bridge ) ;
2012-04-03 05:31:53 +04:00
2017-06-28 23:13:53 +03:00
struct pci_host_bridge * devm_pci_alloc_host_bridge ( struct device * dev ,
size_t priv )
{
struct pci_host_bridge * bridge ;
bridge = devm_kzalloc ( dev , sizeof ( * bridge ) + priv , GFP_KERNEL ) ;
if ( ! bridge )
return NULL ;
2019-03-18 19:07:18 +03:00
pci_init_host_bridge ( bridge ) ;
2017-06-28 23:13:53 +03:00
bridge - > dev . release = devm_pci_release_host_bridge_dev ;
return bridge ;
}
EXPORT_SYMBOL ( devm_pci_alloc_host_bridge ) ;
2017-06-28 23:13:52 +03:00
void pci_free_host_bridge ( struct pci_host_bridge * bridge )
{
pci_free_resource_list ( & bridge - > windows ) ;
2019-05-03 17:05:32 +03:00
pci_free_resource_list ( & bridge - > dma_ranges ) ;
2017-06-28 23:13:52 +03:00
kfree ( bridge ) ;
}
EXPORT_SYMBOL ( pci_free_host_bridge ) ;
2020-02-29 00:02:03 +03:00
/* Indexed by PCI_X_SSTATUS_FREQ (secondary bus mode and frequency) */
2014-01-11 04:14:48 +04:00
static const unsigned char pcix_bus_speed [ ] = {
2009-12-13 16:11:33 +03:00
PCI_SPEED_UNKNOWN , /* 0 */
PCI_SPEED_66MHz_PCIX , /* 1 */
PCI_SPEED_100MHz_PCIX , /* 2 */
PCI_SPEED_133MHz_PCIX , /* 3 */
PCI_SPEED_UNKNOWN , /* 4 */
PCI_SPEED_66MHz_PCIX_ECC , /* 5 */
PCI_SPEED_100MHz_PCIX_ECC , /* 6 */
PCI_SPEED_133MHz_PCIX_ECC , /* 7 */
PCI_SPEED_UNKNOWN , /* 8 */
PCI_SPEED_66MHz_PCIX_266 , /* 9 */
PCI_SPEED_100MHz_PCIX_266 , /* A */
PCI_SPEED_133MHz_PCIX_266 , /* B */
PCI_SPEED_UNKNOWN , /* C */
PCI_SPEED_66MHz_PCIX_533 , /* D */
PCI_SPEED_100MHz_PCIX_533 , /* E */
PCI_SPEED_133MHz_PCIX_533 /* F */
} ;
2020-02-29 00:02:03 +03:00
/* Indexed by PCI_EXP_LNKCAP_SLS, PCI_EXP_LNKSTA_CLS */
2013-07-31 10:53:16 +04:00
const unsigned char pcie_link_speed [ ] = {
2009-12-13 16:11:32 +03:00
PCI_SPEED_UNKNOWN , /* 0 */
PCIE_SPEED_2_5GT , /* 1 */
PCIE_SPEED_5_0GT , /* 2 */
2009-12-13 16:11:35 +03:00
PCIE_SPEED_8_0GT , /* 3 */
2018-03-12 12:13:32 +03:00
PCIE_SPEED_16_0GT , /* 4 */
2019-06-04 19:24:43 +03:00
PCIE_SPEED_32_0GT , /* 5 */
2009-12-13 16:11:32 +03:00
PCI_SPEED_UNKNOWN , /* 6 */
PCI_SPEED_UNKNOWN , /* 7 */
PCI_SPEED_UNKNOWN , /* 8 */
PCI_SPEED_UNKNOWN , /* 9 */
PCI_SPEED_UNKNOWN , /* A */
PCI_SPEED_UNKNOWN , /* B */
PCI_SPEED_UNKNOWN , /* C */
PCI_SPEED_UNKNOWN , /* D */
PCI_SPEED_UNKNOWN , /* E */
PCI_SPEED_UNKNOWN /* F */
} ;
2020-02-29 00:02:03 +03:00
EXPORT_SYMBOL_GPL ( pcie_link_speed ) ;
const char * pci_speed_string ( enum pci_bus_speed speed )
{
/* Indexed by the pci_bus_speed enum */
static const char * speed_strings [ ] = {
" 33 MHz PCI " , /* 0x00 */
" 66 MHz PCI " , /* 0x01 */
" 66 MHz PCI-X " , /* 0x02 */
" 100 MHz PCI-X " , /* 0x03 */
" 133 MHz PCI-X " , /* 0x04 */
NULL , /* 0x05 */
NULL , /* 0x06 */
NULL , /* 0x07 */
NULL , /* 0x08 */
" 66 MHz PCI-X 266 " , /* 0x09 */
" 100 MHz PCI-X 266 " , /* 0x0a */
" 133 MHz PCI-X 266 " , /* 0x0b */
" Unknown AGP " , /* 0x0c */
" 1x AGP " , /* 0x0d */
" 2x AGP " , /* 0x0e */
" 4x AGP " , /* 0x0f */
" 8x AGP " , /* 0x10 */
" 66 MHz PCI-X 533 " , /* 0x11 */
" 100 MHz PCI-X 533 " , /* 0x12 */
" 133 MHz PCI-X 533 " , /* 0x13 */
" 2.5 GT/s PCIe " , /* 0x14 */
" 5.0 GT/s PCIe " , /* 0x15 */
" 8.0 GT/s PCIe " , /* 0x16 */
" 16.0 GT/s PCIe " , /* 0x17 */
" 32.0 GT/s PCIe " , /* 0x18 */
} ;
if ( speed < ARRAY_SIZE ( speed_strings ) )
return speed_strings [ speed ] ;
return " Unknown " ;
}
EXPORT_SYMBOL_GPL ( pci_speed_string ) ;
2009-12-13 16:11:32 +03:00
void pcie_update_link_speed ( struct pci_bus * bus , u16 linksta )
{
2012-12-06 00:51:18 +04:00
bus - > cur_bus_speed = pcie_link_speed [ linksta & PCI_EXP_LNKSTA_CLS ] ;
2009-12-13 16:11:32 +03:00
}
EXPORT_SYMBOL_GPL ( pcie_update_link_speed ) ;
2009-12-13 16:11:34 +03:00
static unsigned char agp_speeds [ ] = {
AGP_UNKNOWN ,
AGP_1X ,
AGP_2X ,
AGP_4X ,
AGP_8X
} ;
static enum pci_bus_speed agp_speed ( int agp3 , int agpstat )
{
int index = 0 ;
if ( agpstat & 4 )
index = 3 ;
else if ( agpstat & 2 )
index = 2 ;
else if ( agpstat & 1 )
index = 1 ;
else
goto out ;
2013-11-14 22:28:18 +04:00
2009-12-13 16:11:34 +03:00
if ( agp3 ) {
index + = 2 ;
if ( index = = 5 )
index = 0 ;
}
out :
return agp_speeds [ index ] ;
}
2009-12-13 16:11:33 +03:00
static void pci_set_bus_speed ( struct pci_bus * bus )
{
struct pci_dev * bridge = bus - > self ;
int pos ;
2009-12-13 16:11:34 +03:00
pos = pci_find_capability ( bridge , PCI_CAP_ID_AGP ) ;
if ( ! pos )
pos = pci_find_capability ( bridge , PCI_CAP_ID_AGP3 ) ;
if ( pos ) {
u32 agpstat , agpcmd ;
pci_read_config_dword ( bridge , pos + PCI_AGP_STATUS , & agpstat ) ;
bus - > max_bus_speed = agp_speed ( agpstat & 8 , agpstat & 7 ) ;
pci_read_config_dword ( bridge , pos + PCI_AGP_COMMAND , & agpcmd ) ;
bus - > cur_bus_speed = agp_speed ( agpstat & 8 , agpcmd & 7 ) ;
}
2009-12-13 16:11:33 +03:00
pos = pci_find_capability ( bridge , PCI_CAP_ID_PCIX ) ;
if ( pos ) {
u16 status ;
enum pci_bus_speed max ;
2012-12-06 00:51:17 +04:00
pci_read_config_word ( bridge , pos + PCI_X_BRIDGE_SSTATUS ,
& status ) ;
if ( status & PCI_X_SSTATUS_533MHZ ) {
2009-12-13 16:11:33 +03:00
max = PCI_SPEED_133MHz_PCIX_533 ;
2012-12-06 00:51:17 +04:00
} else if ( status & PCI_X_SSTATUS_266MHZ ) {
2009-12-13 16:11:33 +03:00
max = PCI_SPEED_133MHz_PCIX_266 ;
2012-12-06 00:51:17 +04:00
} else if ( status & PCI_X_SSTATUS_133MHZ ) {
2014-04-19 04:13:49 +04:00
if ( ( status & PCI_X_SSTATUS_VERS ) = = PCI_X_SSTATUS_V2 )
2009-12-13 16:11:33 +03:00
max = PCI_SPEED_133MHz_PCIX_ECC ;
2014-04-19 04:13:49 +04:00
else
2009-12-13 16:11:33 +03:00
max = PCI_SPEED_133MHz_PCIX ;
} else {
max = PCI_SPEED_66MHz_PCIX ;
}
bus - > max_bus_speed = max ;
2012-12-06 00:51:17 +04:00
bus - > cur_bus_speed = pcix_bus_speed [
( status & PCI_X_SSTATUS_FREQ ) > > 6 ] ;
2009-12-13 16:11:33 +03:00
return ;
}
2013-09-05 11:55:29 +04:00
if ( pci_is_pcie ( bridge ) ) {
2009-12-13 16:11:33 +03:00
u32 linkcap ;
u16 linksta ;
2012-07-24 13:20:06 +04:00
pcie_capability_read_dword ( bridge , PCI_EXP_LNKCAP , & linkcap ) ;
2012-12-06 00:51:18 +04:00
bus - > max_bus_speed = pcie_link_speed [ linkcap & PCI_EXP_LNKCAP_SLS ] ;
2018-09-20 19:27:17 +03:00
bridge - > link_active_reporting = ! ! ( linkcap & PCI_EXP_LNKCAP_DLLLARC ) ;
2009-12-13 16:11:33 +03:00
2012-07-24 13:20:06 +04:00
pcie_capability_read_word ( bridge , PCI_EXP_LNKSTA , & linksta ) ;
2009-12-13 16:11:33 +03:00
pcie_update_link_speed ( bus , linksta ) ;
}
}
2015-07-28 16:46:11 +03:00
static struct irq_domain * pci_host_bridge_msi_domain ( struct pci_bus * bus )
{
2015-07-28 16:46:12 +03:00
struct irq_domain * d ;
2015-07-28 16:46:11 +03:00
/*
* Any firmware interface that can resolve the msi_domain
* should be called from here .
*/
2015-07-28 16:46:12 +03:00
d = pci_host_bridge_of_msi_domain ( bus ) ;
2015-12-10 19:55:27 +03:00
if ( ! d )
d = pci_host_bridge_acpi_msi_domain ( bus ) ;
2015-07-28 16:46:11 +03:00
2016-02-17 00:56:22 +03:00
# ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
/*
* If no IRQ domain was found via the OF tree , try looking it up
* directly through the fwnode_handle .
*/
if ( ! d ) {
struct fwnode_handle * fwnode = pci_root_bus_fwnode ( bus ) ;
if ( fwnode )
d = irq_find_matching_fwnode ( fwnode ,
DOMAIN_BUS_PCI_MSI ) ;
}
# endif
2015-07-28 16:46:12 +03:00
return d ;
2015-07-28 16:46:11 +03:00
}
static void pci_set_bus_msi_domain ( struct pci_bus * bus )
{
struct irq_domain * d ;
2015-09-19 00:08:54 +03:00
struct pci_bus * b ;
2015-07-28 16:46:11 +03:00
/*
2015-09-19 00:08:54 +03:00
* The bus can be a root bus , a subordinate bus , or a virtual bus
* created by an SR - IOV device . Walk up to the first bridge device
* found or derive the domain from the host bridge .
2015-07-28 16:46:11 +03:00
*/
2015-09-19 00:08:54 +03:00
for ( b = bus , d = NULL ; ! d & & ! pci_is_root_bus ( b ) ; b = b - > parent ) {
if ( b - > self )
d = dev_get_msi_domain ( & b - > self - > dev ) ;
}
if ( ! d )
d = pci_host_bridge_msi_domain ( b ) ;
2015-07-28 16:46:11 +03:00
dev_set_msi_domain ( & bus - > dev , d ) ;
}
2017-06-28 23:13:55 +03:00
static int pci_register_host_bridge ( struct pci_host_bridge * bridge )
2016-11-25 13:57:09 +03:00
{
struct device * parent = bridge - > dev . parent ;
struct resource_entry * window , * n ;
struct pci_bus * bus , * b ;
resource_size_t offset ;
LIST_HEAD ( resources ) ;
struct resource * res ;
char addr [ 64 ] , * fmt ;
const char * name ;
int err ;
bus = pci_alloc_bus ( NULL ) ;
if ( ! bus )
return - ENOMEM ;
bridge - > bus = bus ;
2017-11-30 19:58:14 +03:00
/* Temporarily move resources off the list */
2016-11-25 13:57:09 +03:00
list_splice_init ( & bridge - > windows , & resources ) ;
bus - > sysdata = bridge - > sysdata ;
bus - > msi = bridge - > msi ;
bus - > ops = bridge - > ops ;
bus - > number = bus - > busn_res . start = bridge - > busnr ;
# ifdef CONFIG_PCI_DOMAINS_GENERIC
bus - > domain_nr = pci_bus_find_domain_nr ( bus , parent ) ;
# endif
b = pci_find_bus ( pci_domain_nr ( bus ) , bridge - > busnr ) ;
if ( b ) {
2017-11-30 19:58:14 +03:00
/* Ignore it if we already got here via a different bridge */
2016-11-25 13:57:09 +03:00
dev_dbg ( & b - > dev , " bus already known \n " ) ;
err = - EEXIST ;
goto free ;
}
dev_set_name ( & bridge - > dev , " pci%04x:%02x " , pci_domain_nr ( bus ) ,
bridge - > busnr ) ;
err = pcibios_root_bridge_prepare ( bridge ) ;
if ( err )
goto free ;
err = device_register ( & bridge - > dev ) ;
if ( err )
put_device ( & bridge - > dev ) ;
bus - > bridge = get_device ( & bridge - > dev ) ;
device_enable_async_suspend ( bus - > bridge ) ;
pci_set_bus_of_node ( bus ) ;
pci_set_bus_msi_domain ( bus ) ;
if ( ! parent )
set_dev_node ( bus - > bridge , pcibus_to_node ( bus ) ) ;
bus - > dev . class = & pcibus_class ;
bus - > dev . parent = bus - > bridge ;
dev_set_name ( & bus - > dev , " %04x:%02x " , pci_domain_nr ( bus ) , bus - > number ) ;
name = dev_name ( & bus - > dev ) ;
err = device_register ( & bus - > dev ) ;
if ( err )
goto unregister ;
pcibios_add_bus ( bus ) ;
/* Create legacy_io and legacy_mem files for this bus */
pci_create_legacy_files ( bus ) ;
if ( parent )
dev_info ( parent , " PCI host bridge to bus %s \n " , name ) ;
else
pr_info ( " PCI host bridge to bus %s \n " , name ) ;
2019-10-19 09:45:43 +03:00
if ( nr_node_ids > 1 & & pcibus_to_node ( bus ) = = NUMA_NO_NODE )
dev_warn ( & bus - > dev , " Unknown NUMA node; performance will be reduced \n " ) ;
2016-11-25 13:57:09 +03:00
/* Add initial resources to the bus */
resource_list_for_each_entry_safe ( window , n , & resources ) {
list_move_tail ( & window - > node , & bridge - > windows ) ;
offset = window - > offset ;
res = window - > res ;
if ( res - > flags & IORESOURCE_BUS )
pci_bus_insert_busn_res ( bus , bus - > number , res - > end ) ;
else
pci_bus_add_resource ( bus , res , 0 ) ;
if ( offset ) {
if ( resource_type ( res ) = = IORESOURCE_IO )
fmt = " (bus address [%#06llx-%#06llx]) " ;
else
fmt = " (bus address [%#010llx-%#010llx]) " ;
snprintf ( addr , sizeof ( addr ) , fmt ,
( unsigned long long ) ( res - > start - offset ) ,
( unsigned long long ) ( res - > end - offset ) ) ;
} else
addr [ 0 ] = ' \0 ' ;
dev_info ( & bus - > dev , " root bus resource %pR%s \n " , res , addr ) ;
}
down_write ( & pci_bus_sem ) ;
list_add_tail ( & bus - > node , & pci_root_buses ) ;
up_write ( & pci_bus_sem ) ;
return 0 ;
unregister :
put_device ( & bridge - > dev ) ;
device_unregister ( & bridge - > dev ) ;
free :
kfree ( bus ) ;
return err ;
}
PCI: Check whether bridges allow access to extended config space
Even if a device supports extended config space, i.e., it is a PCI-X Mode 2
or a PCI Express device, the extended space may not be accessible if
there's a conventional PCI bus in the path to it.
We currently figure that out in pci_cfg_space_size() by reading the first
dword of extended config space. On most platforms that returns ~0 data if
the space is inaccessible, but it may set error bits in PCI status
registers, and on some platforms it causes exceptions that we currently
don't recover from.
For example, a PCIe-to-conventional PCI bridge treats config transactions
with a non-zero Extended Register Address as an Unsupported Request on PCIe
and a received Master-Abort on the destination bus (see PCI Express to
PCI/PCI-X Bridge spec, r1.0, sec 4.1.3).
A sample case is a LS1043A CPU (NXP QorIQ Layerscape) platform with the
following bus topology:
LS1043 PCIe Root Port
-> PEX8112 PCIe-to-PCI bridge (doesn't support ext cfg on PCI side)
-> PMC slot connector (for legacy PMC modules)
With a PMC module topology as follows:
PMC connector
-> PCI-to-PCIe bridge
-> PCIe switch (4 ports)
-> 4 PCIe devices (one on each port)
The PCIe devices on the PMC module support extended config space, but we
can't reach it because the PEX8112 can't generate accesses to the extended
space on its secondary bus. Attempts to access it cause Unsupported
Request errors, which result in synchronous aborts on this platform.
To avoid these errors, check whether bridges are capable of generating
extended config space addresses on their secondary interfaces. If they
can't, we restrict devices below the bridge to only the 256-byte
PCI-compatible config space.
Signed-off-by: Gilles Buloz <gilles.buloz@kontron.com>
[bhelgaas: changelog, rework patch so bus_flags testing is all in
pci_bridge_child_ext_cfg_accessible()]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-05-03 23:21:44 +03:00
static bool pci_bridge_child_ext_cfg_accessible ( struct pci_dev * bridge )
{
int pos ;
u32 status ;
/*
* If extended config space isn ' t accessible on a bridge ' s primary
* bus , we certainly can ' t access it on the secondary bus .
*/
if ( bridge - > bus - > bus_flags & PCI_BUS_FLAGS_NO_EXTCFG )
return false ;
/*
* PCIe Root Ports and switch ports are PCIe on both sides , so if
* extended config space is accessible on the primary , it ' s also
* accessible on the secondary .
*/
if ( pci_is_pcie ( bridge ) & &
( pci_pcie_type ( bridge ) = = PCI_EXP_TYPE_ROOT_PORT | |
pci_pcie_type ( bridge ) = = PCI_EXP_TYPE_UPSTREAM | |
pci_pcie_type ( bridge ) = = PCI_EXP_TYPE_DOWNSTREAM ) )
return true ;
/*
* For the other bridge types :
* - PCI - to - PCI bridges
* - PCIe - to - PCI / PCI - X forward bridges
* - PCI / PCI - X - to - PCIe reverse bridges
* extended config space on the secondary side is only accessible
* if the bridge supports PCI - X Mode 2.
*/
pos = pci_find_capability ( bridge , PCI_CAP_ID_PCIX ) ;
if ( ! pos )
return false ;
pci_read_config_dword ( bridge , pos + PCI_X_STATUS , & status ) ;
return status & ( PCI_X_STATUS_266MHZ | PCI_X_STATUS_533MHZ ) ;
}
2008-04-19 00:53:55 +04:00
static struct pci_bus * pci_alloc_child_bus ( struct pci_bus * parent ,
struct pci_dev * bridge , int busnr )
2005-04-17 02:20:36 +04:00
{
struct pci_bus * child ;
int i ;
2013-01-22 01:20:52 +04:00
int ret ;
2005-04-17 02:20:36 +04:00
2017-11-30 19:58:14 +03:00
/* Allocate a new bus and inherit stuff from the parent */
2014-09-29 18:29:26 +04:00
child = pci_alloc_bus ( parent ) ;
2005-04-17 02:20:36 +04:00
if ( ! child )
return NULL ;
child - > parent = parent ;
child - > ops = parent - > ops ;
2013-08-10 00:27:08 +04:00
child - > msi = parent - > msi ;
2005-04-17 02:20:36 +04:00
child - > sysdata = parent - > sysdata ;
2006-02-14 19:52:22 +03:00
child - > bus_flags = parent - > bus_flags ;
2005-04-17 02:20:36 +04:00
2017-11-30 19:58:14 +03:00
/*
* Initialize some portions of the bus device , but don ' t register
* it now as the parent is not properly set up yet .
2007-05-23 06:47:54 +04:00
*/
child - > dev . class = & pcibus_class ;
2008-10-30 04:17:49 +03:00
dev_set_name ( & child - > dev , " %04x:%02x " , pci_domain_nr ( child ) , busnr ) ;
2005-04-17 02:20:36 +04:00
2017-11-30 19:58:14 +03:00
/* Set up the primary, secondary and subordinate bus numbers */
2012-05-18 05:51:11 +04:00
child - > number = child - > busn_res . start = busnr ;
child - > primary = parent - > busn_res . start ;
child - > busn_res . end = 0xff ;
2005-04-17 02:20:36 +04:00
2013-01-22 01:20:52 +04:00
if ( ! bridge ) {
child - > dev . parent = parent - > bridge ;
goto add_dev ;
}
2008-11-21 21:41:07 +03:00
child - > self = bridge ;
child - > bridge = get_device ( & bridge - > dev ) ;
2013-01-22 01:20:52 +04:00
child - > dev . parent = child - > bridge ;
2011-04-11 05:37:07 +04:00
pci_set_bus_of_node ( child ) ;
2009-12-13 16:11:33 +03:00
pci_set_bus_speed ( child ) ;
PCI: Check whether bridges allow access to extended config space
Even if a device supports extended config space, i.e., it is a PCI-X Mode 2
or a PCI Express device, the extended space may not be accessible if
there's a conventional PCI bus in the path to it.
We currently figure that out in pci_cfg_space_size() by reading the first
dword of extended config space. On most platforms that returns ~0 data if
the space is inaccessible, but it may set error bits in PCI status
registers, and on some platforms it causes exceptions that we currently
don't recover from.
For example, a PCIe-to-conventional PCI bridge treats config transactions
with a non-zero Extended Register Address as an Unsupported Request on PCIe
and a received Master-Abort on the destination bus (see PCI Express to
PCI/PCI-X Bridge spec, r1.0, sec 4.1.3).
A sample case is a LS1043A CPU (NXP QorIQ Layerscape) platform with the
following bus topology:
LS1043 PCIe Root Port
-> PEX8112 PCIe-to-PCI bridge (doesn't support ext cfg on PCI side)
-> PMC slot connector (for legacy PMC modules)
With a PMC module topology as follows:
PMC connector
-> PCI-to-PCIe bridge
-> PCIe switch (4 ports)
-> 4 PCIe devices (one on each port)
The PCIe devices on the PMC module support extended config space, but we
can't reach it because the PEX8112 can't generate accesses to the extended
space on its secondary bus. Attempts to access it cause Unsupported
Request errors, which result in synchronous aborts on this platform.
To avoid these errors, check whether bridges are capable of generating
extended config space addresses on their secondary interfaces. If they
can't, we restrict devices below the bridge to only the 256-byte
PCI-compatible config space.
Signed-off-by: Gilles Buloz <gilles.buloz@kontron.com>
[bhelgaas: changelog, rework patch so bus_flags testing is all in
pci_bridge_child_ext_cfg_accessible()]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-05-03 23:21:44 +03:00
/*
* Check whether extended config space is accessible on the child
* bus . Note that we currently assume it is always accessible on
* the root bus .
*/
if ( ! pci_bridge_child_ext_cfg_accessible ( bridge ) ) {
child - > bus_flags | = PCI_BUS_FLAGS_NO_EXTCFG ;
pci_info ( child , " extended config space not accessible \n " ) ;
}
2017-11-30 19:58:14 +03:00
/* Set up default resource pointers and names */
2008-11-21 21:39:32 +03:00
for ( i = 0 ; i < PCI_BRIDGE_RESOURCE_NUM ; i + + ) {
2005-04-17 02:20:36 +04:00
child - > resource [ i ] = & bridge - > resource [ PCI_BRIDGE_RESOURCES + i ] ;
child - > resource [ i ] - > name = child - > name ;
}
bridge - > subordinate = child ;
2013-01-22 01:20:52 +04:00
add_dev :
2015-07-28 16:46:11 +03:00
pci_set_bus_msi_domain ( child ) ;
2013-01-22 01:20:52 +04:00
ret = device_register ( & child - > dev ) ;
WARN_ON ( ret < 0 ) ;
2013-04-12 09:44:20 +04:00
pcibios_add_bus ( child ) ;
2016-02-09 17:30:47 +03:00
if ( child - > ops - > add_bus ) {
ret = child - > ops - > add_bus ( child ) ;
if ( WARN_ON ( ret < 0 ) )
dev_err ( & child - > dev , " failed to add bus: %d \n " , ret ) ;
}
2013-01-22 01:20:52 +04:00
/* Create legacy_io and legacy_mem files for this bus */
pci_create_legacy_files ( child ) ;
2005-04-17 02:20:36 +04:00
return child ;
}
2014-04-19 04:13:49 +04:00
struct pci_bus * pci_add_new_bus ( struct pci_bus * parent , struct pci_dev * dev ,
int busnr )
2005-04-17 02:20:36 +04:00
{
struct pci_bus * child ;
child = pci_alloc_child_bus ( parent , dev , busnr ) ;
2005-04-28 11:25:48 +04:00
if ( child ) {
2006-06-02 08:35:43 +04:00
down_write ( & pci_bus_sem ) ;
2005-04-17 02:20:36 +04:00
list_add_tail ( & child - > node , & parent - > children ) ;
2006-06-02 08:35:43 +04:00
up_write ( & pci_bus_sem ) ;
2005-04-28 11:25:48 +04:00
}
2005-04-17 02:20:36 +04:00
return child ;
}
2014-04-26 00:32:25 +04:00
EXPORT_SYMBOL ( pci_add_new_bus ) ;
2005-04-17 02:20:36 +04:00
2014-09-03 03:26:00 +04:00
static void pci_enable_crs ( struct pci_dev * pdev )
{
u16 root_cap = 0 ;
/* Enable CRS Software Visibility if supported */
pcie_capability_read_word ( pdev , PCI_EXP_RTCAP , & root_cap ) ;
if ( root_cap & PCI_EXP_RTCAP_CRSVIS )
pcie_capability_set_word ( pdev , PCI_EXP_RTCTL ,
PCI_EXP_RTCTL_CRSSVE ) ;
}
2017-10-13 21:35:44 +03:00
static unsigned int pci_scan_child_bus_extend ( struct pci_bus * bus ,
unsigned int available_buses ) ;
2018-11-19 16:14:32 +03:00
/**
* pci_ea_fixed_busnrs ( ) - Read fixed Secondary and Subordinate bus
* numbers from EA capability .
* @ dev : Bridge
* @ sec : updated with secondary bus number from EA
* @ sub : updated with subordinate bus number from EA
*
2019-11-04 09:57:44 +03:00
* If @ dev is a bridge with EA capability that specifies valid secondary
* and subordinate bus numbers , return true with the bus numbers in @ sec
* and @ sub . Otherwise return false .
2018-11-19 16:14:32 +03:00
*/
static bool pci_ea_fixed_busnrs ( struct pci_dev * dev , u8 * sec , u8 * sub )
{
int ea , offset ;
u32 dw ;
2019-11-04 09:57:44 +03:00
u8 ea_sec , ea_sub ;
2018-11-19 16:14:32 +03:00
if ( dev - > hdr_type ! = PCI_HEADER_TYPE_BRIDGE )
return false ;
/* find PCI EA capability in list */
ea = pci_find_capability ( dev , PCI_CAP_ID_EA ) ;
if ( ! ea )
return false ;
offset = ea + PCI_EA_FIRST_ENT ;
pci_read_config_dword ( dev , offset , & dw ) ;
2019-11-04 09:57:44 +03:00
ea_sec = dw & PCI_EA_SEC_BUS_MASK ;
ea_sub = ( dw & PCI_EA_SUB_BUS_MASK ) > > PCI_EA_SUB_BUS_SHIFT ;
if ( ea_sec = = 0 | | ea_sub < ea_sec )
return false ;
* sec = ea_sec ;
* sub = ea_sub ;
2018-11-19 16:14:32 +03:00
return true ;
}
2017-10-13 21:35:44 +03:00
2005-04-17 02:20:36 +04:00
/*
2017-10-13 21:35:44 +03:00
* pci_scan_bridge_extend ( ) - Scan buses behind a bridge
* @ bus : Parent bus the bridge is on
* @ dev : Bridge itself
* @ max : Starting subordinate number of buses behind this bridge
* @ available_buses : Total number of buses available for this bridge and
* the devices below . After the minimal bus space has
* been allocated the remaining buses will be
* distributed equally between hotplug - capable bridges .
* @ pass : Either % 0 ( scan already configured bridges ) or % 1 ( scan bridges
* that need to be reconfigured .
*
2005-04-17 02:20:36 +04:00
* If it ' s a bridge , configure it and scan the bus behind it .
* For CardBus bridges , we don ' t scan behind as the devices will
* be handled by the bridge driver itself .
*
* We need to process bridges in two passes - - first we scan those
* already configured by the BIOS and after we are done with all of
* them , we proceed to assigning numbers to the remaining buses in
* order to avoid overlaps between old and new bus numbers .
2018-05-28 15:47:56 +03:00
*
* Return : New subordinate number covering all buses behind this bridge .
2005-04-17 02:20:36 +04:00
*/
2017-10-13 21:35:44 +03:00
static int pci_scan_bridge_extend ( struct pci_bus * bus , struct pci_dev * dev ,
int max , unsigned int available_buses ,
int pass )
2005-04-17 02:20:36 +04:00
{
struct pci_bus * child ;
int is_cardbus = ( dev - > hdr_type = = PCI_HEADER_TYPE_CARDBUS ) ;
2005-12-08 18:53:12 +03:00
u32 buses , i , j = 0 ;
2005-04-17 02:20:36 +04:00
u16 bctl ;
2010-03-17 00:52:58 +03:00
u8 primary , secondary , subordinate ;
2008-10-21 03:06:29 +04:00
int broken = 0 ;
2018-11-19 16:14:32 +03:00
bool fixed_buses ;
u8 fixed_sec , fixed_sub ;
int next_busnr ;
2005-04-17 02:20:36 +04:00
2016-06-02 11:17:13 +03:00
/*
* Make sure the bridge is powered on to be able to access config
* space of devices below it .
*/
pm_runtime_get_sync ( & dev - > dev ) ;
2005-04-17 02:20:36 +04:00
pci_read_config_dword ( dev , PCI_PRIMARY_BUS , & buses ) ;
2010-03-17 00:52:58 +03:00
primary = buses & 0xFF ;
secondary = ( buses > > 8 ) & 0xFF ;
subordinate = ( buses > > 16 ) & 0xFF ;
2005-04-17 02:20:36 +04:00
2018-01-18 21:55:24 +03:00
pci_dbg ( dev , " scanning [bus %02x-%02x] behind bridge, pass %d \n " ,
2010-03-17 00:52:58 +03:00
secondary , subordinate , pass ) ;
2005-04-17 02:20:36 +04:00
2012-01-30 15:25:24 +04:00
if ( ! primary & & ( primary ! = bus - > number ) & & secondary & & subordinate ) {
2018-01-18 21:55:24 +03:00
pci_warn ( dev , " Primary bus is hard wired to 0 \n " ) ;
2012-01-30 15:25:24 +04:00
primary = bus - > number ;
}
2008-10-21 03:06:29 +04:00
/* Check if setup is sensible at all */
if ( ! pass & &
2012-09-11 04:19:33 +04:00
( primary ! = bus - > number | | secondary < = bus - > number | |
Revert "PCI: Make sure bus number resources stay within their parents bounds"
This reverts commit 1820ffdccb9b ("PCI: Make sure bus number resources stay
within their parents bounds") because it breaks some systems with LSI Logic
FC949ES Fibre Channel Adapters, apparently by exposing a defect in those
adapters.
Dirk tested a Tyan VX50 (B4985) with this device that worked like this
prior to 1820ffdccb9b:
bus: [bus 00-7f] on node 0 link 1
ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-07])
pci 0000:00:0e.0: PCI bridge to [bus 0a]
pci_bus 0000:0a: busn_res: can not insert [bus 0a] under [bus 00-07] (conflicts with (null) [bus 00-07])
pci 0000:0a:00.0: [1000:0646] type 00 class 0x0c0400 (FC adapter)
Note that the root bridge [bus 00-07] aperture is wrong; this is a BIOS
defect in the PCI0 _CRS method. But prior to 1820ffdccb9b, we didn't
enforce that aperture, and the FC adapter worked fine at 0a:00.0.
After 1820ffdccb9b, we notice that 00:0e.0's aperture is not contained in
the root bridge's aperture, so we reconfigure it so it *is* contained:
pci 0000:00:0e.0: bridge configuration invalid ([bus 0a-0a]), reconfiguring
pci 0000:00:0e.0: PCI bridge to [bus 06-07]
This effectively moves the FC device from 0a:00.0 to 07:00.0, which should
be legal. But when we enumerate bus 06, the FC device doesn't respond, so
we don't find anything. This is probably a defect in the FC device.
Possible fixes (due to Yinghai):
1) Add a quirk to fix the _CRS information based on what amd_bus.c read
from the hardware
2) Reset the FC device after we change its bus number
3) Revert 1820ffdccb9b
Fix 1 would be relatively easy, but it does sweep the LSI FC issue under
the rug. We might want to reconfigure bus numbers in the future for some
other reason, e.g., hotplug, and then we could trip over this again.
For that reason, I like fix 2, but we don't know whether it actually works,
and we don't have a patch for it yet.
This revert is fix 3, which also sweeps the LSI FC issue under the rug.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=84281
Reported-by: Dirk Gouders <dirk@gouders.net>
Tested-by: Dirk Gouders <dirk@gouders.net>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org # v3.15+
CC: Yinghai Lu <yinghai@kernel.org>
2014-09-19 21:08:40 +04:00
secondary > subordinate ) ) {
2018-01-18 21:55:24 +03:00
pci_info ( dev , " bridge configuration invalid ([bus %02x-%02x]), reconfiguring \n " ,
2012-09-11 04:19:33 +04:00
secondary , subordinate ) ;
2008-10-21 03:06:29 +04:00
broken = 1 ;
}
2017-11-30 19:58:14 +03:00
/*
* Disable Master - Abort Mode during probing to avoid reporting of
* bus errors in some architectures .
*/
2005-04-17 02:20:36 +04:00
pci_read_config_word ( dev , PCI_BRIDGE_CONTROL , & bctl ) ;
pci_write_config_word ( dev , PCI_BRIDGE_CONTROL ,
bctl & ~ PCI_BRIDGE_CTL_MASTER_ABORT ) ;
2014-09-03 03:26:00 +04:00
pci_enable_crs ( dev ) ;
2010-03-17 00:52:58 +03:00
if ( ( secondary | | subordinate ) & & ! pcibios_assign_all_busses ( ) & &
! is_cardbus & & ! broken ) {
unsigned int cmax ;
2017-11-30 19:58:14 +03:00
2005-04-17 02:20:36 +04:00
/*
2017-11-30 19:58:14 +03:00
* Bus already configured by firmware , process it in the
* first pass and just note the configuration .
2005-04-17 02:20:36 +04:00
*/
if ( pass )
[PATCH] PCI: Avoid leaving MASTER_ABORT disabled permanently when returning from pci_scan_bridge.
> On Mon, Feb 13, 2006 at 05:13:21PM -0800, David S. Miller wrote:
> >
> > In drivers/pci/probe.c:pci_scan_bridge(), if this is not the first
> > pass (pass != 0) we don't restore the PCI_BRIDGE_CONTROL_REGISTER and
> > thus leave PCI_BRIDGE_CTL_MASTER_ABORT off:
> >
> > int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass)
> > {
> > ...
> > /* Disable MasterAbortMode during probing to avoid reporting
> > of bus errors (in some architectures) */
> > pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bctl);
> > pci_write_config_word(dev, PCI_BRIDGE_CONTROL,
> > bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT);
> > ...
> > if ((buses & 0xffff00) && !pcibios_assign_all_busses() && !is_cardbus) {
> > unsigned int cmax, busnr;
> > /*
> > * Bus already configured by firmware, process it in the first
> > * pass and just note the configuration.
> > */
> > if (pass)
> > return max;
> > ...
> > }
> >
> > pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bctl);
> > ...
> >
> > This doesn't seem intentional.
Agreed, looks like an accident. The patch [1] originally came from Kip
Walker (Broadcom back then) between 2.6.0-test3 and 2.6.0-test4. As I
recall it was supposed to fix an issue with with PCI aborts being
signalled by the PCI bridge of the Broadcom BCM1250 family of SOCs when
probing behind pci_scan_bridge. It is undeseriable to disable
PCI_BRIDGE_CTL_MASTER_ABORT in pci_{read,write)_config_* and the
behaviour wasn't considered a bug in need of a workaround, so this was
put in probe.c.
I don't have an affected system at hand, so can't really test but I
propose something like the below patch.
[1] http://www.linux-mips.org/git?p=linux.git;a=commit;h=599457e0cb702a31a3247ea6a5d9c6c99c4cf195
[PCI] Avoid leaving MASTER_ABORT disabled permanently when returning from pci_scan_bridge.
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2006-02-14 19:23:57 +03:00
goto out ;
2005-04-17 02:20:36 +04:00
/*
2017-11-30 19:58:14 +03:00
* The bus might already exist for two reasons : Either we
* are rescanning the bus or the bus is reachable through
* more than one bridge . The second case can happen with
* the i450NX chipset .
2005-04-17 02:20:36 +04:00
*/
2010-03-17 00:52:58 +03:00
child = pci_find_bus ( pci_domain_nr ( bus ) , secondary ) ;
2009-03-20 23:56:10 +03:00
if ( ! child ) {
2010-03-17 00:52:58 +03:00
child = pci_add_new_bus ( bus , dev , secondary ) ;
2009-03-20 23:56:10 +03:00
if ( ! child )
goto out ;
2010-03-17 00:52:58 +03:00
child - > primary = primary ;
2012-05-18 05:51:13 +04:00
pci_bus_insert_busn_res ( child , secondary , subordinate ) ;
2009-03-20 23:56:10 +03:00
child - > bridge_ctl = bctl ;
2005-04-17 02:20:36 +04:00
}
cmax = pci_scan_child_bus ( child ) ;
2014-01-24 00:59:27 +04:00
if ( cmax > subordinate )
2018-01-18 21:55:24 +03:00
pci_warn ( dev , " bridge has subordinate %02x but max busn %02x \n " ,
2014-01-24 00:59:27 +04:00
subordinate , cmax ) ;
2017-11-30 19:58:14 +03:00
/* Subordinate should equal child->busn_res.end */
2014-01-24 00:59:27 +04:00
if ( subordinate > max )
max = subordinate ;
2005-04-17 02:20:36 +04:00
} else {
2017-11-30 19:58:14 +03:00
2005-04-17 02:20:36 +04:00
/*
* We need to assign a number to this bus which we always
* do in the second pass .
*/
2005-09-23 08:06:31 +04:00
if ( ! pass ) {
2014-01-24 00:59:23 +04:00
if ( pcibios_assign_all_busses ( ) | | broken | | is_cardbus )
2017-11-30 19:58:14 +03:00
/*
* Temporarily disable forwarding of the
* configuration cycles on all bridges in
* this bus segment to avoid possible
* conflicts in the second pass between two
* bridges programmed with overlapping bus
* ranges .
*/
2005-09-23 08:06:31 +04:00
pci_write_config_dword ( dev , PCI_PRIMARY_BUS ,
buses & ~ 0xffffff ) ;
[PATCH] PCI: Avoid leaving MASTER_ABORT disabled permanently when returning from pci_scan_bridge.
> On Mon, Feb 13, 2006 at 05:13:21PM -0800, David S. Miller wrote:
> >
> > In drivers/pci/probe.c:pci_scan_bridge(), if this is not the first
> > pass (pass != 0) we don't restore the PCI_BRIDGE_CONTROL_REGISTER and
> > thus leave PCI_BRIDGE_CTL_MASTER_ABORT off:
> >
> > int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass)
> > {
> > ...
> > /* Disable MasterAbortMode during probing to avoid reporting
> > of bus errors (in some architectures) */
> > pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bctl);
> > pci_write_config_word(dev, PCI_BRIDGE_CONTROL,
> > bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT);
> > ...
> > if ((buses & 0xffff00) && !pcibios_assign_all_busses() && !is_cardbus) {
> > unsigned int cmax, busnr;
> > /*
> > * Bus already configured by firmware, process it in the first
> > * pass and just note the configuration.
> > */
> > if (pass)
> > return max;
> > ...
> > }
> >
> > pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bctl);
> > ...
> >
> > This doesn't seem intentional.
Agreed, looks like an accident. The patch [1] originally came from Kip
Walker (Broadcom back then) between 2.6.0-test3 and 2.6.0-test4. As I
recall it was supposed to fix an issue with with PCI aborts being
signalled by the PCI bridge of the Broadcom BCM1250 family of SOCs when
probing behind pci_scan_bridge. It is undeseriable to disable
PCI_BRIDGE_CTL_MASTER_ABORT in pci_{read,write)_config_* and the
behaviour wasn't considered a bug in need of a workaround, so this was
put in probe.c.
I don't have an affected system at hand, so can't really test but I
propose something like the below patch.
[1] http://www.linux-mips.org/git?p=linux.git;a=commit;h=599457e0cb702a31a3247ea6a5d9c6c99c4cf195
[PCI] Avoid leaving MASTER_ABORT disabled permanently when returning from pci_scan_bridge.
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2006-02-14 19:23:57 +03:00
goto out ;
2005-09-23 08:06:31 +04:00
}
2005-04-17 02:20:36 +04:00
/* Clear errors */
pci_write_config_word ( dev , PCI_STATUS , 0xffff ) ;
2018-11-19 16:14:32 +03:00
/* Read bus numbers from EA Capability (if present) */
fixed_buses = pci_ea_fixed_busnrs ( dev , & fixed_sec , & fixed_sub ) ;
if ( fixed_buses )
next_busnr = fixed_sec ;
else
next_busnr = max + 1 ;
2017-11-30 19:58:14 +03:00
/*
* Prevent assigning a bus number that already exists .
* This can happen when a bridge is hot - plugged , so in this
* case we only re - scan this bus .
*/
2018-11-19 16:14:32 +03:00
child = pci_find_bus ( pci_domain_nr ( bus ) , next_busnr ) ;
2011-06-02 07:02:50 +04:00
if ( ! child ) {
2018-11-19 16:14:32 +03:00
child = pci_add_new_bus ( bus , dev , next_busnr ) ;
2011-06-02 07:02:50 +04:00
if ( ! child )
goto out ;
2018-11-19 16:14:32 +03:00
pci_bus_insert_busn_res ( child , next_busnr ,
2017-10-13 21:35:43 +03:00
bus - > busn_res . end ) ;
2011-06-02 07:02:50 +04:00
}
2014-01-24 00:59:21 +04:00
max + + ;
2017-10-13 21:35:44 +03:00
if ( available_buses )
available_buses - - ;
2005-04-17 02:20:36 +04:00
buses = ( buses & 0xff000000 )
| ( ( unsigned int ) ( child - > primary ) < < 0 )
2012-05-18 05:51:11 +04:00
| ( ( unsigned int ) ( child - > busn_res . start ) < < 8 )
| ( ( unsigned int ) ( child - > busn_res . end ) < < 16 ) ;
2005-04-17 02:20:36 +04:00
/*
* yenta . c forces a secondary latency timer of 176.
* Copy that behaviour here .
*/
if ( is_cardbus ) {
buses & = ~ 0xff000000 ;
buses | = CARDBUS_LATENCY_TIMER < < 24 ;
}
2011-01-24 23:14:33 +03:00
2017-11-30 19:58:14 +03:00
/* We need to blast all three values with a single write */
2005-04-17 02:20:36 +04:00
pci_write_config_dword ( dev , PCI_PRIMARY_BUS , buses ) ;
if ( ! is_cardbus ) {
2007-10-09 03:24:16 +04:00
child - > bridge_ctl = bctl ;
2017-10-13 21:35:44 +03:00
max = pci_scan_child_bus_extend ( child , available_buses ) ;
2005-04-17 02:20:36 +04:00
} else {
2017-11-30 19:58:14 +03:00
2005-04-17 02:20:36 +04:00
/*
2017-11-30 19:58:14 +03:00
* For CardBus bridges , we leave 4 bus numbers as
* cards with a PCI - to - PCI bridge can be inserted
* later .
2005-04-17 02:20:36 +04:00
*/
2014-04-19 04:13:49 +04:00
for ( i = 0 ; i < CARDBUS_RESERVE_BUSNR ; i + + ) {
2005-12-08 18:53:12 +03:00
struct pci_bus * parent = bus ;
2005-04-28 11:25:47 +04:00
if ( pci_find_bus ( pci_domain_nr ( bus ) ,
max + i + 1 ) )
break ;
2005-12-08 18:53:12 +03:00
while ( parent - > parent ) {
if ( ( ! pcibios_assign_all_busses ( ) ) & &
2012-05-18 05:51:11 +04:00
( parent - > busn_res . end > max ) & &
( parent - > busn_res . end < = max + i ) ) {
2005-12-08 18:53:12 +03:00
j = 1 ;
}
parent = parent - > parent ;
}
if ( j ) {
2017-11-30 19:58:14 +03:00
2005-12-08 18:53:12 +03:00
/*
2017-11-30 19:58:14 +03:00
* Often , there are two CardBus
* bridges - - try to leave one
* valid bus number for each one .
2005-12-08 18:53:12 +03:00
*/
i / = 2 ;
break ;
}
}
2005-04-28 11:25:47 +04:00
max + = i ;
2005-04-17 02:20:36 +04:00
}
2017-11-30 19:58:14 +03:00
2018-11-19 16:14:32 +03:00
/*
* Set subordinate bus number to its real value .
* If fixed subordinate bus number exists from EA
* capability then use it .
*/
if ( fixed_buses )
max = fixed_sub ;
2012-05-18 05:51:13 +04:00
pci_bus_update_busn_res_end ( child , max ) ;
2005-04-17 02:20:36 +04:00
pci_write_config_byte ( dev , PCI_SUBORDINATE_BUS , max ) ;
}
2008-02-09 01:00:52 +03:00
sprintf ( child - > name ,
( is_cardbus ? " PCI CardBus %04x:%02x " : " PCI Bus %04x:%02x " ) ,
pci_domain_nr ( bus ) , child - > number ) ;
2005-04-17 02:20:36 +04:00
2018-05-24 21:23:52 +03:00
/* Check that all devices are accessible */
2005-12-08 18:53:12 +03:00
while ( bus - > parent ) {
2012-05-18 05:51:11 +04:00
if ( ( child - > busn_res . end > bus - > busn_res . end ) | |
( child - > number > bus - > busn_res . end ) | |
2005-12-08 18:53:12 +03:00
( child - > number < bus - > number ) | |
2012-05-18 05:51:11 +04:00
( child - > busn_res . end < bus - > number ) ) {
2018-05-24 21:23:52 +03:00
dev_info ( & dev - > dev , " devices behind bridge are unusable because %pR cannot be assigned for them \n " ,
& child - > busn_res ) ;
break ;
2005-12-08 18:53:12 +03:00
}
bus = bus - > parent ;
}
[PATCH] PCI: Avoid leaving MASTER_ABORT disabled permanently when returning from pci_scan_bridge.
> On Mon, Feb 13, 2006 at 05:13:21PM -0800, David S. Miller wrote:
> >
> > In drivers/pci/probe.c:pci_scan_bridge(), if this is not the first
> > pass (pass != 0) we don't restore the PCI_BRIDGE_CONTROL_REGISTER and
> > thus leave PCI_BRIDGE_CTL_MASTER_ABORT off:
> >
> > int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass)
> > {
> > ...
> > /* Disable MasterAbortMode during probing to avoid reporting
> > of bus errors (in some architectures) */
> > pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bctl);
> > pci_write_config_word(dev, PCI_BRIDGE_CONTROL,
> > bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT);
> > ...
> > if ((buses & 0xffff00) && !pcibios_assign_all_busses() && !is_cardbus) {
> > unsigned int cmax, busnr;
> > /*
> > * Bus already configured by firmware, process it in the first
> > * pass and just note the configuration.
> > */
> > if (pass)
> > return max;
> > ...
> > }
> >
> > pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bctl);
> > ...
> >
> > This doesn't seem intentional.
Agreed, looks like an accident. The patch [1] originally came from Kip
Walker (Broadcom back then) between 2.6.0-test3 and 2.6.0-test4. As I
recall it was supposed to fix an issue with with PCI aborts being
signalled by the PCI bridge of the Broadcom BCM1250 family of SOCs when
probing behind pci_scan_bridge. It is undeseriable to disable
PCI_BRIDGE_CTL_MASTER_ABORT in pci_{read,write)_config_* and the
behaviour wasn't considered a bug in need of a workaround, so this was
put in probe.c.
I don't have an affected system at hand, so can't really test but I
propose something like the below patch.
[1] http://www.linux-mips.org/git?p=linux.git;a=commit;h=599457e0cb702a31a3247ea6a5d9c6c99c4cf195
[PCI] Avoid leaving MASTER_ABORT disabled permanently when returning from pci_scan_bridge.
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2006-02-14 19:23:57 +03:00
out :
pci_write_config_word ( dev , PCI_BRIDGE_CONTROL , bctl ) ;
2016-06-02 11:17:13 +03:00
pm_runtime_put ( & dev - > dev ) ;
2005-04-17 02:20:36 +04:00
return max ;
}
2017-10-13 21:35:44 +03:00
/*
* pci_scan_bridge ( ) - Scan buses behind a bridge
* @ bus : Parent bus the bridge is on
* @ dev : Bridge itself
* @ max : Starting subordinate number of buses behind this bridge
* @ pass : Either % 0 ( scan already configured bridges ) or % 1 ( scan bridges
* that need to be reconfigured .
*
* If it ' s a bridge , configure it and scan the bus behind it .
* For CardBus bridges , we don ' t scan behind as the devices will
* be handled by the bridge driver itself .
*
* We need to process bridges in two passes - - first we scan those
* already configured by the BIOS and after we are done with all of
* them , we proceed to assigning numbers to the remaining buses in
* order to avoid overlaps between old and new bus numbers .
2018-05-28 15:47:56 +03:00
*
* Return : New subordinate number covering all buses behind this bridge .
2017-10-13 21:35:44 +03:00
*/
int pci_scan_bridge ( struct pci_bus * bus , struct pci_dev * dev , int max , int pass )
{
return pci_scan_bridge_extend ( bus , dev , max , 0 , pass ) ;
}
2014-04-26 00:32:25 +04:00
EXPORT_SYMBOL ( pci_scan_bridge ) ;
2005-04-17 02:20:36 +04:00
/*
* Read interrupt line and base address registers .
* The architecture - dependent code can tweak these , of course .
*/
static void pci_read_irq ( struct pci_dev * dev )
{
unsigned char irq ;
2018-01-17 21:30:29 +03:00
/* VFs are not allowed to use INTx, so skip the config reads */
if ( dev - > is_virtfn ) {
dev - > pin = 0 ;
dev - > irq = 0 ;
return ;
}
2005-04-17 02:20:36 +04:00
pci_read_config_byte ( dev , PCI_INTERRUPT_PIN , & irq ) ;
2005-11-03 03:24:32 +03:00
dev - > pin = irq ;
2005-04-17 02:20:36 +04:00
if ( irq )
pci_read_config_byte ( dev , PCI_INTERRUPT_LINE , & irq ) ;
dev - > irq = irq ;
}
2010-01-26 20:10:03 +03:00
void set_pcie_port_type ( struct pci_dev * pdev )
2009-03-20 06:25:14 +03:00
{
int pos ;
u16 reg16 ;
PCI: Add dev->has_secondary_link to track downstream PCIe links
A PCIe Port is an interface to a Link. A Root Port is a PCI-PCI bridge in
a Root Complex and has a Link on its secondary (downstream) side. For
other Ports, the Link may be on either the upstream (closer to the Root
Complex) or downstream side of the Port.
The usual topology has a Root Port connected to an Upstream Port. We
previously assumed this was the only possible topology, and that a
Downstream Port's Link was always on its downstream side, like this:
+---------------------+
+------+ | Downstream |
| Root | | Upstream Port +--Link--
| Port +--Link--+ Port |
+------+ | Downstream |
| Port +--Link--
+---------------------+
But systems do exist (see URL below) where the Root Port is connected to a
Downstream Port. In this case, a Downstream Port's Link may be on either
the upstream or downstream side:
+---------------------+
+------+ | Upstream |
| Root | | Downstream Port +--Link--
| Port +--Link--+ Port |
+------+ | Downstream |
| Port +--Link--
+---------------------+
We can't use the Port type to determine which side the Link is on, so add a
bit in struct pci_dev to keep track.
A Root Port's Link is always on the Port's secondary side. A component
(Endpoint or Port) on the other end of the Link obviously has the Link on
its upstream side. If that component is a Port, it is part of a Switch or
a Bridge. A Bridge has a PCI or PCI-X bus on its secondary side, not a
Link. The internal bus of a Switch connects the Port to another Port whose
Link is on the downstream side.
[bhelgaas: changelog, comment, cache "type", use if/else]
Link: http://lkml.kernel.org/r/54EB81B2.4050904@pobox.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=94361
Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2015-05-21 10:05:02 +03:00
int type ;
struct pci_dev * parent ;
2009-03-20 06:25:14 +03:00
pos = pci_find_capability ( pdev , PCI_CAP_ID_EXP ) ;
if ( ! pos )
return ;
PCI: Enumerate switches below PCI-to-PCIe bridges
A PCI-to-PCIe bridge (a "reverse bridge") has a PCI or PCI-X primary
interface and a PCI Express secondary interface. The PCIe interface is a
Downstream Port that originates a Link. See the "PCI Express to PCI/PCI-X
Bridge Specification", rev 1.0, sections 1.2 and A.6.
The bug report below involves a PCI-to-PCIe bridge and a PCIe switch below
the bridge:
00:1e.0 Intel 82801 PCI Bridge to [bus 01-0a]
01:00.0 Pericom PI7C9X111SL PCIe-to-PCI Reversible Bridge to [bus 02-0a]
02:00.0 Pericom Device 8608 [PCIe Upstream Port] to [bus 03-0a]
03:01.0 Pericom Device 8608 [PCIe Downstream Port] to [bus 0a]
01:00.0 is configured as a PCI-to-PCIe bridge (despite the name printed by
lspci). As we traverse a PCIe hierarchy, device connections alternate
between PCIe Links and internal Switch logic. Previously we did not
recognize that 01:00.0 had a secondary link, so we thought the 02:00.0
Upstream Port *did* have a secondary link. In fact, it's the other way
around: 01:00.0 has a secondary link, and 02:00.0 has internal Switch logic
on its secondary side.
When we thought 02:00.0 had a secondary link, the pci_scan_slot() ->
only_one_child() path assumed 02:00.0 could have only one child, so 03:00.0
was the only possible downstream device. But 03:00.0 doesn't exist, so we
didn't look for any other devices on bus 03.
Booting with "pci=pcie_scan_all" is a workaround, but we don't want users
to have to do that.
Recognize that PCI-to-PCIe bridges originate links on their secondary
interfaces.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=189361
Fixes: d0751b98dfa3 ("PCI: Add dev->has_secondary_link to track downstream PCIe links")
Tested-by: Blake Moore <blake.moore@men.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org # v4.2+
2017-01-11 18:11:53 +03:00
2009-11-05 06:05:11 +03:00
pdev - > pcie_cap = pos ;
2009-03-20 06:25:14 +03:00
pci_read_config_word ( pdev , pos + PCI_EXP_FLAGS , & reg16 ) ;
2012-07-24 13:20:02 +04:00
pdev - > pcie_flags_reg = reg16 ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
pci_read_config_word ( pdev , pos + PCI_EXP_DEVCAP , & reg16 ) ;
pdev - > pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD ;
PCI: Add dev->has_secondary_link to track downstream PCIe links
A PCIe Port is an interface to a Link. A Root Port is a PCI-PCI bridge in
a Root Complex and has a Link on its secondary (downstream) side. For
other Ports, the Link may be on either the upstream (closer to the Root
Complex) or downstream side of the Port.
The usual topology has a Root Port connected to an Upstream Port. We
previously assumed this was the only possible topology, and that a
Downstream Port's Link was always on its downstream side, like this:
+---------------------+
+------+ | Downstream |
| Root | | Upstream Port +--Link--
| Port +--Link--+ Port |
+------+ | Downstream |
| Port +--Link--
+---------------------+
But systems do exist (see URL below) where the Root Port is connected to a
Downstream Port. In this case, a Downstream Port's Link may be on either
the upstream or downstream side:
+---------------------+
+------+ | Upstream |
| Root | | Downstream Port +--Link--
| Port +--Link--+ Port |
+------+ | Downstream |
| Port +--Link--
+---------------------+
We can't use the Port type to determine which side the Link is on, so add a
bit in struct pci_dev to keep track.
A Root Port's Link is always on the Port's secondary side. A component
(Endpoint or Port) on the other end of the Link obviously has the Link on
its upstream side. If that component is a Port, it is part of a Switch or
a Bridge. A Bridge has a PCI or PCI-X bus on its secondary side, not a
Link. The internal bus of a Switch connects the Port to another Port whose
Link is on the downstream side.
[bhelgaas: changelog, comment, cache "type", use if/else]
Link: http://lkml.kernel.org/r/54EB81B2.4050904@pobox.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=94361
Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2015-05-21 10:05:02 +03:00
2019-08-22 11:55:53 +03:00
parent = pci_upstream_bridge ( pdev ) ;
if ( ! parent )
return ;
PCI: Add dev->has_secondary_link to track downstream PCIe links
A PCIe Port is an interface to a Link. A Root Port is a PCI-PCI bridge in
a Root Complex and has a Link on its secondary (downstream) side. For
other Ports, the Link may be on either the upstream (closer to the Root
Complex) or downstream side of the Port.
The usual topology has a Root Port connected to an Upstream Port. We
previously assumed this was the only possible topology, and that a
Downstream Port's Link was always on its downstream side, like this:
+---------------------+
+------+ | Downstream |
| Root | | Upstream Port +--Link--
| Port +--Link--+ Port |
+------+ | Downstream |
| Port +--Link--
+---------------------+
But systems do exist (see URL below) where the Root Port is connected to a
Downstream Port. In this case, a Downstream Port's Link may be on either
the upstream or downstream side:
+---------------------+
+------+ | Upstream |
| Root | | Downstream Port +--Link--
| Port +--Link--+ Port |
+------+ | Downstream |
| Port +--Link--
+---------------------+
We can't use the Port type to determine which side the Link is on, so add a
bit in struct pci_dev to keep track.
A Root Port's Link is always on the Port's secondary side. A component
(Endpoint or Port) on the other end of the Link obviously has the Link on
its upstream side. If that component is a Port, it is part of a Switch or
a Bridge. A Bridge has a PCI or PCI-X bus on its secondary side, not a
Link. The internal bus of a Switch connects the Port to another Port whose
Link is on the downstream side.
[bhelgaas: changelog, comment, cache "type", use if/else]
Link: http://lkml.kernel.org/r/54EB81B2.4050904@pobox.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=94361
Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2015-05-21 10:05:02 +03:00
/*
2019-08-22 11:55:53 +03:00
* Some systems do not identify their upstream / downstream ports
* correctly so detect impossible configurations here and correct
* the port type accordingly .
PCI: Add dev->has_secondary_link to track downstream PCIe links
A PCIe Port is an interface to a Link. A Root Port is a PCI-PCI bridge in
a Root Complex and has a Link on its secondary (downstream) side. For
other Ports, the Link may be on either the upstream (closer to the Root
Complex) or downstream side of the Port.
The usual topology has a Root Port connected to an Upstream Port. We
previously assumed this was the only possible topology, and that a
Downstream Port's Link was always on its downstream side, like this:
+---------------------+
+------+ | Downstream |
| Root | | Upstream Port +--Link--
| Port +--Link--+ Port |
+------+ | Downstream |
| Port +--Link--
+---------------------+
But systems do exist (see URL below) where the Root Port is connected to a
Downstream Port. In this case, a Downstream Port's Link may be on either
the upstream or downstream side:
+---------------------+
+------+ | Upstream |
| Root | | Downstream Port +--Link--
| Port +--Link--+ Port |
+------+ | Downstream |
| Port +--Link--
+---------------------+
We can't use the Port type to determine which side the Link is on, so add a
bit in struct pci_dev to keep track.
A Root Port's Link is always on the Port's secondary side. A component
(Endpoint or Port) on the other end of the Link obviously has the Link on
its upstream side. If that component is a Port, it is part of a Switch or
a Bridge. A Bridge has a PCI or PCI-X bus on its secondary side, not a
Link. The internal bus of a Switch connects the Port to another Port whose
Link is on the downstream side.
[bhelgaas: changelog, comment, cache "type", use if/else]
Link: http://lkml.kernel.org/r/54EB81B2.4050904@pobox.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=94361
Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2015-05-21 10:05:02 +03:00
*/
type = pci_pcie_type ( pdev ) ;
2019-08-22 11:55:53 +03:00
if ( type = = PCI_EXP_TYPE_DOWNSTREAM ) {
PCI: Tolerate hierarchies with no Root Port
We should not assume any particular hardware topology. Commit d0751b98dfa3
("PCI: Add dev->has_secondary_link to track downstream PCIe links") relied
on the assumption that every PCIe hierarchy is rooted at a Root Port. But
we can't rely on any assumption about what hardware we will find; we just
have to deal with the world as it is.
On some platforms, PCIe devices (endpoints, switch upstream ports, etc.)
appear directly on the root bus, and there is no Root Port in the PCI bus
hierarchy. For example, Meelis observed these top-level devices on a
Sparc V245:
0000:02:00.0 PCI bridge to [bus 03-0d] Switch Upstream Port
0001:02:00.0 PCI bridge to [bus 03] PCIe to PCI/PCI-X Bridge
These devices *look* like they have links going upstream, but there really
are no upstream devices.
In set_pcie_port_type(), we used the parent device to figure out which side
of a switch port has a link, so if the parent device did not exist, we
dereferenced a NULL parent pointer.
Check whether the parent device exists before dereferencing it.
Meelis observed this oops on Sparc V245 and T2000. Ben Herrenschmidt says
this is also possible on IBM PowerVM guests on PowerPC.
[bhelgaas: changelog, comment]
Link: http://lkml.kernel.org/r/alpine.LRH.2.20.1508122118210.18637@math.ut.ee
Reported-by: Meelis Roos <mroos@linux.ee>
Tested-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
2015-08-17 13:47:58 +03:00
/*
2019-08-22 11:55:53 +03:00
* If pdev claims to be downstream port but the parent
* device is also downstream port assume pdev is actually
* upstream port .
PCI: Tolerate hierarchies with no Root Port
We should not assume any particular hardware topology. Commit d0751b98dfa3
("PCI: Add dev->has_secondary_link to track downstream PCIe links") relied
on the assumption that every PCIe hierarchy is rooted at a Root Port. But
we can't rely on any assumption about what hardware we will find; we just
have to deal with the world as it is.
On some platforms, PCIe devices (endpoints, switch upstream ports, etc.)
appear directly on the root bus, and there is no Root Port in the PCI bus
hierarchy. For example, Meelis observed these top-level devices on a
Sparc V245:
0000:02:00.0 PCI bridge to [bus 03-0d] Switch Upstream Port
0001:02:00.0 PCI bridge to [bus 03] PCIe to PCI/PCI-X Bridge
These devices *look* like they have links going upstream, but there really
are no upstream devices.
In set_pcie_port_type(), we used the parent device to figure out which side
of a switch port has a link, so if the parent device did not exist, we
dereferenced a NULL parent pointer.
Check whether the parent device exists before dereferencing it.
Meelis observed this oops on Sparc V245 and T2000. Ben Herrenschmidt says
this is also possible on IBM PowerVM guests on PowerPC.
[bhelgaas: changelog, comment]
Link: http://lkml.kernel.org/r/alpine.LRH.2.20.1508122118210.18637@math.ut.ee
Reported-by: Meelis Roos <mroos@linux.ee>
Tested-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
2015-08-17 13:47:58 +03:00
*/
2019-08-22 11:55:53 +03:00
if ( pcie_downstream_port ( parent ) ) {
pci_info ( pdev , " claims to be downstream port but is acting as upstream port, correcting type \n " ) ;
pdev - > pcie_flags_reg & = ~ PCI_EXP_FLAGS_TYPE ;
pdev - > pcie_flags_reg | = PCI_EXP_TYPE_UPSTREAM ;
}
} else if ( type = = PCI_EXP_TYPE_UPSTREAM ) {
/*
* If pdev claims to be upstream port but the parent
* device is also upstream port assume pdev is actually
* downstream port .
*/
if ( pci_pcie_type ( parent ) = = PCI_EXP_TYPE_UPSTREAM ) {
pci_info ( pdev , " claims to be upstream port but is acting as downstream port, correcting type \n " ) ;
pdev - > pcie_flags_reg & = ~ PCI_EXP_FLAGS_TYPE ;
pdev - > pcie_flags_reg | = PCI_EXP_TYPE_DOWNSTREAM ;
}
PCI: Add dev->has_secondary_link to track downstream PCIe links
A PCIe Port is an interface to a Link. A Root Port is a PCI-PCI bridge in
a Root Complex and has a Link on its secondary (downstream) side. For
other Ports, the Link may be on either the upstream (closer to the Root
Complex) or downstream side of the Port.
The usual topology has a Root Port connected to an Upstream Port. We
previously assumed this was the only possible topology, and that a
Downstream Port's Link was always on its downstream side, like this:
+---------------------+
+------+ | Downstream |
| Root | | Upstream Port +--Link--
| Port +--Link--+ Port |
+------+ | Downstream |
| Port +--Link--
+---------------------+
But systems do exist (see URL below) where the Root Port is connected to a
Downstream Port. In this case, a Downstream Port's Link may be on either
the upstream or downstream side:
+---------------------+
+------+ | Upstream |
| Root | | Downstream Port +--Link--
| Port +--Link--+ Port |
+------+ | Downstream |
| Port +--Link--
+---------------------+
We can't use the Port type to determine which side the Link is on, so add a
bit in struct pci_dev to keep track.
A Root Port's Link is always on the Port's secondary side. A component
(Endpoint or Port) on the other end of the Link obviously has the Link on
its upstream side. If that component is a Port, it is part of a Switch or
a Bridge. A Bridge has a PCI or PCI-X bus on its secondary side, not a
Link. The internal bus of a Switch connects the Port to another Port whose
Link is on the downstream side.
[bhelgaas: changelog, comment, cache "type", use if/else]
Link: http://lkml.kernel.org/r/54EB81B2.4050904@pobox.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=94361
Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2015-05-21 10:05:02 +03:00
}
2009-03-20 06:25:14 +03:00
}
2010-01-26 20:10:03 +03:00
void set_pcie_hotplug_bridge ( struct pci_dev * pdev )
2009-09-10 01:09:24 +04:00
{
u32 reg32 ;
2012-07-24 13:20:06 +04:00
pcie_capability_read_dword ( pdev , PCI_EXP_SLTCAP , & reg32 ) ;
2009-09-10 01:09:24 +04:00
if ( reg32 & PCI_EXP_SLTCAP_HPC )
pdev - > is_hotplug_bridge = 1 ;
}
PCI: Recognize Thunderbolt devices
Detect on probe whether a PCI device is part of a Thunderbolt controller.
Intel uses a Vendor-Specific Extended Capability (VSEC) with ID 0x1234
on such devices. Detect presence of this VSEC and cache it in a newly
added is_thunderbolt bit in struct pci_dev.
Also, add a helper to check whether a given PCI device is situated on a
Thunderbolt daisy chain (i.e., below a PCI device with is_thunderbolt
set).
The necessity arises from the following:
* If an external Thunderbolt GPU is connected to a dual GPU laptop,
that GPU is currently registered with vga_switcheroo even though it
can neither drive the laptop's panel nor be powered off by the
platform. To vga_switcheroo it will appear as if two discrete
GPUs are present. As a result, when the external GPU is runtime
suspended, vga_switcheroo will cut power to the internal discrete GPU
which may not be runtime suspended at all at this moment. The
solution is to not register external GPUs with vga_switcheroo, which
necessitates a way to recognize if they're on a Thunderbolt daisy
chain.
* Dual GPU MacBook Pros introduced 2011+ can no longer switch external
DisplayPort ports between GPUs. (They're no longer just used for DP
but have become combined DP/Thunderbolt ports.) The driver to switch
the ports, drivers/platform/x86/apple-gmux.c, needs to detect presence
of a Thunderbolt controller and, if found, keep external ports
permanently switched to the discrete GPU.
v2: Make kerneldoc for pci_is_thunderbolt_attached() more precise,
drop portion of commit message pertaining to separate series.
(Bjorn Helgaas)
Cc: Andreas Noever <andreas.noever@gmail.com>
Cc: Michael Jamet <michael.jamet@intel.com>
Cc: Tomas Winkler <tomas.winkler@intel.com>
Cc: Amir Levy <amir.jer.levy@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: http://patchwork.freedesktop.org/patch/msgid/0ab165a4a35c0b60f29d4c306c653ead14fcd8f9.1489145162.git.lukas@wunner.de
2017-03-10 23:23:45 +03:00
static void set_pcie_thunderbolt ( struct pci_dev * dev )
{
int vsec = 0 ;
u32 header ;
while ( ( vsec = pci_find_next_ext_capability ( dev , vsec ,
PCI_EXT_CAP_ID_VNDR ) ) ) {
pci_read_config_dword ( dev , vsec + PCI_VNDR_HEADER , & header ) ;
/* Is the device part of a Thunderbolt controller? */
if ( dev - > vendor = = PCI_VENDOR_ID_INTEL & &
PCI_VNDR_HEADER_ID ( header ) = = PCI_VSEC_ID_INTEL_TBT ) {
dev - > is_thunderbolt = 1 ;
return ;
}
}
}
2018-08-16 12:28:48 +03:00
static void set_pcie_untrusted ( struct pci_dev * dev )
{
struct pci_dev * parent ;
/*
* If the upstream bridge is untrusted we treat this device
* untrusted as well .
*/
parent = pci_upstream_bridge ( dev ) ;
if ( parent & & parent - > untrusted )
dev - > untrusted = true ;
}
2014-05-06 00:20:51 +04:00
/**
2017-11-30 19:58:14 +03:00
* pci_ext_cfg_is_aliased - Is ext config space just an alias of std config ?
2014-05-06 00:20:51 +04:00
* @ dev : PCI device
*
* PCI Express to PCI / PCI - X Bridge Specification , rev 1.0 , 4.1 .4 says that
* when forwarding a type1 configuration request the bridge must check that
* the extended register address field is zero . The bridge is not permitted
* to forward the transactions and must handle it as an Unsupported Request .
* Some bridges do not follow this rule and simply drop the extended register
* bits , resulting in the standard config space being aliased , every 256
* bytes across the entire configuration space . Test for this condition by
* comparing the first dword of each potential alias to the vendor / device ID .
* Known offenders :
* ASM1083 / 1085 PCIe - to - PCI Reversible Bridge ( 1 b21 : 1080 , rev 01 & 03 )
* AMD / ATI SBx00 PCI to PCI Bridge ( 1002 : 4384 , rev 40 )
*/
static bool pci_ext_cfg_is_aliased ( struct pci_dev * dev )
{
# ifdef CONFIG_PCI_QUIRKS
int pos ;
u32 header , tmp ;
pci_read_config_dword ( dev , PCI_VENDOR_ID , & header ) ;
for ( pos = PCI_CFG_SPACE_SIZE ;
pos < PCI_CFG_SPACE_EXP_SIZE ; pos + = PCI_CFG_SPACE_SIZE ) {
if ( pci_read_config_dword ( dev , pos , & tmp ) ! = PCIBIOS_SUCCESSFUL
| | header ! = tmp )
return false ;
}
return true ;
# else
return false ;
# endif
}
2014-01-11 04:14:48 +04:00
/**
2017-11-30 19:58:14 +03:00
* pci_cfg_space_size - Get the configuration space size of the PCI device
2014-01-11 04:14:48 +04:00
* @ dev : PCI device
*
* Regular PCI devices have 256 bytes , but PCI - X 2 and PCI Express devices
* have 4096 bytes . Even if the device is capable , that doesn ' t mean we can
* access it . Maybe we don ' t have a way to generate extended config space
* accesses , or the device is behind a reverse Express bridge . So we try
* reading the dword at 0x100 which must either be 0 or a valid extended
* capability header .
*/
static int pci_cfg_space_size_ext ( struct pci_dev * dev )
{
u32 status ;
int pos = PCI_CFG_SPACE_SIZE ;
if ( pci_read_config_dword ( dev , pos , & status ) ! = PCIBIOS_SUCCESSFUL )
2015-12-08 03:21:10 +03:00
return PCI_CFG_SPACE_SIZE ;
2014-05-06 00:20:51 +04:00
if ( status = = 0xffffffff | | pci_ext_cfg_is_aliased ( dev ) )
2015-12-08 03:21:10 +03:00
return PCI_CFG_SPACE_SIZE ;
2014-01-11 04:14:48 +04:00
return PCI_CFG_SPACE_EXP_SIZE ;
}
int pci_cfg_space_size ( struct pci_dev * dev )
{
int pos ;
u32 status ;
u16 class ;
2018-10-11 19:49:58 +03:00
# ifdef CONFIG_PCI_IOV
2019-06-14 01:57:20 +03:00
/*
* Per the SR - IOV specification ( rev 1.1 , sec 3.5 ) , VFs are required to
* implement a PCIe capability and therefore must implement extended
* config space . We can skip the NO_EXTCFG test below and the
* reachability / aliasing test in pci_cfg_space_size_ext ( ) by virtue of
* the fact that the SR - IOV capability on the PF resides in extended
* config space and must be accessible and non - aliased to have enabled
* support for this VF . This is a micro performance optimization for
* systems supporting many VFs .
*/
if ( dev - > is_virtfn )
return PCI_CFG_SPACE_EXP_SIZE ;
2018-10-11 19:49:58 +03:00
# endif
PCI: Check whether bridges allow access to extended config space
Even if a device supports extended config space, i.e., it is a PCI-X Mode 2
or a PCI Express device, the extended space may not be accessible if
there's a conventional PCI bus in the path to it.
We currently figure that out in pci_cfg_space_size() by reading the first
dword of extended config space. On most platforms that returns ~0 data if
the space is inaccessible, but it may set error bits in PCI status
registers, and on some platforms it causes exceptions that we currently
don't recover from.
For example, a PCIe-to-conventional PCI bridge treats config transactions
with a non-zero Extended Register Address as an Unsupported Request on PCIe
and a received Master-Abort on the destination bus (see PCI Express to
PCI/PCI-X Bridge spec, r1.0, sec 4.1.3).
A sample case is a LS1043A CPU (NXP QorIQ Layerscape) platform with the
following bus topology:
LS1043 PCIe Root Port
-> PEX8112 PCIe-to-PCI bridge (doesn't support ext cfg on PCI side)
-> PMC slot connector (for legacy PMC modules)
With a PMC module topology as follows:
PMC connector
-> PCI-to-PCIe bridge
-> PCIe switch (4 ports)
-> 4 PCIe devices (one on each port)
The PCIe devices on the PMC module support extended config space, but we
can't reach it because the PEX8112 can't generate accesses to the extended
space on its secondary bus. Attempts to access it cause Unsupported
Request errors, which result in synchronous aborts on this platform.
To avoid these errors, check whether bridges are capable of generating
extended config space addresses on their secondary interfaces. If they
can't, we restrict devices below the bridge to only the 256-byte
PCI-compatible config space.
Signed-off-by: Gilles Buloz <gilles.buloz@kontron.com>
[bhelgaas: changelog, rework patch so bus_flags testing is all in
pci_bridge_child_ext_cfg_accessible()]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-05-03 23:21:44 +03:00
if ( dev - > bus - > bus_flags & PCI_BUS_FLAGS_NO_EXTCFG )
return PCI_CFG_SPACE_SIZE ;
2014-01-11 04:14:48 +04:00
class = dev - > class > > 8 ;
if ( class = = PCI_CLASS_BRIDGE_HOST )
return pci_cfg_space_size_ext ( dev ) ;
2015-12-08 03:21:10 +03:00
if ( pci_is_pcie ( dev ) )
return pci_cfg_space_size_ext ( dev ) ;
2014-01-11 04:14:48 +04:00
2015-12-08 03:21:10 +03:00
pos = pci_find_capability ( dev , PCI_CAP_ID_PCIX ) ;
if ( ! pos )
return PCI_CFG_SPACE_SIZE ;
2014-01-11 04:14:48 +04:00
2015-12-08 03:21:10 +03:00
pci_read_config_dword ( dev , pos + PCI_X_STATUS , & status ) ;
if ( status & ( PCI_X_STATUS_266MHZ | PCI_X_STATUS_533MHZ ) )
return pci_cfg_space_size_ext ( dev ) ;
2014-01-11 04:14:48 +04:00
return PCI_CFG_SPACE_SIZE ;
}
2018-03-19 23:06:00 +03:00
static u32 pci_class ( struct pci_dev * dev )
{
u32 class ;
# ifdef CONFIG_PCI_IOV
if ( dev - > is_virtfn )
return dev - > physfn - > sriov - > class ;
# endif
pci_read_config_dword ( dev , PCI_CLASS_REVISION , & class ) ;
return class ;
}
static void pci_subsystem_ids ( struct pci_dev * dev , u16 * vendor , u16 * device )
{
# ifdef CONFIG_PCI_IOV
if ( dev - > is_virtfn ) {
* vendor = dev - > physfn - > sriov - > subsystem_vendor ;
* device = dev - > physfn - > sriov - > subsystem_device ;
return ;
}
# endif
pci_read_config_word ( dev , PCI_SUBSYSTEM_VENDOR_ID , vendor ) ;
pci_read_config_word ( dev , PCI_SUBSYSTEM_ID , device ) ;
}
static u8 pci_hdr_type ( struct pci_dev * dev )
{
u8 hdr_type ;
# ifdef CONFIG_PCI_IOV
if ( dev - > is_virtfn )
return dev - > physfn - > sriov - > hdr_type ;
# endif
pci_read_config_byte ( dev , PCI_HEADER_TYPE , & hdr_type ) ;
return hdr_type ;
}
2007-04-24 01:19:36 +04:00
# define LEGACY_IO_RESOURCE (IORESOURCE_IO | IORESOURCE_PCI_FIXED)
2006-12-30 03:47:29 +03:00
2015-10-21 17:17:35 +03:00
static void pci_msi_setup_pci_dev ( struct pci_dev * dev )
2015-05-07 17:52:21 +03:00
{
/*
* Disable the MSI hardware to avoid screaming interrupts
* during boot . This is the power on reset default so
* usually this should be a noop .
*/
dev - > msi_cap = pci_find_capability ( dev , PCI_CAP_ID_MSI ) ;
if ( dev - > msi_cap )
pci_msi_set_enable ( dev , 0 ) ;
dev - > msix_cap = pci_find_capability ( dev , PCI_CAP_ID_MSIX ) ;
if ( dev - > msix_cap )
pci_msix_clear_and_set_ctrl ( dev , PCI_MSIX_FLAGS_ENABLE , 0 ) ;
}
2017-05-27 00:02:25 +03:00
/**
2017-11-30 19:58:14 +03:00
* pci_intx_mask_broken - Test PCI_COMMAND_INTX_DISABLE writability
2017-05-27 00:02:25 +03:00
* @ dev : PCI device
*
* Test whether PCI_COMMAND_INTX_DISABLE is writable for @ dev . Check this
* at enumeration - time to avoid modifying PCI_COMMAND at run - time .
*/
static int pci_intx_mask_broken ( struct pci_dev * dev )
{
u16 orig , toggle , new ;
pci_read_config_word ( dev , PCI_COMMAND , & orig ) ;
toggle = orig ^ PCI_COMMAND_INTX_DISABLE ;
pci_write_config_word ( dev , PCI_COMMAND , toggle ) ;
pci_read_config_word ( dev , PCI_COMMAND , & new ) ;
pci_write_config_word ( dev , PCI_COMMAND , orig ) ;
/*
* PCI_COMMAND_INTX_DISABLE was reserved and read - only prior to PCI
* r2 .3 , so strictly speaking , a device is not * broken * if it ' s not
* writable . But we ' ll live with the misnomer for now .
*/
if ( new ! = toggle )
return 1 ;
return 0 ;
}
2018-06-05 05:16:09 +03:00
static void early_dump_pci_device ( struct pci_dev * pdev )
{
u32 value [ 256 / 4 ] ;
int i ;
pci_info ( pdev , " config space: \n " ) ;
for ( i = 0 ; i < 256 ; i + = 4 )
pci_read_config_dword ( pdev , i , & value [ i / 4 ] ) ;
print_hex_dump ( KERN_INFO , " " , DUMP_PREFIX_OFFSET , 16 , 1 ,
value , 256 , false ) ;
}
2005-04-17 02:20:36 +04:00
/**
2017-11-30 19:58:14 +03:00
* pci_setup_device - Fill in class and map information of a device
2005-04-17 02:20:36 +04:00
* @ dev : the device structure to fill
*
2013-11-14 22:28:18 +04:00
* Initialize the device structure with information about the device ' s
2017-11-30 19:58:14 +03:00
* vendor , class , memory and IO - space addresses , IRQ lines etc .
2005-04-17 02:20:36 +04:00
* Called at initialisation of the PCI subsystem and by CardBus services .
2009-03-20 06:25:14 +03:00
* Returns 0 on success and negative if unknown type of device ( not normal ,
* bridge or CardBus ) .
2005-04-17 02:20:36 +04:00
*/
2009-03-20 06:25:14 +03:00
int pci_setup_device ( struct pci_dev * dev )
2005-04-17 02:20:36 +04:00
{
u32 class ;
2016-02-25 23:35:57 +03:00
u16 cmd ;
2009-03-20 06:25:14 +03:00
u8 hdr_type ;
2009-10-06 19:45:19 +04:00
int pos = 0 ;
2012-02-24 07:19:00 +04:00
struct pci_bus_region region ;
struct resource * res ;
2009-03-20 06:25:14 +03:00
2018-03-19 23:06:00 +03:00
hdr_type = pci_hdr_type ( dev ) ;
2009-03-20 06:25:14 +03:00
dev - > sysdata = dev - > bus - > sysdata ;
dev - > dev . parent = dev - > bus - > bridge ;
dev - > dev . bus = & pci_bus_type ;
dev - > hdr_type = hdr_type & 0x7f ;
dev - > multifunction = ! ! ( hdr_type & 0x80 ) ;
dev - > error_state = pci_channel_io_normal ;
set_pcie_port_type ( dev ) ;
2015-07-17 12:16:32 +03:00
pci_dev_assign_slot ( dev ) ;
2017-11-30 19:58:14 +03:00
/*
* Assume 32 - bit PCI ; let 64 - bit PCI cards ( which are far rarer )
* set this higher , assuming the system even supports it .
*/
2009-03-20 06:25:14 +03:00
dev - > dma_mask = 0xffffffff ;
2005-04-17 02:20:36 +04:00
2008-07-03 00:24:49 +04:00
dev_set_name ( & dev - > dev , " %04x:%02x:%02x.%d " , pci_domain_nr ( dev - > bus ) ,
dev - > bus - > number , PCI_SLOT ( dev - > devfn ) ,
PCI_FUNC ( dev - > devfn ) ) ;
2005-04-17 02:20:36 +04:00
2018-03-19 23:06:00 +03:00
class = pci_class ( dev ) ;
2007-06-09 02:46:30 +04:00
dev - > revision = class & 0xff ;
2012-02-20 02:50:12 +04:00
dev - > class = class > > 8 ; /* upper 3 bytes */
2005-04-17 02:20:36 +04:00
2019-04-20 07:07:20 +03:00
pci_info ( dev , " [%04x:%04x] type %02x class %#08x \n " ,
2012-02-20 02:50:12 +04:00
dev - > vendor , dev - > device , dev - > hdr_type , dev - > class ) ;
2005-04-17 02:20:36 +04:00
2018-06-05 05:16:09 +03:00
if ( pci_early_dump )
early_dump_pci_device ( dev ) ;
2017-11-30 19:58:14 +03:00
/* Need to have dev->class ready */
2009-03-21 17:05:11 +03:00
dev - > cfg_size = pci_cfg_space_size ( dev ) ;
2017-11-30 19:58:14 +03:00
/* Need to have dev->cfg_size ready */
PCI: Recognize Thunderbolt devices
Detect on probe whether a PCI device is part of a Thunderbolt controller.
Intel uses a Vendor-Specific Extended Capability (VSEC) with ID 0x1234
on such devices. Detect presence of this VSEC and cache it in a newly
added is_thunderbolt bit in struct pci_dev.
Also, add a helper to check whether a given PCI device is situated on a
Thunderbolt daisy chain (i.e., below a PCI device with is_thunderbolt
set).
The necessity arises from the following:
* If an external Thunderbolt GPU is connected to a dual GPU laptop,
that GPU is currently registered with vga_switcheroo even though it
can neither drive the laptop's panel nor be powered off by the
platform. To vga_switcheroo it will appear as if two discrete
GPUs are present. As a result, when the external GPU is runtime
suspended, vga_switcheroo will cut power to the internal discrete GPU
which may not be runtime suspended at all at this moment. The
solution is to not register external GPUs with vga_switcheroo, which
necessitates a way to recognize if they're on a Thunderbolt daisy
chain.
* Dual GPU MacBook Pros introduced 2011+ can no longer switch external
DisplayPort ports between GPUs. (They're no longer just used for DP
but have become combined DP/Thunderbolt ports.) The driver to switch
the ports, drivers/platform/x86/apple-gmux.c, needs to detect presence
of a Thunderbolt controller and, if found, keep external ports
permanently switched to the discrete GPU.
v2: Make kerneldoc for pci_is_thunderbolt_attached() more precise,
drop portion of commit message pertaining to separate series.
(Bjorn Helgaas)
Cc: Andreas Noever <andreas.noever@gmail.com>
Cc: Michael Jamet <michael.jamet@intel.com>
Cc: Tomas Winkler <tomas.winkler@intel.com>
Cc: Amir Levy <amir.jer.levy@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: http://patchwork.freedesktop.org/patch/msgid/0ab165a4a35c0b60f29d4c306c653ead14fcd8f9.1489145162.git.lukas@wunner.de
2017-03-10 23:23:45 +03:00
set_pcie_thunderbolt ( dev ) ;
2018-08-16 12:28:48 +03:00
set_pcie_untrusted ( dev ) ;
2005-04-17 02:20:36 +04:00
/* "Unknown power state" */
2005-08-18 02:32:19 +04:00
dev - > current_state = PCI_UNKNOWN ;
2005-04-17 02:20:36 +04:00
/* Early fixups, before probing the BARs */
pci_fixup_device ( pci_fixup_early , dev ) ;
2017-11-30 19:58:14 +03:00
/* Device class may be changed after fixup */
2009-05-27 20:25:05 +04:00
class = dev - > class > > 8 ;
2005-04-17 02:20:36 +04:00
2016-02-25 23:35:57 +03:00
if ( dev - > non_compliant_bars ) {
pci_read_config_word ( dev , PCI_COMMAND , & cmd ) ;
if ( cmd & ( PCI_COMMAND_IO | PCI_COMMAND_MEMORY ) ) {
2018-01-18 21:55:24 +03:00
pci_info ( dev , " device has non-compliant BARs; disabling IO/MEM decoding \n " ) ;
2016-02-25 23:35:57 +03:00
cmd & = ~ PCI_COMMAND_IO ;
cmd & = ~ PCI_COMMAND_MEMORY ;
pci_write_config_word ( dev , PCI_COMMAND , cmd ) ;
}
}
2017-05-27 00:02:25 +03:00
dev - > broken_intx_masking = pci_intx_mask_broken ( dev ) ;
2005-04-17 02:20:36 +04:00
switch ( dev - > hdr_type ) { /* header type */
case PCI_HEADER_TYPE_NORMAL : /* standard header */
if ( class = = PCI_CLASS_BRIDGE_PCI )
goto bad ;
pci_read_irq ( dev ) ;
pci_read_bases ( dev , 6 , PCI_ROM_ADDRESS ) ;
2018-03-19 23:06:00 +03:00
pci_subsystem_ids ( dev , & dev - > subsystem_vendor , & dev - > subsystem_device ) ;
2006-10-04 03:41:26 +04:00
/*
2014-03-06 01:07:03 +04:00
* Do the ugly legacy mode stuff here rather than broken chip
* quirk code . Legacy mode ATA controllers have fixed
* addresses . These are not always echoed in BAR0 - 3 , and
* BAR0 - 3 in a few cases contain junk !
2006-10-04 03:41:26 +04:00
*/
if ( class = = PCI_CLASS_STORAGE_IDE ) {
u8 progif ;
pci_read_config_byte ( dev , PCI_CLASS_PROG , & progif ) ;
if ( ( progif & 1 ) = = 0 ) {
2012-02-24 07:19:00 +04:00
region . start = 0x1F0 ;
region . end = 0x1F7 ;
res = & dev - > resource [ 0 ] ;
res - > flags = LEGACY_IO_RESOURCE ;
PCI: Convert pcibios_resource_to_bus() to take a pci_bus, not a pci_dev
These interfaces:
pcibios_resource_to_bus(struct pci_dev *dev, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_dev *dev, *resource, *bus_region)
took a pci_dev, but they really depend only on the pci_bus. And we want to
use them in resource allocation paths where we have the bus but not a
device, so this patch converts them to take the pci_bus instead of the
pci_dev:
pcibios_resource_to_bus(struct pci_bus *bus, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_bus *bus, *resource, *bus_region)
In fact, with standard PCI-PCI bridges, they only depend on the host
bridge, because that's the only place address translation occurs, but
we aren't going that far yet.
[bhelgaas: changelog]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2013-12-10 10:54:40 +04:00
pcibios_bus_to_resource ( dev - > bus , res , & region ) ;
2018-01-18 21:55:24 +03:00
pci_info ( dev , " legacy IDE quirk: reg 0x10: %pR \n " ,
2014-03-06 01:07:03 +04:00
res ) ;
2012-02-24 07:19:00 +04:00
region . start = 0x3F6 ;
region . end = 0x3F6 ;
res = & dev - > resource [ 1 ] ;
res - > flags = LEGACY_IO_RESOURCE ;
PCI: Convert pcibios_resource_to_bus() to take a pci_bus, not a pci_dev
These interfaces:
pcibios_resource_to_bus(struct pci_dev *dev, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_dev *dev, *resource, *bus_region)
took a pci_dev, but they really depend only on the pci_bus. And we want to
use them in resource allocation paths where we have the bus but not a
device, so this patch converts them to take the pci_bus instead of the
pci_dev:
pcibios_resource_to_bus(struct pci_bus *bus, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_bus *bus, *resource, *bus_region)
In fact, with standard PCI-PCI bridges, they only depend on the host
bridge, because that's the only place address translation occurs, but
we aren't going that far yet.
[bhelgaas: changelog]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2013-12-10 10:54:40 +04:00
pcibios_bus_to_resource ( dev - > bus , res , & region ) ;
2018-01-18 21:55:24 +03:00
pci_info ( dev , " legacy IDE quirk: reg 0x14: %pR \n " ,
2014-03-06 01:07:03 +04:00
res ) ;
2006-10-04 03:41:26 +04:00
}
if ( ( progif & 4 ) = = 0 ) {
2012-02-24 07:19:00 +04:00
region . start = 0x170 ;
region . end = 0x177 ;
res = & dev - > resource [ 2 ] ;
res - > flags = LEGACY_IO_RESOURCE ;
PCI: Convert pcibios_resource_to_bus() to take a pci_bus, not a pci_dev
These interfaces:
pcibios_resource_to_bus(struct pci_dev *dev, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_dev *dev, *resource, *bus_region)
took a pci_dev, but they really depend only on the pci_bus. And we want to
use them in resource allocation paths where we have the bus but not a
device, so this patch converts them to take the pci_bus instead of the
pci_dev:
pcibios_resource_to_bus(struct pci_bus *bus, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_bus *bus, *resource, *bus_region)
In fact, with standard PCI-PCI bridges, they only depend on the host
bridge, because that's the only place address translation occurs, but
we aren't going that far yet.
[bhelgaas: changelog]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2013-12-10 10:54:40 +04:00
pcibios_bus_to_resource ( dev - > bus , res , & region ) ;
2018-01-18 21:55:24 +03:00
pci_info ( dev , " legacy IDE quirk: reg 0x18: %pR \n " ,
2014-03-06 01:07:03 +04:00
res ) ;
2012-02-24 07:19:00 +04:00
region . start = 0x376 ;
region . end = 0x376 ;
res = & dev - > resource [ 3 ] ;
res - > flags = LEGACY_IO_RESOURCE ;
PCI: Convert pcibios_resource_to_bus() to take a pci_bus, not a pci_dev
These interfaces:
pcibios_resource_to_bus(struct pci_dev *dev, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_dev *dev, *resource, *bus_region)
took a pci_dev, but they really depend only on the pci_bus. And we want to
use them in resource allocation paths where we have the bus but not a
device, so this patch converts them to take the pci_bus instead of the
pci_dev:
pcibios_resource_to_bus(struct pci_bus *bus, *bus_region, *resource)
pcibios_bus_to_resource(struct pci_bus *bus, *resource, *bus_region)
In fact, with standard PCI-PCI bridges, they only depend on the host
bridge, because that's the only place address translation occurs, but
we aren't going that far yet.
[bhelgaas: changelog]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2013-12-10 10:54:40 +04:00
pcibios_bus_to_resource ( dev - > bus , res , & region ) ;
2018-01-18 21:55:24 +03:00
pci_info ( dev , " legacy IDE quirk: reg 0x1c: %pR \n " ,
2014-03-06 01:07:03 +04:00
res ) ;
2006-10-04 03:41:26 +04:00
}
}
2005-04-17 02:20:36 +04:00
break ;
case PCI_HEADER_TYPE_BRIDGE : /* bridge header */
2017-11-30 19:58:14 +03:00
/*
* The PCI - to - PCI bridge spec requires that subtractive
* decoding ( i . e . transparent ) bridge must have programming
* interface code of 0x01 .
*/
2005-11-03 03:55:49 +03:00
pci_read_irq ( dev ) ;
2005-04-17 02:20:36 +04:00
dev - > transparent = ( ( dev - > class & 0xff ) = = 1 ) ;
pci_read_bases ( dev , 2 , PCI_ROM_ADDRESS1 ) ;
2019-01-19 20:35:04 +03:00
pci_read_bridge_windows ( dev ) ;
2009-09-10 01:09:24 +04:00
set_pcie_hotplug_bridge ( dev ) ;
2009-10-06 19:45:19 +04:00
pos = pci_find_capability ( dev , PCI_CAP_ID_SSVID ) ;
if ( pos ) {
pci_read_config_word ( dev , pos + PCI_SSVID_VENDOR_ID , & dev - > subsystem_vendor ) ;
pci_read_config_word ( dev , pos + PCI_SSVID_DEVICE_ID , & dev - > subsystem_device ) ;
}
2005-04-17 02:20:36 +04:00
break ;
case PCI_HEADER_TYPE_CARDBUS : /* CardBus bridge header */
if ( class ! = PCI_CLASS_BRIDGE_CARDBUS )
goto bad ;
pci_read_irq ( dev ) ;
pci_read_bases ( dev , 1 , 0 ) ;
pci_read_config_word ( dev , PCI_CB_SUBSYSTEM_VENDOR_ID , & dev - > subsystem_vendor ) ;
pci_read_config_word ( dev , PCI_CB_SUBSYSTEM_ID , & dev - > subsystem_device ) ;
break ;
default : /* unknown header */
2018-01-18 21:55:24 +03:00
pci_err ( dev , " unknown header type %02x, ignoring device \n " ,
2014-04-19 04:13:50 +04:00
dev - > hdr_type ) ;
2009-03-20 06:25:14 +03:00
return - EIO ;
2005-04-17 02:20:36 +04:00
bad :
2018-01-18 21:55:24 +03:00
pci_err ( dev , " ignoring class %#08x (doesn't match header type %02x) \n " ,
2014-04-19 04:13:50 +04:00
dev - > class , dev - > hdr_type ) ;
2015-06-20 00:20:58 +03:00
dev - > class = PCI_CLASS_NOT_DEFINED < < 8 ;
2005-04-17 02:20:36 +04:00
}
/* We found a fine healthy device, go go go... */
return 0 ;
}
2015-08-21 00:08:27 +03:00
static void pci_configure_mps ( struct pci_dev * dev )
{
struct pci_dev * bridge = pci_upstream_bridge ( dev ) ;
2018-08-13 21:19:46 +03:00
int mps , mpss , p_mps , rc ;
2015-08-21 00:08:27 +03:00
if ( ! pci_is_pcie ( dev ) | | ! bridge | | ! pci_is_pcie ( bridge ) )
return ;
2018-08-13 21:19:39 +03:00
/* MPS and MRRS fields are of type 'RsvdP' for VFs, short-circuit out */
if ( dev - > is_virtfn )
return ;
2015-08-21 00:08:27 +03:00
mps = pcie_get_mps ( dev ) ;
p_mps = pcie_get_mps ( bridge ) ;
if ( mps = = p_mps )
return ;
if ( pcie_bus_config = = PCIE_BUS_TUNE_OFF ) {
2018-01-18 21:55:24 +03:00
pci_warn ( dev , " Max Payload Size %d, but upstream %s set to %d; if necessary, use \" pci=pcie_bus_safe \" and report a bug \n " ,
2015-08-21 00:08:27 +03:00
mps , pci_name ( bridge ) , p_mps ) ;
return ;
}
PCI: Set MPS to match upstream bridge
Firmware typically configures the PCIe fabric with a consistent Max Payload
Size setting based on the devices present at boot. A hot-added device
typically has the power-on default MPS setting (128 bytes), which may not
match the fabric.
The previous Linux default, in the absence of any "pci=pcie_bus_*" options,
was PCIE_BUS_TUNE_OFF, in which we never touch MPS, even for hot-added
devices.
Add a new default setting, PCIE_BUS_DEFAULT, in which we make sure every
device's MPS setting matches the upstream bridge. This makes it more
likely that a hot-added device will work in a system with optimized MPS
configuration.
Note that if we hot-add a device that only supports 128-byte MPS, it still
likely won't work because we don't reconfigure the rest of the fabric.
Booting with "pci=pcie_bus_peer2peer" is a workaround for this because it
sets MPS to 128 for everything.
[bhelgaas: changelog, new default, rework for pci_configure_device() path]
Tested-by: Keith Busch <keith.busch@intel.com>
Tested-by: Jordan Hargrave <jharg93@gmail.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
2015-08-24 16:48:16 +03:00
/*
* Fancier MPS configuration is done later by
* pcie_bus_configure_settings ( )
*/
if ( pcie_bus_config ! = PCIE_BUS_DEFAULT )
return ;
2018-08-13 21:19:46 +03:00
mpss = 128 < < dev - > pcie_mpss ;
if ( mpss < p_mps & & pci_pcie_type ( bridge ) = = PCI_EXP_TYPE_ROOT_PORT ) {
pcie_set_mps ( bridge , mpss ) ;
pci_info ( dev , " Upstream bridge's Max Payload Size set to %d (was %d, max %d) \n " ,
mpss , p_mps , 128 < < bridge - > pcie_mpss ) ;
p_mps = pcie_get_mps ( bridge ) ;
}
PCI: Set MPS to match upstream bridge
Firmware typically configures the PCIe fabric with a consistent Max Payload
Size setting based on the devices present at boot. A hot-added device
typically has the power-on default MPS setting (128 bytes), which may not
match the fabric.
The previous Linux default, in the absence of any "pci=pcie_bus_*" options,
was PCIE_BUS_TUNE_OFF, in which we never touch MPS, even for hot-added
devices.
Add a new default setting, PCIE_BUS_DEFAULT, in which we make sure every
device's MPS setting matches the upstream bridge. This makes it more
likely that a hot-added device will work in a system with optimized MPS
configuration.
Note that if we hot-add a device that only supports 128-byte MPS, it still
likely won't work because we don't reconfigure the rest of the fabric.
Booting with "pci=pcie_bus_peer2peer" is a workaround for this because it
sets MPS to 128 for everything.
[bhelgaas: changelog, new default, rework for pci_configure_device() path]
Tested-by: Keith Busch <keith.busch@intel.com>
Tested-by: Jordan Hargrave <jharg93@gmail.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
2015-08-24 16:48:16 +03:00
rc = pcie_set_mps ( dev , p_mps ) ;
if ( rc ) {
2018-01-18 21:55:24 +03:00
pci_warn ( dev , " can't set Max Payload Size to %d; if necessary, use \" pci=pcie_bus_safe \" and report a bug \n " ,
PCI: Set MPS to match upstream bridge
Firmware typically configures the PCIe fabric with a consistent Max Payload
Size setting based on the devices present at boot. A hot-added device
typically has the power-on default MPS setting (128 bytes), which may not
match the fabric.
The previous Linux default, in the absence of any "pci=pcie_bus_*" options,
was PCIE_BUS_TUNE_OFF, in which we never touch MPS, even for hot-added
devices.
Add a new default setting, PCIE_BUS_DEFAULT, in which we make sure every
device's MPS setting matches the upstream bridge. This makes it more
likely that a hot-added device will work in a system with optimized MPS
configuration.
Note that if we hot-add a device that only supports 128-byte MPS, it still
likely won't work because we don't reconfigure the rest of the fabric.
Booting with "pci=pcie_bus_peer2peer" is a workaround for this because it
sets MPS to 128 for everything.
[bhelgaas: changelog, new default, rework for pci_configure_device() path]
Tested-by: Keith Busch <keith.busch@intel.com>
Tested-by: Jordan Hargrave <jharg93@gmail.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
2015-08-24 16:48:16 +03:00
p_mps ) ;
return ;
}
2018-01-18 21:55:24 +03:00
pci_info ( dev , " Max Payload Size set to %d (was %d, max %d) \n " ,
2018-08-13 21:19:46 +03:00
p_mps , mps , mpss ) ;
2015-08-21 00:08:27 +03:00
}
2017-07-12 07:04:14 +03:00
int pci_configure_extended_tags ( struct pci_dev * dev , void * ign )
2017-01-20 17:16:51 +03:00
{
2017-07-12 07:04:14 +03:00
struct pci_host_bridge * host ;
u32 cap ;
u16 ctl ;
2017-01-20 17:16:51 +03:00
int ret ;
if ( ! pci_is_pcie ( dev ) )
2017-07-12 07:04:14 +03:00
return 0 ;
2017-01-20 17:16:51 +03:00
2017-07-12 07:04:14 +03:00
ret = pcie_capability_read_dword ( dev , PCI_EXP_DEVCAP , & cap ) ;
2017-01-20 17:16:51 +03:00
if ( ret )
2017-07-12 07:04:14 +03:00
return 0 ;
if ( ! ( cap & PCI_EXP_DEVCAP_EXT_TAG ) )
return 0 ;
2017-01-20 17:16:51 +03:00
2017-07-12 07:04:14 +03:00
ret = pcie_capability_read_word ( dev , PCI_EXP_DEVCTL , & ctl ) ;
if ( ret )
return 0 ;
host = pci_find_host_bridge ( dev - > bus ) ;
if ( ! host )
return 0 ;
2017-01-20 17:16:51 +03:00
2017-07-12 07:04:14 +03:00
/*
* If some device in the hierarchy doesn ' t handle Extended Tags
* correctly , make sure they ' re disabled .
*/
if ( host - > no_ext_tags ) {
if ( ctl & PCI_EXP_DEVCTL_EXT_TAG ) {
2018-01-18 21:55:24 +03:00
pci_info ( dev , " disabling Extended Tags \n " ) ;
2017-07-12 07:04:14 +03:00
pcie_capability_clear_word ( dev , PCI_EXP_DEVCTL ,
PCI_EXP_DEVCTL_EXT_TAG ) ;
}
return 0 ;
}
if ( ! ( ctl & PCI_EXP_DEVCTL_EXT_TAG ) ) {
2018-01-18 21:55:24 +03:00
pci_info ( dev , " enabling Extended Tags \n " ) ;
2017-01-20 17:16:51 +03:00
pcie_capability_set_word ( dev , PCI_EXP_DEVCTL ,
PCI_EXP_DEVCTL_EXT_TAG ) ;
2017-07-12 07:04:14 +03:00
}
return 0 ;
2017-01-20 17:16:51 +03:00
}
2017-08-15 06:23:23 +03:00
/**
* pcie_relaxed_ordering_enabled - Probe for PCIe relaxed ordering enable
* @ dev : PCI device to query
*
* Returns true if the device has enabled relaxed ordering attribute .
*/
bool pcie_relaxed_ordering_enabled ( struct pci_dev * dev )
{
u16 v ;
pcie_capability_read_word ( dev , PCI_EXP_DEVCTL , & v ) ;
return ! ! ( v & PCI_EXP_DEVCTL_RELAX_EN ) ;
}
EXPORT_SYMBOL ( pcie_relaxed_ordering_enabled ) ;
static void pci_configure_relaxed_ordering ( struct pci_dev * dev )
{
struct pci_dev * root ;
/* PCI_EXP_DEVICE_RELAX_EN is RsvdP in VFs */
if ( dev - > is_virtfn )
return ;
if ( ! pcie_relaxed_ordering_enabled ( dev ) )
return ;
/*
* For now , we only deal with Relaxed Ordering issues with Root
* Ports . Peer - to - Peer DMA is another can of worms .
*/
root = pci_find_pcie_root_port ( dev ) ;
if ( ! root )
return ;
if ( root - > dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING ) {
pcie_capability_clear_word ( dev , PCI_EXP_DEVCTL ,
PCI_EXP_DEVCTL_RELAX_EN ) ;
2018-01-18 21:55:24 +03:00
pci_info ( dev , " Relaxed Ordering disabled because the Root Port didn't support it \n " ) ;
2017-08-15 06:23:23 +03:00
}
}
2017-11-29 01:43:50 +03:00
static void pci_configure_ltr ( struct pci_dev * dev )
{
# ifdef CONFIG_PCIEASPM
2018-04-17 18:58:09 +03:00
struct pci_host_bridge * host = pci_find_host_bridge ( dev - > bus ) ;
2017-11-29 01:43:50 +03:00
struct pci_dev * bridge ;
2019-01-05 02:59:07 +03:00
u32 cap , ctl ;
2018-04-17 18:58:09 +03:00
2017-11-29 01:43:50 +03:00
if ( ! pci_is_pcie ( dev ) )
return ;
pcie_capability_read_dword ( dev , PCI_EXP_DEVCAP2 , & cap ) ;
if ( ! ( cap & PCI_EXP_DEVCAP2_LTR ) )
return ;
2019-01-05 02:59:07 +03:00
pcie_capability_read_dword ( dev , PCI_EXP_DEVCTL2 , & ctl ) ;
if ( ctl & PCI_EXP_DEVCTL2_LTR_EN ) {
if ( pci_pcie_type ( dev ) = = PCI_EXP_TYPE_ROOT_PORT ) {
dev - > ltr_path = 1 ;
return ;
}
2017-11-29 01:43:50 +03:00
bridge = pci_upstream_bridge ( dev ) ;
if ( bridge & & bridge - > ltr_path )
dev - > ltr_path = 1 ;
2019-01-05 02:59:07 +03:00
return ;
2017-11-29 01:43:50 +03:00
}
2019-01-05 02:59:07 +03:00
if ( ! host - > native_ltr )
return ;
/*
* Software must not enable LTR in an Endpoint unless the Root
* Complex and all intermediate Switches indicate support for LTR .
* PCIe r4 .0 , sec 6.18 .
*/
if ( pci_pcie_type ( dev ) = = PCI_EXP_TYPE_ROOT_PORT | |
( ( bridge = pci_upstream_bridge ( dev ) ) & &
bridge - > ltr_path ) ) {
2017-11-29 01:43:50 +03:00
pcie_capability_set_word ( dev , PCI_EXP_DEVCTL2 ,
PCI_EXP_DEVCTL2_LTR_EN ) ;
2019-01-05 02:59:07 +03:00
dev - > ltr_path = 1 ;
}
2017-11-29 01:43:50 +03:00
# endif
}
2018-06-30 18:24:24 +03:00
static void pci_configure_eetlp_prefix ( struct pci_dev * dev )
{
# ifdef CONFIG_PCI_PASID
struct pci_dev * bridge ;
2018-09-10 22:27:42 +03:00
int pcie_type ;
2018-06-30 18:24:24 +03:00
u32 cap ;
if ( ! pci_is_pcie ( dev ) )
return ;
pcie_capability_read_dword ( dev , PCI_EXP_DEVCAP2 , & cap ) ;
if ( ! ( cap & PCI_EXP_DEVCAP2_EE_PREFIX ) )
return ;
2018-09-10 22:27:42 +03:00
pcie_type = pci_pcie_type ( dev ) ;
if ( pcie_type = = PCI_EXP_TYPE_ROOT_PORT | |
pcie_type = = PCI_EXP_TYPE_RC_END )
2018-06-30 18:24:24 +03:00
dev - > eetlp_prefix_path = 1 ;
else {
bridge = pci_upstream_bridge ( dev ) ;
if ( bridge & & bridge - > eetlp_prefix_path )
dev - > eetlp_prefix_path = 1 ;
}
# endif
}
2018-11-14 17:47:01 +03:00
static void pci_configure_serr ( struct pci_dev * dev )
{
u16 control ;
if ( dev - > hdr_type = = PCI_HEADER_TYPE_BRIDGE ) {
/*
* A bridge will not forward ERR_ messages coming from an
* endpoint unless SERR # forwarding is enabled .
*/
pci_read_config_word ( dev , PCI_BRIDGE_CONTROL , & control ) ;
if ( ! ( control & PCI_BRIDGE_CTL_SERR ) ) {
control | = PCI_BRIDGE_CTL_SERR ;
pci_write_config_word ( dev , PCI_BRIDGE_CONTROL , control ) ;
}
}
}
PCI: Add pci_configure_device() during enumeration
Some platforms can tell the OS how to configure PCI devices, e.g., how to
set cache line size, error reporting enables, etc. ACPI defines _HPP and
_HPX methods for this purpose.
This configuration was previously done by some of the hotplug drivers using
pci_configure_slot(). But not all hotplug drivers did this, and per the
spec (ACPI rev 5.0, sec 6.2.7), we can also do it for "devices not
configured by the BIOS at system boot."
Move this configuration into the PCI core by adding pci_configure_device()
and calling it from pci_device_add(), so we do this for all devices as we
enumerate them.
This is based on pci_configure_slot(), which is used by hotplug drivers.
I omitted:
- pcie_bus_configure_settings() because it configures MPS and MRRS, which
requires global knowledge of the fabric and must be done later, and
- configuration of subordinate devices; that will happen when we call
pci_device_add() for those devices.
Because pci_configure_slot() was only done by hotplug drivers, this initial
version of pci_configure_device() only configures hot-added devices,
ignoring anything added during boot.
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
2014-08-28 00:29:47 +04:00
static void pci_configure_device ( struct pci_dev * dev )
{
2015-08-21 00:08:27 +03:00
pci_configure_mps ( dev ) ;
2017-07-12 07:04:14 +03:00
pci_configure_extended_tags ( dev , NULL ) ;
2017-08-15 06:23:23 +03:00
pci_configure_relaxed_ordering ( dev ) ;
2017-11-29 01:43:50 +03:00
pci_configure_ltr ( dev ) ;
2018-06-30 18:24:24 +03:00
pci_configure_eetlp_prefix ( dev ) ;
2018-11-14 17:47:01 +03:00
pci_configure_serr ( dev ) ;
2015-08-21 00:08:27 +03:00
PCI/ACPI: Remove unnecessary struct hotplug_program_ops
Move the ACPI-specific structs hpx_type0, hpx_type1, hpx_type2 and
hpx_type3 to drivers/pci/pci-acpi.c as they are not used anywhere else.
Then remove the struct hotplug_program_ops that has been shared between
drivers/pci/probe.c and drivers/pci/pci-acpi.c from drivers/pci/pci.h as it
is no longer needed.
The struct hotplug_program_ops was added by 87fcf12e846a ("PCI/ACPI: Remove
the need for 'struct hotplug_params'") and replaced previously used struct
hotplug_params enabling the support for the _HPX Type 3 Setting Record that
was added by f873c51a155a ("PCI/ACPI: Implement _HPX Type 3 Setting
Record").
The new struct allowed for the static functions such program_hpx_type0(),
program_hpx_type1(), etc., from the drivers/pci/probe.c to be called from
the function pci_acpi_program_hp_params() in the drivers/pci/pci-acpi.c.
Previously a programming of _HPX Type 0 was as follows:
drivers/pci/probe.c:
program_hpx_type0()
...
pci_configure_device()
hp_ops = {
.program_type0 = program_hpx_type0,
...
}
pci_acpi_program_hp_params(&hp_ops)
drivers/pci/pci-acpi.c:
pci_acpi_program_hp_params(&hp_ops)
acpi_run_hpx(hp_ops)
decode_type0_hpx_record()
hp_ops->program_type0 # program_hpx_type0() called via hp_ops
After the ACPI-specific functions, structs, enums, etc., have been moved to
drivers/pci/pci-acpi.c there is no need for the hotplug_program_ops as all
of the _HPX Type 0, 1, 2 and 3 are directly accessible.
Link: https://lore.kernel.org/r/20190827094951.10613-4-kw@linux.com
Signed-off-by: Krzysztof Wilczynski <kw@linux.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2019-08-27 12:49:51 +03:00
pci_acpi_program_hp_params ( dev ) ;
PCI: Add pci_configure_device() during enumeration
Some platforms can tell the OS how to configure PCI devices, e.g., how to
set cache line size, error reporting enables, etc. ACPI defines _HPP and
_HPX methods for this purpose.
This configuration was previously done by some of the hotplug drivers using
pci_configure_slot(). But not all hotplug drivers did this, and per the
spec (ACPI rev 5.0, sec 6.2.7), we can also do it for "devices not
configured by the BIOS at system boot."
Move this configuration into the PCI core by adding pci_configure_device()
and calling it from pci_device_add(), so we do this for all devices as we
enumerate them.
This is based on pci_configure_slot(), which is used by hotplug drivers.
I omitted:
- pcie_bus_configure_settings() because it configures MPS and MRRS, which
requires global knowledge of the fabric and must be done later, and
- configuration of subordinate devices; that will happen when we call
pci_device_add() for those devices.
Because pci_configure_slot() was only done by hotplug drivers, this initial
version of pci_configure_device() only configures hot-added devices,
ignoring anything added during boot.
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
2014-08-28 00:29:47 +04:00
}
2008-10-13 15:49:55 +04:00
static void pci_release_capabilities ( struct pci_dev * dev )
{
2018-06-30 23:07:17 +03:00
pci_aer_exit ( dev ) ;
2008-10-13 15:49:55 +04:00
pci_vpd_release ( dev ) ;
2009-03-20 06:25:11 +03:00
pci_iov_release ( dev ) ;
2012-02-11 12:18:30 +04:00
pci_free_cap_save_buffers ( dev ) ;
2008-10-13 15:49:55 +04:00
}
2005-04-17 02:20:36 +04:00
/**
2017-11-30 19:58:14 +03:00
* pci_release_dev - Free a PCI device structure when all users of it are
* finished
2005-04-17 02:20:36 +04:00
* @ dev : device that ' s been disconnected
*
2017-11-30 19:58:14 +03:00
* Will be called only by the device core when all users of this PCI device are
2005-04-17 02:20:36 +04:00
* done .
*/
static void pci_release_dev ( struct device * dev )
{
2014-02-01 18:38:29 +04:00
struct pci_dev * pci_dev ;
2005-04-17 02:20:36 +04:00
2014-02-01 18:38:29 +04:00
pci_dev = to_pci_dev ( dev ) ;
2008-10-13 15:49:55 +04:00
pci_release_capabilities ( pci_dev ) ;
2011-04-11 05:37:07 +04:00
pci_release_of_node ( pci_dev ) ;
2013-06-04 21:18:14 +04:00
pcibios_release_device ( pci_dev ) ;
2013-05-25 17:48:31 +04:00
pci_bus_put ( pci_dev - > bus ) ;
PCI: Introduce new device binding path using pci_dev.driver_override
The driver_override field allows us to specify the driver for a device
rather than relying on the driver to provide a positive match of the
device. This shortcuts the existing process of looking up the vendor and
device ID, adding them to the driver new_id, binding the device, then
removing the ID, but it also provides a couple advantages.
First, the above existing process allows the driver to bind to any device
matching the new_id for the window where it's enabled. This is often not
desired, such as the case of trying to bind a single device to a meta
driver like pci-stub or vfio-pci. Using driver_override we can do this
deterministically using:
echo pci-stub > /sys/bus/pci/devices/0000:03:00.0/driver_override
echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind
echo 0000:03:00.0 > /sys/bus/pci/drivers_probe
Previously we could not invoke drivers_probe after adding a device to
new_id for a driver as we get non-deterministic behavior whether the driver
we intend or the standard driver will claim the device. Now it becomes a
deterministic process, only the driver matching driver_override will probe
the device.
To return the device to the standard driver, we simply clear the
driver_override and reprobe the device:
echo > /sys/bus/pci/devices/0000:03:00.0/driver_override
echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind
echo 0000:03:00.0 > /sys/bus/pci/drivers_probe
Another advantage to this approach is that we can specify a driver override
to force a specific binding or prevent any binding. For instance when an
IOMMU group is exposed to userspace through VFIO we require that all
devices within that group are owned by VFIO. However, devices can be
hot-added into an IOMMU group, in which case we want to prevent the device
from binding to any driver (override driver = "none") or perhaps have it
automatically bind to vfio-pci. With driver_override it's a simple matter
for this field to be set internally when the device is first discovered to
prevent driver matches.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Alexander Graf <agraf@suse.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-05-20 18:53:21 +04:00
kfree ( pci_dev - > driver_override ) ;
2018-08-30 13:32:36 +03:00
bitmap_free ( pci_dev - > dma_alias_mask ) ;
2005-04-17 02:20:36 +04:00
kfree ( pci_dev ) ;
}
2013-05-25 17:48:30 +04:00
struct pci_dev * pci_alloc_dev ( struct pci_bus * bus )
2007-04-05 11:19:08 +04:00
{
struct pci_dev * dev ;
dev = kzalloc ( sizeof ( struct pci_dev ) , GFP_KERNEL ) ;
if ( ! dev )
return NULL ;
INIT_LIST_HEAD ( & dev - > bus_list ) ;
2013-04-08 07:05:07 +04:00
dev - > dev . type = & pci_dev_type ;
2013-05-25 17:48:30 +04:00
dev - > bus = pci_bus_get ( bus ) ;
2007-04-05 11:19:08 +04:00
return dev ;
}
2013-05-25 17:48:30 +04:00
EXPORT_SYMBOL ( pci_alloc_dev ) ;
2017-08-29 22:45:44 +03:00
static bool pci_bus_crs_vendor_id ( u32 l )
{
return ( l & 0xffff ) = = 0x0001 ;
}
2017-08-29 22:45:44 +03:00
static bool pci_bus_wait_crs ( struct pci_bus * bus , int devfn , u32 * l ,
int timeout )
2005-04-17 02:20:36 +04:00
{
int delay = 1 ;
2017-08-29 22:45:44 +03:00
if ( ! pci_bus_crs_vendor_id ( * l ) )
return true ; /* not a CRS completion */
2005-04-17 02:20:36 +04:00
2017-08-29 22:45:44 +03:00
if ( ! timeout )
return false ; /* CRS, but caller doesn't want to wait */
2005-04-17 02:20:36 +04:00
2014-09-09 01:19:49 +04:00
/*
2017-08-29 22:45:44 +03:00
* We got the reserved Vendor ID that indicates a completion with
* Configuration Request Retry Status ( CRS ) . Retry until we get a
* valid Vendor ID or we time out .
2014-09-09 01:19:49 +04:00
*/
2017-08-29 22:45:44 +03:00
while ( pci_bus_crs_vendor_id ( * l ) ) {
2017-08-29 22:45:44 +03:00
if ( delay > timeout ) {
2017-08-29 22:45:45 +03:00
pr_warn ( " pci %04x:%02x:%02x.%d: not ready after %dms; giving up \n " ,
pci_domain_nr ( bus ) , bus - > number ,
PCI_SLOT ( devfn ) , PCI_FUNC ( devfn ) , delay - 1 ) ;
2012-01-27 22:55:10 +04:00
return false ;
2005-04-17 02:20:36 +04:00
}
2017-08-29 22:45:45 +03:00
if ( delay > = 1000 )
pr_info ( " pci %04x:%02x:%02x.%d: not ready after %dms; waiting \n " ,
pci_domain_nr ( bus ) , bus - > number ,
PCI_SLOT ( devfn ) , PCI_FUNC ( devfn ) , delay - 1 ) ;
2012-01-27 22:55:10 +04:00
2005-04-17 02:20:36 +04:00
msleep ( delay ) ;
delay * = 2 ;
2017-08-29 22:45:43 +03:00
2012-01-27 22:55:10 +04:00
if ( pci_bus_read_config_dword ( bus , devfn , PCI_VENDOR_ID , l ) )
return false ;
2005-04-17 02:20:36 +04:00
}
2017-08-29 22:45:45 +03:00
if ( delay > = 1000 )
pr_info ( " pci %04x:%02x:%02x.%d: ready after %dms \n " ,
pci_domain_nr ( bus ) , bus - > number ,
PCI_SLOT ( devfn ) , PCI_FUNC ( devfn ) , delay - 1 ) ;
2012-01-27 22:55:10 +04:00
return true ;
}
2017-08-29 22:45:44 +03:00
PCI: Workaround IDT switch ACS Source Validation erratum
Some IDT switches incorrectly flag an ACS Source Validation error on
completions for config read requests even though PCIe r4.0, sec 6.12.1.1,
says that completions are never affected by ACS Source Validation. Here's
the text of IDT 89H32H8G3-YC, erratum #36:
Item #36 - Downstream port applies ACS Source Validation to Completions
Section 6.12.1.1 of the PCI Express Base Specification 3.1 states that
completions are never affected by ACS Source Validation. However,
completions received by a downstream port of the PCIe switch from a
device that has not yet captured a PCIe bus number are incorrectly
dropped by ACS Source Validation by the switch downstream port.
Workaround: Issue a CfgWr1 to the downstream device before issuing the
first CfgRd1 to the device. This allows the downstream device to capture
its bus number; ACS Source Validation no longer stops completions from
being forwarded by the downstream port. It has been observed that
Microsoft Windows implements this workaround already; however, some
versions of Linux and other operating systems may not.
When doing the first config read to probe for a device, if the device is
behind an IDT switch with this erratum:
1. Disable ACS Source Validation if enabled
2. Wait for device to become ready to accept config accesses (by using
the Config Request Retry Status mechanism)
3. Do a config write to the endpoint
4. Enable ACS Source Validation (if it was enabled to begin with)
The workaround suggested by IDT is basically only step 3, but we don't know
when the device is ready to accept config requests. That means we need to
do config reads until we receive a non-Config Request Retry Status, which
means we need to disable ACS SV temporarily.
Signed-off-by: James Puthukattukaran <james.puthukattukaran@oracle.com>
[bhelgaas: changelog, clean up whitespace, fold in unused variable fix
from Anders Roxell <anders.roxell@linaro.org>]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
2018-07-09 18:31:25 +03:00
bool pci_bus_generic_read_dev_vendor_id ( struct pci_bus * bus , int devfn , u32 * l ,
int timeout )
2017-08-29 22:45:44 +03:00
{
if ( pci_bus_read_config_dword ( bus , devfn , PCI_VENDOR_ID , l ) )
return false ;
2017-11-30 19:58:14 +03:00
/* Some broken boards return 0 or ~0 if a slot is empty: */
2017-08-29 22:45:44 +03:00
if ( * l = = 0xffffffff | | * l = = 0x00000000 | |
* l = = 0x0000ffff | | * l = = 0xffff0000 )
return false ;
if ( pci_bus_crs_vendor_id ( * l ) )
return pci_bus_wait_crs ( bus , devfn , l , timeout ) ;
2012-01-27 22:55:10 +04:00
return true ;
}
PCI: Workaround IDT switch ACS Source Validation erratum
Some IDT switches incorrectly flag an ACS Source Validation error on
completions for config read requests even though PCIe r4.0, sec 6.12.1.1,
says that completions are never affected by ACS Source Validation. Here's
the text of IDT 89H32H8G3-YC, erratum #36:
Item #36 - Downstream port applies ACS Source Validation to Completions
Section 6.12.1.1 of the PCI Express Base Specification 3.1 states that
completions are never affected by ACS Source Validation. However,
completions received by a downstream port of the PCIe switch from a
device that has not yet captured a PCIe bus number are incorrectly
dropped by ACS Source Validation by the switch downstream port.
Workaround: Issue a CfgWr1 to the downstream device before issuing the
first CfgRd1 to the device. This allows the downstream device to capture
its bus number; ACS Source Validation no longer stops completions from
being forwarded by the downstream port. It has been observed that
Microsoft Windows implements this workaround already; however, some
versions of Linux and other operating systems may not.
When doing the first config read to probe for a device, if the device is
behind an IDT switch with this erratum:
1. Disable ACS Source Validation if enabled
2. Wait for device to become ready to accept config accesses (by using
the Config Request Retry Status mechanism)
3. Do a config write to the endpoint
4. Enable ACS Source Validation (if it was enabled to begin with)
The workaround suggested by IDT is basically only step 3, but we don't know
when the device is ready to accept config requests. That means we need to
do config reads until we receive a non-Config Request Retry Status, which
means we need to disable ACS SV temporarily.
Signed-off-by: James Puthukattukaran <james.puthukattukaran@oracle.com>
[bhelgaas: changelog, clean up whitespace, fold in unused variable fix
from Anders Roxell <anders.roxell@linaro.org>]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
2018-07-09 18:31:25 +03:00
bool pci_bus_read_dev_vendor_id ( struct pci_bus * bus , int devfn , u32 * l ,
int timeout )
{
# ifdef CONFIG_PCI_QUIRKS
struct pci_dev * bridge = bus - > self ;
/*
* Certain IDT switches have an issue where they improperly trigger
* ACS Source Validation errors on completions for config reads .
*/
if ( bridge & & bridge - > vendor = = PCI_VENDOR_ID_IDT & &
bridge - > device = = 0x80b5 )
return pci_idt_bus_quirk ( bus , devfn , l , timeout ) ;
# endif
return pci_bus_generic_read_dev_vendor_id ( bus , devfn , l , timeout ) ;
}
2012-01-27 22:55:10 +04:00
EXPORT_SYMBOL ( pci_bus_read_dev_vendor_id ) ;
/*
2017-11-30 19:58:14 +03:00
* Read the config data for a PCI device , sanity - check it ,
* and fill in the dev structure .
2012-01-27 22:55:10 +04:00
*/
static struct pci_dev * pci_scan_device ( struct pci_bus * bus , int devfn )
{
struct pci_dev * dev ;
u32 l ;
if ( ! pci_bus_read_dev_vendor_id ( bus , devfn , & l , 60 * 1000 ) )
return NULL ;
2013-05-25 17:48:31 +04:00
dev = pci_alloc_dev ( bus ) ;
2005-04-17 02:20:36 +04:00
if ( ! dev )
return NULL ;
dev - > devfn = devfn ;
dev - > vendor = l & 0xffff ;
dev - > device = ( l > > 16 ) & 0xffff ;
2008-09-02 19:40:51 +04:00
2011-04-11 05:37:07 +04:00
pci_set_of_node ( dev ) ;
2009-03-20 06:25:14 +03:00
if ( pci_setup_device ( dev ) ) {
2013-05-25 17:48:31 +04:00
pci_bus_put ( dev - > bus ) ;
2005-04-17 02:20:36 +04:00
kfree ( dev ) ;
return NULL ;
}
return dev ;
}
2019-03-20 14:05:30 +03:00
void pcie_report_downtraining ( struct pci_dev * dev )
2018-08-07 02:25:35 +03:00
{
if ( ! pci_is_pcie ( dev ) )
return ;
/* Look from the device up to avoid downstream ports with no devices */
if ( ( pci_pcie_type ( dev ) ! = PCI_EXP_TYPE_ENDPOINT ) & &
( pci_pcie_type ( dev ) ! = PCI_EXP_TYPE_LEG_END ) & &
( pci_pcie_type ( dev ) ! = PCI_EXP_TYPE_UPSTREAM ) )
return ;
/* Multi-function PCIe devices share the same link/status */
if ( PCI_FUNC ( dev - > devfn ) ! = 0 | | dev - > is_virtfn )
return ;
/* Print link status only if the device is constrained by the fabric */
__pcie_print_link_status ( dev , false ) ;
}
2008-10-13 15:49:55 +04:00
static void pci_init_capabilities ( struct pci_dev * dev )
{
2019-10-04 00:28:26 +03:00
pci_ea_init ( dev ) ; /* Enhanced Allocation */
2015-10-30 01:35:39 +03:00
2015-10-21 17:17:35 +03:00
/* Setup MSI caps & disable MSI/MSI-X interrupts */
pci_msi_setup_pci_dev ( dev ) ;
2008-10-13 15:49:55 +04:00
2008-12-08 00:02:58 +03:00
/* Buffers for saving PCIe and PCI-X capabilities */
pci_allocate_cap_save_buffers ( dev ) ;
2019-10-04 00:28:26 +03:00
pci_pm_init ( dev ) ; /* Power Management */
pci_vpd_init ( dev ) ; /* Vital Product Data */
pci_configure_ari ( dev ) ; /* Alternative Routing-ID Forwarding */
pci_iov_init ( dev ) ; /* Single Root I/O Virtualization */
pci_ats_init ( dev ) ; /* Address Translation Services */
2019-11-28 17:54:55 +03:00
pci_pri_init ( dev ) ; /* Page Request Interface */
pci_pasid_init ( dev ) ; /* Process Address Space ID */
2019-10-04 00:28:26 +03:00
pci_enable_acs ( dev ) ; /* Enable ACS P2P upstream forwarding */
pci_ptm_init ( dev ) ; /* Precision Time Measurement */
pci_aer_init ( dev ) ; /* Advanced Error Reporting */
2018-02-16 19:55:38 +03:00
2018-08-07 02:25:35 +03:00
pcie_report_downtraining ( dev ) ;
2018-02-16 19:55:38 +03:00
if ( pci_probe_reset_function ( dev ) = = 0 )
dev - > reset_fn = 1 ;
2008-10-13 15:49:55 +04:00
}
2015-10-02 12:19:32 +03:00
/*
2017-11-30 19:58:14 +03:00
* This is the equivalent of pci_host_bridge_msi_domain ( ) that acts on
2015-10-02 12:19:32 +03:00
* devices . Firmware interfaces that can select the MSI domain on a
* per - device basis should be called from here .
*/
static struct irq_domain * pci_dev_msi_domain ( struct pci_dev * dev )
{
struct irq_domain * d ;
/*
2017-11-30 19:58:14 +03:00
* If a domain has been set through the pcibios_add_device ( )
2015-10-02 12:19:32 +03:00
* callback , then this is the one ( platform code knows best ) .
*/
d = dev_get_msi_domain ( & dev - > dev ) ;
if ( d )
return d ;
2015-10-02 16:43:06 +03:00
/*
* Let ' s see if we have a firmware interface able to provide
* the domain .
*/
d = pci_msi_get_device_domain ( dev ) ;
if ( d )
return d ;
2015-10-02 12:19:32 +03:00
return NULL ;
}
2015-07-28 16:46:11 +03:00
static void pci_set_msi_domain ( struct pci_dev * dev )
{
2015-10-02 12:19:32 +03:00
struct irq_domain * d ;
2015-07-28 16:46:11 +03:00
/*
2015-10-02 12:19:32 +03:00
* If the platform or firmware interfaces cannot supply a
* device - specific MSI domain , then inherit the default domain
* from the host bridge itself .
2015-07-28 16:46:11 +03:00
*/
2015-10-02 12:19:32 +03:00
d = pci_dev_msi_domain ( dev ) ;
if ( ! d )
d = dev_get_msi_domain ( & dev - > bus - > dev ) ;
dev_set_msi_domain ( & dev - > dev , d ) ;
2015-07-28 16:46:11 +03:00
}
2007-03-27 09:53:30 +04:00
void pci_device_add ( struct pci_dev * dev , struct pci_bus * bus )
2005-04-17 02:20:36 +04:00
{
2013-01-22 01:20:52 +04:00
int ret ;
PCI: Add pci_configure_device() during enumeration
Some platforms can tell the OS how to configure PCI devices, e.g., how to
set cache line size, error reporting enables, etc. ACPI defines _HPP and
_HPX methods for this purpose.
This configuration was previously done by some of the hotplug drivers using
pci_configure_slot(). But not all hotplug drivers did this, and per the
spec (ACPI rev 5.0, sec 6.2.7), we can also do it for "devices not
configured by the BIOS at system boot."
Move this configuration into the PCI core by adding pci_configure_device()
and calling it from pci_device_add(), so we do this for all devices as we
enumerate them.
This is based on pci_configure_slot(), which is used by hotplug drivers.
I omitted:
- pcie_bus_configure_settings() because it configures MPS and MRRS, which
requires global knowledge of the fabric and must be done later, and
- configuration of subordinate devices; that will happen when we call
pci_device_add() for those devices.
Because pci_configure_slot() was only done by hotplug drivers, this initial
version of pci_configure_device() only configures hot-added devices,
ignoring anything added during boot.
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
2014-08-28 00:29:47 +04:00
pci_configure_device ( dev ) ;
2005-09-06 03:31:03 +04:00
device_initialize ( & dev - > dev ) ;
dev - > dev . release = pci_release_dev ;
2005-04-17 02:20:36 +04:00
2013-01-22 01:20:44 +04:00
set_dev_node ( & dev - > dev , pcibus_to_node ( bus ) ) ;
2005-09-06 03:31:03 +04:00
dev - > dev . dma_mask = & dev - > dma_mask ;
2008-02-05 09:27:55 +03:00
dev - > dev . dma_parms = & dev - > dma_parms ;
2005-09-06 03:31:03 +04:00
dev - > dev . coherent_dma_mask = 0xffffffffull ;
2005-04-17 02:20:36 +04:00
2018-10-09 17:08:24 +03:00
dma_set_max_seg_size ( & dev - > dev , 65536 ) ;
2018-10-09 17:08:23 +03:00
dma_set_seg_boundary ( & dev - > dev , 0xffffffff ) ;
2008-02-05 09:27:55 +03:00
2005-04-17 02:20:36 +04:00
/* Fix up broken headers */
pci_fixup_device ( pci_fixup_header , dev ) ;
2012-02-16 09:40:31 +04:00
pci_reassigndev_resource_alignment ( dev ) ;
2009-09-10 01:49:59 +04:00
dev - > state_saved = false ;
2008-10-13 15:49:55 +04:00
pci_init_capabilities ( dev ) ;
2008-07-07 05:34:48 +04:00
2005-04-17 02:20:36 +04:00
/*
* Add the device to our list of discovered devices
* and the bus list for fixup functions , etc .
*/
2006-06-02 08:35:43 +04:00
down_write ( & pci_bus_sem ) ;
2005-04-17 02:20:36 +04:00
list_add_tail ( & dev - > bus_list , & bus - > devices ) ;
2006-06-02 08:35:43 +04:00
up_write ( & pci_bus_sem ) ;
2013-01-22 01:20:52 +04:00
ret = pcibios_add_device ( dev ) ;
WARN_ON ( ret < 0 ) ;
2017-11-30 19:58:14 +03:00
/* Set up MSI IRQ domain */
2015-07-28 16:46:11 +03:00
pci_set_msi_domain ( dev ) ;
2013-01-22 01:20:52 +04:00
/* Notifier could use PCI capabilities */
dev - > match_driver = false ;
ret = device_add ( & dev - > dev ) ;
WARN_ON ( ret < 0 ) ;
2005-09-06 03:31:03 +04:00
}
2014-04-15 02:11:40 +04:00
struct pci_dev * pci_scan_single_device ( struct pci_bus * bus , int devfn )
2005-09-06 03:31:03 +04:00
{
struct pci_dev * dev ;
2009-03-20 23:56:00 +03:00
dev = pci_get_slot ( bus , devfn ) ;
if ( dev ) {
pci_dev_put ( dev ) ;
return dev ;
}
2005-09-06 03:31:03 +04:00
dev = pci_scan_device ( bus , devfn ) ;
if ( ! dev )
return NULL ;
pci_device_add ( dev , bus ) ;
2005-04-17 02:20:36 +04:00
return dev ;
}
2007-11-22 02:07:11 +03:00
EXPORT_SYMBOL ( pci_scan_single_device ) ;
2005-04-17 02:20:36 +04:00
2013-01-25 20:12:31 +04:00
static unsigned next_fn ( struct pci_bus * bus , struct pci_dev * dev , unsigned fn )
2009-12-13 16:10:02 +03:00
{
2013-01-25 20:12:31 +04:00
int pos ;
u16 cap = 0 ;
unsigned next_fn ;
2010-01-18 00:01:41 +03:00
2013-01-25 20:12:31 +04:00
if ( pci_ari_enabled ( bus ) ) {
if ( ! dev )
return 0 ;
pos = pci_find_ext_capability ( dev , PCI_EXT_CAP_ID_ARI ) ;
if ( ! pos )
return 0 ;
2010-01-18 00:01:41 +03:00
2013-01-25 20:12:31 +04:00
pci_read_config_word ( dev , pos + PCI_ARI_CAP , & cap ) ;
next_fn = PCI_ARI_CAP_NFN ( cap ) ;
if ( next_fn < = fn )
return 0 ; /* protect against malformed list */
2009-12-13 16:10:02 +03:00
2013-01-25 20:12:31 +04:00
return next_fn ;
}
/* dev may be NULL for non-contiguous multifunction devices */
if ( ! dev | | dev - > multifunction )
return ( fn + 1 ) % 8 ;
2009-12-13 16:10:02 +03:00
return 0 ;
}
static int only_one_child ( struct pci_bus * bus )
{
2017-12-01 00:22:39 +03:00
struct pci_dev * bridge = bus - > self ;
2012-05-01 01:21:02 +04:00
2017-12-01 00:22:39 +03:00
/*
* Systems with unusual topologies set PCI_SCAN_ALL_PCIE_DEVS so
* we scan for all possible devices , not just Device 0.
*/
if ( pci_has_flag ( PCI_SCAN_ALL_PCIE_DEVS ) )
2009-12-13 16:10:02 +03:00
return 0 ;
2016-02-05 23:57:47 +03:00
/*
2017-12-01 00:22:39 +03:00
* A PCIe Downstream Port normally leads to a Link with only Device
* 0 on it ( PCIe spec r3 .1 , sec 7.3 .1 ) . As an optimization , scan
* only for Device 0 in that situation .
2016-02-05 23:57:47 +03:00
*/
2019-08-22 11:55:53 +03:00
if ( bridge & & pci_is_pcie ( bridge ) & & pcie_downstream_port ( bridge ) )
2009-12-13 16:10:02 +03:00
return 1 ;
2017-12-01 00:22:39 +03:00
2009-12-13 16:10:02 +03:00
return 0 ;
}
2005-04-17 02:20:36 +04:00
/**
2017-11-30 19:58:14 +03:00
* pci_scan_slot - Scan a PCI slot on a bus for devices
2005-04-17 02:20:36 +04:00
* @ bus : PCI bus to scan
2017-11-30 19:58:14 +03:00
* @ devfn : slot number to scan ( must have zero function )
2005-04-17 02:20:36 +04:00
*
* Scan a PCI slot on the specified PCI bus for devices , adding
* discovered devices to the @ bus - > devices list . New devices
2008-02-15 01:56:56 +03:00
* will not have is_added set .
2009-03-20 23:56:05 +03:00
*
* Returns the number of new devices found .
2005-04-17 02:20:36 +04:00
*/
2007-03-27 09:53:30 +04:00
int pci_scan_slot ( struct pci_bus * bus , int devfn )
2005-04-17 02:20:36 +04:00
{
2009-12-13 16:10:02 +03:00
unsigned fn , nr = 0 ;
2009-03-20 23:56:05 +03:00
struct pci_dev * dev ;
2009-12-13 16:10:02 +03:00
if ( only_one_child ( bus ) & & ( devfn > 0 ) )
return 0 ; /* Already scanned the entire slot */
2005-04-17 02:20:36 +04:00
2009-03-20 23:56:05 +03:00
dev = pci_scan_single_device ( bus , devfn ) ;
2010-01-18 00:01:41 +03:00
if ( ! dev )
return 0 ;
2018-07-03 12:05:41 +03:00
if ( ! pci_dev_is_added ( dev ) )
2009-03-20 23:56:05 +03:00
nr + + ;
2013-01-25 20:12:31 +04:00
for ( fn = next_fn ( bus , dev , 0 ) ; fn > 0 ; fn = next_fn ( bus , dev , fn ) ) {
2009-12-13 16:10:02 +03:00
dev = pci_scan_single_device ( bus , devfn + fn ) ;
if ( dev ) {
2018-07-03 12:05:41 +03:00
if ( ! pci_dev_is_added ( dev ) )
2009-12-13 16:10:02 +03:00
nr + + ;
dev - > multifunction = 1 ;
2005-04-17 02:20:36 +04:00
}
}
PCI: add PCI Express ASPM support
PCI Express ASPM defines a protocol for PCI Express components in the D0
state to reduce Link power by placing their Links into a low power state
and instructing the other end of the Link to do likewise. This
capability allows hardware-autonomous, dynamic Link power reduction
beyond what is achievable by software-only controlled power management.
However, The device should be configured by software appropriately.
Enabling ASPM will save power, but will introduce device latency.
This patch adds ASPM support in Linux. It introduces a global policy for
ASPM, a sysfs file /sys/module/pcie_aspm/parameters/policy can control
it. The interface can be used as a boot option too. Currently we have
below setting:
-default, BIOS default setting
-powersave, highest power saving mode, enable all available ASPM
state and clock power management
-performance, highest performance, disable ASPM and clock power
management
By default, the 'default' policy is used currently.
In my test, power difference between powersave mode and performance mode
is about 1.3w in a system with 3 PCIE links.
Note: some devices might not work well with aspm, either because chipset
issue or device issue. The patch provide API (pci_disable_link_state),
driver can disable ASPM for specific device.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2008-02-25 04:46:41 +03:00
2017-11-30 19:58:14 +03:00
/* Only one slot has PCIe device */
2008-07-23 06:32:31 +04:00
if ( bus - > self & & nr )
PCI: add PCI Express ASPM support
PCI Express ASPM defines a protocol for PCI Express components in the D0
state to reduce Link power by placing their Links into a low power state
and instructing the other end of the Link to do likewise. This
capability allows hardware-autonomous, dynamic Link power reduction
beyond what is achievable by software-only controlled power management.
However, The device should be configured by software appropriately.
Enabling ASPM will save power, but will introduce device latency.
This patch adds ASPM support in Linux. It introduces a global policy for
ASPM, a sysfs file /sys/module/pcie_aspm/parameters/policy can control
it. The interface can be used as a boot option too. Currently we have
below setting:
-default, BIOS default setting
-powersave, highest power saving mode, enable all available ASPM
state and clock power management
-performance, highest performance, disable ASPM and clock power
management
By default, the 'default' policy is used currently.
In my test, power difference between powersave mode and performance mode
is about 1.3w in a system with 3 PCIE links.
Note: some devices might not work well with aspm, either because chipset
issue or device issue. The patch provide API (pci_disable_link_state),
driver can disable ASPM for specific device.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2008-02-25 04:46:41 +03:00
pcie_aspm_init_link_state ( bus - > self ) ;
2005-04-17 02:20:36 +04:00
return nr ;
}
2014-04-26 00:32:25 +04:00
EXPORT_SYMBOL ( pci_scan_slot ) ;
2005-04-17 02:20:36 +04:00
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
static int pcie_find_smpss ( struct pci_dev * dev , void * data )
{
u8 * smpss = data ;
if ( ! pci_is_pcie ( dev ) )
return 0 ;
PCI: Don't restrict MPS for slots below Root Ports
When booting with "pci=pcie_bus_safe", we previously limited the
fabric MPS to 128 when we found:
(1) A hotplug-capable Downstream Port ("dev->is_hotplug_bridge &&
pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT"), or
(2) A hotplug-capable Root Port with a slot that was either empty or
contained a multi-function device ("dev->is_hotplug_bridge &&
!list_is_singular(&dev->bus->devices)")
Part (1) is valid, but part (2) is not.
After a hot-add in the slot below a Root Port, we can reconfigure all
MPS values in the fabric below the Root Port because the new device is
the only thing below the Root Port and there are no active drivers.
Therefore, there's no reason to limit the MPS for Root Ports, no
matter what's in the slot.
Test info:
-+-[0000:40]-+-07.0-[0000:46]--+-00.0 Intel 82576 NIC
\-00.1 Intel 82576 NIC
0000:40:07.0 Root Port bridge to [bus 46] (MPS supported=256)
0000:46:00.0 Endpoint (MPS supported=512)
0000:46:00.1 Endpoint (MPS supported=512)
# echo 0 > /sys/bus/pci/slots/7/power
# echo 1 > /sys/bus/pci/slots/7/power
pcieport 0000:40:07.0: PCI-E Max Payload Size set to 256/ 256 (was 256)
pci 0000:46:00.0: PCI-E Max Payload Size set to 256/ 512 (was 128)
pci 0000:46:00.1: PCI-E Max Payload Size set to 256/ 512 (was 128)
Before this change, we set MPS to 128 for the Root Port and both NICs
because the slot contained a multi-function device and
dev->is_hotplug_bridge && !list_is_singular(&dev->bus->devices)
was true. After this change, we set it to 256.
[bhelgaas: changelog, comments, split out upstream bridge check]
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Jon Mason <jdmason@kudzu.us>
2013-08-22 07:24:47 +04:00
/*
* We don ' t have a way to change MPS settings on devices that have
* drivers attached . A hot - added device might support only the minimum
* MPS setting ( MPS = 128 ) . Therefore , if the fabric contains a bridge
* where devices may be hot - added , we limit the fabric MPS to 128 so
* hot - added devices will work correctly .
*
* However , if we hot - add a device to a slot directly below a Root
* Port , it ' s impossible for there to be other existing devices below
* the port . We don ' t limit the MPS in this case because we can
* reconfigure MPS on both the Root Port and the hot - added device ,
* and there are no other devices involved .
*
* Note that this PCIE_BUS_SAFE path assumes no peer - to - peer DMA .
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
*/
PCI: Don't restrict MPS for slots below Root Ports
When booting with "pci=pcie_bus_safe", we previously limited the
fabric MPS to 128 when we found:
(1) A hotplug-capable Downstream Port ("dev->is_hotplug_bridge &&
pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT"), or
(2) A hotplug-capable Root Port with a slot that was either empty or
contained a multi-function device ("dev->is_hotplug_bridge &&
!list_is_singular(&dev->bus->devices)")
Part (1) is valid, but part (2) is not.
After a hot-add in the slot below a Root Port, we can reconfigure all
MPS values in the fabric below the Root Port because the new device is
the only thing below the Root Port and there are no active drivers.
Therefore, there's no reason to limit the MPS for Root Ports, no
matter what's in the slot.
Test info:
-+-[0000:40]-+-07.0-[0000:46]--+-00.0 Intel 82576 NIC
\-00.1 Intel 82576 NIC
0000:40:07.0 Root Port bridge to [bus 46] (MPS supported=256)
0000:46:00.0 Endpoint (MPS supported=512)
0000:46:00.1 Endpoint (MPS supported=512)
# echo 0 > /sys/bus/pci/slots/7/power
# echo 1 > /sys/bus/pci/slots/7/power
pcieport 0000:40:07.0: PCI-E Max Payload Size set to 256/ 256 (was 256)
pci 0000:46:00.0: PCI-E Max Payload Size set to 256/ 512 (was 128)
pci 0000:46:00.1: PCI-E Max Payload Size set to 256/ 512 (was 128)
Before this change, we set MPS to 128 for the Root Port and both NICs
because the slot contained a multi-function device and
dev->is_hotplug_bridge && !list_is_singular(&dev->bus->devices)
was true. After this change, we set it to 256.
[bhelgaas: changelog, comments, split out upstream bridge check]
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Jon Mason <jdmason@kudzu.us>
2013-08-22 07:24:47 +04:00
if ( dev - > is_hotplug_bridge & &
pci_pcie_type ( dev ) ! = PCI_EXP_TYPE_ROOT_PORT )
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
* smpss = 0 ;
if ( * smpss > dev - > pcie_mpss )
* smpss = dev - > pcie_mpss ;
return 0 ;
}
static void pcie_write_mps ( struct pci_dev * dev , int mps )
{
2011-10-14 23:56:14 +04:00
int rc ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
if ( pcie_bus_config = = PCIE_BUS_PERFORMANCE ) {
2011-10-14 23:56:14 +04:00
mps = 128 < < dev - > pcie_mpss ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
2012-07-24 13:20:03 +04:00
if ( pci_pcie_type ( dev ) ! = PCI_EXP_TYPE_ROOT_PORT & &
dev - > bus - > self )
2017-11-30 19:58:14 +03:00
/*
* For " Performance " , the assumption is made that
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
* downstream communication will never be larger than
* the MRRS . So , the MPS only needs to be configured
* for the upstream communication . This being the case ,
* walk from the top down and set the MPS of the child
* to that of the parent bus .
2011-10-14 23:56:14 +04:00
*
* Configure the device MPS with the smaller of the
* device MPSS or the bridge MPS ( which is assumed to be
* properly configured at this point to the largest
* allowable MPS based on its parent bus ) .
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
*/
2011-10-14 23:56:14 +04:00
mps = min ( mps , pcie_get_mps ( dev - > bus - > self ) ) ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
}
rc = pcie_set_mps ( dev , mps ) ;
if ( rc )
2018-01-18 21:55:24 +03:00
pci_err ( dev , " Failed attempting to set the MPS \n " ) ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
}
2011-10-14 23:56:14 +04:00
static void pcie_write_mrrs ( struct pci_dev * dev )
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
{
2011-10-14 23:56:14 +04:00
int rc , mrrs ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
2017-11-30 19:58:14 +03:00
/*
* In the " safe " case , do not configure the MRRS . There appear to be
2011-09-09 01:41:18 +04:00
* issues with setting MRRS to 0 on a number of devices .
*/
if ( pcie_bus_config ! = PCIE_BUS_PERFORMANCE )
return ;
2017-11-30 19:58:14 +03:00
/*
* For max performance , the MRRS must be set to the largest supported
2011-09-09 01:41:18 +04:00
* value . However , it cannot be configured larger than the MPS the
2011-10-14 23:56:14 +04:00
* device or the bus can support . This should already be properly
2017-11-30 19:58:14 +03:00
* configured by a prior call to pcie_write_mps ( ) .
2011-09-09 01:41:18 +04:00
*/
2011-10-14 23:56:14 +04:00
mrrs = pcie_get_mps ( dev ) ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
2017-11-30 19:58:14 +03:00
/*
* MRRS is a R / W register . Invalid values can be written , but a
2011-09-09 01:41:18 +04:00
* subsequent read will verify if the value is acceptable or not .
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
* If the MRRS value provided is not acceptable ( e . g . , too large ) ,
* shrink the value until it is acceptable to the HW .
2013-11-14 22:28:18 +04:00
*/
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
while ( mrrs ! = pcie_get_readrq ( dev ) & & mrrs > = 128 ) {
rc = pcie_set_readrq ( dev , mrrs ) ;
2011-10-14 23:56:14 +04:00
if ( ! rc )
break ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
2018-01-18 21:55:24 +03:00
pci_warn ( dev , " Failed attempting to set the MRRS \n " ) ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
mrrs / = 2 ;
}
2011-10-14 23:56:14 +04:00
if ( mrrs < 128 )
2018-01-18 21:55:24 +03:00
pci_err ( dev , " MRRS was unable to be configured with a safe value. If problems are experienced, try running with pci=pcie_bus_safe \n " ) ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
}
static int pcie_bus_configure_set ( struct pci_dev * dev , void * data )
{
2011-10-14 23:56:16 +04:00
int mps , orig_mps ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
if ( ! pci_is_pcie ( dev ) )
return 0 ;
PCI: Set MPS to match upstream bridge
Firmware typically configures the PCIe fabric with a consistent Max Payload
Size setting based on the devices present at boot. A hot-added device
typically has the power-on default MPS setting (128 bytes), which may not
match the fabric.
The previous Linux default, in the absence of any "pci=pcie_bus_*" options,
was PCIE_BUS_TUNE_OFF, in which we never touch MPS, even for hot-added
devices.
Add a new default setting, PCIE_BUS_DEFAULT, in which we make sure every
device's MPS setting matches the upstream bridge. This makes it more
likely that a hot-added device will work in a system with optimized MPS
configuration.
Note that if we hot-add a device that only supports 128-byte MPS, it still
likely won't work because we don't reconfigure the rest of the fabric.
Booting with "pci=pcie_bus_peer2peer" is a workaround for this because it
sets MPS to 128 for everything.
[bhelgaas: changelog, new default, rework for pci_configure_device() path]
Tested-by: Keith Busch <keith.busch@intel.com>
Tested-by: Jordan Hargrave <jharg93@gmail.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
2015-08-24 16:48:16 +03:00
if ( pcie_bus_config = = PCIE_BUS_TUNE_OFF | |
pcie_bus_config = = PCIE_BUS_DEFAULT )
2013-08-26 12:33:06 +04:00
return 0 ;
2011-10-14 23:56:16 +04:00
mps = 128 < < * ( u8 * ) data ;
orig_mps = pcie_get_mps ( dev ) ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
pcie_write_mps ( dev , mps ) ;
2011-10-14 23:56:14 +04:00
pcie_write_mrrs ( dev ) ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
2018-01-18 21:55:24 +03:00
pci_info ( dev , " Max Payload Size set to %4d/%4d (was %4d), Max Read Rq %4d \n " ,
2014-04-19 04:13:50 +04:00
pcie_get_mps ( dev ) , 128 < < dev - > pcie_mpss ,
2011-10-14 23:56:16 +04:00
orig_mps , pcie_get_readrq ( dev ) ) ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
return 0 ;
}
2017-11-30 19:58:14 +03:00
/*
* pcie_bus_configure_settings ( ) requires that pci_walk_bus work in a top - down ,
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
* parents then children fashion . If this changes , then this code will not
* work as designed .
*/
2013-08-22 07:24:44 +04:00
void pcie_bus_configure_settings ( struct pci_bus * bus )
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
{
2014-04-29 22:51:55 +04:00
u8 smpss = 0 ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
2013-08-22 07:24:44 +04:00
if ( ! bus - > self )
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
return ;
if ( ! pci_is_pcie ( bus - > self ) )
2011-10-03 18:50:20 +04:00
return ;
2017-11-30 19:58:14 +03:00
/*
* FIXME - Peer to peer DMA is possible , though the endpoint would need
2013-08-26 12:33:05 +04:00
* to be aware of the MPS of the destination . To work around this ,
2011-10-03 18:50:20 +04:00
* simply force the MPS of the entire system to the smallest possible .
*/
if ( pcie_bus_config = = PCIE_BUS_PEER2PEER )
smpss = 0 ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
if ( pcie_bus_config = = PCIE_BUS_SAFE ) {
2013-08-22 07:24:44 +04:00
smpss = bus - > self - > pcie_mpss ;
2011-10-03 18:50:20 +04:00
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
pcie_find_smpss ( bus - > self , & smpss ) ;
pci_walk_bus ( bus , pcie_find_smpss , & smpss ) ;
}
pcie_bus_configure_set ( bus - > self , & smpss ) ;
pci_walk_bus ( bus , pcie_bus_configure_set , & smpss ) ;
}
2011-08-02 09:01:18 +04:00
EXPORT_SYMBOL_GPL ( pcie_bus_configure_settings ) ;
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-07-21 00:20:54 +04:00
2017-06-24 04:50:42 +03:00
/*
* Called after each bus is probed , but before its children are examined . This
* is marked as __weak because multiple architectures define it .
*/
void __weak pcibios_fixup_bus ( struct pci_bus * bus )
{
/* nothing to do, expected to be removed in the future */
}
2017-10-13 21:35:44 +03:00
/**
* pci_scan_child_bus_extend ( ) - Scan devices below a bus
* @ bus : Bus to scan for devices
* @ available_buses : Total number of buses available ( % 0 does not try to
* extend beyond the minimal )
*
* Scans devices below @ bus including subordinate buses . Returns new
* subordinate number including all the found devices . Passing
* @ available_buses causes the remaining bus space to be distributed
* equally between hotplug - capable bridges to allow future extension of the
* hierarchy .
*/
static unsigned int pci_scan_child_bus_extend ( struct pci_bus * bus ,
unsigned int available_buses )
2005-04-17 02:20:36 +04:00
{
2017-10-13 21:35:44 +03:00
unsigned int used_buses , normal_bridges = 0 , hotplug_bridges = 0 ;
unsigned int start = bus - > busn_res . start ;
2018-03-07 10:39:13 +03:00
unsigned int devfn , fn , cmax , max = start ;
2005-04-17 02:20:36 +04:00
struct pci_dev * dev ;
2018-03-07 10:39:13 +03:00
int nr_devs ;
2005-04-17 02:20:36 +04:00
2009-11-04 20:32:52 +03:00
dev_dbg ( & bus - > dev , " scanning bus \n " ) ;
2005-04-17 02:20:36 +04:00
/* Go find them, Rover! */
2018-03-07 10:39:13 +03:00
for ( devfn = 0 ; devfn < 256 ; devfn + = 8 ) {
nr_devs = pci_scan_slot ( bus , devfn ) ;
/*
* The Jailhouse hypervisor may pass individual functions of a
* multi - function device to a guest without passing function 0.
* Look for them as well .
*/
if ( jailhouse_paravirt ( ) & & nr_devs = = 0 ) {
for ( fn = 1 ; fn < 8 ; fn + + ) {
dev = pci_scan_single_device ( bus , devfn + fn ) ;
if ( dev )
dev - > multifunction = 1 ;
}
}
}
2005-04-17 02:20:36 +04:00
2017-11-30 19:58:14 +03:00
/* Reserve buses for SR-IOV capability */
2017-10-13 21:35:44 +03:00
used_buses = pci_iov_bus_range ( bus ) ;
max + = used_buses ;
2009-03-20 06:25:13 +03:00
2005-04-17 02:20:36 +04:00
/*
* After performing arch - dependent fixup of the bus , look behind
* all PCI - to - PCI bridges on this bus .
*/
2009-03-20 23:56:10 +03:00
if ( ! bus - > is_added ) {
2009-11-04 20:32:52 +03:00
dev_dbg ( & bus - > dev , " fixups for bus \n " ) ;
2009-03-20 23:56:10 +03:00
pcibios_fixup_bus ( bus ) ;
2013-04-12 09:44:16 +04:00
bus - > is_added = 1 ;
2009-03-20 23:56:10 +03:00
}
2017-10-13 21:35:44 +03:00
/*
* Calculate how many hotplug bridges and normal bridges there
* are on this bus . We will distribute the additional available
* buses between hotplug bridges .
*/
for_each_pci_bridge ( dev , bus ) {
if ( dev - > is_hotplug_bridge )
hotplug_bridges + + ;
else
normal_bridges + + ;
}
2017-10-13 21:35:42 +03:00
/*
* Scan bridges that are already configured . We don ' t touch them
* unless they are misconfigured ( which will be done in the second
* scan below ) .
*/
2017-10-13 21:35:44 +03:00
for_each_pci_bridge ( dev , bus ) {
cmax = max ;
max = pci_scan_bridge_extend ( bus , dev , max , 0 , 0 ) ;
2018-05-28 15:47:50 +03:00
/*
* Reserve one bus for each bridge now to avoid extending
* hotplug bridges too much during the second scan below .
*/
used_buses + + ;
if ( cmax - max > 1 )
used_buses + = cmax - max - 1 ;
2017-10-13 21:35:44 +03:00
}
2017-10-13 21:35:42 +03:00
/* Scan bridges that need to be reconfigured */
2017-10-13 21:35:44 +03:00
for_each_pci_bridge ( dev , bus ) {
unsigned int buses = 0 ;
if ( ! hotplug_bridges & & normal_bridges = = 1 ) {
2017-11-30 19:58:14 +03:00
2017-10-13 21:35:44 +03:00
/*
* There is only one bridge on the bus ( upstream
* port ) so it gets all available buses which it
* can then distribute to the possible hotplug
* bridges below .
*/
buses = available_buses ;
} else if ( dev - > is_hotplug_bridge ) {
2017-11-30 19:58:14 +03:00
2017-10-13 21:35:44 +03:00
/*
* Distribute the extra buses between hotplug
* bridges if any .
*/
buses = available_buses / hotplug_bridges ;
2018-05-28 15:47:50 +03:00
buses = min ( buses , available_buses - used_buses + 1 ) ;
2017-10-13 21:35:44 +03:00
}
cmax = max ;
max = pci_scan_bridge_extend ( bus , dev , cmax , buses , 1 ) ;
2018-05-28 15:47:50 +03:00
/* One bus is already accounted so don't add it again */
if ( max - cmax > 1 )
used_buses + = max - cmax - 1 ;
2017-10-13 21:35:44 +03:00
}
2005-04-17 02:20:36 +04:00
2016-07-22 06:40:28 +03:00
/*
* Make sure a hotplug bridge has at least the minimum requested
2017-10-13 21:35:44 +03:00
* number of buses but allow it to grow up to the maximum available
* bus number of there is room .
2016-07-22 06:40:28 +03:00
*/
2017-10-13 21:35:44 +03:00
if ( bus - > self & & bus - > self - > is_hotplug_bridge ) {
used_buses = max_t ( unsigned int , available_buses ,
pci_hotplug_bus_size - 1 ) ;
if ( max - start < used_buses ) {
max = start + used_buses ;
/* Do not allocate more buses than we have room left */
if ( max > bus - > busn_res . end )
max = bus - > busn_res . end ;
dev_dbg ( & bus - > dev , " %pR extended by %#02x \n " ,
& bus - > busn_res , max - start ) ;
}
2016-07-22 06:40:28 +03:00
}
2005-04-17 02:20:36 +04:00
/*
* We ' ve scanned the bus and so we know all about what ' s on
* the other side of any bridges that may be on this bus plus
* any devices .
*
* Return how far we ' ve got finding sub - buses .
*/
2009-11-04 20:32:52 +03:00
dev_dbg ( & bus - > dev , " bus scan returning with max=%02x \n " , max ) ;
2005-04-17 02:20:36 +04:00
return max ;
}
2017-10-13 21:35:44 +03:00
/**
* pci_scan_child_bus ( ) - Scan devices below a bus
* @ bus : Bus to scan for devices
*
* Scans devices below @ bus including subordinate buses . Returns new
* subordinate number including all the found devices .
*/
unsigned int pci_scan_child_bus ( struct pci_bus * bus )
{
return pci_scan_child_bus_extend ( bus , 0 ) ;
}
2014-04-26 00:32:25 +04:00
EXPORT_SYMBOL_GPL ( pci_scan_child_bus ) ;
2005-04-17 02:20:36 +04:00
ACPI / PCI: Set root bridge ACPI handle in advance
The ACPI handles of PCI root bridges need to be known to
acpi_bind_one(), so that it can create the appropriate
"firmware_node" and "physical_node" files for them, but currently
the way it gets to know those handles is not exactly straightforward
(to put it lightly).
This is how it works, roughly:
1. acpi_bus_scan() finds the handle of a PCI root bridge,
creates a struct acpi_device object for it and passes that
object to acpi_pci_root_add().
2. acpi_pci_root_add() creates a struct acpi_pci_root object,
populates its "device" field with its argument's address
(device->handle is the ACPI handle found in step 1).
3. The struct acpi_pci_root object created in step 2 is passed
to pci_acpi_scan_root() and used to get resources that are
passed to pci_create_root_bus().
4. pci_create_root_bus() creates a struct pci_host_bridge object
and passes its "dev" member to device_register().
5. platform_notify(), which for systems with ACPI is set to
acpi_platform_notify(), is called.
So far, so good. Now it starts to be "interesting".
6. acpi_find_bridge_device() is used to find the ACPI handle of
the given device (which is the PCI root bridge) and executes
acpi_pci_find_root_bridge(), among other things, for the
given device object.
7. acpi_pci_find_root_bridge() uses the name (sic!) of the given
device object to extract the segment and bus numbers of the PCI
root bridge and passes them to acpi_get_pci_rootbridge_handle().
8. acpi_get_pci_rootbridge_handle() browses the list of ACPI PCI
root bridges and finds the one that matches the given segment
and bus numbers. Its handle is then used to initialize the
ACPI handle of the PCI root bridge's device object by
acpi_bind_one(). However, this is *exactly* the ACPI handle we
started with in step 1.
Needless to say, this is quite embarassing, but it may be avoided
thanks to commit f3fd0c8 (ACPI: Allow ACPI handles of devices to be
initialized in advance), which makes it possible to initialize the
ACPI handle of a device before passing it to device_register().
Accordingly, add a new __weak routine, pcibios_root_bridge_prepare(),
defaulting to an empty implementation that can be replaced by the
interested architecutres (x86 and ia64 at the moment) with functions
that will set the root bridge's ACPI handle before its dev member is
passed to device_register(). Make both x86 and ia64 provide such
implementations of pcibios_root_bridge_prepare() and remove
acpi_pci_find_root_bridge() and acpi_get_pci_rootbridge_handle() that
aren't necessary any more.
Included is a fix for breakage on systems with non-ACPI PCI host
bridges from Bjorn Helgaas.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2013-01-10 01:33:37 +04:00
/**
2017-11-30 19:58:14 +03:00
* pcibios_root_bridge_prepare - Platform - specific host bridge setup
* @ bridge : Host bridge to set up
ACPI / PCI: Set root bridge ACPI handle in advance
The ACPI handles of PCI root bridges need to be known to
acpi_bind_one(), so that it can create the appropriate
"firmware_node" and "physical_node" files for them, but currently
the way it gets to know those handles is not exactly straightforward
(to put it lightly).
This is how it works, roughly:
1. acpi_bus_scan() finds the handle of a PCI root bridge,
creates a struct acpi_device object for it and passes that
object to acpi_pci_root_add().
2. acpi_pci_root_add() creates a struct acpi_pci_root object,
populates its "device" field with its argument's address
(device->handle is the ACPI handle found in step 1).
3. The struct acpi_pci_root object created in step 2 is passed
to pci_acpi_scan_root() and used to get resources that are
passed to pci_create_root_bus().
4. pci_create_root_bus() creates a struct pci_host_bridge object
and passes its "dev" member to device_register().
5. platform_notify(), which for systems with ACPI is set to
acpi_platform_notify(), is called.
So far, so good. Now it starts to be "interesting".
6. acpi_find_bridge_device() is used to find the ACPI handle of
the given device (which is the PCI root bridge) and executes
acpi_pci_find_root_bridge(), among other things, for the
given device object.
7. acpi_pci_find_root_bridge() uses the name (sic!) of the given
device object to extract the segment and bus numbers of the PCI
root bridge and passes them to acpi_get_pci_rootbridge_handle().
8. acpi_get_pci_rootbridge_handle() browses the list of ACPI PCI
root bridges and finds the one that matches the given segment
and bus numbers. Its handle is then used to initialize the
ACPI handle of the PCI root bridge's device object by
acpi_bind_one(). However, this is *exactly* the ACPI handle we
started with in step 1.
Needless to say, this is quite embarassing, but it may be avoided
thanks to commit f3fd0c8 (ACPI: Allow ACPI handles of devices to be
initialized in advance), which makes it possible to initialize the
ACPI handle of a device before passing it to device_register().
Accordingly, add a new __weak routine, pcibios_root_bridge_prepare(),
defaulting to an empty implementation that can be replaced by the
interested architecutres (x86 and ia64 at the moment) with functions
that will set the root bridge's ACPI handle before its dev member is
passed to device_register(). Make both x86 and ia64 provide such
implementations of pcibios_root_bridge_prepare() and remove
acpi_pci_find_root_bridge() and acpi_get_pci_rootbridge_handle() that
aren't necessary any more.
Included is a fix for breakage on systems with non-ACPI PCI host
bridges from Bjorn Helgaas.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2013-01-10 01:33:37 +04:00
*
* Default empty implementation . Replace with an architecture - specific setup
* routine , if necessary .
*/
int __weak pcibios_root_bridge_prepare ( struct pci_host_bridge * bridge )
{
return 0 ;
}
2013-04-12 09:44:20 +04:00
void __weak pcibios_add_bus ( struct pci_bus * bus )
{
}
void __weak pcibios_remove_bus ( struct pci_bus * bus )
{
}
2017-06-28 23:14:01 +03:00
struct pci_bus * pci_create_root_bus ( struct device * parent , int bus ,
struct pci_ops * ops , void * sysdata , struct list_head * resources )
2005-04-17 02:20:36 +04:00
{
2012-02-24 07:19:00 +04:00
int error ;
2012-02-24 07:18:59 +04:00
struct pci_host_bridge * bridge ;
2005-04-17 02:20:36 +04:00
2016-11-25 13:57:10 +03:00
bridge = pci_alloc_host_bridge ( 0 ) ;
2012-04-03 05:31:53 +04:00
if ( ! bridge )
2016-11-25 13:57:09 +03:00
return NULL ;
2012-04-03 05:31:53 +04:00
bridge - > dev . parent = parent ;
2011-10-29 02:25:40 +04:00
2016-11-25 13:57:09 +03:00
list_splice_init ( resources , & bridge - > windows ) ;
bridge - > sysdata = sysdata ;
bridge - > busnr = bus ;
bridge - > ops = ops ;
2011-10-29 02:25:40 +04:00
2016-11-25 13:57:09 +03:00
error = pci_register_host_bridge ( bridge ) ;
if ( error < 0 )
goto err_out ;
2012-02-24 07:18:59 +04:00
2016-11-25 13:57:09 +03:00
return bridge - > bus ;
2005-04-17 02:20:36 +04:00
err_out :
2016-11-25 13:57:09 +03:00
kfree ( bridge ) ;
2005-04-17 02:20:36 +04:00
return NULL ;
}
2015-04-08 21:21:33 +03:00
EXPORT_SYMBOL_GPL ( pci_create_root_bus ) ;
2005-09-06 03:31:03 +04:00
2018-01-30 23:56:52 +03:00
int pci_host_probe ( struct pci_host_bridge * bridge )
{
struct pci_bus * bus , * child ;
int ret ;
ret = pci_scan_root_bus_bridge ( bridge ) ;
if ( ret < 0 ) {
dev_err ( bridge - > dev . parent , " Scanning root bridge failed " ) ;
return ret ;
}
bus = bridge - > bus ;
/*
* We insert PCI resources into the iomem_resource and
* ioport_resource trees in either pci_bus_claim_resources ( )
* or pci_bus_assign_resources ( ) .
*/
if ( pci_has_flag ( PCI_PROBE_ONLY ) ) {
pci_bus_claim_resources ( bus ) ;
} else {
pci_bus_size_bridges ( bus ) ;
pci_bus_assign_resources ( bus ) ;
list_for_each_entry ( child , & bus - > children , node )
pcie_bus_configure_settings ( child ) ;
}
pci_bus_add_devices ( bus ) ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( pci_host_probe ) ;
2012-05-18 21:35:50 +04:00
int pci_bus_insert_busn_res ( struct pci_bus * b , int bus , int bus_max )
{
struct resource * res = & b - > busn_res ;
struct resource * parent_res , * conflict ;
res - > start = bus ;
res - > end = bus_max ;
res - > flags = IORESOURCE_BUS ;
if ( ! pci_is_root_bus ( b ) )
parent_res = & b - > parent - > busn_res ;
else {
parent_res = get_pci_domain_busn_res ( pci_domain_nr ( b ) ) ;
res - > flags | = IORESOURCE_PCI_FIXED ;
}
2014-01-24 00:59:24 +04:00
conflict = request_resource_conflict ( parent_res , res ) ;
2012-05-18 21:35:50 +04:00
if ( conflict )
2019-04-20 07:07:20 +03:00
dev_info ( & b - > dev ,
2012-05-18 21:35:50 +04:00
" busn_res: can not insert %pR under %s%pR (conflicts with %s %pR) \n " ,
res , pci_is_root_bus ( b ) ? " domain " : " " ,
parent_res , conflict - > name , conflict ) ;
return conflict = = NULL ;
}
int pci_bus_update_busn_res_end ( struct pci_bus * b , int bus_max )
{
struct resource * res = & b - > busn_res ;
struct resource old_res = * res ;
resource_size_t size ;
int ret ;
if ( res - > start > bus_max )
return - EINVAL ;
size = bus_max - res - > start + 1 ;
ret = adjust_resource ( res , res - > start , size ) ;
2019-04-20 07:07:20 +03:00
dev_info ( & b - > dev , " busn_res: %pR end %s updated to %02x \n " ,
2012-05-18 21:35:50 +04:00
& old_res , ret ? " can not be " : " is " , bus_max ) ;
if ( ! ret & & ! res - > parent )
pci_bus_insert_busn_res ( b , res - > start , res - > end ) ;
return ret ;
}
void pci_bus_release_busn_res ( struct pci_bus * b )
{
struct resource * res = & b - > busn_res ;
int ret ;
if ( ! res - > flags | | ! res - > parent )
return ;
ret = release_resource ( res ) ;
2019-04-20 07:07:20 +03:00
dev_info ( & b - > dev , " busn_res: %pR %s released \n " ,
2012-05-18 21:35:50 +04:00
res , ret ? " can not be " : " is " ) ;
}
2017-06-28 23:13:55 +03:00
int pci_scan_root_bus_bridge ( struct pci_host_bridge * bridge )
2011-10-29 02:25:50 +04:00
{
2015-02-05 08:44:44 +03:00
struct resource_entry * window ;
2012-05-18 05:51:12 +04:00
bool found = false ;
2011-10-29 02:25:50 +04:00
struct pci_bus * b ;
2017-06-28 23:13:55 +03:00
int max , bus , ret ;
2012-05-18 05:51:12 +04:00
2017-06-28 23:13:55 +03:00
if ( ! bridge )
return - EINVAL ;
resource_list_for_each_entry ( window , & bridge - > windows )
2012-05-18 05:51:12 +04:00
if ( window - > res - > flags & IORESOURCE_BUS ) {
found = true ;
break ;
}
2011-10-29 02:25:50 +04:00
2017-06-28 23:13:55 +03:00
ret = pci_register_host_bridge ( bridge ) ;
if ( ret < 0 )
return ret ;
b = bridge - > bus ;
bus = bridge - > busnr ;
2011-10-29 02:25:50 +04:00
2012-05-18 05:51:12 +04:00
if ( ! found ) {
dev_info ( & b - > dev ,
" No busn resource found for root bus, will use [bus %02x-ff] \n " ,
bus ) ;
pci_bus_insert_busn_res ( b , bus , 255 ) ;
}
max = pci_scan_child_bus ( b ) ;
if ( ! found )
pci_bus_update_busn_res_end ( b , max ) ;
2017-06-28 23:13:55 +03:00
return 0 ;
2011-10-29 02:25:50 +04:00
}
2017-06-28 23:13:55 +03:00
EXPORT_SYMBOL ( pci_scan_root_bus_bridge ) ;
2015-08-04 05:27:10 +03:00
struct pci_bus * pci_scan_root_bus ( struct device * parent , int bus ,
struct pci_ops * ops , void * sysdata , struct list_head * resources )
{
2015-02-05 08:44:44 +03:00
struct resource_entry * window ;
2012-05-18 05:51:12 +04:00
bool found = false ;
2011-10-29 02:25:50 +04:00
struct pci_bus * b ;
2012-05-18 05:51:12 +04:00
int max ;
2015-02-05 08:44:44 +03:00
resource_list_for_each_entry ( window , resources )
2012-05-18 05:51:12 +04:00
if ( window - > res - > flags & IORESOURCE_BUS ) {
found = true ;
break ;
}
2011-10-29 02:25:50 +04:00
2017-06-28 23:14:01 +03:00
b = pci_create_root_bus ( parent , bus , ops , sysdata , resources ) ;
2011-10-29 02:25:50 +04:00
if ( ! b )
return NULL ;
2012-05-18 05:51:12 +04:00
if ( ! found ) {
dev_info ( & b - > dev ,
" No busn resource found for root bus, will use [bus %02x-ff] \n " ,
bus ) ;
pci_bus_insert_busn_res ( b , bus , 255 ) ;
}
max = pci_scan_child_bus ( b ) ;
if ( ! found )
pci_bus_update_busn_res_end ( b , max ) ;
2011-10-29 02:25:50 +04:00
return b ;
2015-08-04 05:27:10 +03:00
}
2011-10-29 02:25:50 +04:00
EXPORT_SYMBOL ( pci_scan_root_bus ) ;
2012-11-22 00:35:00 +04:00
struct pci_bus * pci_scan_bus ( int bus , struct pci_ops * ops ,
2011-10-29 02:25:55 +04:00
void * sysdata )
{
LIST_HEAD ( resources ) ;
struct pci_bus * b ;
pci_add_resource ( & resources , & ioport_resource ) ;
pci_add_resource ( & resources , & iomem_resource ) ;
2012-05-18 05:51:12 +04:00
pci_add_resource ( & resources , & busn_resource ) ;
2011-10-29 02:25:55 +04:00
b = pci_create_root_bus ( NULL , bus , ops , sysdata , & resources ) ;
if ( b ) {
2012-05-18 05:51:12 +04:00
pci_scan_child_bus ( b ) ;
2011-10-29 02:25:55 +04:00
} else {
pci_free_resource_list ( & resources ) ;
}
return b ;
}
EXPORT_SYMBOL ( pci_scan_bus ) ;
2012-01-21 14:08:22 +04:00
/**
2017-11-30 19:58:14 +03:00
* pci_rescan_bus_bridge_resize - Scan a PCI bus for devices
2012-01-21 14:08:22 +04:00
* @ bridge : PCI bridge for the bus to scan
*
* Scan a PCI bus and child buses for new devices , add them ,
* and enable them , resizing bridge mmio / io resource if necessary
* and possible . The caller must ensure the child devices are already
* removed for resizing to occur .
*
* Returns the max number of subordinate bus discovered .
*/
2014-04-15 02:11:40 +04:00
unsigned int pci_rescan_bus_bridge_resize ( struct pci_dev * bridge )
2012-01-21 14:08:22 +04:00
{
unsigned int max ;
struct pci_bus * bus = bridge - > subordinate ;
max = pci_scan_child_bus ( bus ) ;
pci_assign_unassigned_bridge_resources ( bridge ) ;
pci_bus_add_devices ( bus ) ;
return max ;
}
2012-10-31 00:31:21 +04:00
/**
2017-11-30 19:58:14 +03:00
* pci_rescan_bus - Scan a PCI bus for devices
2012-10-31 00:31:21 +04:00
* @ bus : PCI bus to scan
*
2017-11-30 19:58:14 +03:00
* Scan a PCI bus and child buses for new devices , add them ,
* and enable them .
2012-10-31 00:31:21 +04:00
*
* Returns the max number of subordinate bus discovered .
*/
2014-04-15 02:11:40 +04:00
unsigned int pci_rescan_bus ( struct pci_bus * bus )
2012-10-31 00:31:21 +04:00
{
unsigned int max ;
max = pci_scan_child_bus ( bus ) ;
pci_assign_unassigned_bus_resources ( bus ) ;
pci_bus_add_devices ( bus ) ;
return max ;
}
EXPORT_SYMBOL_GPL ( pci_rescan_bus ) ;
2014-01-10 18:22:18 +04:00
/*
* pci_rescan_bus ( ) , pci_rescan_bus_bridge_resize ( ) and PCI device removal
* routines should always be executed under this mutex .
*/
static DEFINE_MUTEX ( pci_rescan_remove_lock ) ;
void pci_lock_rescan_remove ( void )
{
mutex_lock ( & pci_rescan_remove_lock ) ;
}
EXPORT_SYMBOL_GPL ( pci_lock_rescan_remove ) ;
void pci_unlock_rescan_remove ( void )
{
mutex_unlock ( & pci_rescan_remove_lock ) ;
}
EXPORT_SYMBOL_GPL ( pci_unlock_rescan_remove ) ;
2014-04-19 04:13:49 +04:00
static int __init pci_sort_bf_cmp ( const struct device * d_a ,
const struct device * d_b )
PCI: optionally sort device lists breadth-first
Problem:
New Dell PowerEdge servers have 2 embedded ethernet ports, which are
labeled NIC1 and NIC2 on the chassis, in the BIOS setup screens, and
in the printed documentation. Assuming no other add-in ethernet ports
in the system, Linux 2.4 kernels name these eth0 and eth1
respectively. Many people have come to expect this naming. Linux 2.6
kernels name these eth1 and eth0 respectively (backwards from
expectations). I also have reports that various Sun and HP servers
have similar behavior.
Root cause:
Linux 2.4 kernels walk the pci_devices list, which happens to be
sorted in breadth-first order (or pcbios_find_device order on i386,
which most often is breadth-first also). 2.6 kernels have both the
pci_devices list and the pci_bus_type.klist_devices list, the latter
is what is walked at driver load time to match the pci_id tables; this
klist happens to be in depth-first order.
On systems where, for physical routing reasons, NIC1 appears on a
lower bus number than NIC2, but NIC2's bridge is discovered first in
the depth-first ordering, NIC2 will be discovered before NIC1. If the
list were sorted breadth-first, NIC1 would be discovered before NIC2.
A PowerEdge 1955 system has the following topology which easily
exhibits the difference between depth-first and breadth-first device
lists.
-[0000:00]-+-00.0 Intel Corporation 5000P Chipset Memory Controller Hub
+-02.0-[0000:03-08]--+-00.0-[0000:04-07]--+-00.0-[0000:05-06]----00.0-[0000:06]----00.0 Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled NIC2, 2.4 kernel name eth1, 2.6 kernel name eth0)
+-1c.0-[0000:01-02]----00.0-[0000:02]----00.0 Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled NIC1, 2.4 kernel name eth0, 2.6 kernel name eth1)
Other factors, such as device driver load order and the presence of
PCI slots at various points in the bus hierarchy further complicate
this problem; I'm not trying to solve those here, just restore the
device order, and thus basic behavior, that 2.4 kernels had.
Solution:
The solution can come in multiple steps.
Suggested fix #1: kernel
Patch below optionally sorts the two device lists into breadth-first
ordering to maintain compatibility with 2.4 kernels. It adds two new
command line options:
pci=bfsort
pci=nobfsort
to force the sort order, or not, as you wish. It also adds DMI checks
for the specific Dell systems which exhibit "backwards" ordering, to
make them "right".
Suggested fix #2: udev rules from userland
Many people also have the expectation that embedded NICs are always
discovered before add-in NICs (which this patch does not try to do).
Using the PCI IRQ Routing Table provided by system BIOS, it's easy to
determine which PCI devices are embedded, or if add-in, which PCI slot
they're in. I'm working on a tool that would allow udev to name
ethernet devices in ascending embedded, slot 1 .. slot N order,
subsort by PCI bus/dev/fn breadth-first. It'll be possible to use it
independent of udev as well for those distributions that don't use
udev in their installers.
Suggested fix #3: system board routing rules
One can constrain the system board layout to put NIC1 ahead of NIC2
regardless of breadth-first or depth-first discovery order. This adds
a significant level of complexity to board routing, and may not be
possible in all instances (witness the above systems from several
major manufacturers). I don't want to encourage this particular train
of thought too far, at the expense of not doing #1 or #2 above.
Feedback appreciated. Patch tested on a Dell PowerEdge 1955 blade
with 2.6.18.
You'll also note I took some liberty and temporarily break the klist
abstraction to simplify and speed up the sort algorithm. I think
that's both safe and appropriate in this instance.
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2006-09-30 00:23:23 +04:00
{
2008-08-26 20:00:57 +04:00
const struct pci_dev * a = to_pci_dev ( d_a ) ;
const struct pci_dev * b = to_pci_dev ( d_b ) ;
PCI: optionally sort device lists breadth-first
Problem:
New Dell PowerEdge servers have 2 embedded ethernet ports, which are
labeled NIC1 and NIC2 on the chassis, in the BIOS setup screens, and
in the printed documentation. Assuming no other add-in ethernet ports
in the system, Linux 2.4 kernels name these eth0 and eth1
respectively. Many people have come to expect this naming. Linux 2.6
kernels name these eth1 and eth0 respectively (backwards from
expectations). I also have reports that various Sun and HP servers
have similar behavior.
Root cause:
Linux 2.4 kernels walk the pci_devices list, which happens to be
sorted in breadth-first order (or pcbios_find_device order on i386,
which most often is breadth-first also). 2.6 kernels have both the
pci_devices list and the pci_bus_type.klist_devices list, the latter
is what is walked at driver load time to match the pci_id tables; this
klist happens to be in depth-first order.
On systems where, for physical routing reasons, NIC1 appears on a
lower bus number than NIC2, but NIC2's bridge is discovered first in
the depth-first ordering, NIC2 will be discovered before NIC1. If the
list were sorted breadth-first, NIC1 would be discovered before NIC2.
A PowerEdge 1955 system has the following topology which easily
exhibits the difference between depth-first and breadth-first device
lists.
-[0000:00]-+-00.0 Intel Corporation 5000P Chipset Memory Controller Hub
+-02.0-[0000:03-08]--+-00.0-[0000:04-07]--+-00.0-[0000:05-06]----00.0-[0000:06]----00.0 Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled NIC2, 2.4 kernel name eth1, 2.6 kernel name eth0)
+-1c.0-[0000:01-02]----00.0-[0000:02]----00.0 Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled NIC1, 2.4 kernel name eth0, 2.6 kernel name eth1)
Other factors, such as device driver load order and the presence of
PCI slots at various points in the bus hierarchy further complicate
this problem; I'm not trying to solve those here, just restore the
device order, and thus basic behavior, that 2.4 kernels had.
Solution:
The solution can come in multiple steps.
Suggested fix #1: kernel
Patch below optionally sorts the two device lists into breadth-first
ordering to maintain compatibility with 2.4 kernels. It adds two new
command line options:
pci=bfsort
pci=nobfsort
to force the sort order, or not, as you wish. It also adds DMI checks
for the specific Dell systems which exhibit "backwards" ordering, to
make them "right".
Suggested fix #2: udev rules from userland
Many people also have the expectation that embedded NICs are always
discovered before add-in NICs (which this patch does not try to do).
Using the PCI IRQ Routing Table provided by system BIOS, it's easy to
determine which PCI devices are embedded, or if add-in, which PCI slot
they're in. I'm working on a tool that would allow udev to name
ethernet devices in ascending embedded, slot 1 .. slot N order,
subsort by PCI bus/dev/fn breadth-first. It'll be possible to use it
independent of udev as well for those distributions that don't use
udev in their installers.
Suggested fix #3: system board routing rules
One can constrain the system board layout to put NIC1 ahead of NIC2
regardless of breadth-first or depth-first discovery order. This adds
a significant level of complexity to board routing, and may not be
possible in all instances (witness the above systems from several
major manufacturers). I don't want to encourage this particular train
of thought too far, at the expense of not doing #1 or #2 above.
Feedback appreciated. Patch tested on a Dell PowerEdge 1955 blade
with 2.6.18.
You'll also note I took some liberty and temporarily break the klist
abstraction to simplify and speed up the sort algorithm. I think
that's both safe and appropriate in this instance.
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2006-09-30 00:23:23 +04:00
if ( pci_domain_nr ( a - > bus ) < pci_domain_nr ( b - > bus ) ) return - 1 ;
else if ( pci_domain_nr ( a - > bus ) > pci_domain_nr ( b - > bus ) ) return 1 ;
if ( a - > bus - > number < b - > bus - > number ) return - 1 ;
else if ( a - > bus - > number > b - > bus - > number ) return 1 ;
if ( a - > devfn < b - > devfn ) return - 1 ;
else if ( a - > devfn > b - > devfn ) return 1 ;
return 0 ;
}
2008-02-15 01:56:56 +03:00
void __init pci_sort_breadthfirst ( void )
PCI: optionally sort device lists breadth-first
Problem:
New Dell PowerEdge servers have 2 embedded ethernet ports, which are
labeled NIC1 and NIC2 on the chassis, in the BIOS setup screens, and
in the printed documentation. Assuming no other add-in ethernet ports
in the system, Linux 2.4 kernels name these eth0 and eth1
respectively. Many people have come to expect this naming. Linux 2.6
kernels name these eth1 and eth0 respectively (backwards from
expectations). I also have reports that various Sun and HP servers
have similar behavior.
Root cause:
Linux 2.4 kernels walk the pci_devices list, which happens to be
sorted in breadth-first order (or pcbios_find_device order on i386,
which most often is breadth-first also). 2.6 kernels have both the
pci_devices list and the pci_bus_type.klist_devices list, the latter
is what is walked at driver load time to match the pci_id tables; this
klist happens to be in depth-first order.
On systems where, for physical routing reasons, NIC1 appears on a
lower bus number than NIC2, but NIC2's bridge is discovered first in
the depth-first ordering, NIC2 will be discovered before NIC1. If the
list were sorted breadth-first, NIC1 would be discovered before NIC2.
A PowerEdge 1955 system has the following topology which easily
exhibits the difference between depth-first and breadth-first device
lists.
-[0000:00]-+-00.0 Intel Corporation 5000P Chipset Memory Controller Hub
+-02.0-[0000:03-08]--+-00.0-[0000:04-07]--+-00.0-[0000:05-06]----00.0-[0000:06]----00.0 Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled NIC2, 2.4 kernel name eth1, 2.6 kernel name eth0)
+-1c.0-[0000:01-02]----00.0-[0000:02]----00.0 Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled NIC1, 2.4 kernel name eth0, 2.6 kernel name eth1)
Other factors, such as device driver load order and the presence of
PCI slots at various points in the bus hierarchy further complicate
this problem; I'm not trying to solve those here, just restore the
device order, and thus basic behavior, that 2.4 kernels had.
Solution:
The solution can come in multiple steps.
Suggested fix #1: kernel
Patch below optionally sorts the two device lists into breadth-first
ordering to maintain compatibility with 2.4 kernels. It adds two new
command line options:
pci=bfsort
pci=nobfsort
to force the sort order, or not, as you wish. It also adds DMI checks
for the specific Dell systems which exhibit "backwards" ordering, to
make them "right".
Suggested fix #2: udev rules from userland
Many people also have the expectation that embedded NICs are always
discovered before add-in NICs (which this patch does not try to do).
Using the PCI IRQ Routing Table provided by system BIOS, it's easy to
determine which PCI devices are embedded, or if add-in, which PCI slot
they're in. I'm working on a tool that would allow udev to name
ethernet devices in ascending embedded, slot 1 .. slot N order,
subsort by PCI bus/dev/fn breadth-first. It'll be possible to use it
independent of udev as well for those distributions that don't use
udev in their installers.
Suggested fix #3: system board routing rules
One can constrain the system board layout to put NIC1 ahead of NIC2
regardless of breadth-first or depth-first discovery order. This adds
a significant level of complexity to board routing, and may not be
possible in all instances (witness the above systems from several
major manufacturers). I don't want to encourage this particular train
of thought too far, at the expense of not doing #1 or #2 above.
Feedback appreciated. Patch tested on a Dell PowerEdge 1955 blade
with 2.6.18.
You'll also note I took some liberty and temporarily break the klist
abstraction to simplify and speed up the sort algorithm. I think
that's both safe and appropriate in this instance.
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2006-09-30 00:23:23 +04:00
{
2008-08-26 20:00:57 +04:00
bus_sort_breadthfirst ( & pci_bus_type , & pci_sort_bf_cmp ) ;
PCI: optionally sort device lists breadth-first
Problem:
New Dell PowerEdge servers have 2 embedded ethernet ports, which are
labeled NIC1 and NIC2 on the chassis, in the BIOS setup screens, and
in the printed documentation. Assuming no other add-in ethernet ports
in the system, Linux 2.4 kernels name these eth0 and eth1
respectively. Many people have come to expect this naming. Linux 2.6
kernels name these eth1 and eth0 respectively (backwards from
expectations). I also have reports that various Sun and HP servers
have similar behavior.
Root cause:
Linux 2.4 kernels walk the pci_devices list, which happens to be
sorted in breadth-first order (or pcbios_find_device order on i386,
which most often is breadth-first also). 2.6 kernels have both the
pci_devices list and the pci_bus_type.klist_devices list, the latter
is what is walked at driver load time to match the pci_id tables; this
klist happens to be in depth-first order.
On systems where, for physical routing reasons, NIC1 appears on a
lower bus number than NIC2, but NIC2's bridge is discovered first in
the depth-first ordering, NIC2 will be discovered before NIC1. If the
list were sorted breadth-first, NIC1 would be discovered before NIC2.
A PowerEdge 1955 system has the following topology which easily
exhibits the difference between depth-first and breadth-first device
lists.
-[0000:00]-+-00.0 Intel Corporation 5000P Chipset Memory Controller Hub
+-02.0-[0000:03-08]--+-00.0-[0000:04-07]--+-00.0-[0000:05-06]----00.0-[0000:06]----00.0 Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled NIC2, 2.4 kernel name eth1, 2.6 kernel name eth0)
+-1c.0-[0000:01-02]----00.0-[0000:02]----00.0 Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled NIC1, 2.4 kernel name eth0, 2.6 kernel name eth1)
Other factors, such as device driver load order and the presence of
PCI slots at various points in the bus hierarchy further complicate
this problem; I'm not trying to solve those here, just restore the
device order, and thus basic behavior, that 2.4 kernels had.
Solution:
The solution can come in multiple steps.
Suggested fix #1: kernel
Patch below optionally sorts the two device lists into breadth-first
ordering to maintain compatibility with 2.4 kernels. It adds two new
command line options:
pci=bfsort
pci=nobfsort
to force the sort order, or not, as you wish. It also adds DMI checks
for the specific Dell systems which exhibit "backwards" ordering, to
make them "right".
Suggested fix #2: udev rules from userland
Many people also have the expectation that embedded NICs are always
discovered before add-in NICs (which this patch does not try to do).
Using the PCI IRQ Routing Table provided by system BIOS, it's easy to
determine which PCI devices are embedded, or if add-in, which PCI slot
they're in. I'm working on a tool that would allow udev to name
ethernet devices in ascending embedded, slot 1 .. slot N order,
subsort by PCI bus/dev/fn breadth-first. It'll be possible to use it
independent of udev as well for those distributions that don't use
udev in their installers.
Suggested fix #3: system board routing rules
One can constrain the system board layout to put NIC1 ahead of NIC2
regardless of breadth-first or depth-first discovery order. This adds
a significant level of complexity to board routing, and may not be
possible in all instances (witness the above systems from several
major manufacturers). I don't want to encourage this particular train
of thought too far, at the expense of not doing #1 or #2 above.
Feedback appreciated. Patch tested on a Dell PowerEdge 1955 blade
with 2.6.18.
You'll also note I took some liberty and temporarily break the klist
abstraction to simplify and speed up the sort algorithm. I think
that's both safe and appropriate in this instance.
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2006-09-30 00:23:23 +04:00
}
2017-10-13 21:35:41 +03:00
int pci_hp_add_bridge ( struct pci_dev * dev )
{
struct pci_bus * parent = dev - > bus ;
2017-10-13 21:35:42 +03:00
int busnr , start = parent - > busn_res . start ;
2017-10-13 21:35:44 +03:00
unsigned int available_buses = 0 ;
2017-10-13 21:35:41 +03:00
int end = parent - > busn_res . end ;
for ( busnr = start ; busnr < = end ; busnr + + ) {
if ( ! pci_find_bus ( pci_domain_nr ( parent ) , busnr ) )
break ;
}
if ( busnr - - > end ) {
2018-01-18 21:55:24 +03:00
pci_err ( dev , " No bus number available for hot-added bridge \n " ) ;
2017-10-13 21:35:41 +03:00
return - 1 ;
}
2017-10-13 21:35:42 +03:00
/* Scan bridges that are already configured */
busnr = pci_scan_bridge ( parent , dev , busnr , 0 ) ;
2017-10-13 21:35:44 +03:00
/*
* Distribute the available bus numbers between hotplug - capable
* bridges to make extending the chain later possible .
*/
available_buses = end - busnr ;
2017-10-13 21:35:42 +03:00
/* Scan bridges that need to be reconfigured */
2017-10-13 21:35:44 +03:00
pci_scan_bridge_extend ( parent , dev , busnr , available_buses , 1 ) ;
2017-10-13 21:35:42 +03:00
2017-10-13 21:35:41 +03:00
if ( ! dev - > subordinate )
return - 1 ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( pci_hp_add_bridge ) ;