8a61449941
When a PCIe card is hot-removed, the Presence Detect State and Data Link Layer Link Active bits often do not clear simultaneously. I've seen delays of up to 244 msec between the two events with Thunderbolt. After pciehp has brought down the slot in response to the first event, the other bit may still be set. It's not discernible whether it's set because a new card is already in the slot or if it will soon clear. So pciehp tries to bring up the slot and in the latter case fails with a bunch of messages, some of them at KERN_ERR severity. If the slot is no longer occupied, the messages are false positives and annoy users. Stuart Hayes reports the following splat on hot removal: KERN_INFO pcieport 0000:3c:06.0: pciehp: Slot(180): Link Up KERN_INFO pcieport 0000:3c:06.0: pciehp: Timeout waiting for Presence Detect KERN_ERR pcieport 0000:3c:06.0: pciehp: link training error: status 0x0001 KERN_ERR pcieport 0000:3c:06.0: pciehp: Failed to check link status Dongdong Liu complains about a similar splat: KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): Link Down KERN_INFO iommu: Removing device 0000:87:00.0 from group 12 KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): Card present KERN_INFO pcieport 0000:80:10.0: Data Link Layer Link Active not set in 1000 msec KERN_ERR pciehp 0000:80:10.0:pcie004: Failed to check link status Users are particularly irritated to see a bringup attempt even though the slot was explicitly brought down via sysfs. In a perfect world, we could avoid this by setting Link Disable on slot bringdown and re-enabling it upon a Presence Detect State change. In reality however, there are broken hotplug ports which hardwire Presence Detect to zero, see80696f9914
("PCI: pciehp: Tolerate Presence Detect hardwired to zero"). Conversely, PCIe r1.0 hotplug ports hardwire Link Active to zero because Link Active Reporting wasn't specified before PCIe r1.1. On unplug, some ports first clear Presence then Link (see Stuart Hayes' splat) whereas others use the inverse order (see Dongdong Liu's splat). To top it off, there are hotplug ports which flap the Presence and Link bits on slot bringup, see6c35a1ac3d
("PCI: pciehp: Tolerate initially unstable link"). pciehp is designed to work with all of these variants. Surplus attempts at slot bringup are a lesser evil than not being able to bring up slots at all. Although we could try to perfect the behavior for specific hotplug controllers, we'd risk breaking others or increasing code complexity. But we can certainly minimize annoyance by emitting only a single message with KERN_INFO severity if bringup is unsuccessful: * Drop the "Timeout waiting for Presence Detect" message in pcie_wait_for_presence(). The sole caller of that function, pciehp_check_link_status(), ignores the timeout and carries on. It emits error messages of its own and I don't think this particular message adds much value. * There's a single error condition in pciehp_check_link_status() which does not emit a message. Adding one allows dropping the "Failed to check link status" message emitted by board_added() if pciehp_check_link_status() returns a non-zero integer. * Tone down all messages in pciehp_check_link_status() to KERN_INFO severity and rephrase them to look as innocuous as possible. To this end, move the message emitted by pcie_wait_for_link_delay() to its callers. As a result, Stuart Hayes' splat becomes: KERN_INFO pcieport 0000:3c:06.0: pciehp: Slot(180): Link Up KERN_INFO pcieport 0000:3c:06.0: pciehp: Slot(180): Cannot train link: status 0x0001 Dongdong Liu's splat becomes: KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): Card present KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): No link The messages now merely serve as information that presence or link bits were set a little longer than expected. Bringup failures which are not false positives are still reported, albeit no longer at KERN_ERR severity. Link: https://lore.kernel.org/linux-pci/20200310182100.102987-1-stuart.w.hayes@gmail.com/ Link: https://lore.kernel.org/linux-pci/1547649064-19019-1-git-send-email-liudongdong3@huawei.com/ Link: https://lore.kernel.org/r/b45e46fd8a6aa6930aaac9d7718c2e4b787a4e5e.1595935071.git.lukas@wunner.de Reported-by: Stuart Hayes <stuart.w.hayes@gmail.com> Reported-by: Dongdong Liu <liudongdong3@huawei.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
432 lines
11 KiB
C
432 lines
11 KiB
C
// SPDX-License-Identifier: GPL-2.0+
|
|
/*
|
|
* PCI Express Hot Plug Controller Driver
|
|
*
|
|
* Copyright (C) 1995,2001 Compaq Computer Corporation
|
|
* Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
|
|
* Copyright (C) 2001 IBM Corp.
|
|
* Copyright (C) 2003-2004 Intel Corporation
|
|
*
|
|
* All rights reserved.
|
|
*
|
|
* Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
|
|
*
|
|
*/
|
|
|
|
#define dev_fmt(fmt) "pciehp: " fmt
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/types.h>
|
|
#include <linux/pm_runtime.h>
|
|
#include <linux/pci.h>
|
|
#include "pciehp.h"
|
|
|
|
/* The following routines constitute the bulk of the
|
|
hotplug controller logic
|
|
*/
|
|
|
|
#define SAFE_REMOVAL true
|
|
#define SURPRISE_REMOVAL false
|
|
|
|
static void set_slot_off(struct controller *ctrl)
|
|
{
|
|
/*
|
|
* Turn off slot, turn on attention indicator, turn off power
|
|
* indicator
|
|
*/
|
|
if (POWER_CTRL(ctrl)) {
|
|
pciehp_power_off_slot(ctrl);
|
|
|
|
/*
|
|
* After turning power off, we must wait for at least 1 second
|
|
* before taking any action that relies on power having been
|
|
* removed from the slot/adapter.
|
|
*/
|
|
msleep(1000);
|
|
}
|
|
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
|
|
PCI_EXP_SLTCTL_ATTN_IND_ON);
|
|
}
|
|
|
|
/**
|
|
* board_added - Called after a board has been added to the system.
|
|
* @ctrl: PCIe hotplug controller where board is added
|
|
*
|
|
* Turns power on for the board.
|
|
* Configures board.
|
|
*/
|
|
static int board_added(struct controller *ctrl)
|
|
{
|
|
int retval = 0;
|
|
struct pci_bus *parent = ctrl->pcie->port->subordinate;
|
|
|
|
if (POWER_CTRL(ctrl)) {
|
|
/* Power on slot */
|
|
retval = pciehp_power_on_slot(ctrl);
|
|
if (retval)
|
|
return retval;
|
|
}
|
|
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
|
|
INDICATOR_NOOP);
|
|
|
|
/* Check link training status */
|
|
retval = pciehp_check_link_status(ctrl);
|
|
if (retval)
|
|
goto err_exit;
|
|
|
|
/* Check for a power fault */
|
|
if (ctrl->power_fault_detected || pciehp_query_power_fault(ctrl)) {
|
|
ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
|
|
retval = -EIO;
|
|
goto err_exit;
|
|
}
|
|
|
|
retval = pciehp_configure_device(ctrl);
|
|
if (retval) {
|
|
if (retval != -EEXIST) {
|
|
ctrl_err(ctrl, "Cannot add device at %04x:%02x:00\n",
|
|
pci_domain_nr(parent), parent->number);
|
|
goto err_exit;
|
|
}
|
|
}
|
|
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
|
|
PCI_EXP_SLTCTL_ATTN_IND_OFF);
|
|
return 0;
|
|
|
|
err_exit:
|
|
set_slot_off(ctrl);
|
|
return retval;
|
|
}
|
|
|
|
/**
|
|
* remove_board - Turn off slot and Power Indicator
|
|
* @ctrl: PCIe hotplug controller where board is being removed
|
|
* @safe_removal: whether the board is safely removed (versus surprise removed)
|
|
*/
|
|
static void remove_board(struct controller *ctrl, bool safe_removal)
|
|
{
|
|
pciehp_unconfigure_device(ctrl, safe_removal);
|
|
|
|
if (POWER_CTRL(ctrl)) {
|
|
pciehp_power_off_slot(ctrl);
|
|
|
|
/*
|
|
* After turning power off, we must wait for at least 1 second
|
|
* before taking any action that relies on power having been
|
|
* removed from the slot/adapter.
|
|
*/
|
|
msleep(1000);
|
|
|
|
/* Ignore link or presence changes caused by power off */
|
|
atomic_and(~(PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC),
|
|
&ctrl->pending_events);
|
|
}
|
|
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
|
|
INDICATOR_NOOP);
|
|
}
|
|
|
|
static int pciehp_enable_slot(struct controller *ctrl);
|
|
static int pciehp_disable_slot(struct controller *ctrl, bool safe_removal);
|
|
|
|
void pciehp_request(struct controller *ctrl, int action)
|
|
{
|
|
atomic_or(action, &ctrl->pending_events);
|
|
if (!pciehp_poll_mode)
|
|
irq_wake_thread(ctrl->pcie->irq, ctrl);
|
|
}
|
|
|
|
void pciehp_queue_pushbutton_work(struct work_struct *work)
|
|
{
|
|
struct controller *ctrl = container_of(work, struct controller,
|
|
button_work.work);
|
|
|
|
mutex_lock(&ctrl->state_lock);
|
|
switch (ctrl->state) {
|
|
case BLINKINGOFF_STATE:
|
|
pciehp_request(ctrl, DISABLE_SLOT);
|
|
break;
|
|
case BLINKINGON_STATE:
|
|
pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
mutex_unlock(&ctrl->state_lock);
|
|
}
|
|
|
|
void pciehp_handle_button_press(struct controller *ctrl)
|
|
{
|
|
mutex_lock(&ctrl->state_lock);
|
|
switch (ctrl->state) {
|
|
case OFF_STATE:
|
|
case ON_STATE:
|
|
if (ctrl->state == ON_STATE) {
|
|
ctrl->state = BLINKINGOFF_STATE;
|
|
ctrl_info(ctrl, "Slot(%s): Powering off due to button press\n",
|
|
slot_name(ctrl));
|
|
} else {
|
|
ctrl->state = BLINKINGON_STATE;
|
|
ctrl_info(ctrl, "Slot(%s) Powering on due to button press\n",
|
|
slot_name(ctrl));
|
|
}
|
|
/* blink power indicator and turn off attention */
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
|
|
PCI_EXP_SLTCTL_ATTN_IND_OFF);
|
|
schedule_delayed_work(&ctrl->button_work, 5 * HZ);
|
|
break;
|
|
case BLINKINGOFF_STATE:
|
|
case BLINKINGON_STATE:
|
|
/*
|
|
* Cancel if we are still blinking; this means that we
|
|
* press the attention again before the 5 sec. limit
|
|
* expires to cancel hot-add or hot-remove
|
|
*/
|
|
ctrl_info(ctrl, "Slot(%s): Button cancel\n", slot_name(ctrl));
|
|
cancel_delayed_work(&ctrl->button_work);
|
|
if (ctrl->state == BLINKINGOFF_STATE) {
|
|
ctrl->state = ON_STATE;
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
|
|
PCI_EXP_SLTCTL_ATTN_IND_OFF);
|
|
} else {
|
|
ctrl->state = OFF_STATE;
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
|
|
PCI_EXP_SLTCTL_ATTN_IND_OFF);
|
|
}
|
|
ctrl_info(ctrl, "Slot(%s): Action canceled due to button press\n",
|
|
slot_name(ctrl));
|
|
break;
|
|
default:
|
|
ctrl_err(ctrl, "Slot(%s): Ignoring invalid state %#x\n",
|
|
slot_name(ctrl), ctrl->state);
|
|
break;
|
|
}
|
|
mutex_unlock(&ctrl->state_lock);
|
|
}
|
|
|
|
void pciehp_handle_disable_request(struct controller *ctrl)
|
|
{
|
|
mutex_lock(&ctrl->state_lock);
|
|
switch (ctrl->state) {
|
|
case BLINKINGON_STATE:
|
|
case BLINKINGOFF_STATE:
|
|
cancel_delayed_work(&ctrl->button_work);
|
|
break;
|
|
}
|
|
ctrl->state = POWEROFF_STATE;
|
|
mutex_unlock(&ctrl->state_lock);
|
|
|
|
ctrl->request_result = pciehp_disable_slot(ctrl, SAFE_REMOVAL);
|
|
}
|
|
|
|
void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
|
|
{
|
|
int present, link_active;
|
|
|
|
/*
|
|
* If the slot is on and presence or link has changed, turn it off.
|
|
* Even if it's occupied again, we cannot assume the card is the same.
|
|
*/
|
|
mutex_lock(&ctrl->state_lock);
|
|
switch (ctrl->state) {
|
|
case BLINKINGOFF_STATE:
|
|
cancel_delayed_work(&ctrl->button_work);
|
|
/* fall through */
|
|
case ON_STATE:
|
|
ctrl->state = POWEROFF_STATE;
|
|
mutex_unlock(&ctrl->state_lock);
|
|
if (events & PCI_EXP_SLTSTA_DLLSC)
|
|
ctrl_info(ctrl, "Slot(%s): Link Down\n",
|
|
slot_name(ctrl));
|
|
if (events & PCI_EXP_SLTSTA_PDC)
|
|
ctrl_info(ctrl, "Slot(%s): Card not present\n",
|
|
slot_name(ctrl));
|
|
pciehp_disable_slot(ctrl, SURPRISE_REMOVAL);
|
|
break;
|
|
default:
|
|
mutex_unlock(&ctrl->state_lock);
|
|
break;
|
|
}
|
|
|
|
/* Turn the slot on if it's occupied or link is up */
|
|
mutex_lock(&ctrl->state_lock);
|
|
present = pciehp_card_present(ctrl);
|
|
link_active = pciehp_check_link_active(ctrl);
|
|
if (present <= 0 && link_active <= 0) {
|
|
mutex_unlock(&ctrl->state_lock);
|
|
return;
|
|
}
|
|
|
|
switch (ctrl->state) {
|
|
case BLINKINGON_STATE:
|
|
cancel_delayed_work(&ctrl->button_work);
|
|
/* fall through */
|
|
case OFF_STATE:
|
|
ctrl->state = POWERON_STATE;
|
|
mutex_unlock(&ctrl->state_lock);
|
|
if (present)
|
|
ctrl_info(ctrl, "Slot(%s): Card present\n",
|
|
slot_name(ctrl));
|
|
if (link_active)
|
|
ctrl_info(ctrl, "Slot(%s): Link Up\n",
|
|
slot_name(ctrl));
|
|
ctrl->request_result = pciehp_enable_slot(ctrl);
|
|
break;
|
|
default:
|
|
mutex_unlock(&ctrl->state_lock);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static int __pciehp_enable_slot(struct controller *ctrl)
|
|
{
|
|
u8 getstatus = 0;
|
|
|
|
if (MRL_SENS(ctrl)) {
|
|
pciehp_get_latch_status(ctrl, &getstatus);
|
|
if (getstatus) {
|
|
ctrl_info(ctrl, "Slot(%s): Latch open\n",
|
|
slot_name(ctrl));
|
|
return -ENODEV;
|
|
}
|
|
}
|
|
|
|
if (POWER_CTRL(ctrl)) {
|
|
pciehp_get_power_status(ctrl, &getstatus);
|
|
if (getstatus) {
|
|
ctrl_info(ctrl, "Slot(%s): Already enabled\n",
|
|
slot_name(ctrl));
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return board_added(ctrl);
|
|
}
|
|
|
|
static int pciehp_enable_slot(struct controller *ctrl)
|
|
{
|
|
int ret;
|
|
|
|
pm_runtime_get_sync(&ctrl->pcie->port->dev);
|
|
ret = __pciehp_enable_slot(ctrl);
|
|
if (ret && ATTN_BUTTN(ctrl))
|
|
/* may be blinking */
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
|
|
INDICATOR_NOOP);
|
|
pm_runtime_put(&ctrl->pcie->port->dev);
|
|
|
|
mutex_lock(&ctrl->state_lock);
|
|
ctrl->state = ret ? OFF_STATE : ON_STATE;
|
|
mutex_unlock(&ctrl->state_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int __pciehp_disable_slot(struct controller *ctrl, bool safe_removal)
|
|
{
|
|
u8 getstatus = 0;
|
|
|
|
if (POWER_CTRL(ctrl)) {
|
|
pciehp_get_power_status(ctrl, &getstatus);
|
|
if (!getstatus) {
|
|
ctrl_info(ctrl, "Slot(%s): Already disabled\n",
|
|
slot_name(ctrl));
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
remove_board(ctrl, safe_removal);
|
|
return 0;
|
|
}
|
|
|
|
static int pciehp_disable_slot(struct controller *ctrl, bool safe_removal)
|
|
{
|
|
int ret;
|
|
|
|
pm_runtime_get_sync(&ctrl->pcie->port->dev);
|
|
ret = __pciehp_disable_slot(ctrl, safe_removal);
|
|
pm_runtime_put(&ctrl->pcie->port->dev);
|
|
|
|
mutex_lock(&ctrl->state_lock);
|
|
ctrl->state = OFF_STATE;
|
|
mutex_unlock(&ctrl->state_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
|
|
{
|
|
struct controller *ctrl = to_ctrl(hotplug_slot);
|
|
|
|
mutex_lock(&ctrl->state_lock);
|
|
switch (ctrl->state) {
|
|
case BLINKINGON_STATE:
|
|
case OFF_STATE:
|
|
mutex_unlock(&ctrl->state_lock);
|
|
/*
|
|
* The IRQ thread becomes a no-op if the user pulls out the
|
|
* card before the thread wakes up, so initialize to -ENODEV.
|
|
*/
|
|
ctrl->request_result = -ENODEV;
|
|
pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
|
|
wait_event(ctrl->requester,
|
|
!atomic_read(&ctrl->pending_events) &&
|
|
!ctrl->ist_running);
|
|
return ctrl->request_result;
|
|
case POWERON_STATE:
|
|
ctrl_info(ctrl, "Slot(%s): Already in powering on state\n",
|
|
slot_name(ctrl));
|
|
break;
|
|
case BLINKINGOFF_STATE:
|
|
case ON_STATE:
|
|
case POWEROFF_STATE:
|
|
ctrl_info(ctrl, "Slot(%s): Already enabled\n",
|
|
slot_name(ctrl));
|
|
break;
|
|
default:
|
|
ctrl_err(ctrl, "Slot(%s): Invalid state %#x\n",
|
|
slot_name(ctrl), ctrl->state);
|
|
break;
|
|
}
|
|
mutex_unlock(&ctrl->state_lock);
|
|
|
|
return -ENODEV;
|
|
}
|
|
|
|
int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot)
|
|
{
|
|
struct controller *ctrl = to_ctrl(hotplug_slot);
|
|
|
|
mutex_lock(&ctrl->state_lock);
|
|
switch (ctrl->state) {
|
|
case BLINKINGOFF_STATE:
|
|
case ON_STATE:
|
|
mutex_unlock(&ctrl->state_lock);
|
|
pciehp_request(ctrl, DISABLE_SLOT);
|
|
wait_event(ctrl->requester,
|
|
!atomic_read(&ctrl->pending_events) &&
|
|
!ctrl->ist_running);
|
|
return ctrl->request_result;
|
|
case POWEROFF_STATE:
|
|
ctrl_info(ctrl, "Slot(%s): Already in powering off state\n",
|
|
slot_name(ctrl));
|
|
break;
|
|
case BLINKINGON_STATE:
|
|
case OFF_STATE:
|
|
case POWERON_STATE:
|
|
ctrl_info(ctrl, "Slot(%s): Already disabled\n",
|
|
slot_name(ctrl));
|
|
break;
|
|
default:
|
|
ctrl_err(ctrl, "Slot(%s): Invalid state %#x\n",
|
|
slot_name(ctrl), ctrl->state);
|
|
break;
|
|
}
|
|
mutex_unlock(&ctrl->state_lock);
|
|
|
|
return -ENODEV;
|
|
}
|